Commits

Matt Chaput  committed efde0b2 Merge

Merging bug fixes from default branch.

  • Participants
  • Parent commits 9ba5c1a, a733ff9
  • Branches 2.5.x

Comments (0)

Files changed (7)

File src/whoosh/qparser/default.py

         prev = pos
         # Priorized list of taggers provided by the parser's plugins
         taggers = self.taggers()
-        print_debug(debug, "Taggers: %r" % taggers)
+        if debug:
+            print_debug(debug, "Taggers: %r" % taggers)
 
         # Define a function that will make a WordNode from the "interstitial"
         # text between matches
                                         " (%r, %s)" % (tagger, text, pos))
                     if prev < pos:
                         tween = inter(prev, pos)
-                        print_debug(debug, "Tween: %r" % tween)
+                        if debug:
+                            print_debug(debug, "Tween: %r" % tween)
                         stack.append(tween)
 
-                    print_debug(debug, "Tagger: %r at %s: %r"
-                                % (tagger, pos, node))
+                    if debug:
+                        print_debug(debug, "Tagger: %r at %s: %r"
+                                    % (tagger, pos, node))
                     stack.append(node)
                     prev = pos = node.endchar
                     break
 
         # Wrap the list of nodes in a group node
         group = self.group(stack)
-        print_debug(debug, "Tagged group: %r" % group)
+        if debug:
+            print_debug(debug, "Tagged group: %r" % group)
         return group
 
     def filterize(self, nodes, debug=False):
         """
 
         # Call each filter in the priorized list of plugin filters
-        print_debug(debug, "Pre-filtered group: %r" % nodes)
+        if debug:
+            print_debug(debug, "Pre-filtered group: %r" % nodes)
         for f in self.filters():
-            print_debug(debug, "..Applying: %r" % f)
+            if debug:
+                print_debug(debug, "..Applying: %r" % f)
             nodes = f(self, nodes)
-            print_debug(debug, "..Result: %r" % nodes)
+            if debug:
+                print_debug(debug, "..Result: %r" % nodes)
             if nodes is None:
                 raise Exception("Filter %r did not return anything" % f)
         return nodes
             text = text.decode("latin1")
 
         nodes = self.process(text, debug=debug)
-        print_debug(debug, "Syntax tree: %r" % nodes)
+        if debug:
+            print_debug(debug, "Syntax tree: %r" % nodes)
 
         q = nodes.query(self)
         if not q:
             q = query.NullQuery
-        print_debug(debug, "Pre-normalized query: %r" % q)
+        if debug:
+            print_debug(debug, "Pre-normalized query: %r" % q)
 
         if normalize:
             q = q.normalize()
-            print_debug(debug, "Normalized query: %r" % q)
+            if debug:
+                print_debug(debug, "Normalized query: %r" % q)
         return q
 
     def parse_(self, text, normalize=True):

File src/whoosh/qparser/plugins.py

             self.prefix = prefix
 
         def r(self):
-            return "%s ~%d" % (self.text, self.maxdist)
+            return "%r ~%d" % (self.text, self.maxdist)
 
         def query(self, parser):
             # Use the superclass's query() method to create a FuzzyTerm query

File src/whoosh/query/terms.py

         return r
 
     def __unicode__(self):
-        t = u("%s:%s") % (self.fieldname, self.text)
+        text = self.text
+        if isinstance(text, bytes_type):
+            try:
+                text = text.decode("ascii")
+            except UnicodeDecodeError:
+                text = repr(text)
+
+        t = u("%s:%s") % (self.fieldname, text)
         if self.boost != 1:
             t += u("^") + text_type(self.boost)
         return t
             for btext in self._btexts(ixreader):
                 yield (fieldname, btext)
 
-    def tokens(self, boost=1.0):
-        yield Token(fieldname=self.fieldname, text=self.text,
-                    boost=boost * self.boost, startchar=self.startchar,
-                    endchar=self.endchar, chars=True)
+    def tokens(self, boost=1.0, exreader=None):
+        fieldname = self.field()
+        if exreader is None:
+            btexts = [self.text]
+        else:
+            btexts = self._btexts(exreader)
+
+        for btext in btexts:
+            yield Token(fieldname=fieldname, text=btext,
+                        boost=boost * self.boost, startchar=self.startchar,
+                        endchar=self.endchar, chars=True)
 
     def simplify(self, ixreader):
-        if self.fieldname not in ixreader.schema:
+        fieldname = self.field()
+
+        if fieldname not in ixreader.schema:
             return qcore.NullQuery()
-        field = ixreader.schema[self.fieldname]
+        field = ixreader.schema[fieldname]
 
         existing = []
         for btext in sorted(set(self._btexts(ixreader))):
             text = field.from_bytes(btext)
-            existing.append(Term(self.fieldname, text, boost=self.boost))
+            existing.append(Term(fieldname, text, boost=self.boost))
 
         if len(existing) == 1:
             return existing[0]
             return qcore.NullQuery
 
     def estimate_size(self, ixreader):
-        return sum(ixreader.doc_frequency(self.fieldname, btext)
+        fieldname = self.field()
+        return sum(ixreader.doc_frequency(fieldname, btext)
                    for btext in self._btexts(ixreader))
 
     def estimate_min_size(self, ixreader):
-        return min(ixreader.doc_frequency(self.fieldname, text)
+        fieldname = self.field()
+        return min(ixreader.doc_frequency(fieldname, text)
                    for text in self._btexts(ixreader))
 
     def matcher(self, searcher, context=None):
         from whoosh.query import Or
-        from whoosh.util import now
 
-        fieldname = self.fieldname
+        fieldname = self.field()
         constantscore = self.constantscore
 
         reader = searcher.reader()

File src/whoosh/searching.py

         :rtype: :class:`whoosh.spelling.Correction`
         """
 
+        reader = self.reader()
+
         # Dictionary of custom per-field correctors
         if correctors is None:
             correctors = {}
             if fieldname not in correctors:
                 correctors[fieldname] = self.reader().corrector(fieldname)
 
-        # Get any terms in the query in the fields we're correcting
+        # Get any missing terms in the query in the fields we're correcting
         if terms is None:
             terms = []
             for token in q.all_tokens():
-                if token.fieldname in correctors:
+                fieldname = token.fieldname
+                text = token.text
+                if fieldname in correctors and (fieldname, text) not in reader:
                     terms.append((token.fieldname, token.text))
 
         # Make q query corrector

File src/whoosh/spelling.py

     ``tokens``
         A list of token objects representing the corrected words.
 
-    You can also use the :meth:`Correction.format_string` to reformat the
+    You can also use the :meth:`Correction.format_string` method to reformat the
     corrected query string using a :class:`whoosh.highlight.Formatter` class.
     For example, to display the corrected query string as HTML with the
     changed words emphasized::
         self.original_string = qstring
         self.tokens = tokens
 
-        if self.original_string and self.tokens:
+        if self.original_string:
             self.string = self.format_string(highlight.NullFormatter())
         else:
-            self.string = None
+            self.string = ''
 
     def __repr__(self):
         return "%s(%r, %r)" % (self.__class__.__name__, self.query,
                                self.string)
 
     def format_string(self, formatter):
-        if not (self.original_string and self.tokens):
-            raise Exception("The original query isn't available")
+        """
+        Highlights the corrected words in the original query string using the
+        given :class:`~whoosh.highlight.Formatter`.
+
+        :param formatter: A :class:`whoosh.highlight.Formatter` instance.
+        :return: the output of the formatter (usually a string).
+        """
+
+        if not self.original_string:
+            return ''
         if isinstance(formatter, type):
             formatter = formatter()
 
         prefix = self.prefix
         maxdist = self.maxdist
 
+        # A list of tokens that were changed by a corrector
         corrected_tokens = []
+
+        # The corrected query tree. We don't need to deepcopy the original
+        # because we use Query.replace() to find-and-replace the corrected
+        # words and it returns a copy of the query tree.
         corrected_q = q
+
+        # For every word in the original query...
+        # Note we can't put these in a set, because we must preserve WHERE
+        # in the query each token occured so we can format them later
         for token in q.all_tokens():
             fname = token.fieldname
+
+            # If this is one of the words we're supposed to correct...
             if (fname, token.text) in termset:
                 sugs = correctors[fname].suggest(token.text, prefix=prefix,
                                                  maxdist=maxdist)
                 if sugs:
+                    # This is a "simple" corrector, so we just pick the first
+                    # suggestion :/
                     sug = sugs[0]
+
+                    # Return a new copy of the original query with this word
+                    # replaced by the correction
                     corrected_q = corrected_q.replace(token.fieldname,
                                                       token.text, sug)
+                    # Add the token to the list of corrected tokens (for the
+                    # formatter to use later)
+                    token.original = token.text
                     token.text = sug
                     corrected_tokens.append(token)
 

File tests/test_parsing.py

+import pytest
+
 from whoosh import analysis, fields, query
 from whoosh.compat import u, text_type
 from whoosh.qparser import default
     assert len(q) == 2
     assert q[0] == query.Term("f", "A")
     assert q[1] == query.Term("f", "B")
+
+
+def test_unicode_num():
+    schema = fields.Schema(num=fields.NUMERIC)
+    parser = default.QueryParser(u"num", schema=schema)
+    q = parser.parse(u"num:1")
+
+    _ = text_type(q)

File tests/test_spelling.py

         assert c.suggest("cell") == ["cells"]
 
 
+def test_correct_correct():
+    from whoosh import qparser
 
+    schema = fields.Schema(a=fields.TEXT(spelling=True))
+    ix = RamStorage().create_index(schema)
+    ix_writer = ix.writer()
 
+    ix_writer.add_document(a=u('dworska'))
+    ix_writer.add_document(a=u('swojska'))
+
+    ix_writer.commit()
+
+    s = ix.searcher()
+    qtext = u('dworska')
+
+    qp = qparser.QueryParser('a', ix.schema)
+    q = qp.parse(qtext, ix.schema)
+    c = s.correct_query(q, qtext)
+
+    assert c.string == "dworska"
+    assert c.format_string(highlight.UppercaseFormatter()) == "dworska"
+
+