Commits

Matt Chaput committed 7b64638

Fixed bug in correcting code when all words were in the index.

  • Participants
  • Parent commits 83676c9

Comments (0)

Files changed (3)

File src/whoosh/searching.py

         :rtype: :class:`whoosh.spelling.Correction`
         """
 
+        reader = self.reader()
+
         # Dictionary of custom per-field correctors
         if correctors is None:
             correctors = {}
             if fieldname not in correctors:
                 correctors[fieldname] = self.reader().corrector(fieldname)
 
-        # Get any terms in the query in the fields we're correcting
+        # Get any missing terms in the query in the fields we're correcting
         if terms is None:
             terms = []
             for token in q.all_tokens():
-                if token.fieldname in correctors:
+                fieldname = token.fieldname
+                text = token.text
+                if fieldname in correctors and (fieldname, text) not in reader:
                     terms.append((token.fieldname, token.text))
 
         # Make q query corrector

File src/whoosh/spelling.py

     ``tokens``
         A list of token objects representing the corrected words.
 
-    You can also use the :meth:`Correction.format_string` to reformat the
+    You can also use the :meth:`Correction.format_string` method to reformat the
     corrected query string using a :class:`whoosh.highlight.Formatter` class.
     For example, to display the corrected query string as HTML with the
     changed words emphasized::
         self.original_string = qstring
         self.tokens = tokens
 
-        if self.original_string and self.tokens:
+        if self.original_string:
             self.string = self.format_string(highlight.NullFormatter())
         else:
-            self.string = None
+            self.string = ''
 
     def __repr__(self):
         return "%s(%r, %r)" % (self.__class__.__name__, self.query,
                                self.string)
 
     def format_string(self, formatter):
-        if not (self.original_string and self.tokens):
-            raise Exception("The original query isn't available")
+        """
+        Highlights the corrected words in the original query string using the
+        given :class:`~whoosh.highlight.Formatter`.
+
+        :param formatter: A :class:`whoosh.highlight.Formatter` instance.
+        :return: the output of the formatter (usually a string).
+        """
+
+        if not self.original_string:
+            return ''
         if isinstance(formatter, type):
             formatter = formatter()
 
         prefix = self.prefix
         maxdist = self.maxdist
 
+        # A list of tokens that were changed by a corrector
         corrected_tokens = []
+
+        # The corrected query tree. We don't need to deepcopy the original
+        # because we use Query.replace() to find-and-replace the corrected
+        # words and it returns a copy of the query tree.
         corrected_q = q
+
+        # For every word in the original query...
+        # Note we can't put these in a set, because we must preserve WHERE
+        # in the query each token occured so we can format them later
         for token in q.all_tokens():
             fname = token.fieldname
+
+            # If this is one of the words we're supposed to correct...
             if (fname, token.text) in termset:
                 sugs = correctors[fname].suggest(token.text, prefix=prefix,
                                                  maxdist=maxdist)
                 if sugs:
+                    # This is a "simple" corrector, so we just pick the first
+                    # suggestion :/
                     sug = sugs[0]
+
+                    # Return a new copy of the original query with this word
+                    # replaced by the correction
                     corrected_q = corrected_q.replace(token.fieldname,
                                                       token.text, sug)
+                    # Add the token to the list of corrected tokens (for the
+                    # formatter to use later)
+                    token.original = token.text
                     token.text = sug
                     corrected_tokens.append(token)
 

File tests/test_spelling.py

         assert c.suggest("cell") == ["cells"]
 
 
+def test_correct_correct():
+    from whoosh import qparser
 
+    schema = fields.Schema(a=fields.TEXT(spelling=True))
+    ix = RamStorage().create_index(schema)
+    ix_writer = ix.writer()
 
+    ix_writer.add_document(a=u('dworska'))
+    ix_writer.add_document(a=u('swojska'))
+
+    ix_writer.commit()
+
+    s = ix.searcher()
+    qtext = u('dworska')
+
+    qp = qparser.QueryParser('a', ix.schema)
+    q = qp.parse(qtext, ix.schema)
+    c = s.correct_query(q, qtext)
+
+    assert c.string == "dworska"
+    assert c.format_string(highlight.UppercaseFormatter()) == "dworska"
+
+