Commits

Matt Chaput committed 1ea83e3

Fuzzy plugin used "prefix" and "prefixlength" to mean same thing.
Added unit test for setting prefix length in fuzzy syntax.

Comments (0)

Files changed (2)

src/whoosh/qparser/plugins.py

     """, verbose=True)
 
     class FuzzinessNode(syntax.SyntaxNode):
-        def __init__(self, maxdist, prefix, original):
+        def __init__(self, maxdist, prefixlength, original):
             self.maxdist = maxdist
-            self.prefix = prefix
+            self.prefixlength = prefixlength
             self.original = original
 
         def __repr__(self):
-            return "<~%d>" % (self.maxdist,)
+            return "<~%d/%d>" % (self.maxdist, self.prefixlength)
 
     class FuzzyTermNode(syntax.TextNode):
         qclass = query.FuzzyTerm
 
-        def __init__(self, wordnode, maxdist, prefix):
+        def __init__(self, wordnode, maxdist, prefixlength):
             self.fieldname = wordnode.fieldname
             self.text = wordnode.text
             self.boost = wordnode.boost
             self.startchar = wordnode.startchar
             self.endchar = wordnode.endchar
             self.maxdist = maxdist
-            self.prefix = prefix
+            self.prefixlength = prefixlength
 
         def r(self):
-            return "%r ~%d" % (self.text, self.maxdist)
+            return "%r ~%d/%d" % (self.text, self.maxdist, self.prefixlength)
 
         def query(self, parser):
             # Use the superclass's query() method to create a FuzzyTerm query
             q = syntax.TextNode.query(self, parser)
             # Set FuzzyTerm-specific attributes
             q.maxdist = self.maxdist
-            q.prefix = self.prefix
+            q.prefixlength = self.prefixlength
             return q
 
     def create(self, parser, match):
         maxdist = int(mdstr) if mdstr else 1
 
         pstr = match.group("prefix")
-        prefix = int(pstr) if pstr else 0
+        prefixlength = int(pstr) if pstr else 0
 
-        return self.FuzzinessNode(maxdist, prefix, match.group(0))
+        return self.FuzzinessNode(maxdist, prefixlength, match.group(0))
 
     def filters(self, parser):
         return [(self.do_fuzzyterms, 0)]
                 nextnode = group[i + 1]
                 if isinstance(nextnode, self.FuzzinessNode):
                     node = self.FuzzyTermNode(node, nextnode.maxdist,
-                                              nextnode.prefix)
+                                              nextnode.prefixlength)
                     i += 1
             if isinstance(node, self.FuzzinessNode):
                 node = syntax.to_word(node)

tests/test_parse_plugins.py

     assert q.text == "bob~"
 
 
+def test_fuzzy_prefix():
+    from whoosh import scoring
+
+    schema = fields.Schema(title=fields.TEXT(stored=True),
+                           content=fields.TEXT(spelling=True))
+
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        # Match -> first
+        w.add_document(title=u("First"),
+                       content=u"This is the first document we've added!")
+        # No match
+        w.add_document(title=u("Second"),
+                       content=u("The second one is even more interesting! filst"))
+        # Match -> first
+        w.add_document(title=u("Third"),
+                       content=u("The world first line we've added!"))
+        # Match -> zeroth
+        w.add_document(title=u("Fourth"),
+                       content=u("The second one is alaways comes after zeroth!"))
+        # Match -> fire is within 2 edits (transpose + delete) of first
+        w.add_document(title=u("Fifth"),
+                       content=u("The fire is beautiful"))
+
+    from whoosh.qparser import QueryParser, FuzzyTermPlugin #, BoundedFuzzyTermPlugin
+    parser = QueryParser("content", ix.schema)
+    parser.add_plugin(FuzzyTermPlugin())
+    q = parser.parse("first~2/3 OR zeroth", debug=False)
+
+    assert isinstance(q, query.Or)
+    ft = q[0]
+    assert isinstance(ft, query.FuzzyTerm)
+    assert ft.maxdist == 2
+    assert ft.prefixlength == 3
+
+    with ix.searcher(weighting=scoring.TF_IDF()) as searcher:
+        results = searcher.search(q)
+        print(len(results))
+        assert len(results) == 4
+        assert " ".join(hit["title"] for hit in results) == "Fourth First Third Fifth"
+
+
 def test_function_plugin():
     class FakeQuery(query.Query):
         def __init__(self, children, *args, **kwargs):