Commits

Matt Chaput committed 1ea83e3

Fuzzy plugin used "prefix" and "prefixlength" to mean same thing.
Added unit test for setting prefix length in fuzzy syntax.

Comments (0)

Files changed (2)

src/whoosh/qparser/plugins.py

     """, verbose=True)
 
     class FuzzinessNode(syntax.SyntaxNode):
-        def __init__(self, maxdist, prefix, original):
+        def __init__(self, maxdist, prefixlength, original):
             self.maxdist = maxdist
-            self.prefix = prefix
+            self.prefixlength = prefixlength
             self.original = original
 
         def __repr__(self):
-            return "<~%d>" % (self.maxdist,)
+            return "<~%d/%d>" % (self.maxdist, self.prefixlength)
 
     class FuzzyTermNode(syntax.TextNode):
         qclass = query.FuzzyTerm
 
-        def __init__(self, wordnode, maxdist, prefix):
+        def __init__(self, wordnode, maxdist, prefixlength):
             self.fieldname = wordnode.fieldname
             self.text = wordnode.text
             self.boost = wordnode.boost
             self.startchar = wordnode.startchar
             self.endchar = wordnode.endchar
             self.maxdist = maxdist
-            self.prefix = prefix
+            self.prefixlength = prefixlength
 
         def r(self):
-            return "%r ~%d" % (self.text, self.maxdist)
+            return "%r ~%d/%d" % (self.text, self.maxdist, self.prefixlength)
 
         def query(self, parser):
             # Use the superclass's query() method to create a FuzzyTerm query
             q = syntax.TextNode.query(self, parser)
             # Set FuzzyTerm-specific attributes
             q.maxdist = self.maxdist
-            q.prefix = self.prefix
+            q.prefixlength = self.prefixlength
             return q
 
     def create(self, parser, match):
         maxdist = int(mdstr) if mdstr else 1
 
         pstr = match.group("prefix")
-        prefix = int(pstr) if pstr else 0
+        prefixlength = int(pstr) if pstr else 0
 
-        return self.FuzzinessNode(maxdist, prefix, match.group(0))
+        return self.FuzzinessNode(maxdist, prefixlength, match.group(0))
 
     def filters(self, parser):
         return [(self.do_fuzzyterms, 0)]
                 nextnode = group[i + 1]
                 if isinstance(nextnode, self.FuzzinessNode):
                     node = self.FuzzyTermNode(node, nextnode.maxdist,
-                                              nextnode.prefix)
+                                              nextnode.prefixlength)
                     i += 1
             if isinstance(node, self.FuzzinessNode):
                 node = syntax.to_word(node)

tests/test_parse_plugins.py

     assert q.text == "bob~"
 
 
+def test_fuzzy_prefix():
+    from whoosh import scoring
+
+    schema = fields.Schema(title=fields.TEXT(stored=True),
+                           content=fields.TEXT(spelling=True))
+
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        # Match -> first
+        w.add_document(title=u("First"),
+                       content=u"This is the first document we've added!")
+        # No match
+        w.add_document(title=u("Second"),
+                       content=u("The second one is even more interesting! filst"))
+        # Match -> first
+        w.add_document(title=u("Third"),
+                       content=u("The world first line we've added!"))
+        # Match -> zeroth
+        w.add_document(title=u("Fourth"),
+                       content=u("The second one is alaways comes after zeroth!"))
+        # Match -> fire is within 2 edits (transpose + delete) of first
+        w.add_document(title=u("Fifth"),
+                       content=u("The fire is beautiful"))
+
+    from whoosh.qparser import QueryParser, FuzzyTermPlugin #, BoundedFuzzyTermPlugin
+    parser = QueryParser("content", ix.schema)
+    parser.add_plugin(FuzzyTermPlugin())
+    q = parser.parse("first~2/3 OR zeroth", debug=False)
+
+    assert isinstance(q, query.Or)
+    ft = q[0]
+    assert isinstance(ft, query.FuzzyTerm)
+    assert ft.maxdist == 2
+    assert ft.prefixlength == 3
+
+    with ix.searcher(weighting=scoring.TF_IDF()) as searcher:
+        results = searcher.search(q)
+        print(len(results))
+        assert len(results) == 4
+        assert " ".join(hit["title"] for hit in results) == "Fourth First Third Fifth"
+
+
 def test_function_plugin():
     class FakeQuery(query.Query):
         def __init__(self, children, *args, **kwargs):
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.