1. Matt Chaput
  2. whoosh

Commits

Matt Chaput  committed 1ea83e3

Fuzzy plugin used "prefix" and "prefixlength" to mean same thing.
Added unit test for setting prefix length in fuzzy syntax.

  • Participants
  • Parent commits 92ca283
  • Branches default

Comments (0)

Files changed (2)

File src/whoosh/qparser/plugins.py

View file
  • Ignore whitespace
     """, verbose=True)
 
     class FuzzinessNode(syntax.SyntaxNode):
-        def __init__(self, maxdist, prefix, original):
+        def __init__(self, maxdist, prefixlength, original):
             self.maxdist = maxdist
-            self.prefix = prefix
+            self.prefixlength = prefixlength
             self.original = original
 
         def __repr__(self):
-            return "<~%d>" % (self.maxdist,)
+            return "<~%d/%d>" % (self.maxdist, self.prefixlength)
 
     class FuzzyTermNode(syntax.TextNode):
         qclass = query.FuzzyTerm
 
-        def __init__(self, wordnode, maxdist, prefix):
+        def __init__(self, wordnode, maxdist, prefixlength):
             self.fieldname = wordnode.fieldname
             self.text = wordnode.text
             self.boost = wordnode.boost
             self.startchar = wordnode.startchar
             self.endchar = wordnode.endchar
             self.maxdist = maxdist
-            self.prefix = prefix
+            self.prefixlength = prefixlength
 
         def r(self):
-            return "%r ~%d" % (self.text, self.maxdist)
+            return "%r ~%d/%d" % (self.text, self.maxdist, self.prefixlength)
 
         def query(self, parser):
             # Use the superclass's query() method to create a FuzzyTerm query
             q = syntax.TextNode.query(self, parser)
             # Set FuzzyTerm-specific attributes
             q.maxdist = self.maxdist
-            q.prefix = self.prefix
+            q.prefixlength = self.prefixlength
             return q
 
     def create(self, parser, match):
         maxdist = int(mdstr) if mdstr else 1
 
         pstr = match.group("prefix")
-        prefix = int(pstr) if pstr else 0
+        prefixlength = int(pstr) if pstr else 0
 
-        return self.FuzzinessNode(maxdist, prefix, match.group(0))
+        return self.FuzzinessNode(maxdist, prefixlength, match.group(0))
 
     def filters(self, parser):
         return [(self.do_fuzzyterms, 0)]
                 nextnode = group[i + 1]
                 if isinstance(nextnode, self.FuzzinessNode):
                     node = self.FuzzyTermNode(node, nextnode.maxdist,
-                                              nextnode.prefix)
+                                              nextnode.prefixlength)
                     i += 1
             if isinstance(node, self.FuzzinessNode):
                 node = syntax.to_word(node)

File tests/test_parse_plugins.py

View file
  • Ignore whitespace
     assert q.text == "bob~"
 
 
+def test_fuzzy_prefix():
+    from whoosh import scoring
+
+    schema = fields.Schema(title=fields.TEXT(stored=True),
+                           content=fields.TEXT(spelling=True))
+
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        # Match -> first
+        w.add_document(title=u("First"),
+                       content=u"This is the first document we've added!")
+        # No match
+        w.add_document(title=u("Second"),
+                       content=u("The second one is even more interesting! filst"))
+        # Match -> first
+        w.add_document(title=u("Third"),
+                       content=u("The world first line we've added!"))
+        # Match -> zeroth
+        w.add_document(title=u("Fourth"),
+                       content=u("The second one is alaways comes after zeroth!"))
+        # Match -> fire is within 2 edits (transpose + delete) of first
+        w.add_document(title=u("Fifth"),
+                       content=u("The fire is beautiful"))
+
+    from whoosh.qparser import QueryParser, FuzzyTermPlugin #, BoundedFuzzyTermPlugin
+    parser = QueryParser("content", ix.schema)
+    parser.add_plugin(FuzzyTermPlugin())
+    q = parser.parse("first~2/3 OR zeroth", debug=False)
+
+    assert isinstance(q, query.Or)
+    ft = q[0]
+    assert isinstance(ft, query.FuzzyTerm)
+    assert ft.maxdist == 2
+    assert ft.prefixlength == 3
+
+    with ix.searcher(weighting=scoring.TF_IDF()) as searcher:
+        results = searcher.search(q)
+        print(len(results))
+        assert len(results) == 4
+        assert " ".join(hit["title"] for hit in results) == "Fourth First Third Fifth"
+
+
 def test_function_plugin():
     class FakeQuery(query.Query):
         def __init__(self, children, *args, **kwargs):