Commits

Matt Chaput committed b2186fb

Added hyphen to the list of word chars in the wildcard regex to make the result more
predictable. A true fix will have to wait for a more advanced parser.

Comments (0)

Files changed (2)

src/whoosh/qparser/plugins.py

 
 class WildcardPlugin(TaggingPlugin):
     class WildcardNode(syntax.TextNode):
+        # Note that this node inherits tokenize = False from TextNode,
+        # so the text in this node will not be analyzed... just passed
+        # straight to the query
+
+        # TODO: instead of parsing a "wildcard word", create marker nodes for
+        # individual ? and * characters. This will have to wait for a more
+        # advanced wikiparser-like parser.
+
         qclass = query.Wildcard
 
         def r(self):
     # \u061F = Arabic question mark
     # \u1367 = Ethiopic question mark
     qms = u("\u055E\u061F\u1367")
-    expr = u("(?P<text>\\w*[*?%s](\\w|[*?%s])*)") % (qms, qms)
+    expr = u("(?P<text>(\\w|[-])*[*?%s](\\w|[-*?%s])*)") % (qms, qms)
     nodetype = WildcardNode
 
 

tests/test_parsing.py

     assert_equal(q[1].fieldname, "title")
     assert_equal(q[0].text, "*john*")
     assert_equal(q[1].text, "blog")
+
+def test_dash():
+    ana = analysis.StandardAnalyzer("[ \t\r\n()*?]+")
+    schema = fields.Schema(title=fields.TEXT(analyzer=ana),
+                           text=fields.TEXT(analyzer=ana), time=fields.ID)
+    qtext = u("*Ben-Hayden*")
+
+    qp = default.QueryParser("text", schema)
+    q = qp.parse(qtext)
+    assert_equal(repr(q), "Wildcard('text', u'*ben-hayden*')")
+
+    qp = default.MultifieldParser(["title", "text", "time"], schema)
+    q = qp.parse(qtext)
+    assert_equal(repr(q), "Or([Wildcard('title', u'*ben-hayden*'), Wildcard('text', u'*ben-hayden*'), Wildcard('time', u'*Ben-Hayden*')])")
+
+