Commits

Matt Chaput committed 12655f5 Merge

Merging changes.

Comments (0)

Files changed (2)

src/whoosh/analysis/tokenizers.py

     def __init__(self, expression="[^/]+"):
         self.expr = rcompile(expression)
 
-    def __call__(self, value, **kwargs):
-        assert isinstance(value, text_type), "%r is not unicode" % value
-        token = Token(**kwargs)
-        for match in self.expr.finditer(value):
-            token.text = value[:match.end()]
-            yield token
+    def __call__(self, value, positions=False, start_pos=0, **kwargs):
+         assert isinstance(value, text_type), "%r is not unicode" % value
+         token = Token(positions, **kwargs)
+         pos = start_pos
+         for match in self.expr.finditer(value):
+             token.text = value[:match.end()]
+             if positions:
+                 token.pos = pos
+                 pos += 1
+             yield token
+

tests/test_analysis.py

                                            "/alfa/bravo/charlie",
                                            "/alfa/bravo/charlie/delta"]
 
+def test_path_tokenizer2():
+    path_field = fields.TEXT(analyzer=analysis.PathTokenizer())
+    st = RamStorage()
+    schema = fields.Schema(path=path_field)
+    index = st.create_index(schema)
+
+    with index.writer() as writer:
+        writer.add_document(path=u'/alfa/brvo/charlie/delta/')
+        writer.add_document(path=u'/home/user/file.txt')
+    assert not index.is_empty()
+
+    with index.reader() as reader:
+        items = list(reader.all_terms())
+    assert 'path' in [field for field, value in items]
+    assert '/alfa' in [value for field, value in items]
 
 def test_composition1():
     ca = analysis.RegexTokenizer() | analysis.LowercaseFilter()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.