Matt Chaput avatar Matt Chaput committed 14de108

Raise an error when a bad analyzer chain is constructed.
See issue #358.

Comments (0)

Files changed (3)

src/whoosh/analysis/acore.py

 from whoosh.compat import iteritems
 
 
+# Exceptions
+
+class CompositionError(Exception):
+    pass
+
+
 # Utility functions
 
 def unstopped(tokenstream):

src/whoosh/analysis/analyzers.py

 # those of the authors and should not be interpreted as representing official
 # policies, either expressed or implied, of Matt Chaput.
 
-from whoosh.analysis.acore import Composable
+from whoosh.analysis.acore import Composable, CompositionError
+from whoosh.analysis.tokenizers import Tokenizer
 from whoosh.analysis.filters import LowercaseFilter
 from whoosh.analysis.filters import StopFilter, STOP_WORDS
 from whoosh.analysis.morph import StemFilter
 class CompositeAnalyzer(Analyzer):
     def __init__(self, *composables):
         self.items = []
+
         for comp in composables:
             if isinstance(comp, CompositeAnalyzer):
                 self.items.extend(comp.items)
             else:
                 self.items.append(comp)
 
+        # Tokenizers must start a chain, and then only filters after that
+        # (because analyzers take a string and return a generator of tokens,
+        # and filters take and return generators of tokens)
+        for item in self.items[1:]:
+            if isinstance(item, Tokenizer):
+                raise CompositionError("Only one tokenizer allowed at the start"
+                                       " of the analyzer: %r" % self.items)
+
     def __repr__(self):
         return "%s(%s)" % (self.__class__.__name__,
                            ", ".join(repr(item) for item in self.items))

tests/test_analysis.py

     es_stopper = analysis.RegexTokenizer() | analysis.StopFilter(lang="es")
     ls = [token.text for token in es_stopper(u("el lapiz es en la mesa"))]
     assert ls == ["lapiz", "mesa"]
+
+
+def test_issue358():
+    t = analysis.RegexTokenizer("\w+")
+    with pytest.raises(analysis.CompositionError):
+        _ = t | analysis.StandardAnalyzer()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.