Commits

Matt Chaput committed fb14e84

Fixed pickling of LanguageAnalyzer.

  • Participants
  • Parent commits 31a721c

Comments (0)

Files changed (3)

src/whoosh/analysis/analyzers.py

     """
 
     from whoosh.lang import NoStemmer, NoStopWords
-    from whoosh.lang import stemmer_for_language
     from whoosh.lang import stopwords_for_language
 
     # Make the start of the chain
 
     # Add a stemming filter
     try:
-        stemfn = stemmer_for_language(lang)
-        chain = chain | StemFilter(stemfn=stemfn, cachesize=cachesize)
+        chain = chain | StemFilter(lang=lang, cachesize=cachesize)
     except NoStemmer:
         pass
 

src/whoosh/analysis/morph.py

 
     is_morph = True
 
-    def __init__(self, stemfn=stem, ignore=None, cachesize=50000):
+    def __init__(self, stemfn=stem, lang=None, ignore=None, cachesize=50000):
         """
         :param stemfn: the function to use for stemming.
+        :param lang: if not None, overrides the stemfn with a language stemmer
+            from the ``whoosh.lang.snowball`` package.
         :param ignore: a set/list of words that should not be stemmed. This is
             converted into a frozenset. If you omit this argument, all tokens
             are stemmed.
         """
 
         self.stemfn = stemfn
+        self.lang = lang
         self.ignore = frozenset() if ignore is None else frozenset(ignore)
         self.cachesize = cachesize
         # clear() sets the _stem attr to a cached wrapper around self.stemfn
         self.clear()
 
     def clear(self):
+        if self.lang:
+            from whoosh.lang import stemmer_for_language
+            stemfn = stemmer_for_language(self.lang)
+        else:
+            stemfn = self.stemfn
+
         if isinstance(self.cachesize, integer_types) and self.cachesize != 0:
             if self.cachesize < 0:
-                self._stem = unbound_cache(self.stemfn)
+                self._stem = unbound_cache(stemfn)
             elif self.cachesize > 1:
-                self._stem = lru_cache(self.cachesize)(self.stemfn)
+                self._stem = lru_cache(self.cachesize)(stemfn)
         else:
-            self._stem = self.stemfn
+            self._stem = stemfn
 
     def cache_info(self):
         if self.cachesize <= 1:

tests/test_analysis.py

 
 from whoosh import analysis, fields, qparser
 from whoosh.compat import u, unichr, text_type
+from whoosh.compat import dumps
 from whoosh.filedb.filestore import RamStorage
 from whoosh.util.testing import skip_if_unavailable
 
         assert_equal(words, target)
 
 
+def test_pickleability():
+    ana = analysis.LanguageAnalyzer("en")
+    pick = dumps(ana, -1)
 
-