Commits

Matt Chaput committed 9238dd8

Fixed output of charset_table_to_dict to be a dict. Fixes issue #298.
Fixed missing parens in stemming.rst. Fixes issue #299.
Minor doc fixes.

Comments (0)

Files changed (5)

docs/source/stemming.rst

     from whoosh.support.charset import accent_map
 
     # For example, to add an accent-folding filter to a stemming analyzer:
-    my_analyzer = StemmingAnalyzer | CharsetFilter(accent_map)
+    my_analyzer = StemmingAnalyzer() | CharsetFilter(accent_map)
 
     # To use this analyzer in your schema:
     my_schema = fields.Schema(content=fields.TEXT(analyzer=my_analyzer))
     from whoosh.analysis import CharsetFilter
     from whoosh.support.charset import default_charset, charset_table_to_dict
     charmap = charset_table_to_dict(default_charset)
-    my_analyzer = StemmingAnalyzer | CharsetFilter(charmap)
+    my_analyzer = StemmingAnalyzer() | CharsetFilter(charmap)
 
 (The Sphinx charset table format is described at
 http://www.sphinxsearch.com/docs/current.html#conf-charset-table )

src/whoosh/analysis/analyzers.py

     :param ignore: a set of words to not stem.
     :param cachesize: the maximum number of stemmed words to cache. The larger
         this number, the faster stemming will be but the more memory it will
-        use.
+        use. Use None for no cache, or -1 for an unbounded cache.
     """
 
     ret = RegexTokenizer(expression=expression, gaps=gaps)

src/whoosh/analysis/filters.py

         :param charmap: a dictionary mapping from integer character numbers to
             unicode characters, as required by the unicode.translate() method.
         """
+
         self.charmap = charmap
 
     def __eq__(self, other):

src/whoosh/support/charset.py

                 continue
 
             raise Exception("Don't know what to do with %r" % item)
-    return map
+    return dict(map)

tests/test_analysis.py

         assert words == target
 
 
-def test_pickleability():
+def test_la_pickleability():
     ana = analysis.LanguageAnalyzer("en")
     _ = dumps(ana, -1)
 
 
+def test_charset_pickeability():
+    from whoosh.support import charset
+    charmap = charset.charset_table_to_dict(charset.default_charset)
+    ana = analysis.StandardAnalyzer() | analysis.CharsetFilter(charmap)
+    _ = dumps(ana, -1)
+
+    ana = analysis.CharsetTokenizer(charmap)
+    _ = dumps(ana, -1)
+
+