1. coady
  2. lupyne


coady  committed 738589b

Deprecated expunge and optimize as per Lucene.

  • Participants
  • Parent commits 38ef959
  • Branches default

Comments (0)

Files changed (4)

File lupyne/engine/indexers.py

View file
  • Ignore whitespace
 import itertools, operator
 import contextlib
 import abc, collections
+import warnings
 import lucene
 from .queries import Query, Collector, SortField, Highlighter, FastVectorHighlighter, SpellChecker, SpellParser
 from .documents import Field, Document, Hits
     def segments(self):
         "segment filenames with document counts"
         return dict((lucene.SegmentReader.cast_(reader).segmentName, reader.numDocs()) for reader in self.sequentialSubReaders)
-    def copy(self, dest, query=None, exclude=None, optimize=False):
+    def copy(self, dest, query=None, exclude=None, optimize=False, merge=0):
         """Copy the index to the destination directory.
         Optimized to use hard links if the destination is a file system path.
         :param dest: destination directory path or lucene Directory
         :param query: optional lucene Query to select documents
         :param exclude: optional lucene Query to exclude documents
-        :param optimize: optionally optimize destination index
+        :param merge: optionally merge into maximum number of segments
+        :param optimize: .. deprecated:: 1.1+ use **merge** param instead
         copy(self.indexCommit, dest)
         with contextlib.closing(IndexWriter(dest)) as writer:
             if optimize:
-                writer.optimize(optimize)
+                warnings.warn("Use merge=int instead of optimize parameter.", DeprecationWarning)
+                merge = int(optimize)
+            if merge:
+                writer.optimize(merge)
             return len(writer)
     def count(self, name, value):
         "Return number of documents with given term."
     def refresh(self, **caches):
         "Store refreshed searcher with :meth:`IndexSearcher.reopen` caches."
         self.indexSearcher = self.indexSearcher.reopen(**caches)
-    def commit(self, expunge=False, optimize=False, **caches):
+    def commit(self, expunge=False, optimize=False, merge=False, **caches):
         """Commit writes and :meth:`refresh` searcher.
-        :param expunge: expunge deletes
-        :param optimize: optimize index, optionally supply number of segments
+        :param merge: merge segments with deletes, or optionally specify maximum number of segments
+        :param expunge,optimize: .. deprecated:: 1.1+ use **merge** param instead
         if expunge:
+            warnings.warn("Use merge=True instead of expunge parameter.", DeprecationWarning)
         if optimize:
-            self.optimize(optimize)
+            warnings.warn("Use merge=int instead of optimize parameter.", DeprecationWarning)
+            merge = int(optimize)
+        if merge:
+            if isinstance(merge, bool):
+                self.expungeDeletes()
+            else:
+                self.optimize(merge)

File lupyne/server.py

View file
  • Ignore whitespace
 def multi(value):
     return value and value.split(',')
+def mapping(value):
+    if isinstance(value, dict):
+        return value
+    cherrypy.response.headers['warning'] = '199 lupyne "use an object instead of an array"'
+    return dict.fromkeys(value, True)
 class params:
     "Parameter parsing."
         readers = reader.sequentialSubReaders if lucene.MultiReader.instance_(reader) else [reader]
         return dict((unicode(reader.directory()), reader.numDocs()) for reader in readers)
-    @cherrypy.tools.json_in(process_body=lambda body: dict.fromkeys(body, True))
+    @cherrypy.tools.json_in(process_body=mapping)
     def update(self, **caches):
         """Refresh index version.
         **POST** /update
             Reopen searcher, optionally reloading caches, and return document count.
-            ["filters"|"sorters"|"spellcheckers",... ]
+            {"filters"|"sorters"|"spellcheckers": true,... }
+            .. versionchanged:: 1.1+ request body is an object instead of an array
             :return: *int*
             cherrypy.response.status = httplib.ACCEPTED
-    @cherrypy.tools.json_in(process_body=lambda body: {'directories': list(body)})
     @cherrypy.tools.allow(methods=['GET', 'POST'])
-    def index(self, directories=()):
+    def index(self):
         """Add indexes.  See :meth:`WebSearcher.index` for GET method.
         **POST** /[index]
             [*string*,... ]
-        if cherrypy.request.method == 'POST':
-            for directory in directories:
+        request = cherrypy.serving.request
+        if request.method == 'POST':
+            for directory in getattr(request, 'json', ()):
                 self.indexer += directory
         return {unicode(self.indexer.directory): len(self.indexer)}
-    @cherrypy.tools.json_in(process_body=lambda body: dict.fromkeys(body, True))
+    @cherrypy.tools.json_in(process_body=mapping)
     @cherrypy.tools.allow(paths=[('POST',), ('GET', 'PUT', 'DELETE'), ('GET',)])
     def update(self, id='', name='', **options):
         """Commit index changes and refresh index version.
         **POST** /update
             Commit write operations and return document count.  See :meth:`WebSearcher.update` for caching options.
-            ["expunge"|"optimize",... ]
+            {"merge": true|\ *int*,... }
+            .. versionchanged:: 1.1+ request body is an object instead of an array
             :return: *int*

File test/local.py

View file
  • Ignore whitespace
         assert reader[0].dict() == {} and reader.count('text', '?') == 1
         assert len(reader.comparator('text')) == 4
         indexer.delete('text', '?')
-        indexer.commit(expunge=True)
+        with assertWarns(DeprecationWarning):
+            indexer.commit(expunge=True)
+        indexer.commit(merge=True)
         assert not indexer.hasDeletions()
-        indexer.commit(optimize=2)
-        indexer.commit(optimize=True)
+        indexer.commit(merge=2)
+        with assertWarns(DeprecationWarning):
+            indexer.commit(optimize=True)
         assert indexer.optimized
         del reader.indexReader
         self.assertRaises(AttributeError, getattr, reader, 'maxDoc')
         files = set(os.listdir(self.tempdir))
         path = os.path.join(self.tempdir, 'temp')
         with indexer.snapshot('backup') as commit:
-            indexer.commit(optimize=True)
+            indexer.commit(merge=1)
             assert indexer.indexCommit.generation > commit.generation
             engine.indexers.copy(commit, path)
             assert set(os.listdir(path)) == set(commit.fileNames) < files < set(os.listdir(self.tempdir))
         searcher = engine.IndexSearcher(directory)
         assert len(searcher) == size and list(searcher.terms('state')) == ['CA']
         path = os.path.join(self.tempdir, 'temp')
-        size = indexer.copy(path, exclude=query, optimize=True)
+        with assertWarns(DeprecationWarning):
+            size = indexer.copy(path, exclude=query, optimize=True)
         assert len(searcher) + size == len(indexer)
         searcher = engine.IndexSearcher(path)
         assert searcher.optimized and 'CA' not in searcher.terms('state')

File test/remote.py

View file
  • Ignore whitespace
         with assertRaises(httplib.HTTPException, httplib.METHOD_NOT_ALLOWED):
+        with local.assertWarns(UserWarning):
+            resource.post('/update', [])
         with assertRaises(httplib.HTTPException, httplib.METHOD_NOT_ALLOWED):
         with assertRaises(httplib.HTTPException, httplib.METHOD_NOT_ALLOWED):
         resource = client.Resource('localhost', self.ports[0])
         assert not resource.delete('/search', q='sample', **{'q.field': 'name', 'q.type': 'term'})
         assert resource.get('/docs') == [0]
-        assert not resource.post('/update', ['expunge'])
+        assert not resource.post('/update', {'merge': True})
         assert resource.get('/docs') == []
         assert not resource.put('/docs/name/sample')
         assert resource.post('/update')
             assert sorted(resource.get('/fields/' + name)) == ['index', 'store', 'termvector']
         resource.post('/docs', list(fixture.constitution.docs()))
         assert resource.get('/').values() == [35]
-        resource.post('/update', ['optimize', 'spellcheckers'])
+        resource.post('/update', {'spellcheckers': True, 'merge': 1})
         assert resource.get('/docs/0', **{'fields.indexed': 'amendment:int'}) == {'amendment': 0, 'article': 'Preamble'}
         doc = resource.get('/docs/0', **{'fields.vector': 'text,missing'})
         assert doc['missing'] == [] and doc['text'].index('states') < doc['text'].index('united')