Commits

Matt Chaput committed ec4185e

Added functionality to clear the existing index contents when writing. Fixes issue #279.

Comments (0)

Files changed (3)

docs/source/indexing.rst

   * If a path is in the set of paths to re-index, we need to index it.
 
   * Otherwise, we can skip indexing the file.
+
+
+Clearing the index
+==================
+
+In some cases you may want to re-index from scratch. To clear the index without
+disrupting any existing readers::
+
+    from whoosh import writing
+
+    with myindex.writer() as mywriter:
+        # You can optionally add documents to the writer here
+        # e.g. mywriter.add_document(...)
+
+        # Using mergetype=CLEAR clears all existing segments so the index will
+        # only have any documents you've added to this writer
+        mywriter.mergetype = writing.CLEAR
+
+Or, if you don't use the writer as a context manager and call ``commit()``
+directly, do it like this::
+
+    mywriter = myindex.writer()
+    # ...
+    mywriter.commit(mergetype=writing.CLEAR)
+
+.. note::
+    If you don't need to worry about existing readers, a more efficient method
+    is to simply delete the contents of the index directory and start over.

src/whoosh/writing.py

     return []
 
 
+def CLEAR(writer, segments):
+    """This policy DELETES all existing segments and only writes the new
+    segment.
+    """
+
+    return []
+
+
 # Customized sorting pool for postings
 
 class PostingPool(SortingPool):
 
         self.merge = True
         self.optimize = False
+        self.mergetype = None
 
     def __repr__(self):
         return "<%s %r>" % (self.__class__.__name__, self.newsegment)
     # pieces to allow MpWriter to call them individually
 
     def _merge_segments(self, mergetype, optimize, merge):
+        # The writer supports two ways of setting mergetype/optimize/merge:
+        # as attributes or as keyword arguments to commit(). Originally there
+        # were just the keyword arguments, but then I added the ability to use
+        # the writer as a context manager using "with", so the user no longer
+        # explicitly called commit(), hence the attributes
+        mergetype = mergetype if mergetype is not None else self.mergetype
         optimize = optimize if optimize is not None else self.optimize
         merge = merge if merge is not None else self.merge
 

tests/test_writing.py

 import pytest
 
 from whoosh import analysis, fields, query, writing
-from whoosh.compat import u, xrange, text_type
+from whoosh.compat import b, u, xrange, text_type
 from whoosh.filedb.filestore import RamStorage
 from whoosh.util.testing import TempIndex
 
                                                   "kilo", "lima"]
 
 
-class test_add_reader_spelling():
+def test_add_reader_spelling():
     # Test whether add_spell_word() items get copied over in a merge
 
     # Because b is stemming and spelled, it will use add_spell_word()
                                                       "modeling opening polling pressing quitting "
                                                       "rendering ripping rolling timing tying undoing "
                                                       "writing yelling")
+
+def test_clear():
+    schema = fields.Schema(a=fields.KEYWORD)
+    ix = RamStorage().create_index(schema)
+
+    # Add some segments
+    with ix.writer() as w:
+        w.add_document(a=u("one two three"))
+        w.merge = False
+    with ix.writer() as w:
+        w.add_document(a=u("two three four"))
+        w.merge = False
+    with ix.writer() as w:
+        w.add_document(a=u("three four five"))
+        w.merge = False
+
+    # Clear
+    with ix.writer() as w:
+        w.add_document(a=u("foo bar baz"))
+        w.mergetype = writing.CLEAR
+
+    with ix.searcher() as s:
+        assert s.doc_count_all() == 1
+        assert list(s.reader().lexicon("a")) == [b("bar"), b("baz"), b("foo")]
+
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.