Commits

Matt Chaput committed 2a34e3d

Added methods to clear the index.

Comments (0)

Files changed (5)

src/whoosh/codec/clod.py

 class ClodDocWriter(codec.DocWriter):
     def __init__(self, txn):
         self._txn = txn
+        self._postinglimit = 100000
 
         self._docid = -1
         self._lastid = -1
         self._postings = defaultdict(list)
         # Caches doc field length statistics
         self._lengths = LengthsCache(self._txn)
-        # Caches fieldna
-        # me -> fieldbytes encodings
+        # Caches fieldname -> fieldbytes encodings
         self._fieldcache = {}
         # Track the IDs of deleted documents to remove them from posting lists
         self._deleted = set()
         # self._tagmaps = TagmapCache(self._txn)
 
         self._postingcount = 0
-        self._postinglimit = 100000
+
+    def clear(self):
+        assert not self._indoc
+        self._txn.clear()
+        self._reset()
+
+    def _reset(self):
+        self._docmaps = DocmapCache(self._txn)
+        self._postings = defaultdict(list)
+        self._lengths = LengthsCache(self._txn)
+        self._fieldcache = {}
+        self._deleted = set()
+        self._postingcount = 0
 
     def next_doc_id(self):
         return self._docmaps.next_doc_id()

src/whoosh/kv/blueline.py

         self._mm = db.map_from_file(self._datafile, use_mmap)
         self._datafile.seek(0, 2)
         self._startlength = self._datafile.tell()
+        self._cachesize = cachesize
         self._cache = BlockCache(self.load, self.save, self.new_tag,
                                  self._leaving, cachesize)
         self._cursorpool = []
         self._buffersize = buffersize
         self._bufferkeys = None
         self._buffered = 0
-        self._settime = 0.0
         self.closed = False
 
     def __len__(self):
 
     def __setitem__(self, key, value):
         assert isinstance(key, bytes_type) and isinstance(value, bytes_type)
-        t = now()
         self._buffer[key] = value
         self._bufferkeys = None
         self._buffered += len(key) + len(value)
         # block[key] = value
         # if len(block) > 2 and len(block) > self._blocksize:
         #     self._split(i, block)
-        self._settime += now() - t
 
     def __delitem__(self, key):
         if key in self._buffer:
                 self._cache.remove(block.tag)
                 del self._toc[i]
 
-    def dump_blocks(self):
-        self._cache.dump_blocks()
+    def clear(self):
+        self._toc = Toc(self._toc.filename, [])
+        self._datafile = self._db.create_file(self._toc.filename, mode="r+b")
+        self._cache = BlockCache(self.load, self.save, self.new_tag,
+                                 self._leaving, self._cachesize)
+        self._cursorpool = []
+        self._buffer = {}
+        self._bufferkeys = None
+        self._buffered = 0
 
     def new_tag(self):
         return self._db.new_tag(self._toc.tagset())
             yield ([], keys[left:], len(keys) - left)
 
     def flush(self):
-        t = now()
         toc = self._toc
         cache = self._cache
         buff = self._buffer

src/whoosh/kv/db.py

     def __delitem__(self, key):
         raise NotImplementedError
 
+    def clear(self):
+        for key in self:
+            del self[key]
+
     def delete_by_prefix(self, prefix):
         # Hopefully you can override this implementation in your subclass
         keys = list(self.expand_prefix(prefix))

src/whoosh/writing.py

             else:
                 self.commit()
 
+    @abstractmethod
+    def clear(self):
+        """
+        Clear all information from the index.
+        """
+
+        raise NotImplementedError
+
     def group(self):
         """
         Returns a context manager that calls
         self.optimize = False
 
     @unclosed
+    def clear(self):
+        self._docwriter.clear()
+
+    @unclosed
     def reader(self, **kwargs):
         from whoosh.reading import DBReader
 

tests/test_blueline.py

             cur = w.cursor()
             ls = list(cur.expand_prefix(b"0x"))
             assert ls == sorted(keyset)
+
+
+def test_clear():
+    with TempDir("blclear") as dirpath:
+        db = bl.Blueline(dirpath)
+        with db.open(write=True, create=True) as w:
+            for i in xrange(0, 700, 7):
+                w[b(hex(i))] = b"v"
+
+        with db.open(write=True) as w:
+            w.clear()
+
+        with db.open() as r:
+            assert len(r) == 0
+            assert list(r) == []
+            assert r.get(b"0x0") is None