Commits

Matt Chaput committed 8f63c43

Added experimental in-memory pool. Switched b(chr(n)) to pack_byte(n).

  • Participants
  • Parent commits dfe1d87

Comments (0)

Files changed (3)

File src/whoosh/filedb/pools.py

 # policies, either expressed or implied, of Matt Chaput.
 
 from __future__ import with_statement
-import os
-import tempfile
+import os, tempfile
 from array import array
 from collections import defaultdict
 from heapq import heapify, heappop, heapreplace
 
 from whoosh.compat import long_type, iteritems, xrange, text_type, PY3
 from whoosh.filedb.filetables import LengthWriter, LengthReader
-from whoosh.util import length_to_byte, byte_to_length
+from whoosh.util import length_to_byte, byte_to_length, utf8encode
 
 
 try:
         termswriter.add_iter(self.postbuf, lengths.get)
 
 
+class DictPool(PoolBase):
+    def __init__(self, schema, **kwargs):
+        super(DictPool, self).__init__(schema)
+        self.schema = schema
+        self.postbuf = {}
+        
+    def add_posting(self, fieldname, text, docnum, weight, valuestring):
+        term = (fieldname, text)
+        if term in self.postbuf:
+            buf = self.postbuf[term]
+        else:
+            self.postbuf[term] = buf = (array("I"), array("f"), [])
+
+        buf[0].append(docnum)
+        buf[1].append(weight)
+        buf[2].append(valuestring)
+        
+    def finish(self, termswriter, doccount, lengthfile):
+        from itertools import izip
+        
+        pbuf = self.postbuf
+        self._write_lengths(lengthfile, doccount)
+        lengths = LengthReader(None, doccount, self.length_arrays)
+        
+        def gen():
+            for term in sorted(pbuf):
+                fieldname, text = term
+                for docnum, weight, valuestring in izip(*pbuf[term]):
+                    yield (fieldname, text, docnum, weight, valuestring)
+        
+        termswriter.add_iter(gen(), lengths.get)
+
+
 # On-disk unique set of strings
 
 class DiskSet(object):

File src/whoosh/filedb/structfile.py

 from whoosh.compat import load as load_pickle
 from whoosh.compat import integer_types, b
 from whoosh.system import (_INT_SIZE, _SHORT_SIZE, _FLOAT_SIZE, _LONG_SIZE,
-                           pack_sbyte, pack_ushort, pack_int, pack_uint,
-                           pack_long, pack_float,
-                           unpack_sbyte, unpack_ushort, unpack_int,
+                           pack_byte, pack_sbyte, pack_ushort, pack_int,
+                           pack_uint, pack_long, pack_float,
+                           unpack_byte, unpack_sbyte, unpack_ushort, unpack_int,
                            unpack_uint, unpack_long, unpack_float, IS_LITTLE)
 from whoosh.util import (varint, read_varint, signed_varint,
                          decode_signed_varint, float_to_byte, byte_to_float)
         """Writes a single byte to the wrapped file, shortcut for
         ``file.write(chr(n))``.
         """
-        self.file.write(b(chr(n)))
+        self.file.write(pack_byte(n))
 
     def read_byte(self):
         return ord(self.file.read(1))

File src/whoosh/system.py

 _LONG_SIZE = calcsize("!Q")
 _FLOAT_SIZE = calcsize("!f")
 
+_byte_struct = Struct("!B")
 _sbyte_struct = Struct("!b")
 _ushort_struct = Struct("!H")
 _int_struct = Struct("!i")
 _long_struct = Struct("!q")
 _float_struct = Struct("!f")
 
+pack_byte = _byte_struct.pack
 pack_sbyte = _sbyte_struct.pack
 pack_ushort = _ushort_struct.pack
 pack_int = _int_struct.pack
 pack_long = _long_struct.pack
 pack_float = _float_struct.pack
 
+unpack_byte = _byte_struct.unpack  # ord() might be faster
 unpack_sbyte = _sbyte_struct.unpack
 unpack_ushort = _ushort_struct.unpack
 unpack_int = _int_struct.unpack