1. Matt Chaput
  2. whoosh

Commits

Matt Chaput  committed 03206d5

Fixed bug where the code was confused about converting minlength/maxlength to/from bytes.

  • Participants
  • Parent commits 6565466
  • Branches default

Comments (0)

Files changed (2)

File src/whoosh/filedb/filetables.py

View file
  • Ignore whitespace
     
     def to_string(self):
         # Encode the lengths as 0-255 values
-        ml = length_to_byte(self._minlength)
-        xl = length_to_byte(self._maxlength)
+        ml = self._minlength
+        if ml is None:
+            ml = 0
+        xl = self._maxlength
         # Convert None values to the out-of-band NO_ID constant so they can be
         # stored as unsigned ints
         mid = NO_ID if self._minid is None else self._minid
         if hbyte < 2:
             # Freq, Doc freq, min length, max length, max weight, max WOL, min ID, max ID
             f, df, ml, xl, xw, xwol, mid, xid = cls.struct.unpack(s[1:cls.struct.size+1])
-            ml = byte_to_length(ml)
-            xl = byte_to_length(xl)
             mid = None if mid == NO_ID else mid
             xid = None if xid == NO_ID else xid
             # Postings

File tests/test_indexing.py

View file
  • Ignore whitespace
             ls2 = [dr.doc_field_length(i, "f2") for i in xrange(0, len(lengths))]
             assert_equal(ls2, [byte_to_length(length_to_byte(l))for l in lengths])
 
+def test_many_lengths():
+    domain = u("alfa bravo charlie delta echo foxtrot golf hotel").split()
+    schema = fields.Schema(text=fields.TEXT)
+    ix = RamStorage().create_index(schema)
+    w = ix.writer()
+    for i, word in enumerate(domain):
+        length = (i + 1) ** 6
+        w.add_document(text=" ".join(word for _ in xrange(length)))
+    w.commit()
+    
+    s = ix.searcher()
+    for i, word in enumerate(domain):
+        target = byte_to_length(length_to_byte((i + 1) ** 6))
+        ti = s.term_info("text", word)
+        assert_equal(ti.min_length(), target)
+        assert_equal(ti.max_length(), target)
+
 def test_lengths_ram():
     s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
                       f2=fields.KEYWORD(stored=True, scorable=True))