Commits

Matt Chaput committed 80ce1a8

Added code for shift_step=0 case. Renamed NumericRange._or_query(). Added tests.
Minor cleanups.

Comments (0)

Files changed (7)

src/whoosh/fields.py

     
     def index(self, num):
         # word, freq, weight, valuestring
-        return [(txt, 1, 1.0, '') for txt in self._tiers(num)]
+        if self.shift_step:
+            return [(txt, 1, 1.0, '') for txt in self._tiers(num)]
+        else:
+            return [(self.to_text(num), 1, 1.0, '')]
     
     def to_text(self, x, shift=0):
         if self.decimal_places:

src/whoosh/matching.py

         if not b.is_active(): return a.id()
         return min(a.id(), b.id())
     
+    # Using sets is faster in most cases, but could potentially use a lot of
+    # memory
     def all_ids(self):
         return iter(sorted(set(self.a.all_ids()) | set(self.b.all_ids())))
     
     def id(self):
         return self.a.id()
     
-    #def all_ids(self):
-    #    return iter(sorted(set(self.a.all_ids()) & set(self.b.all_ids())))
+    # Using sets is faster in some cases, but could potentially use a lot of
+    # memory
+    def all_ids(self):
+        return iter(sorted(set(self.a.all_ids()) & set(self.b.all_ids())))
     
     def skip_to(self, id):
         if not self.is_active(): raise ReadTooFar

src/whoosh/query.py

                             self.startexcl, self.endexcl, boost=self.boost)
     
     def simplify(self, ixreader):
-        return self._or_query(ixreader).simplify(ixreader)
+        return self._compile_query(ixreader).simplify(ixreader)
     
     def estimate_size(self, ixreader):
-        return self._or_query(ixreader).estimate_size(ixreader)
+        return self._compile_query(ixreader).estimate_size(ixreader)
     
     def docs(self, searcher, exclude_docs=None):
-        q = self._or_query(searcher.reader())
+        q = self._compile_query(searcher.reader())
         return q.docs(searcher, exclude_docs=exclude_docs)
     
-    def _or_query(self, ixreader):
+    def _compile_query(self, ixreader):
         from whoosh.fields import NUMERIC
         from whoosh.support.numeric import tiered_ranges
         
             return NullQuery
         
     def matcher(self, searcher, exclude_docs=None):
-        q = self._or_query(searcher.reader())
+        q = self._compile_query(searcher.reader())
         return q.matcher(searcher, exclude_docs=exclude_docs)
         
 

src/whoosh/support/numeric.py

         shift += step
 
 
-# These functions use hexadecimal strings to encode the numbers, rather than
-# converting them to text using a 7-bit encoding, because while the hex
-# representation uses more space (8 bytes as opposed to 5 bytes for a 32 bit
-# number), it's 5 times faster to encode/decode.
-#
-# The functions for 7 bit encoding are still available (to_7bit and from_7bit)
-# if needed.
-
 _dstruct = struct.Struct("<d")
 _qstruct = struct.Struct("<q")
 _dpack, _dunpack = _dstruct.pack, _dstruct.unpack
     x = sortable_long_to_float(x)
     return x
 
-# Functions for converting sortable representations to and from text
+# Functions for converting sortable representations to and from text.
+#
+# These functions use hexadecimal strings to encode the numbers, rather than
+# converting them to text using a 7-bit encoding, because while the hex
+# representation uses more space (8 bytes as opposed to 5 bytes for a 32 bit
+# number), it's 5-10 times faster to encode/decode in Python.
+#
+# The functions for 7 bit encoding are still available (to_7bit and from_7bit)
+# if needed.
+
 
 def sortable_int_to_text(x, shift=0):
     if shift:
         x >>= shift
-    return chr(shift) + u"%08x" % x #struct.pack(">I", x)#
+    text = chr(shift) + u"%08x" % x
+    assert len(text) == 9
+    return text
+
 def sortable_long_to_text(x, shift=0):
     if shift:
         x >>= shift
-    return chr(shift) + u"%016x" % x #struct.pack(">Q", x)#
+    text = chr(shift) + u"%016x" % x
+    assert len(text) == 17
+    return text
+
 def text_to_sortable_int(text):
-    assert len(text) == 9
-    #return struct.unpack(">I", text[1:])[0]
+    #assert len(text) == 9
     return int(text[1:], 16)
+
 def text_to_sortable_long(text):
-    assert len(text) == 17
-    #return struct.unpack(">Q", text[1:])[0]
+    #assert len(text) == 17
     return long(text[1:], 16)
 
 
             end = float_to_sortable_long(end)
         to_text = sortable_long_to_text
     
+    if not shift_step:
+        yield (to_text(start), to_text(end))
+        return
+    
     # Yield the term ranges for the different resolutions
     for rstart, rend, shift in split_range(valsize, shift_step, start, end):
         starttext = to_text(rstart, shift=shift)
         
         yield (starttext, endtext)
 
+
 # Functions for encoding numeric values as sequences of 7-bit ascii characters
 
 def to_7bit(x, islong):
         shift = 31
         nchars = 5
     else:
-        shift = 62
+        shift = 63
         nchars = 10
 
     buffer = array("c", "\x00" * nchars)
     if len(text) == 5:
         shift = 31
     elif len(text) == 10:
-        shift = 62
+        shift = 63
     else:
         raise ValueError("text is not 5 or 10 bytes")
 

tests/test_fields.py

         self.assertEqual(r[0]["id"], "b")
     
     def test_numeric_range(self):
-        from whoosh.util import now
         def test_type(t, start, end, step, teststart, testend):
             fld = fields.NUMERIC(t)
             schema = fields.Schema(id=fields.STORED, number=fld)
             w.commit()
             
             qp = qparser.QueryParser("number", schema=schema)
-            
-            q = qp.parse("[%s to *]" % teststart)
-            self.assertEqual(q, query.NullQuery)
-            
-            q = qp.parse("[%s to]" % teststart)
-            self.assertEqual(q.__class__, query.NumericRange)
-            self.assertEqual(q.start, teststart)
-            self.assertEqual(q.end, None)
-            
-            q = qp.parse("[to %s]" % testend)
-            self.assertEqual(q.__class__, query.NumericRange)
-            self.assertEqual(q.start, None)
-            self.assertEqual(q.end, testend)
-            
             q = qp.parse("[%s to %s]" % (teststart, testend))
             self.assertEqual(q.__class__, query.NumericRange)
             self.assertEqual(q.start, teststart)
             self.assertEqual(q.end, testend)
             
             s = ix.searcher()
-            self.assertEqual(q._or_query(s.reader()).__class__, query.Or)
+            self.assertEqual(q._compile_query(s.reader()).__class__, query.Or)
             rng = []
             count = teststart
             while count <= testend:
         test_type(int, -500, 500, 5, -350, 280)
         test_type(long, -1000, 1000, 5, -900, 90)
     
+    def test_open_numeric_ranges(self):
+        schema = fields.Schema(id=fields.ID(stored=True),
+                               view_count=fields.NUMERIC(stored=True))
+        ix = RamStorage().create_index(schema)
+        
+        w = ix.writer()
+        for i, letter in enumerate(u"abcdefghijklmno"):
+            w.add_document(id=letter, view_count=(i + 1) * 101)
+        w.commit()
+        
+        s = ix.searcher()
+        #from whoosh.qparser.old import QueryParser
+        #qp = QueryParser("id", schema=schema)
+        qp = qparser.QueryParser("id", schema=schema)
+        
+        def do(qstring, target):
+            q = qp.parse(qstring)
+            results = "".join(sorted([d['id'] for d in s.search(q, limit=None)]))
+            self.assertEqual(results, target, "%r: %s != %s" % (q, results, target))
+        
+        do(u"view_count:[0 TO]", "abcdefghijklmno")
+        do(u"view_count:[1000 TO]", "jklmno")
+        do(u"view_count:[TO 300]", "ab")
+        do(u"view_count:[200 TO 500]", "bcd")
+        do(u"view_count:{202 TO]", "cdefghijklmno")
+        do(u"view_count:[TO 505}", "abcd")
+        do(u"view_count:{202 TO 404}", "c")
+    
+    def test_numeric_steps(self):
+        for step in range(0, 32):
+            schema = fields.Schema(id = fields.STORED,
+                                   num=fields.NUMERIC(int, shift_step=step))
+            ix = RamStorage().create_index(schema)
+            w = ix.writer()
+            for i in xrange(-10, 10):
+                w.add_document(id=i, num=i)
+            w.commit()
+            
+            s = ix.searcher()
+            q = query.NumericRange("num", -9, 9)
+            r = [s.stored_fields(d)["id"] for d in q.docs(s)]
+            self.assertEqual(r, range(-9, 10))
+            
     def test_datetime(self):
         schema = fields.Schema(id=fields.ID(stored=True),
                                date=fields.DATETIME(stored=True))

tests/test_parsing.py

         self.assertEqual(q.start, "d")
         self.assertEqual(q.fieldname, "name")
     
-    def test_empty_numeric_range(self):
+    def test_numeric_range(self):
+        schema = fields.Schema(id=fields.STORED, number=fields.NUMERIC)
+        qp = qparser.QueryParser("number", schema=schema)
+        
+        teststart = 40
+        testend = 100
+        
+        q = qp.parse("[%s to *]" % teststart)
+        self.assertEqual(q, query.NullQuery)
+        
+        q = qp.parse("[%s to]" % teststart)
+        self.assertEqual(q.__class__, query.NumericRange)
+        self.assertEqual(q.start, teststart)
+        self.assertEqual(q.end, None)
+        
+        q = qp.parse("[to %s]" % testend)
+        self.assertEqual(q.__class__, query.NumericRange)
+        self.assertEqual(q.start, None)
+        self.assertEqual(q.end, testend)
+        
+        q = qp.parse("[%s to %s]" % (teststart, testend))
+        self.assertEqual(q.__class__, query.NumericRange)
+        self.assertEqual(q.start, teststart)
+        self.assertEqual(q.end, testend)
+    
+    def test_empty_ranges(self):
         schema = fields.Schema(name=fields.TEXT, num=fields.NUMERIC,
                                date=fields.DATETIME)
         qp = qparser.QueryParser("text", schema=schema)
         
-        for fname in ("num", "name", "date"):
+        for fname in ("name", "date"):
             q = qp.parse("%s:[to]" % fname)
             self.assertEqual(q.__class__, query.TermRange)
             self.assertEqual(q.start, '')
             self.assertEqual(q.end, u'\uffff')
-        
+    
+    def test_empty_numeric_range(self):
+        schema = fields.Schema(id=fields.ID, num=fields.NUMERIC)
+        qp = qparser.QueryParser("num", schema=schema)
+        q = qp.parse("num:[to]")
+        self.assertEqual(q.__class__, query.NumericRange)
+        self.assertEqual(q.start, None)
+        self.assertEqual(q.end, None)
+    
     def test_stopped(self):
         schema = fields.Schema(text = fields.TEXT)
         qp = qparser.QueryParser("text", schema=schema)

tests/test_searching.py

         ids = [fs["id"] for fs in r]
         self.assertEqual(["2", "4", "1", "3"], ids)
         
-    def test_open_numeric_ranges(self):
-        schema = fields.Schema(id=fields.ID(stored=True),
-                               view_count=fields.NUMERIC(stored=True))
-        st = RamStorage()
-        ix = st.create_index(schema)
-        
-        w = ix.writer()
-        for i, letter in enumerate(u"abcdefghijklmno"):
-            w.add_document(id=letter, view_count=(i + 1) * 101)
-        w.commit()
-        
-        s = ix.searcher()
-        #from whoosh.qparser.old import QueryParser
-        #qp = QueryParser("id", schema=schema)
-        qp = qparser.QueryParser("id", schema=schema)
-        
-        def do(qstring, target):
-            q = qp.parse(qstring)
-            results = "".join(sorted([d['id'] for d in s.search(q, limit=None)]))
-            self.assertEqual(results, target, "%r: %s != %s" % (q, results, target))
-        
-        do(u"view_count:[0 TO]", "abcdefghijklmno")
-        do(u"view_count:[1000 TO]", "jklmno")
-        do(u"view_count:[TO 300]", "ab")
-        do(u"view_count:[200 TO 500]", "bcd")
-        do(u"view_count:{202 TO]", "cdefghijklmno")
-        do(u"view_count:[TO 505}", "abcd")
-        do(u"view_count:{202 TO 404}", "c")
-        
     def test_outofdate(self):
         schema = fields.Schema(id=fields.ID(stored=True))
         st = RamStorage()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.