Commits

Matt Chaput committed 9a84c1a

Fixed FieldFacet to work with readers that don't support field caches. Fixes issue #177.
Bumped version number to 2.2.2.

Comments (0)

Files changed (3)

src/whoosh/__init__.py

 # those of the authors and should not be interpreted as representing official
 # policies, either expressed or implied, of Matt Chaput.
 
-__version__ = (2, 2, 1)
+__version__ = (2, 2, 2)
 
 
 def versionstring(build=True, extra=True):

src/whoosh/sorting.py

         field = None
         if fieldname in searcher.schema:
             field = searcher.schema[fieldname]
+        hascache = searcher.reader().supports_caches()
 
         if self.allow_overlap:
             return self.OverlappingFieldCategorizer(fieldname)
 
-        elif isinstance(field, DATETIME):
+        elif hascache and isinstance(field, DATETIME):
             # Return a subclass of NumericFieldCategorizer that formats dates
             return self.DateFieldCategorizer(fieldname, self.reverse)
 
-        elif isinstance(field, NUMERIC):
+        elif hascache and isinstance(field, NUMERIC):
             # Numeric fields are naturally reversible
             return self.NumericFieldCategorizer(fieldname, self.reverse)
 
-        elif self.reverse:
-            # If we need to "reverse" a string field, we need to do more work
-            return self.RevFieldCategorizer(searcher, fieldname, self.reverse)
+        elif hascache and not self.reverse:
+            # Straightforward: use the field cache to sort/categorize
+            return self.FieldCategorizer(fieldname)
 
         else:
-            # Straightforward: use the field cache to sort/categorize
-            return self.FieldCategorizer(fieldname)
+            # If the reader does not support field caches or we need to
+            # reverse-sort a string field, we need to do more work
+            return self.NoCacheFieldCategorizer(searcher, fieldname,
+                                                self.reverse)
 
     class FieldCategorizer(Categorizer):
         """Categorizer for regular, unreversed fields. Just uses the
             else:
                 return long_to_datetime(key)
 
-    class RevFieldCategorizer(Categorizer):
-        """Categorizer for reversed fields. Since keys for non-numeric fields
-        are arbitrary data, it's not possible to "negate" them to reverse the
-        sort order. So, this object builds an array caching the order of
-        all documents according to the field, then uses the cached order as a
-        numeric key.
+    class NoCacheFieldCategorizer(Categorizer):
+        """This object builds an array caching the order of all documents
+        according to the field, then uses the cached order as a numeric key.
+        This is useful when a field cache is not available, and also for
+        reversed fields (since field cache keys for non- numeric fields are
+        arbitrary data, it's not possible to "negate" them to reverse the sort
+        order).
         """
 
-        def __init__(self, reader, fieldname, reverse):
+        def __init__(self, searcher, fieldname, reverse):
             # Cache the relative positions of all docs with the given field
             # across the entire index
+            reader = searcher.reader()
             dc = reader.doc_count_all()
             arry = array("i", [dc + 1] * dc)
-            field = self.searcher.schema[fieldname]
+            field = searcher.schema[fieldname]
             values = field.sortable_values(reader, fieldname)
             for i, (t, _) in enumerate(values):
                 if reverse:
             self.array = arry
 
         def set_searcher(self, searcher, docoffset):
-            self.searcher = searcher
             self.docoffset = docoffset
 
         def key_for_id(self, docid):

tests/test_ramindex.py

         def _runq(q, result, **kwargs):
             r = s.search(q, **kwargs)
             assert_equal([d["id"] for d in r], result)
-        
+
         _runq(query.Term("text", u("format")), ["format", "vector"])
         _runq(query.Term("text", u("the")), ["fieldtype", "format", "const", "vector", "stored"])
         _runq(query.Prefix("text", u("st")), ["format", "vector", "stored"])
         ix.update_document(id=word[0], text=word)
     for word in u("apple burrito cat dollhouse").split():
         ix.update_document(id=word[0], text=word)
-    
+
     assert ix.has_deletions()
     assert_equal(ix.deleted, set([0, 1, 2, 3]))
     assert_equal(ix.doc_count(), 4)
 
 def test_stored():
     r = make_index().reader()
-    
+
     assert_equal(r.stored_fields(2), {"id": "vector", "subs": 23})
-    
+
     target = [{"id": "fieldtype", "subs": 56},
               {"id": "format", "subs": 100},
               {"id": "vector", "subs": 23},
     assert_equal(r.field_length("text"), 59)
     assert_equal(r.max_field_length("text"), 11)
     assert_equal(r.doc_field_length(3, "text"), 8)
-    
+
     assert_equal(r.field_length("text"), 59)
     assert_equal(r.max_field_length("text"), 11)
     assert_equal(r.doc_field_length(3, "text"), 8)
-    
+
     assert_equal(r.doc_frequency("text", "the"), 5)
     assert_equal(r.frequency("text", "the"), 9)
 
 def test_deleting():
     ix = make_index()
     ix.delete_by_term("id", u("vector"))
-    
+
     assert ix.has_deletions()
-    
+
     with ix.searcher() as s:
         q = query.Term("text", "format")
         r = s.search(q)
 
 def test_iter():
     r = make_index().reader()
-    
+
     everything = [("id", u('const'), 1, 1), ("id", u('fieldtype'), 1, 1), ("id", u('format'), 1, 1),
                   ("id", u('scorable'), 1, 1), ("id", u('stored'), 1, 1), ("id", u('unique'), 1, 1),
-                  ("id", u('vector'), 1, 1),  ("text", u('against'), 1, 1),
+                  ("id", u('vector'), 1, 1), ("text", u('against'), 1, 1),
                   ("text", u('attributes'), 1, 1), ("text", u('base'), 1, 1), ("text", u('be'), 1, 1),
                   ("text", u('constructor'), 1, 1), ("text", u('content'), 1, 1), ("text", u('contents'), 1, 1),
                   ("text", u('document'), 2, 2), ("text", u('each'), 2, 2), ("text", u('field'), 6, 6),
                   ("text", u('supports'), 1, 1), ("text", u('the'), 5, 9), ("text", u('this'), 3, 3),
                   ("text", u('to'), 1, 1), ("text", u('type'), 1, 1), ("text", u('unique'), 1, 1),
                   ("text", u('value'), 1, 1), ("text", u('vectors'), 1, 1), ("text", u('whether'), 3, 3)]
-    
+
     assert_equal([item for item in _stats(r) if item[0] != 'subs'], everything)
     assert_equal(_stats(r.iter_from("text", u("su"))), everything[32:])
     assert_equal(list(r.lexicon("text")), [x[1] for x in everything if x[0] == "text"])
 def test_vectors():
     ix = make_index()
     r = ix.reader()
-    
+
     assert not r.has_vector(0, "id")
     assert r.has_vector(0, "text")
-    
+
     target = [(u('contents'), 1), (u('field'), 1), (u('for'), 1), (u('format'), 1),
               (u('storage'), 1), (u('the'), 2)]
     vec = list(r.vector_as("frequency", 1, "text"))
         w = fix.writer()
         w.add_reader(ix.reader())
         w.commit()
-        
+
 def test_threaded():
     from threading import Thread
-    
+
     class TWriter(Thread):
         def __init__(self, ix):
             Thread.__init__(self)
             self.ix = ix
-            
+
         def run(self):
             ix = self.ix
             for i in xrange(1000):
                 ix.update_document(id=text_type(i), key=u("a"))
-    
+
     class TReader(Thread):
         def __init__(self, ix):
             Thread.__init__(self)
             self.ix = ix
             self.go = True
-        
+
         def run(self):
             s = self.ix.searcher()
             while self.go:
                 r = s.search(query.Term("key", u("a")))
                 assert_equal(len(r), 1)
-    
-    schema = fields.Schema(id=fields.ID(stored=True),key=fields.ID(unique=True, stored=True))
+
+    schema = fields.Schema(id=fields.ID(stored=True), key=fields.ID(unique=True, stored=True))
     ix = RamIndex(schema)
     tw = TWriter(ix)
     tr = TReader(ix)
     tw.join()
     tr.go = False
     tr.join()
-    
+
     assert_equal(ix.doc_count(), 1)
     with ix.searcher() as s:
         assert_equal(len(list(s.documents(key="a"))), 1)
     ix.add_document(id=u("golf"), text=u(""))
     ix.add_document(id=u(""), text=u("hotel"))
     ix.add_document(id=u(""), text=u(""))
-    
+
 def test_missing_term_docfreq():
     schema = fields.Schema(id=fields.ID)
     ix = RamIndex(schema)
     assert_equal(p.block_min_length(), 2)
     assert_equal(p.block_max_length(), 8)
     assert_equal(p.block_max_wol(), 0.5)
+
+def test_sorting():
+    from whoosh import sorting
+
+    schema = fields.Schema(id=fields.STORED, name=fields.ID(stored=True),
+                           size=fields.NUMERIC)
+    ix = RamIndex(schema)
+
+    with ix.writer() as w:
+        w.add_document(id=0, name=u("bravo"), size=10)
+        w.add_document(id=1, name=u("alfa"), size=9)
+        w.add_document(id=2, name=u("delta"), size=8)
+        w.add_document(id=3, name=u("charlie"), size=7)
+
+    with ix.searcher() as s:
+        q = query.Every()
+        r = s.search(q, sortedby="name")
+        assert_equal([hit["id"] for hit in r], [1, 0, 3, 2])
+
+        r = s.search(q, sortedby="size")
+        assert_equal([hit["id"] for hit in r], [3, 2, 1, 0])
+
+        facet = sorting.FieldFacet("size", reverse=True)
+        r = s.search(q, sortedby=facet)
+        assert_equal([hit["id"] for hit in r], [0, 1, 2, 3])
+
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.