Commits

Matt Chaput committed c4b3b94

In SegmentReader, cache unchanging information instead of copying methods.
Added simple test of overriding IndexReader abstract methods.

Comments (0)

Files changed (4)

src/whoosh/filedb/filereading.py

             flf = storage.open_file(segment.fieldlengths_filename)
             self.fieldlengths = LengthReader(flf, segment.doc_count_all())
         
-        # Copy methods from underlying segment
-        self.has_deletions = segment.has_deletions
-        self.is_deleted = segment.is_deleted
-        self.doc_count = segment.doc_count
+        # Copy info from underlying segment
+        self._has_deletions = segment.has_deletions()
+        self._doc_count = segment.doc_count()
         
         # Postings file
         self.postfile = self.storage.open_file(segment.termposts_filename,
         self.is_closed = False
         self._sync_lock = Lock()
 
+    def has_deletions(self):
+        return self._has_deletions
+    
+    def doc_count(self):
+        return self._doc_count
+    
+    def is_deleted(self, docnum):
+        return self.segment.is_deleted(docnum)
+
     def generation(self):
         return self.segment.generation
 

src/whoosh/reading.py

         
         return False
     
-    @abstractmethod
     def word_graph(self, fieldname):
         """Returns the root :class:`whoosh.support.dawg.BaseNode` for the given
         field, if the field has a stored word graph (otherwise raises an
         :meth:`IndexReader.has_word_graph`.
         """
         
-        raise NotImplementedError
+        return None
     
     def corrector(self, fieldname):
         """Returns a :class:`whoosh.spelling.Corrector` object that suggests
     def __iter__(self):
         return iter([])
     
+    def all_terms(self):
+        return iter([])
+    
+    def term_info(self, fieldname, text):
+        raise TermNotFound((fieldname, text))
+    
     def iter_from(self, fieldname, text):
         return iter([])
     

src/whoosh/searching.py

         
         return sorting.Sorter(self, *args, **kwargs)
 
-#    def define_facets(self, name, facet, save=False):
-#        """This is an experimental feature which may change in future versions.
-#        
-#        Adds a field cache for a synthetic field defined by a
-#        :class:`whoosh.sorting.FacetType` object, for example a
-#        :class:`~whoosh.sorting.QueryFacet` or
-#        :class:`~whoosh.sorting.RangeFacet`.
-#        
-#        For example, sorting using a :class:`~whoosh.sorting.QueryFacet`
-#        recomputes the queries at sort time, which may be slow::
-#        
-#            qfacet = sorting.QueryFacet({"a-z": TermRange(...
-#            results = searcher.search(myquery, sortedby=qfacet)
-#            
-#        You can cache the results of the query facet in a field cache and use
-#        the pseudo-field for sorting::
-#        
-#            searcher.define_facets("nameranges", qfacet, save=True)
-#            
-#            results = searcher.search(myquery, sortedby="nameranges")
-#        
-#        See :doc:`/facets`.
-#        
-#        :param name: a name for the pseudo-field to cache the query results in.
-#        :param qs: a :class:`~whoosh.sorting.FacetType` object.
-#        :param save: if True, saves the field cache to disk so it is persistent
-#            across searchers. The default is False, which only creates the
-#            field cache in memory.
-#        """
-#        
-#        
-#        if self.subsearchers:
-#            ss = self.subsearchers
-#        else:
-#            ss = [(self, 0)]
-#        
-#        for s, offset in ss:
-#            doclists = defaultdict(list)
-#            catter = facet.categorizer(self)
-#            catter.set_searcher(s, offset)
-#            for docnum in xrange(s.doc_count_all()):
-#                key = catter.key_for_id(docnum)
-#                doclists[key].append(docnum)
-#            s.reader().define_facets(name, doclists, save=save)
+    def add_facet_field(self, name, facet, save=False):
+        """This is an experimental feature which may change in future versions.
+        
+        Adds a field cache for a computed field defined by a
+        :class:`whoosh.sorting.FacetType` object, for example a
+        :class:`~whoosh.sorting.QueryFacet` or
+        :class:`~whoosh.sorting.RangeFacet`.
+        
+        This creates a field cache from the facet, so once you define the
+        "facet field", sorting/grouping by it will be faster than using the
+        original facet object.
+        
+        For example, sorting using a :class:`~whoosh.sorting.QueryFacet`
+        recomputes the queries at sort time, which may be slow::
+        
+            qfacet = sorting.QueryFacet({"a-z": TermRange(...
+            results = searcher.search(myquery, sortedby=qfacet)
+            
+        You can cache the results of the query facet in a field cache::
+        
+            searcher.define_facets("nameranges", qfacet, save=True)
+            
+        ..then use the pseudo-field for sorting::
+        
+            results = searcher.search(myquery, sortedby="nameranges")
+        
+        See :doc:`/facets`.
+        
+        :param name: a name for the pseudo-field to cache the query results in.
+        :param qs: a :class:`~whoosh.sorting.FacetType` object.
+        :param save: if True, saves the field cache to disk so it is persistent
+            across searchers. The default is False, which only creates the
+            field cache in memory.
+        """
+        
+        
+        if self.subsearchers:
+            ss = self.subsearchers
+        else:
+            ss = [(self, 0)]
+        
+        for s, offset in ss:
+            doclists = defaultdict(list)
+            catter = facet.categorizer(self)
+            catter.set_searcher(s, offset)
+            for docnum in xrange(s.doc_count_all()):
+                key = catter.key_for_id(docnum)
+                doclists[key].append(docnum)
+            s.reader().define_facets(name, doclists, save=save)
         
     def docs_for_query(self, q):
         """Returns an iterator of document numbers for documents matching the

tests/test_reading.py

     assert_equal(r.doc_count(), 8)
     assert_equal(r.doc_count_all(), 8)
 
-#def test_reader_subclasses():
-#    def is_abstract(attr):
-#        return hasattr(attr, "__isabstractmethod__") and getattr(attr, "__isabstractmethod__")
-#    def check_methods(base, subclass):
-#        for attrname in dir(base):
-#            if attrname.startswith("_"):
-#                continue
-#            attr = getattr(base, attrname)
-#            if is_abstract(attr):
-#                oattr = getattr(subclass, attrname)
-#                assert not is_abstract(oattr), "%s.%s not overridden" % (subclass.__name__, attrname)
-#    
-#    check_methods(reading.IndexReader, SegmentReader)
-#    check_methods(reading.IndexReader, reading.MultiReader)
-#    check_methods(reading.IndexReader, RamIndex)
-#    check_methods(reading.IndexReader, reading.EmptyReader)
+def test_reader_subclasses():
+    def is_abstract(attr):
+        return hasattr(attr, "__isabstractmethod__") and getattr(attr, "__isabstractmethod__")
+    def check_methods(base, subclass):
+        for attrname in dir(base):
+            if attrname.startswith("_"):
+                continue
+            attr = getattr(base, attrname)
+            if is_abstract(attr):
+                oattr = getattr(subclass, attrname)
+                assert not is_abstract(oattr), "%s.%s not overridden" % (subclass.__name__, attrname)
+    
+    check_methods(reading.IndexReader, SegmentReader)
+    check_methods(reading.IndexReader, reading.MultiReader)
+    check_methods(reading.IndexReader, reading.EmptyReader)
+    check_methods(reading.IndexReader, RamIndex)