Matt Chaput avatar Matt Chaput committed fd4e33f

Small changes for simplification and consistency.

Comments (0)

Files changed (3)

src/whoosh/fields.py

         self._by_number.append(fieldtype)
         self._names.append(name)
         self._by_name[name] = fieldtype
-        
+    
+    def to_number(self, id):
+        """Given a field name or number, return the field's number.
+        """
+        if isinstance(id, int): return id
+        return self.name_to_number(id)
+    
     def name_to_number(self, name):
         """
         Given a field name, returns the field's number.

src/whoosh/query.py

         termset.add((self.fieldname, self.text))
     
     def existing_terms(self, searcher, termset, reverse = False):
-        fname, text = self.fieldname, self.text
-        fnum = searcher.fieldname_to_num(fname)
-        contains = (fnum, text) in searcher
+        fieldname, text = self.fieldname, self.text
+        fieldnum = searcher.fieldname_to_num(fieldname)
+        contains = (fieldnum, text) in searcher
         if reverse: contains = not contains
         if contains:
-            termset.add((fname, text))
+            termset.add((fieldname, text))
 
 
 class Term(SimpleQuery):

src/whoosh/reading.py

             self.vector_table.close()
         self.is_closed = True
     
-    def fieldname_to_num(self, fieldname):
-        """Returns the field number corresponding to the given field name.
-        """
-        if fieldname in self.schema:
-            return self.schema.name_to_number(fieldname)
-        else:
-            raise UnknownFieldError(fieldname)
-    
     def doc_count_all(self):
         """Returns the total number of documents, DELETED OR UNDELETED,
         in this reader.
         """
         return self.segment.doc_count()
     
-    def field_length(self, fieldnum):
+    def field_length(self, fieldid):
         """Returns the total number of terms in the given field.
         """
         
-        if isinstance(fieldnum, basestring):
-            fieldnum = self.fieldname_to_num(fieldnum)
-        return self.segment.field_length(fieldnum)
+        fieldid = self.schema.to_number(fieldid)
+        return self.segment.field_length(fieldid)
     
     def vector_format(self, fieldnum):
         """
         #return self._doc_info(docnum)[1]
         return self._doc_info(docnum, -2)
     
-    def doc_field_length(self, docnum, fieldnum):
+    def doc_field_length(self, docnum, fieldid):
         """Returns the number of terms in the given field in the
         given document. This is used by some scoring algorithms.
         """
         
-        if isinstance(fieldnum, basestring):
-            fieldnum = self.fieldname_to_num(fieldnum)
-        
-        if fieldnum not in self._scorable_fields:
-            raise FieldConfigurationError("Field %r does not store lengths" % fieldnum)
+        fieldid = self.schema.to_number(fieldid)
+        if fieldid not in self._scorable_fields:
+            raise FieldConfigurationError("Field %r does not store lengths" % fieldid)
             
-        return self._doc_info(docnum, fieldnum)
+        return self._doc_info(docnum, fieldid)
     
 
 class MultiDocReader(DocReader):
     
     @protected
     def __contains__(self, term):
-        fieldnum = term[0]
-        if isinstance(fieldnum, basestring):
-            term = (self.schema.name_to_number(fieldnum), term[1])
-            
+        fieldid, text = term
+        term = (self.schema.to_number(fieldid), text)
         return term in self.term_table
     
     def close(self):
         self.term_table.close()
         self.is_closed = True
     
-    def fieldname_to_num(self, fieldname):
-        """Returns the field number corresponding to the given field name.
-        """
-        if fieldname in self.schema:
-            return self.schema.name_to_number(fieldname)
-        else:
-            raise UnknownFieldError(fieldname)
-    
     def format(self, fieldname):
         """Returns the Format object corresponding to the given field name.
         """
             raise TermNotFound("%s:%r" % (fieldnum, text))
     
     @protected
-    def doc_frequency(self, fieldnum, text):
+    def doc_frequency(self, fieldid, text):
         """Returns the document frequency of the given term (that is,
         how many documents the term appears in).
         """
         
-        if isinstance(fieldnum, basestring):
-            fieldnum = self.fieldname_to_num(fieldnum)
-            
-        if (fieldnum, text) not in self:
+        fieldid = self.schema.to_number(fieldid)
+        if (fieldid, text) not in self:
             return 0
-        
-        return self.term_table.posting_count((fieldnum, text))
+        return self.term_table.posting_count((fieldid, text))
     
     @protected
-    def term_count(self, fieldnum, text):
+    def term_count(self, fieldid, text):
         """
         Returns the total number of instances of the given term
         in the corpus.
         """
         
-        if isinstance(fieldnum, basestring):
-            fieldnum = self.fieldname_to_num(fieldnum)
-        
-        if (fieldnum, text) not in self:
+        fieldid = self.schema.to_number(fieldid)
+        if (fieldid, text) not in self:
             return 0
-        
-        return self.term_table[(fieldnum, text)]
+        return self.term_table[(fieldid, text)]
     
     def doc_count_all(self):
         """
         """
         return self.segment.doc_count_all()
     
-    def expand_prefix(self, fieldname, prefix):
-        """
-        Yields terms in the given field that start with the given prefix.
+    @protected
+    def iter_from(self, fieldnum, text):
+        """Yields (field_num, text, doc_freq, collection_frequency) tuples
+        for all terms in the reader, starting at the given term.
         """
         
-        fieldnum = self.fieldname_to_num(fieldname)
-        for fn, t, _, _ in self.iter_from(fieldnum, prefix):
-            if fn != fieldnum or not t.startswith(prefix):
-                return
-            yield t
-    
-    @protected
-    def iter_from(self, fieldnum, text):
         tt = self.term_table
         postingcount = tt.posting_count
         for (fn, t), termcount in tt.iter_from((fieldnum, text)):
             yield (fn, t, postingcount((fn, t)), termcount)
     
-    def all_terms(self):
-        """
-        Yields (fieldname, text) tuples for every term in the index.
+    def expand_prefix(self, fieldid, prefix):
+        """Yields terms in the given field that start with the given prefix.
         """
         
+        fieldid = self.schema.to_number(fieldid)
+        for fn, t, _, _ in self.iter_from(fieldid, prefix):
+            if fn != fieldid or not t.startswith(prefix):
+                return
+            yield t
+    
+    def all_terms(self):
+        """Yields (fieldname, text) tuples for every term in the index.
+        """
+        
+        num2name = self.schema.number_to_name
         current_fieldnum = None
         current_fieldname = None
+        
         for fn, t, _, _ in self:
+            # Only call self.schema.number_to_name when the
+            # field number changes.
             if fn != current_fieldnum:
                 current_fieldnum = fn
-                current_fieldname = self.schema.number_to_name(fn)
+                current_fieldname = num2name(fn)
             yield (current_fieldname, t)
     
-    def iter_field(self, fieldnum):
+    def iter_field(self, fieldid):
         """Yields (text, doc_frequency, term_frequency) tuples for
         all terms in the given field.
         """
-        if isinstance(fieldnum, basestring):
-            fieldnum = self.schema.name_to_number(fieldnum)
         
-        for fn, t, docfreq, freq in self.iter_from(fieldnum, ''):
-            if fn != fieldnum:
+        fieldid = self.schema.to_number(fieldid)
+        for fn, t, docfreq, freq in self.iter_from(fieldid, ''):
+            if fn != fieldid:
                 return
             yield t, docfreq, freq
     
-    def lexicon(self, fieldnum):
+    def lexicon(self, fieldid):
         """Yields all terms in the given field."""
         
-        if isinstance(fieldnum, basestring):
-            fieldnum = self.schema.name_to_number(fieldnum)
-        
-        for t, _, _ in self.iter_field(fieldnum):
+        for t, _, _ in self.iter_field(fieldid):
             yield t
     
-    def most_frequent_terms(self, fieldnum, number = 5):
+    def most_frequent_terms(self, fieldid, number = 5):
         """Yields the top 'number' most frequent terms in the given field as
         a series of (frequency, text) tuples.
         """
         return nlargest(number,
                         ((indexfreq, token)
                          for token, _, indexfreq
-                         in self.iter_field(fieldnum)))
+                         in self.iter_field(fieldid)))
     
     # Posting retrieval methods
     
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.