Commits

Matt Chaput committed 0a887dd

Added "tagged ints" to StructFile. Removed docstrings from old spell checker.
Minor whitespace cleanups.

Comments (0)

Files changed (4)

src/whoosh/analysis.py

       
   Every tokenizer is a callable that takes a string and returns an iterator of
   tokens.
-      
+
 * Filters take the tokens from the tokenizer and perform various
   transformations on them. For example, the LowercaseFilter converts all tokens
   to lowercase, which is usually necessary when indexing regular English text.
       
   Every filter is a callable that takes a token generator and returns a token
   generator.
-      
+
 * Analyzers are convenience functions/classes that "package up" a tokenizer and
   zero or more filters into a single unit. For example, the StandardAnalyzer
   combines a RegexTokenizer, LowercaseFilter, and StopFilter.
   Every analyzer is a callable that takes a string and returns a token
   iterator. (So Tokenizers can be used as Analyzers if you don't need any
   filtering).
-  
+
 You can compose tokenizers and filters together using the ``|`` character::
 
     my_analyzer = RegexTokenizer() | LowercaseFilter() | StopFilter()
-    
+
 The first item must be a tokenizer and the rest must be filters (you can't put
 a filter first or a tokenizer after the first item).
 """

src/whoosh/filedb/structfile.py

         """
         return decode_signed_varint(read_varint(self.file.read))
 
+    def write_tagint(self, i):
+        """Writes a sometimes-compressed unsigned integer to the wrapped file.
+        This is similar to the varint methods but uses a less compressed but
+        faster format.
+        """
+        
+        # Store numbers 0-253 in one byte. Byte 254 means "an unsigned 16-bit
+        # int follows." Byte 255 means "An unsigned 32-bit int follows."
+        if i <= 253:
+            self.file.write(chr(i))
+        elif i <= 65535:
+            self.file.write("\xFE" + pack_ushort(i))
+        else:
+            self.file.write("\xFF" + pack_uint(i))
+    
+    def read_tagint(self):
+        """Reads a sometimes-compressed unsigned integer from the wrapped file.
+        This is similar to the varint methods but uses a less compressed but
+        faster format.
+        """
+        
+        tb = ord(self.file.read(1))
+        if tb == 254:
+            return self.file.read_ushort()
+        elif tb == 255:
+            return self.file.read_uint()
+        else:
+            return tb
+
     def write_byte(self, n):
         """Writes a single byte to the wrapped file, shortcut for
         ``file.write(chr(n))``.

src/whoosh/scoring.py

         """
         
         raise NotImplementedError(self.__class__.__name__)
-
+    
 
 # WeightScorer
 

src/whoosh/spelling.py

 
         return Correction(q, qstring, corrected_q, corrected_tokens)
 
+
+
 #
 #
 #
                  booststart=2.0, boostend=1.0,
                  mingram=3, maxgram=4,
                  minscore=0.5):
-        """
-        :param storage: The storage object in which to create the
-            spell-checker's dictionary index.
-        :param indexname: The name to use for the spell-checker's dictionary
-            index. You only need to change this if you have multiple spelling
-            indexes in the same storage.
-        :param booststart: How much to boost matches of the first N-gram (the
-            beginning of the word).
-        :param boostend: How much to boost matches of the last N-gram (the end
-            of the word).
-        :param mingram: The minimum gram length to store.
-        :param maxgram: The maximum gram length to store.
-        :param minscore: The minimum score matches much achieve to be returned.
-        """
-
         self.storage = storage
         self.indexname = indexname
 
         self.minscore = minscore
 
     def index(self, create=False):
-        """Returns the backend index of this object (instantiating it if it
-        didn't already exist).
-        """
-
         from whoosh import index
         if create or not self._index:
             create = create or not index.exists(self.storage, indexname=self.indexname)
         return Schema(**dict(fls))
 
     def suggestions_and_scores(self, text, weighting=None):
-        """Returns a list of possible alternative spellings of 'text', as
-        ('word', score, weight) triples, where 'word' is the suggested
-        word, 'score' is the score that was assigned to the word using
-        :meth:`SpellChecker.add_field` or :meth:`SpellChecker.add_scored_words`,
-        and 'weight' is the score the word received in the search for the
-        original word's ngrams.
-        
-        You must add words to the dictionary (using add_field, add_words,
-        and/or add_scored_words) before you can use this.
-        
-        This is a lower-level method, in case an expert user needs access to
-        the raw scores, for example to implement a custom suggestion ranking
-        algorithm. Most people will want to call :meth:`~SpellChecker.suggest`
-        instead, which simply returns the top N valued words.
-        
-        :param text: The word to check.
-        :rtype: list
-        """
-
         if weighting is None:
             weighting = scoring.TF_IDF()
 
             s.close()
 
     def suggest(self, text, number=3, usescores=False):
-        """Returns a list of suggested alternative spellings of 'text'. You
-        must add words to the dictionary (using add_field, add_words, and/or
-        add_scored_words) before you can use this.
-        
-        :param text: The word to check.
-        :param number: The maximum number of suggestions to return.
-        :param usescores: Use the per-word score to influence the suggestions.
-        :rtype: list
-        """
-
         if usescores:
             def keyfn(a):
                 return 0 - (1 / distance(text, a[0])) * a[1]
                 if weight >= self.minscore]
 
     def add_field(self, ix, fieldname):
-        """Adds the terms in a field from another index to the backend
-        dictionary. This method calls add_scored_words() and uses each term's
-        frequency as the score. As a result, more common words will be
-        suggested before rare words. If you want to calculate the scores
-        differently, use add_scored_words() directly.
-        
-        :param ix: The index.Index object from which to add terms.
-        :param fieldname: The field name (or number) of a field in the source
-            index. All the indexed terms from this field will be added to the
-            dictionary.
-        """
-
         r = ix.reader()
         try:
             self.add_scored_words((w, terminfo.weight())
             r.close()
 
     def add_words(self, ws, score=1):
-        """Adds a list of words to the backend dictionary.
-        
-        :param ws: A sequence of words (strings) to add to the dictionary.
-        :param score: An optional score to use for ALL the words in 'ws'.
-        """
         self.add_scored_words((w, score) for w in ws)
 
     def add_scored_words(self, ws):
-        """Adds a list of ("word", score) tuples to the backend dictionary.
-        Associating words with a score lets you use the 'usescores' keyword
-        argument of the suggest() method to order the suggestions using the
-        scores.
-        
-        :param ws: A sequence of ("word", score) tuples.
-        """
-
         writer = self.index().writer()
         for text, score in ws:
             fields = {"word": text, "score": score}