Matt Chaput avatar Matt Chaput committed 3c6e44f Merge

Merging back bug fixes from default branch.

Comments (0)

Files changed (42)

 *~
 *.DS_Store
 
+.idea
 .settings
 .coverage
 .tox

.idea/.name

-whoosh

.idea/codeStyleSettings.xml

-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectCodeStyleSettingsManager">
-    <option name="PER_PROJECT_SETTINGS">
-      <value>
-        <XML>
-          <option name="XML_LEGACY_SETTINGS_IMPORTED" value="true" />
-        </XML>
-      </value>
-    </option>
-  </component>
-</project>
-

.idea/dictionaries/new.xml

-<component name="ProjectDictionaryState">
-  <dictionary name="new" />
-</component>

.idea/encodings.xml

-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
-</project>
-

.idea/inspectionProfiles/profiles_settings.xml

-<component name="InspectionProjectProfileManager">
-  <settings>
-    <option name="PROJECT_PROFILE" />
-    <option name="USE_PROJECT_PROFILE" value="false" />
-    <version value="1.0" />
-  </settings>
-</component>

.idea/misc.xml

-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectKey">
-    <option name="state" value="project://d335e528-6cc9-49f2-bf34-b9140f4b60c4" />
-  </component>
-  <component name="ProjectResources">
-    <default-html-doctype>http://www.w3.org/1999/xhtml</default-html-doctype>
-  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.2 (C:/Python27/python.exe)" project-jdk-type="Python SDK" />
-</project>
-

.idea/modules.xml

-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/whoosh.iml" filepath="$PROJECT_DIR$/.idea/whoosh.iml" />
-    </modules>
-  </component>
-</project>
-

.idea/other.xml

-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="PyDocumentationSettings">
-    <option name="myDocStringFormat" value="reStructuredText" />
-  </component>
-</project>
-

.idea/scopes/scope_settings.xml

-<component name="DependencyValidationManager">
-  <state>
-    <option name="SKIP_IMPORT_STATEMENTS" value="false" />
-  </state>
-</component>

.idea/testrunner.xml

-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="TestRunnerService">
-    <option name="projectConfiguration" value="py.test" />
-    <option name="PROJECT_TEST_RUNNER" value="py.test" />
-  </component>
-</project>
-

.idea/vcs.xml

-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="" vcs="hg4idea" />
-  </component>
-</project>
-

.idea/whoosh.iml

-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$">
-      <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
-      <excludeFolder url="file://$MODULE_DIR$/.settings" />
-      <excludeFolder url="file://$MODULE_DIR$/.tox" />
-      <excludeFolder url="file://$MODULE_DIR$/src/Whoosh.egg-info" />
-    </content>
-    <orderEntry type="jdk" jdkName="Python 2.7.2 (C:/Python27/python.exe)" jdkType="Python SDK" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-  <component name="PackageRequirementsSettings">
-    <option name="requirementsPath" value="" />
-  </component>
-  <component name="TemplatesService">
-    <option name="TEMPLATE_CONFIGURATION" value="Mako" />
-  </component>
-</module>
-

docs/source/api/query.rst

 .. autoclass:: SpanQuery
 .. autoclass:: SpanFirst
 .. autoclass:: SpanNear
+.. autoclass:: SpanNear2
 .. autoclass:: SpanNot
 .. autoclass:: SpanOr
 .. autoclass:: SpanContains

docs/source/indexing.rst

   * If a path is in the set of paths to re-index, we need to index it.
 
   * Otherwise, we can skip indexing the file.
+
+
+Clearing the index
+==================
+
+In some cases you may want to re-index from scratch. To clear the index without
+disrupting any existing readers::
+
+    from whoosh import writing
+
+    with myindex.writer() as mywriter:
+        # You can optionally add documents to the writer here
+        # e.g. mywriter.add_document(...)
+
+        # Using mergetype=CLEAR clears all existing segments so the index will
+        # only have any documents you've added to this writer
+        mywriter.mergetype = writing.CLEAR
+
+Or, if you don't use the writer as a context manager and call ``commit()``
+directly, do it like this::
+
+    mywriter = myindex.writer()
+    # ...
+    mywriter.commit(mergetype=writing.CLEAR)
+
+.. note::
+    If you don't need to worry about existing readers, a more efficient method
+    is to simply delete the contents of the index directory and start over.

src/whoosh/codec/base.py

     def matcher(self, fieldname, text, format_, scorer=None):
         raise NotImplementedError
 
+    @abstractmethod
+    def indexed_field_names(self):
+        raise NotImplementedError
+
     def close(self):
         pass
 

src/whoosh/codec/memory.py

         ids, weights, values = zip(*items)
         return ListMatcher(ids, weights, values, format_, scorer=scorer)
 
+    def indexed_field_names(self):
+        return self._invindex.keys()
+
     def close(self):
         pass
 

src/whoosh/codec/plaintext.py

         fieldname, btext = term
         return self._find_term(fieldname, btext)
 
+    def indexed_field_names(self):
+        return self._iter_fields()
+
     def terms(self):
         for fieldname in self._iter_fields():
             for btext in self._iter_btexts():

src/whoosh/codec/whoosh2.py

 class W2TermsReader(PostingIndexBase):
     # Implements whoosh.codec.base.TermsReader
 
+    def indexed_field_names(self):
+        return self.fieldmap.keys()
+
     def terms(self):
         return self.keys()
 

src/whoosh/codec/whoosh3.py

     def __contains__(self, term):
         return self._keycoder(*term) in self._tindex
 
+    def indexed_field_names(self):
+        return self._fieldmap.keys()
+
     def terms(self):
         keydecoder = self._keydecoder
         return (keydecoder(keybytes) for keybytes in self._tindex.keys())

src/whoosh/collectors.py

         _allow = self._allow
         _restrict = self._restrict
 
-        if _allow or _restrict:
+        if _allow is not None or _restrict is not None:
             filtered_count = self.filtered_count
             for sub_docnum in child.matches():
                 global_docnum = self.offset + sub_docnum
-                if ((_allow and global_docnum not in _allow)
-                    or (_restrict and global_docnum in _restrict)):
+                if ((_allow is not None and global_docnum not in _allow)
+                    or (_restrict is not None and global_docnum in _restrict)):
                     filtered_count += 1
                     continue
                 child.collect(sub_docnum)

src/whoosh/fields.py

             either ``int``, ``float``. If you use ``Decimal``,
             use the ``decimal_places`` argument to control how many decimal
             places the field will store.
+        :param bits: When ``numtype`` is ``int``, the number of bits to use to
+            store the number: 8, 16, 32, or 64.
         :param stored: Whether the value of this field is stored with the
             document.
         :param unique: Whether the value of this field is unique per-document.
             raise Exception("A float type and decimal_places argument %r are "
                             "incompatible" % decimal_places)
 
+        intsizes = [8, 16, 32, 64]
+        intcodes = ["B", "H", "I", "Q"]
         # Set up field configuration based on type and size
         if numtype is float:
             bits = 64  # Floats are converted to 64 bit ints
-        intsizes = [8, 16, 32, 64]
-        intcodes = ["B", "H", "I", "Q"]
-        if bits not in intsizes:
-            raise Exception("Invalid bits %r, use 8, 16, 32, or 64"
-                            % bits)
+        else:
+            if bits not in intsizes:
+                raise Exception("Invalid bits %r, use 8, 16, 32, or 64"
+                                % bits)
         # Type code for the *sortable* representation
         self.sortable_typecode = intcodes[intsizes.index(bits)]
         self._struct = struct.Struct(">" + self.sortable_typecode)
         self.analyzer = analysis.IDAnalyzer()
         self.format = formats.Existence(field_boost=field_boost)
 
+        # Calculate the minimum and maximum possible values for error checking
+        self.min_value = from_sortable(numtype, bits, signed, 0)
+        self.max_value = from_sortable(numtype, bits, signed, 2 ** bits - 1)
+
         # Column configuration
         if default is None:
             if numtype is int:
         if dc and isinstance(x, (string_type, Decimal)):
             x = Decimal(x) * (10 ** dc)
         x = self.numtype(x)
+
+        if x < self.min_value or x > self.max_value:
+            raise ValueError("Numeric field value %s out of range [%s, %s]"
+                             % (x, self.min_value, self.max_value))
         return x
 
     def unprepare_number(self, x):
         self.format = formats.Existence(field_boost=field_boost)
 
     def _obj_to_bool(self, x):
-        if isinstance(x, string_type):
-            x = x.lower() in self.trues
+        # We special case strings such as "true", "false", "yes", "no", but
+        # otherwise call bool() on the query value. This lets you pass objects
+        # as query values and do the right thing.
+
+        if isinstance(x, string_type) and x.lower() in self.trues:
+            x = True
+        elif isinstance(x, string_type) and x.lower() in self.falses:
+            x = False
         else:
             x = bool(x)
         return x

src/whoosh/filedb/compound.py

 
 from whoosh.compat import BytesIO, memoryview_
 from whoosh.filedb.structfile import BufferFile, StructFile
-from whoosh.filedb.filestore import FileStorage
+from whoosh.filedb.filestore import FileStorage, StorageError
 from whoosh.system import emptybytes
 from whoosh.util import random_name
 
     def __init__(self, dbfile, use_mmap=True, basepos=0):
         self._file = dbfile
         self._file.seek(basepos)
+        self.is_closed = False
 
         self._diroffset = self._file.read_long()
         self._dirlength = self._file.read_int()
         return "<%s (%s)>" % (self.__class__.__name__, self._name)
 
     def close(self):
+        if self.is_closed:
+            raise Exception("Already closed")
+        self.is_closed = True
+
         if self._source:
             try:
                 self._source.close()
             fileinfo = self._dir[name]
         except KeyError:
             raise NameError("Unknown file %r" % (name,))
-        return (fileinfo["offset"], fileinfo["length"])
+        return fileinfo["offset"], fileinfo["length"]
 
     def open_file(self, name, *args, **kwargs):
+        if self.is_closed:
+            raise StorageError("Storage was closed")
+
         offset, length = self.range(name)
         if self._source:
             # Create a memoryview/buffer from the mmap

src/whoosh/filedb/filestore.py

 
 # Exceptions
 
-class ReadOnlyError(Exception):
+class StorageError(Exception):
+    pass
+
+
+class ReadOnlyError(StorageError):
     pass
 
 

src/whoosh/lang/stopwords.py

 # They were obtained from:
 # anoncvs.postgresql.org/cvsweb.cgi/pgsql/src/backend/snowball/stopwords/
 
-from whoosh.compat import u
-
 
 # =====
 # This module was generated from the original files using the following script
 #    f = open("stopwords/" + name)
 #    wordls = [line.strip() for line in f]
 #    words = " ".join(wordls)
-#    print '"%s": frozenset(u("""' % name
+#    print '"%s": frozenset(u"""' % name
 #    print textwrap.fill(words, 72)
-#    print '""").split())'
+#    print '""".split())'
 #    print
 
 
 stoplists = {
-    "da": frozenset(u("""
+    "da": frozenset(u"""
     og i jeg det at en den til er som på de med han af for ikke der var mig
     sig men et har om vi min havde ham hun nu over da fra du ud sin dem os
     op man hans hvor eller hvad skal selv her alle vil blev kunne ind når
     være dog noget ville jo deres efter ned skulle denne end dette mit
     også under have dig anden hende mine alt meget sit sine vor mod disse
     hvis din nogle hos blive mange ad bliver hendes været thi jer sådan
-    """).split()),
+    """.split()),
 
-    "nl": frozenset(u("""
+    "nl": frozenset(u"""
     de en van ik te dat die in een hij het niet zijn is was op aan met als
     voor had er maar om hem dan zou of wat mijn men dit zo door over ze zich
     bij ook tot je mij uit der daar haar naar heb hoe heeft hebben deze u
     doen toen moet ben zonder kan hun dus alles onder ja eens hier wie werd
     altijd doch wordt wezen kunnen ons zelf tegen na reeds wil kon niets uw
     iemand geweest andere
-    """).split()),
+    """.split()),
 
-    "en": frozenset(u("""
+    "en": frozenset(u"""
     i me my myself we our ours ourselves you your yours yourself yourselves
     he him his himself she her hers herself it its itself they them their
     theirs themselves what which who whom this that these those am is are
     out on off over under again further then once here there when where why
     how all any both each few more most other some such no nor not only own
     same so than too very s t can will just don should now
-    """).split()),
+    """.split()),
 
-    "fi": frozenset(u("""
+    "fi": frozenset(u"""
     olla olen olet on olemme olette ovat ole oli olisi olisit olisin
     olisimme olisitte olisivat olit olin olimme olitte olivat ollut olleet
     en et ei emme ette eivät minä minun minut minua minussa minusta minuun
     joita joissa joista joihin joilla joilta joille joina joiksi että ja
     jos koska kuin mutta niin sekä sillä tai vaan vai vaikka kanssa mukaan
     noin poikki yli kun niin nyt itse
-    """).split()),
+    """.split()),
 
-    "fr": frozenset(u("""
+    "fr": frozenset(u"""
     au aux avec ce ces dans de des du elle en et eux il je la le leur lui ma
     mais me même mes moi mon ne nos notre nous on ou par pas pour qu que
     qui sa se ses son sur ta te tes toi ton tu un une vos votre vous c d j l
     auront aurais aurait aurions auriez auraient avais avait avions aviez
     avaient eut eûmes eûtes eurent aie aies ait ayons ayez aient eusse
     eusses eût eussions eussiez eussent
-    """).split()),
+    """.split()),
 
-    "de": frozenset(u("""
+    "de": frozenset(u"""
     aber alle allem allen aller alles als also am an ander andere anderem
     anderen anderer anderes anderm andern anderr anders auch auf aus bei bin
     bis bist da damit dann der den des dem die das daß derselbe derselben
     unter viel vom von vor während war waren warst was weg weil weiter
     welche welchem welchen welcher welches wenn werde werden wie wieder will
     wir wird wirst wo wollen wollte würde würden zu zum zur zwar zwischen
-    """).split()),
+    """.split()),
 
-    "hu": frozenset(u("""
+    "hu": frozenset(u"""
     a ahogy ahol aki akik akkor alatt által általában amely amelyek
     amelyekben amelyeket amelyet amelynek ami amit amolyan amíg amikor át
     abban ahhoz annak arra arról az azok azon azt azzal azért aztán
     több úgy ugyanis új újabb újra után utána utolsó vagy vagyis
     valaki valami valamint való vagyok van vannak volt voltam voltak
     voltunk vissza vele viszont volna
-    """).split()),
+    """.split()),
 
-    "it": frozenset(u("""
+    "it": frozenset(u"""
     ad al allo ai agli all agl alla alle con col coi da dal dallo dai dagli
     dall dagl dalla dalle di del dello dei degli dell degl della delle in
     nel nello nei negli nell negl nella nelle su sul sullo sui sugli sull
     staresti starebbe staremmo stareste starebbero stavo stavi stava stavamo
     stavate stavano stetti stesti stette stemmo steste stettero stessi
     stesse stessimo stessero stando
-    """).split()),
+    """.split()),
 
-    "no": frozenset(u("""
+    "no": frozenset(u"""
     og i jeg det at en et den til er som på de med han av ikke ikkje der
     så var meg seg men ett har om vi min mitt ha hadde hun nå over da ved
     fra du ut sin dem oss opp man kan hans hvor eller hva skal selv sjøl
     hennes hoss hossen ikkje ingi inkje korleis korso kva kvar kvarhelst
     kven kvi kvifor me medan mi mine mykje no nokon noka nokor noko nokre si
     sia sidan so somt somme um upp vere vore verte vort varte vart
-    """).split()),
+    """.split()),
 
-    "pt": frozenset(u("""
+    "pt": frozenset(u"""
     de a o que e do da em um para com não uma os no se na por mais as dos
     como mas ao ele das à seu sua ou quando muito nos já eu também só
     pelo pela até isso ela entre depois sem mesmo aos seus quem nas me esse
     tivemos tiveram tivera tivéramos tenha tenhamos tenham tivesse
     tivéssemos tivessem tiver tivermos tiverem terei terá teremos terão
     teria teríamos teriam
-    """).split()),
+    """.split()),
 
-    "ru": frozenset(u("""
+    "ru": frozenset(u"""
     и в во не что он на я с со как а то все она
     так его но да ты к у же вы за бы по только
     ее мне было вот от меня еще нет о из ему
     впрочем хорошо свою этой перед иногда
     лучше чуть том нельзя такой им более
     всегда конечно всю между
-    """).split()),
+    """.split()),
 
-    "es": frozenset(u("""
+    "es": frozenset(u"""
     de la que el en y a los del se las por un para con no una su al lo como
     más pero sus le ya o este sí porque esta entre cuando muy sin sobre
     también me hasta hay donde quien desde todo nos durante todos uno les
     tuvieron tuviera tuvieras tuviéramos tuvierais tuvieran tuviese
     tuvieses tuviésemos tuvieseis tuviesen teniendo tenido tenida tenidos
     tenidas tened
-    """).split()),
+    """.split()),
 
-    "sv": frozenset(u("""
+    "sv": frozenset(u"""
     och det att i en jag hon som han på den med var sig för så till är
     men ett om hade de av icke mig du henne då sin nu har inte hans honom
     skulle hennes där min man ej vid kunde något från ut när efter upp
     mitt ni bli blev oss din dessa några deras blir mina samma vilken er
     sådan vår blivit dess inom mellan sådant varför varje vilka ditt vem
     vilket sitta sådana vart dina vars vårt våra ert era vilkas
-    """).split()),
+    """.split()),
 
-    "tr": frozenset(u("""
+    "tr": frozenset(u"""
     acaba ama aslında az bazı belki biri birkaç birşey biz bu çok
     çünkü da daha de defa diye eğer en gibi hem hep hepsi her hiç için
     ile ise kez ki kim mı mu mü nasıl ne neden nerde nerede nereye niçin
     niye o sanki şey siz şu tüm ve veya ya yani
-    """).split()),
+    """.split()),
 }

src/whoosh/matching/binary.py

                 # quality when added to B
                 sk = a.skip_to_quality(minquality - bq)
                 skipped += sk
-                if not sk:
+                if not sk and a.is_active():
                     # The matcher couldn't skip ahead for some reason, so just
                     # advance and try again
                     a.next()
                 # And vice-versa
                 sk = b.skip_to_quality(minquality - aq)
                 skipped += sk
-                if not sk:
+                if not sk and b.is_active():
                     b.next()
 
             if not a.is_active() or not b.is_active():

src/whoosh/matching/combo.py

 
     def skip_to_quality(self, minquality):
         skipped = 0
-        while self.block_quality() <= minquality:
+        while self.is_active() and self.block_quality() <= minquality:
             skipped += 1
             self._docnum = self._limit
             self._read_part()
-        self._find_next()
+        if self.is_active():
+            self._find_next()
         return skipped
 
     def id(self):

src/whoosh/multiproc.py

 
     # The filename of the single remaining run
     runname = writer.pool.runs[0]
+    # The indexed field names
+    fieldnames = writer.pool.fieldnames
     # The segment object (parent can use this to re-open the files created
     # by the sub-writer)
     segment = writer._partial_segment()
 
-    return runname, segment
+    return runname, fieldnames, segment
 
 
 # Multiprocessing Writer
             if multisegment:
                 # Actually finish the segment and return it with no run
                 runname = None
+                fieldnames = writer.pool.fieldnames
                 segment = writer._finalize_segment()
             else:
                 # Merge all runs in the writer's pool into one run, close the
                 # segment, and return the run name and the segment
                 k = self.kwargs.get("k", 64)
-                runname, segment = finish_subsegment(writer, k)
+                runname, fieldnames, segment = finish_subsegment(writer, k)
 
             # Put the results (the run filename and the segment object) on the
             # result queue
-            resultqueue.put((runname, segment), timeout=5)
+            resultqueue.put((runname, fieldnames, segment), timeout=5)
 
     def _process_file(self, filename, doc_count):
         # This method processes a "job file" written out by the parent task. A
         for task in self.tasks:
             task.join()
 
-        # Pull a (run_file_name, segment) tuple off the result queue for
-        # each sub-task, representing the final results of the task
+        # Pull a (run_file_name, fieldnames, segment) tuple off the result
+        # queue for each sub-task, representing the final results of the task
         results = []
         for task in self.tasks:
             results.append(self.resultqueue.get(timeout=5))
 
         if self.multisegment:
-            finalsegments += [s for _, s in results]
+            # If we're not merging the segments, we don't care about the runname
+            # and fieldnames in the results... just pull out the segments and
+            # add them to the list of final segments
+            finalsegments += [s for _, _, s in results]
             if self._added:
                 finalsegments.append(self._finalize_segment())
             else:
         self._finish()
 
     def _merge_subsegments(self, results, mergetype):
+        schema = self.schema
+        schemanames = set(schema.names())
         storage = self.storage
         codec = self.codec
         sources = []
             sources.append(self.pool.iter_postings())
 
         pdrs = []
-        for runname, segment in results:
+        for runname, fieldnames, segment in results:
+            fieldnames = set(fieldnames) | schemanames
             pdr = codec.per_document_reader(storage, segment)
             pdrs.append(pdr)
             basedoc = self.docnum
-            docmap = self.write_per_doc(pdr)
+            docmap = self.write_per_doc(fieldnames, pdr)
             assert docmap is None
 
             items = self._read_and_renumber_run(runname, basedoc)
 
         try:
             # Merge the iterators into the field writer
-            self.fieldwriter.add_postings(self.schema, mpdr, imerge(sources))
+            self.fieldwriter.add_postings(schema, mpdr, imerge(sources))
         finally:
             mpdr.close()
         self._added = True

src/whoosh/query/nested.py

                 self._find_next_children()
 
         def skip_to(self, docid):
+            if docid <= self._nextchild:
+                return
+
             m = self.child
-
-            m.skip_to(docid)
-            if m.is_active():
+            if not m.is_active() or docid < m.id():
+                # We've already read-ahead past the desired doc, so iterate
+                while self.is_active() and self._nextchild < docid:
+                    self.next()
+            elif m.is_active():
+                # The child is active and hasn't read-ahead to the desired doc
+                # yet, so skip to it and re-find
+                m.skip_to(docid)
                 self._find_next_children()
             else:
                 # Go inactive

src/whoosh/query/positional.py

         return self._and_query().estimate_min_size(ixreader)
 
     def matcher(self, searcher, context=None):
+        from whoosh.query import Term, SpanNear2
+
         fieldname = self.fieldname
-        reader = searcher.reader()
-
         if fieldname not in searcher.schema:
             return matching.NullMatcher()
+
         field = searcher.schema[fieldname]
-
-        words = [field.to_bytes(word) for word in self.words]
-
-        # Shortcut the query if one of the words doesn't exist.
-        for word in words:
-            if (fieldname, word) not in reader:
-                return matching.NullMatcher()
-
         if not field.format or not field.format.supports("positions"):
             raise qcore.QueryError("Phrase search: %r field has no positions"
                                    % self.fieldname)
 
-        # Construct a tree of SpanNear queries representing the words in the
-        # phrase and return its matcher
-        from whoosh.query.spans import SpanNear
+        terms = []
+        # Build a list of Term queries from the words in the phrase
+        reader = searcher.reader()
+        for word in self.words:
+            word = field.to_bytes(word)
+            if (fieldname, word) not in reader:
+                # Shortcut the query if one of the words doesn't exist.
+                return matching.NullMatcher()
+            terms.append(Term(fieldname, word))
 
-        q = SpanNear.phrase(fieldname, words, slop=self.slop)
+        # Create the equivalent SpanNear2 query from the terms
+        q = SpanNear2(terms, slop=self.slop, ordered=True, mindist=1)
+        # Get the matcher
         m = q.matcher(searcher, context)
+
         if self.boost != 1.0:
             m = matching.WrappingMatcher(m, boost=self.boost)
         return m

src/whoosh/query/spans.py

             return self.start - span.end
 
 
+def bisect_spans(spans, start):
+    lo = 0
+    hi = len(spans)
+    while lo < hi:
+        mid = (lo + hi) // 2
+        if spans[mid].start < start:
+            lo = mid + 1
+        else:
+            hi = mid
+    return lo
+
+
 # Base matchers
 
 class SpanWrappingMatcher(wrappers.WrappingMatcher):
     def copy(self):
         return self.__class__(self.a.copy(), self.b.copy())
 
+    def depth(self):
+        return 1 + max(self.a.depth(), self.b.depth())
+
     def replace(self, minquality=0):
         # TODO: fix this
         if not self.is_active():
 
 
 class SpanNear(SpanQuery):
-    """Matches queries that occur near each other. By default, only matches
+    """
+    Note: for new code, use :class:`SpanNear2` instead of this class. SpanNear2
+    takes a list of sub-queries instead of requiring you to create a binary
+    tree of query objects.
+
+    Matches queries that occur near each other. By default, only matches
     queries that occur right next to each other (slop=1) and in order
     (ordered=True).
 
 
                     # Check the distance between the spans
                     dist = aspan.distance_to(bspan)
-                    if dist >= mindist and dist <= slop:
+                    if mindist <= dist <= slop:
                         spans.add(aspan.to(bspan))
 
             return sorted(spans)
 
 
+class SpanNear2(SpanQuery):
+    """
+    Matches queries that occur near each other. By default, only matches
+    queries that occur right next to each other (slop=1) and in order
+    (ordered=True).
+
+    New code should use this query type instead of :class:`SpanNear`.
+
+    (Unlike :class:`SpanNear`, this query takes a list of subqueries instead of
+    requiring you to build a binary tree of query objects. This query should
+    also be slightly faster due to less overhead.)
+
+    For example, to find documents where "whoosh" occurs next to "library"
+    in the "text" field::
+
+        from whoosh import query, spans
+        t1 = query.Term("text", "whoosh")
+        t2 = query.Term("text", "library")
+        q = spans.SpanNear2([t1, t2])
+
+    To find documents where "whoosh" occurs at most 5 positions before
+    "library"::
+
+        q = spans.SpanNear2([t1, t2], slop=5)
+
+    To find documents where "whoosh" occurs at most 5 positions before or after
+    "library"::
+
+        q = spans.SpanNear2(t1, t2, slop=5, ordered=False)
+    """
+
+    def __init__(self, qs, slop=1, ordered=True, mindist=1):
+        """
+        :param qs: a sequence of sub-queries to match.
+        :param slop: the number of positions within which the queries must
+            occur. Default is 1, meaning the queries must occur right next
+            to each other.
+        :param ordered: whether a must occur before b. Default is True.
+        :pram mindist: the minimum distance allowed between the queries.
+        """
+
+        self.qs = qs
+        self.slop = slop
+        self.ordered = ordered
+        self.mindist = mindist
+
+    def __repr__(self):
+        return ("%s(%r, slop=%d, ordered=%s, mindist=%d)"
+                % (self.__class__.__name__, self.qs, self.slop, self.ordered,
+                   self.mindist))
+
+    def __eq__(self, other):
+        return (other and self.__class__ == other.__class__
+                and self.qs == other.qs and self.slop == other.slop
+                and self.ordered == other.ordered
+                and self.mindist == other.mindist)
+
+    def __hash__(self):
+        h = hash(self.slop) ^ hash(self.ordered) ^ hash(self.mindist)
+        for q in self.qs:
+            h ^= hash(q)
+        return h
+
+    def is_leaf(self):
+        return False
+
+    def children(self):
+        return self.qs
+
+    def apply(self, fn):
+        return self.__class__([fn(q) for q in self.qs], slop=self.slop,
+                              ordered=self.ordered, mindist=self.mindist)
+
+    def matcher(self, searcher, context=None):
+        ms = [q.matcher(searcher, context) for q in self.qs]
+        return self.SpanNear2Matcher(ms, slop=self.slop, ordered=self.ordered,
+                                     mindist=self.mindist)
+
+    class SpanNear2Matcher(SpanWrappingMatcher):
+        def __init__(self, ms, slop=1, ordered=True, mindist=1):
+            self.ms = ms
+            self.slop = slop
+            self.ordered = ordered
+            self.mindist = mindist
+            isect = make_binary_tree(binary.IntersectionMatcher, ms)
+            super(SpanNear2.SpanNear2Matcher, self).__init__(isect)
+
+        def copy(self):
+            return self.__class__([m.copy() for m in self.ms], slop=self.slop,
+                                  ordered=self.ordered, mindist=self.mindist)
+
+        def replace(self, minquality=0):
+            # TODO: fix this
+            if not self.is_active():
+                return mcore.NullMatcher()
+            return self
+
+        def _get_spans(self):
+            slop = self.slop
+            mindist = self.mindist
+            ordered = self.ordered
+            ms = self.ms
+
+            aspans = ms[0].spans()
+            i = 1
+            while i < len(ms) and aspans:
+                bspans = ms[i].spans()
+                spans = set()
+                for aspan in aspans:
+                    # Use a binary search to find the first position we should
+                    # start looking for possible matches
+                    if ordered:
+                        start = aspan.start
+                    else:
+                        start = max(0, aspan.start - slop)
+                    j = bisect_spans(bspans, start)
+
+                    while j < len(bspans):
+                        bspan = bspans[j]
+                        j += 1
+
+                        if (bspan.end < aspan.start - slop
+                            or (ordered and aspan.start > bspan.start)):
+                            # B is too far in front of A, or B is in front of A
+                            # *at all* when ordered is True
+                            continue
+                        if bspan.start > aspan.end + slop:
+                            # B is too far from A. Since spans are listed in
+                            # start position order, we know that all spans after
+                            # this one will also be too far.
+                            break
+
+                        # Check the distance between the spans
+                        dist = aspan.distance_to(bspan)
+                        if mindist <= dist <= slop:
+                            spans.add(aspan.to(bspan))
+                aspans = sorted(spans)
+                i += 1
+
+            if i == len(ms):
+                return aspans
+            else:
+                return []
+
+
 class SpanOr(SpanQuery):
     """Matches documents that match any of a list of sub-queries. Unlike
     query.Or, this class merges together matching spans from the different
 
         def _get_spans(self):
             return self.a.spans()
+
+
+
+
+

src/whoosh/query/wrappers.py

         return self.__class__(child, self.score)
 
     def matcher(self, searcher, context=None):
+        from whoosh.searching import SearchContext
+
+        context = context or SearchContext()
         m = self.child.matcher(searcher, context)
-        if isinstance(m, matching.NullMatcherClass):
+        if context.needs_current or isinstance(m, matching.NullMatcherClass):
             return m
         else:
             ids = array("I", m.all_ids())

src/whoosh/reading.py

 
 # Exceptions
 
+class ReaderClosed(Exception):
+    """Exception raised when you try to do some operation on a closed searcher
+    (or a Results object derived from a searcher that has since been closed).
+    """
+
+    message = "Operation on a closed reader"
+
+
 class TermNotFound(Exception):
     pass
 
         return None
 
     @abstractmethod
+    def indexed_field_names(self):
+        """Returns an iterable of strings representing the names of the indexed
+        fields. This may include additional names not explicitly listed in the
+        Schema if you use "glob" fields.
+        """
+
+        raise NotImplementedError
+
+    @abstractmethod
     def all_terms(self):
         """Yields (fieldname, text) tuples for every term in the index.
         """
         return self._storage
 
     def has_deletions(self):
+        if self.is_closed:
+            raise ReaderClosed
         return self._perdoc.has_deletions()
 
     def doc_count(self):
+        if self.is_closed:
+            raise ReaderClosed
         return self._perdoc.doc_count()
 
     def doc_count_all(self):
+        if self.is_closed:
+            raise ReaderClosed
         return self._perdoc.doc_count_all()
 
     def is_deleted(self, docnum):
+        if self.is_closed:
+            raise ReaderClosed
         return self._perdoc.is_deleted(docnum)
 
     def generation(self):
                                self._segment)
 
     def __contains__(self, term):
+        if self.is_closed:
+            raise ReaderClosed
         fieldname, text = term
         if fieldname not in self.schema:
             return False
         return (fieldname, text) in self._terms
 
     def close(self):
+        if self.is_closed:
+            raise ReaderClosed("Reader already closed")
         self._terms.close()
         self._perdoc.close()
         if self._graph:
         self.is_closed = True
 
     def stored_fields(self, docnum):
+        if self.is_closed:
+            raise ReaderClosed
         assert docnum >= 0
         schema = self.schema
         sfs = self._perdoc.stored_fields(docnum)
     # Delegate doc methods to the per-doc reader
 
     def all_doc_ids(self):
+        if self.is_closed:
+            raise ReaderClosed
         return self._perdoc.all_doc_ids()
 
     def iter_docs(self):
+        if self.is_closed:
+            raise ReaderClosed
         return self._perdoc.iter_docs()
 
     def all_stored_fields(self):
+        if self.is_closed:
+            raise ReaderClosed
         return self._perdoc.all_stored_fields()
 
     def field_length(self, fieldname):
+        if self.is_closed:
+            raise ReaderClosed
         return self._perdoc.field_length(fieldname)
 
     def min_field_length(self, fieldname):
+        if self.is_closed:
+            raise ReaderClosed
         return self._perdoc.min_field_length(fieldname)
 
     def max_field_length(self, fieldname):
+        if self.is_closed:
+            raise ReaderClosed
         return self._perdoc.max_field_length(fieldname)
 
     def doc_field_length(self, docnum, fieldname, default=0):
+        if self.is_closed:
+            raise ReaderClosed
         return self._perdoc.doc_field_length(docnum, fieldname, default)
 
     def has_vector(self, docnum, fieldname):
+        if self.is_closed:
+            raise ReaderClosed
         return self._perdoc.has_vector(docnum, fieldname)
 
     #
 
     def _test_field(self, fieldname):
+        if self.is_closed:
+            raise ReaderClosed
         if fieldname not in self.schema:
             raise TermNotFound("No field %r" % fieldname)
         if self.schema[fieldname].format is None:
             raise TermNotFound("Field %r is not indexed" % fieldname)
 
+    def indexed_field_names(self):
+        return self._terms.indexed_field_names()
+
     def all_terms(self):
+        if self.is_closed:
+            raise ReaderClosed
         schema = self.schema
         return ((fieldname, text) for fieldname, text in self._terms.terms()
                 if fieldname in schema)
         return IndexReader.lexicon(self, fieldname)
 
     def __iter__(self):
+        if self.is_closed:
+            raise ReaderClosed
         schema = self.schema
         return ((term, terminfo) for term, terminfo in self._terms.items()
                 if term[0] in schema)
 
     def iter_from(self, fieldname, text):
+        self._test_field(fieldname)
         schema = self.schema
-        self._test_field(fieldname)
         text = self._text_to_bytes(fieldname, text)
         for term, terminfo in self._terms.items_from(fieldname, text):
             if term[0] not in schema:
     def postings(self, fieldname, text, scorer=None):
         from whoosh.matching.wrappers import FilterMatcher
 
+        if self.is_closed:
+            raise ReaderClosed
         if fieldname not in self.schema:
             raise TermNotFound("No  field %r" % fieldname)
         text = self._text_to_bytes(fieldname, text)
         return matcher
 
     def vector(self, docnum, fieldname, format_=None):
+        if self.is_closed:
+            raise ReaderClosed
         if fieldname not in self.schema:
             raise TermNotFound("No  field %r" % fieldname)
         vformat = format_ or self.schema[fieldname].vector
     # Graph methods
 
     def has_word_graph(self, fieldname):
+        if self.is_closed:
+            raise ReaderClosed
         if fieldname not in self.schema:
             return False
         if not self.schema[fieldname].spelling:
         return gr.has_root(fieldname)
 
     def word_graph(self, fieldname):
+        if self.is_closed:
+            raise ReaderClosed
         if not self.has_word_graph(fieldname):
             raise KeyError("No word graph for field %r" % fieldname)
         gr = self._get_graph()
         return fst.Node(gr, gr.root(fieldname))
 
     def terms_within(self, fieldname, text, maxdist, prefix=0):
+        if self.is_closed:
+            raise ReaderClosed
         if not self.has_word_graph(fieldname):
             # This reader doesn't have a graph stored, use the slow method
             return IndexReader.terms_within(self, fieldname, text, maxdist,
     # Column methods
 
     def has_column(self, fieldname):
+        if self.is_closed:
+            raise ReaderClosed
         coltype = self.schema[fieldname].column_type
         return coltype and self._perdoc.has_column(fieldname)
 
     def column_reader(self, fieldname, column=None, translate=True):
+        if self.is_closed:
+            raise ReaderClosed
         fieldobj = self.schema[fieldname]
         if not self.has_column(fieldname):
             raise Exception("No column for field %r" % fieldname)
     def __iter__(self):
         return iter([])
 
+    def indexed_field_names(self):
+        return []
+
     def all_terms(self):
         return iter([])
 
             # Yield the term
             yield term
 
+    def indexed_field_names(self):
+        names = set()
+        for r in self.reader():
+            names.update(r.indexed_field_names())
+        return iter(names)
+
     def all_terms(self):
         return self._merge_terms([r.all_terms() for r in self.readers])
 

src/whoosh/sorting.py

         elif self._use_column:
             return self._creader[docid]
         else:
-            return self._lists[docid] or None
+            return self._lists[docid] or [None]
 
     def key_for(self, matcher, docid):
         if self._use_vectors:

src/whoosh/writing.py

     return []
 
 
+def CLEAR(writer, segments):
+    """This policy DELETES all existing segments and only writes the new
+    segment.
+    """
+
+    return []
+
+
 # Customized sorting pool for postings
 
 class PostingPool(SortingPool):
         self.segment = segment
         self.limit = limitmb * 1024 * 1024
         self.currentsize = 0
+        self.fieldnames = set()
 
     def _new_run(self):
         path = "%s.run" % random_name()
         assert isinstance(item[1], bytes_type), "tbytes=%r" % item[1]
         if item[4] is not None:
             assert isinstance(item[4], bytes_type), "vbytes=%r" % item[4]
+        self.fieldnames.add(item[0])
         size = (28 + 4 * 5  # tuple = 28 + 4 * length
                 + 21 + len(item[0])  # fieldname = str = 21 + length
                 + 26 + len(item[1]) * 2  # text = unicode = 26 + 2 * length
 
         self.merge = True
         self.optimize = False
+        self.mergetype = None
 
     def __repr__(self):
         return "<%s %r>" % (self.__class__.__name__, self.newsegment)
         items = self._process_posts(items, startdoc, docmap)
         self.fieldwriter.add_postings(self.schema, lengths, items)
 
-    def write_per_doc(self, reader):
+    def write_per_doc(self, fieldnames, reader):
+        # Very bad hack: reader should be an IndexReader, but may be a
+        # PerDocumentReader if this is called from multiproc, where the code
+        # tries to be efficient by merging per-doc and terms separately.
+        # TODO: fix this!
+
         schema = self.schema
-
         if reader.has_deletions():
             docmap = {}
         else:
             docmap = None
 
         pdw = self.perdocwriter
-
         # Open all column readers
         cols = {}
-        for fieldname, fieldobj in schema.items():
+        for fieldname in fieldnames:
+            fieldobj = schema[fieldname]
             coltype = fieldobj.column_type
             if coltype and reader.has_column(fieldname):
                 creader = reader.column_reader(fieldname, coltype)
                 docmap[docnum] = self.docnum
 
             pdw.start_doc(self.docnum)
-            for fieldname, fieldobj in schema.items():
+            for fieldname in fieldnames:
+                fieldobj = schema[fieldname]
                 length = reader.doc_field_length(docnum, fieldname)
                 pdw.add_field(fieldname, fieldobj,
                               stored.get(fieldname), length)
     def add_reader(self, reader):
         self._check_state()
         basedoc = self.docnum
-        docmap = self.write_per_doc(reader)
+        ndxnames = set(fname for fname in reader.indexed_field_names()
+                       if fname in self.schema)
+        fieldnames = set(self.schema.names()) | ndxnames
+
+        docmap = self.write_per_doc(fieldnames, reader)
         self.add_postings_to_pool(reader, basedoc, docmap)
         self._added = True
 
     # pieces to allow MpWriter to call them individually
 
     def _merge_segments(self, mergetype, optimize, merge):
+        # The writer supports two ways of setting mergetype/optimize/merge:
+        # as attributes or as keyword arguments to commit(). Originally there
+        # were just the keyword arguments, but then I added the ability to use
+        # the writer as a context manager using "with", so the user no longer
+        # explicitly called commit(), hence the attributes
+        mergetype = mergetype if mergetype is not None else self.mergetype
         optimize = optimize if optimize is not None else self.optimize
         merge = merge if merge is not None else self.merge
 

tests/test_collector.py

 from __future__ import with_statement
 
-from whoosh import fields, query
-from whoosh.compat import u
+from whoosh import fields, qparser, query
+from whoosh.compat import b, u
 from whoosh.filedb.filestore import RamStorage
 
 
         r = s.search(query.Term("text", u("charlie")))
         assert [hit["id"] for hit in r] == [1, 3]
         assert len(r) == 2
+
+
+def test_filter_that_matches_no_document():
+    schema = fields.Schema(id=fields.STORED, text=fields.TEXT)
+    ix = RamStorage().create_index(schema)
+    w = ix.writer()
+    w.add_document(id=1, text=u("alfa bravo charlie"))
+    w.add_document(id=2, text=u("alfa bravo delta"))
+    w.commit()
+
+    with ix.searcher() as s:
+        r = s.search(
+            query.Every(),
+            filter=query.Term("text", u("echo")))
+        assert [hit["id"] for hit in r] == []
+        assert len(r) == 0
+
+
+def test_daterange_matched_terms():
+    from whoosh.qparser import GtLtPlugin
+    from datetime import datetime
+
+    schema = fields.Schema(id=fields.KEYWORD(stored=True),
+                           body=fields.TEXT,
+                           num=fields.NUMERIC(stored=True, unique=True),
+                           created=fields.DATETIME(stored=True))
+    ix = RamStorage().create_index(schema)
+
+    with ix.writer() as w:
+        w.add_document(id=u"one", body=u"this and this", num='5',
+                       created=datetime.now())
+        w.add_document(id=u"three", body=u"that and that", num='7',
+                       created=datetime.now())
+        w.add_document(id=u"two", body=u"this and that", num='6',
+                       created=datetime.now())
+
+    with ix.searcher() as s:
+        parser = qparser.QueryParser("body", ix.schema)
+        parser.add_plugin(GtLtPlugin())
+        q = parser.parse(u"created:>='2013-07-01'")
+        r = s.search(q, terms=True)
+
+        assert r.has_matched_terms()
+        termlist = r[0].matched_terms()
+        assert len(termlist) == 1
+        pair = termlist[0]
+        assert pair[0] == "created"
+        assert pair[1] == b("(\x00\x00\x00\x00\x00\x80\xe1\xa3")
+

tests/test_fields.py

         check("{10.2 to 80.8}", "10.4", "80.6")
 
 
+def test_numeric_errors():
+    f = fields.NUMERIC(int, bits=16, signed=True)
+    schema = fields.Schema(f=f)
+
+    with pytest.raises(ValueError):
+        list(f.index(-32769))
+    with pytest.raises(ValueError):
+        list(f.index(32768))
+
+
 def test_nontext_document():
     schema = fields.Schema(id=fields.STORED, num=fields.NUMERIC,
                            date=fields.DATETIME, even=fields.BOOLEAN)
         assert not any(reader.is_deleted(hit.docnum) for hit in r)
 
 
+def test_boolean_multifield():
+    schema = fields.Schema(name=fields.TEXT(stored=True),
+                           bit=fields.BOOLEAN(stored=True))
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        w.add_document(name=u('audi'), bit=True)
+        w.add_document(name=u('vw'), bit=False)
+        w.add_document(name=u('porsche'), bit=False)
+        w.add_document(name=u('ferrari'), bit=True)
+        w.add_document(name=u('citroen'), bit=False)
+
+    with ix.searcher() as s:
+        qp = qparser.MultifieldParser(["name", "bit"], schema)
+        q = qp.parse(u("boop"))
+
+        r = s.search(q)
+        assert sorted(hit["name"] for hit in r) == ["audi", "ferrari"]
+        assert len(r) == 2
+
+
 def test_missing_field():
     schema = fields.Schema()
     ix = RamStorage().create_index(schema)
                        (b('FF'), 1, 0.5, b('\x00\x00\x00\x01')),
                        (b('SPRS'), 1, 1.0, b('\x00\x00\x00\x01')),
                        ]
+

tests/test_indexing.py

         assert [hit["id"] for hit in r] == [1, 0, 3, 2]
 
 
+def test_globfield_length_merge():
+    # Issue 343
 
+    schema = fields.Schema(title=fields.TEXT(stored=True),
+                           path=fields.ID(stored=True))
+    schema.add("*_text", fields.TEXT, glob=True)
 
+    with TempIndex(schema, "globlenmerge") as ix:
+        with ix.writer() as w:
+            w.add_document(title=u"First document", path=u"/a",
+                           content_text=u"This is the first document we've added!")
 
+        with ix.writer() as w:
+            w.add_document(title=u"Second document", path=u"/b",
+                           content_text=u"The second document is even more interesting!")
+
+        with ix.searcher() as s:
+            docnum = s.document_number(path="/a")
+            assert s.doc_field_length(docnum, "content_text") is not None
+
+            qp = qparser.QueryParser("content", schema)
+            q = qp.parse("content_text:document")
+            r = s.search(q)
+            paths = sorted(hit["path"] for hit in r)
+            assert paths == ["/a", "/b"]
+
+

tests/test_results.py

 from whoosh.codec.whoosh3 import W3Codec
 from whoosh.compat import u, xrange, text_type, permutations
 from whoosh.filedb.filestore import RamStorage
+from whoosh.util.testing import TempStorage
 
 
 def test_score_retrieval():
         assert len(r) == 1
         hit = r[0]
         assert hit["text"] == u("alfa bravo charlie")
+
+
+def test_closed_searcher():
+    from whoosh.reading import ReaderClosed
+
+    schema = fields.Schema(key=fields.KEYWORD(stored=True, sortable=True,
+                                              spelling=True))
+
+    with TempStorage() as st:
+        ix = st.create_index(schema)
+        with ix.writer() as w:
+            w.add_document(key=u("alfa"))
+            w.add_document(key=u("bravo"))
+            w.add_document(key=u("charlie"))
+            w.add_document(key=u("delta"))
+            w.add_document(key=u("echo"))
+
+        s = ix.searcher()
+        r = s.search(query.TermRange("key", "b", "d"))
+        s.close()
+        assert s.is_closed
+        with pytest.raises(ReaderClosed):
+            assert r[0]["key"] == "bravo"
+        with pytest.raises(ReaderClosed):
+            s.reader().column_reader("key")
+        with pytest.raises(ReaderClosed):
+            s.reader().has_word_graph("key")
+        with pytest.raises(ReaderClosed):
+            s.suggest("key", "brovo")
+
+        s = ix.searcher()
+        r = s.search(query.TermRange("key", "b", "d"))
+        assert r[0]
+        assert r[0]["key"] == "bravo"
+        c = s.reader().column_reader("key")
+        assert c[1] == "bravo"
+        assert s.reader().has_word_graph("key")
+        assert s.suggest("key", "brovo") == ["bravo"]

tests/test_searching.py

         q = query.Phrase("value", [u("little"), u("miss"), u("muffet"),
                                    u("sat"), u("tuffet")])
         m = q.matcher(s)
-        assert m.__class__.__name__ == "SpanNearMatcher"
+        assert m.__class__.__name__ == "SpanNear2Matcher"
 
         r = s.search(q)
         assert names(r) == ["A"]
         assert [hit.docnum for hit in r] == [2, 3]
 
 
+def test_terms_to_bytes():
+    schema = fields.Schema(a=fields.TEXT, b=fields.NUMERIC, id=fields.STORED)
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        w.add_document(id=0, a=u("alfa bravo"), b=100)
+        w.add_document(id=1, a=u("bravo charlie"), b=200)
+        w.add_document(id=2, a=u("charlie delta"), b=100)
+        w.add_document(id=3, a=u("delta echo"), b=200)
 
+    with ix.searcher() as s:
+        t1 = query.Term("b", 200)
+        t2 = query.Term("a", "bravo")
+        q = query.And([t1, t2])
+        r = s.search(q)
+        assert [hit["id"] for hit in r] == [1]
 
 
+def test_issue_334():
+    schema = fields.Schema(
+        kind=fields.ID(stored=True),
+        name=fields.ID(stored=True),
+        returns=fields.ID(stored=True),
+    )
+    ix = RamStorage().create_index(schema)
 
+    with ix.writer() as w:
 
+        with w.group():
+            w.add_document(kind=u('class'), name=u('Index'))
+            w.add_document(kind=u('method'), name=u('add document'),
+                           returns=u('void'))
+            w.add_document(kind=u('method'), name=u('add reader'),
+                           returns=u('void'))
+            w.add_document(kind=u('method'), name=u('close'),
+                           returns=u('void'))
+        with w.group():
+            w.add_document(kind=u('class'), name=u('Accumulator'))
+            w.add_document(kind=u('method'), name=u('add'),
+                           returns=u('void'))
+            w.add_document(kind=u('method'), name=u('get result'),
+                           returns=u('number'))
+        with w.group():
+            w.add_document(kind=u('class'), name=u('Calculator'))
+            w.add_document(kind=u('method'), name=u('add'),
+                           returns=u('number'))
+            w.add_document(kind=u('method'), name=u('add all'),
+                           returns=u('number'))
+            w.add_document(kind=u('method'), name=u('add some'),
+                           returns=u('number'))
+            w.add_document(kind=u('method'), name=u('multiply'),
+                           returns=u('number'))
+            w.add_document(kind=u('method'), name=u('close'),
+                           returns=u('void'))
+        with w.group():
+            w.add_document(kind=u('class'), name=u('Deleter'))
+            w.add_document(kind=u('method'), name=u('add'),
+                           returns=u('void'))
+            w.add_document(kind=u('method'), name=u('delete'),
+                           returns=u('void'))
+
+    with ix.searcher() as s:
+        pq = query.Term('kind', 'class')
+        cq = query.Term('name', 'Calculator')
+
+        q = query.NestedChildren(pq, cq) & query.Term('returns', 'void')
+        r = s.search(q)
+        assert len(r) == 1
+        assert r[0]["name"] == u("close")
+
+

tests/test_sorting.py

         assert r.groups("tag") == {None: [2, 4], 0: [3], 1: [0, 1]}
 
 
+def test_missing_overlap():
+    schema = fields.Schema(a=fields.NUMERIC(stored=True),
+                           b=fields.KEYWORD(stored=True))
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        w.add_document(a=0, b=u("one two"))
+        w.add_document(a=1)
+        w.add_document(a=2, b=u("two three"))
+        w.add_document(a=3)
+        w.add_document(a=4, b=u("three four"))
+
+    with ix.searcher() as s:
+        facet = sorting.FieldFacet("b", allow_overlap=True)
+        r = s.search(query.Every(), groupedby=facet)
+        target = {"one": [0], "two": [0, 2], "three": [2, 4],"four": [4],
+                  None: [1, 3]}
+        assert r.groups() == target
+
+
 def test_date_facet():
     from whoosh import columns
 

tests/test_writing.py

 import pytest
 
 from whoosh import analysis, fields, query, writing
-from whoosh.compat import u, xrange, text_type
+from whoosh.compat import b, u, xrange, text_type
 from whoosh.filedb.filestore import RamStorage
 from whoosh.util.testing import TempIndex
 
                                                   "kilo", "lima"]
 
 
-class test_add_reader_spelling():
+def test_add_reader_spelling():
     # Test whether add_spell_word() items get copied over in a merge
 
     # Because b is stemming and spelled, it will use add_spell_word()
                                                       "modeling opening polling pressing quitting "
                                                       "rendering ripping rolling timing tying undoing "
                                                       "writing yelling")
+
+def test_clear():
+    schema = fields.Schema(a=fields.KEYWORD)
+    ix = RamStorage().create_index(schema)
+
+    # Add some segments
+    with ix.writer() as w:
+        w.add_document(a=u("one two three"))
+        w.merge = False
+    with ix.writer() as w:
+        w.add_document(a=u("two three four"))
+        w.merge = False
+    with ix.writer() as w:
+        w.add_document(a=u("three four five"))
+        w.merge = False
+
+    # Clear
+    with ix.writer() as w:
+        w.add_document(a=u("foo bar baz"))
+        w.mergetype = writing.CLEAR
+
+    with ix.searcher() as s:
+        assert s.doc_count_all() == 1
+        assert list(s.reader().lexicon("a")) == [b("bar"), b("baz"), b("foo")]
+
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.