Matt Chaput avatar Matt Chaput committed cfbe657

Fixed a bug in MultiReader._merge_terms() with empty iterators. Fixes issue #267.

This took a long time to realize, but I was calling next() without catching
StopIterator when getting the first terms from each iterator. If the iterator was
empty, it would raise StopIterator. Since it wasn't caught, the exception propagated
upwards, and then since the method itself is a generator, the StopIterator would be
silently swallowed and the generator would exit, making it seem like the method would
just return for no reason.

Comments (0)

Files changed (2)

src/whoosh/reading.py

 
         current = []
         for it in iterlist:
-            term = next(it)
+            try:
+                term = next(it)
+            except StopIteration:
+                continue
             current.append((term, id(it)))
-        heapify(current)
-
         # Number of active iterators
         active = len(current)
+
+        # If only one iterator is active, just yield from it and return
+        if active == 1:
+            term, itid = current[0]
+            it = itermap[itid]
+            yield term
+            for term in it:
+                yield term
+            return
+
+        # Otherwise, do a streaming heap sort of the terms from the iterators
+        heapify(current)
         while active:
             # Peek at the first term in the sorted list
             term = current[0][0]

tests/test_sorting.py

 from __future__ import with_statement
-from datetime import datetime, timedelta
+from datetime import datetime as dt
+from datetime import timedelta
 import random
 import gc
 
     schema = fields.Schema(id=fields.STORED, date=fields.DATETIME)
     ix = RamStorage().create_index(schema)
     w = ix.writer()
-    d1 = datetime(2011, 7, 13)
-    d2 = datetime(1984, 3, 29)
+    d1 = dt(2011, 7, 13)
+    d2 = dt(1984, 3, 29)
     w.add_document(id=0, date=d1)
     w.add_document(id=1, date=d1)
     w.add_document(id=2)
     schema = fields.Schema(id=fields.STORED, date=fields.DATETIME)
     ix = RamStorage().create_index(schema)
     w = ix.writer()
-    w.add_document(id=0, date=datetime(2001, 1, 15))
-    w.add_document(id=1, date=datetime(2001, 1, 10))
+    w.add_document(id=0, date=dt(2001, 1, 15))
+    w.add_document(id=1, date=dt(2001, 1, 10))
     w.add_document(id=2)
-    w.add_document(id=3, date=datetime(2001, 1, 3))
-    w.add_document(id=4, date=datetime(2001, 1, 8))
-    w.add_document(id=5, date=datetime(2001, 1, 6))
+    w.add_document(id=3, date=dt(2001, 1, 3))
+    w.add_document(id=4, date=dt(2001, 1, 8))
+    w.add_document(id=5, date=dt(2001, 1, 6))
     w.commit()
 
     with ix.searcher() as s:
-        rf = sorting.DateRangeFacet("date", datetime(2001, 1, 1),
-                                    datetime(2001, 1, 20), timedelta(days=5))
+        rf = sorting.DateRangeFacet("date", dt(2001, 1, 1),
+                                    dt(2001, 1, 20), timedelta(days=5))
         r = s.search(query.Every(), groupedby={"date": rf})
-        dt = datetime
         assert_equal(r.groups("date"),
                      {(dt(2001, 1, 1, 0, 0), dt(2001, 1, 6, 0, 0)): [3],
                       (dt(2001, 1, 6, 0, 0), dt(2001, 1, 11, 0, 0)): [1, 4, 5],
 
 def test_relative_daterange():
     from whoosh.support.relativedelta import relativedelta
-    dt = datetime
 
     schema = fields.Schema(id=fields.STORED, date=fields.DATETIME)
     ix = RamStorage().create_index(schema)
-    basedate = datetime(2001, 1, 1)
+    basedate = dt(2001, 1, 1)
     count = 0
     with ix.writer() as w:
-        while basedate < datetime(2001, 12, 1):
+        while basedate < dt(2001, 12, 1):
             w.add_document(id=count, date=basedate)
             basedate += timedelta(days=14, hours=16)
             count += 1
         assert_equal(keys, ["London", "Paris", "San Francisco", "Tel Aviv"])
 
 
+def test_issue_267():
+    count = 5
 
+    # Sort ID
+    schema = fields.Schema(id=fields.ID(stored=True),
+                           t1=fields.ID(stored=True),
+                           t2=fields.ID(stored=True))
+    ix = RamStorage().create_index(schema)
 
+    with ix.writer() as w:
+        for i in range(count):
+            w.add_document(id=unicode(i), t1=u'foo')
+    with ix.writer() as w:
+        w.add_document(id=u('100'), t1=u('bar'), t2=u('bbb'))
+        w.add_document(id=u('101'), t1=u('bar'), t2=u('aaa'))
+        w.add_document(id=u('102'), t1=u('bar'), t2=u('ccc'))
+        w.merge = False
 
+    with ix.searcher() as s:
+        assert not s.is_atomic()
+        results = s.search(query.Every(), sortedby=['t2'], limit=None)
+        for r in results:
+            print r, r.score
+        assert_equal(len(results), s.doc_count_all())
+        assert_equal(results[0].get('t2'), u'aaa')
+        assert_equal(results[1].get('t2'), u'bbb')
+        assert_equal(results[2].get('t2'), u'ccc')
 
+    # Sort datetime
+    schema = fields.Schema(id=fields.ID(stored=True),
+                           t1=fields.DATETIME(stored=True),
+                           t2=fields.DATETIME(stored=True))
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        for i in range(count):
+            w.add_document(id=unicode(i), t1=dt(1971, 1, 1))
+    with ix.writer() as w:
+        w.add_document(id=u'100', t1=dt(1973, 1, 1), t2=dt(1976, 1, 1))
+        w.add_document(id=u'101', t1=dt(1973, 1, 1), t2=dt(1975, 1, 1))
+        w.add_document(id=u'102', t1=dt(1973, 1, 1), t2=dt(1974, 1, 1))
+        w.merge = False
 
+    with ix.searcher() as s:
+        assert not s.is_atomic()
+        results = s.search(query.Every(), sortedby=['t2'], limit=None)
+        #for r in results:
+        #    print r
+        assert_equal(len(results), count + 3)
+        assert_equal(results[0].get('t2'), dt(1974, 1, 1))
+        assert_equal(results[1].get('t2'), dt(1975, 1, 1))
+        assert_equal(results[2].get('t2'), dt(1976, 1, 1))
 
 
 
 
 
 
+
+
+
+
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.