Commits

Matt Chaput committed cfbe657

Fixed a bug in MultiReader._merge_terms() with empty iterators. Fixes issue #267.

This took a long time to realize, but I was calling next() without catching
StopIterator when getting the first terms from each iterator. If the iterator was
empty, it would raise StopIterator. Since it wasn't caught, the exception propagated
upwards, and then since the method itself is a generator, the StopIterator would be
silently swallowed and the generator would exit, making it seem like the method would
just return for no reason.

  • Participants
  • Parent commits 46a4059
  • Branches 2.4x

Comments (0)

Files changed (2)

File src/whoosh/reading.py

 
         current = []
         for it in iterlist:
-            term = next(it)
+            try:
+                term = next(it)
+            except StopIteration:
+                continue
             current.append((term, id(it)))
-        heapify(current)
-
         # Number of active iterators
         active = len(current)
+
+        # If only one iterator is active, just yield from it and return
+        if active == 1:
+            term, itid = current[0]
+            it = itermap[itid]
+            yield term
+            for term in it:
+                yield term
+            return
+
+        # Otherwise, do a streaming heap sort of the terms from the iterators
+        heapify(current)
         while active:
             # Peek at the first term in the sorted list
             term = current[0][0]

File tests/test_sorting.py

 from __future__ import with_statement
-from datetime import datetime, timedelta
+from datetime import datetime as dt
+from datetime import timedelta
 import random
 import gc
 
     schema = fields.Schema(id=fields.STORED, date=fields.DATETIME)
     ix = RamStorage().create_index(schema)
     w = ix.writer()
-    d1 = datetime(2011, 7, 13)
-    d2 = datetime(1984, 3, 29)
+    d1 = dt(2011, 7, 13)
+    d2 = dt(1984, 3, 29)
     w.add_document(id=0, date=d1)
     w.add_document(id=1, date=d1)
     w.add_document(id=2)
     schema = fields.Schema(id=fields.STORED, date=fields.DATETIME)
     ix = RamStorage().create_index(schema)
     w = ix.writer()
-    w.add_document(id=0, date=datetime(2001, 1, 15))
-    w.add_document(id=1, date=datetime(2001, 1, 10))
+    w.add_document(id=0, date=dt(2001, 1, 15))
+    w.add_document(id=1, date=dt(2001, 1, 10))
     w.add_document(id=2)
-    w.add_document(id=3, date=datetime(2001, 1, 3))
-    w.add_document(id=4, date=datetime(2001, 1, 8))
-    w.add_document(id=5, date=datetime(2001, 1, 6))
+    w.add_document(id=3, date=dt(2001, 1, 3))
+    w.add_document(id=4, date=dt(2001, 1, 8))
+    w.add_document(id=5, date=dt(2001, 1, 6))
     w.commit()
 
     with ix.searcher() as s:
-        rf = sorting.DateRangeFacet("date", datetime(2001, 1, 1),
-                                    datetime(2001, 1, 20), timedelta(days=5))
+        rf = sorting.DateRangeFacet("date", dt(2001, 1, 1),
+                                    dt(2001, 1, 20), timedelta(days=5))
         r = s.search(query.Every(), groupedby={"date": rf})
-        dt = datetime
         assert_equal(r.groups("date"),
                      {(dt(2001, 1, 1, 0, 0), dt(2001, 1, 6, 0, 0)): [3],
                       (dt(2001, 1, 6, 0, 0), dt(2001, 1, 11, 0, 0)): [1, 4, 5],
 
 def test_relative_daterange():
     from whoosh.support.relativedelta import relativedelta
-    dt = datetime
 
     schema = fields.Schema(id=fields.STORED, date=fields.DATETIME)
     ix = RamStorage().create_index(schema)
-    basedate = datetime(2001, 1, 1)
+    basedate = dt(2001, 1, 1)
     count = 0
     with ix.writer() as w:
-        while basedate < datetime(2001, 12, 1):
+        while basedate < dt(2001, 12, 1):
             w.add_document(id=count, date=basedate)
             basedate += timedelta(days=14, hours=16)
             count += 1
         assert_equal(keys, ["London", "Paris", "San Francisco", "Tel Aviv"])
 
 
+def test_issue_267():
+    count = 5
 
+    # Sort ID
+    schema = fields.Schema(id=fields.ID(stored=True),
+                           t1=fields.ID(stored=True),
+                           t2=fields.ID(stored=True))
+    ix = RamStorage().create_index(schema)
 
+    with ix.writer() as w:
+        for i in range(count):
+            w.add_document(id=unicode(i), t1=u'foo')
+    with ix.writer() as w:
+        w.add_document(id=u('100'), t1=u('bar'), t2=u('bbb'))
+        w.add_document(id=u('101'), t1=u('bar'), t2=u('aaa'))
+        w.add_document(id=u('102'), t1=u('bar'), t2=u('ccc'))
+        w.merge = False
 
+    with ix.searcher() as s:
+        assert not s.is_atomic()
+        results = s.search(query.Every(), sortedby=['t2'], limit=None)
+        for r in results:
+            print r, r.score
+        assert_equal(len(results), s.doc_count_all())
+        assert_equal(results[0].get('t2'), u'aaa')
+        assert_equal(results[1].get('t2'), u'bbb')
+        assert_equal(results[2].get('t2'), u'ccc')
 
+    # Sort datetime
+    schema = fields.Schema(id=fields.ID(stored=True),
+                           t1=fields.DATETIME(stored=True),
+                           t2=fields.DATETIME(stored=True))
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        for i in range(count):
+            w.add_document(id=unicode(i), t1=dt(1971, 1, 1))
+    with ix.writer() as w:
+        w.add_document(id=u'100', t1=dt(1973, 1, 1), t2=dt(1976, 1, 1))
+        w.add_document(id=u'101', t1=dt(1973, 1, 1), t2=dt(1975, 1, 1))
+        w.add_document(id=u'102', t1=dt(1973, 1, 1), t2=dt(1974, 1, 1))
+        w.merge = False
 
+    with ix.searcher() as s:
+        assert not s.is_atomic()
+        results = s.search(query.Every(), sortedby=['t2'], limit=None)
+        #for r in results:
+        #    print r
+        assert_equal(len(results), count + 3)
+        assert_equal(results[0].get('t2'), dt(1974, 1, 1))
+        assert_equal(results[1].get('t2'), dt(1975, 1, 1))
+        assert_equal(results[2].get('t2'), dt(1976, 1, 1))
 
 
 
 
 
 
+
+
+
+