Commits

Matt Chaput  committed 856591e

TimeLimitCollector uses signals on UNIX instead of just setting a flag.

  • Participants
  • Parent commits 9f5e774

Comments (0)

Files changed (4)

File src/whoosh/collectors.py

 generally a good idea to create a new collector for each search.
 """
 
+import os
+import signal
 import threading
 from array import array
 from bisect import insort
 
         # We can still get partial results from the collector
         print(tlc.results())
+
+    IMPORTANT: On Unix systems (systems where signal.SIGALRM is defined), the
+    code uses signals to stop searching immediately when the time limit is
+    reached. On Windows, the OS does not support this functionality, so the
+    search only checks the time between each found document, so if a matcher
+    is slow the search could exceed the time limit.
     """
 
     def __init__(self, child, timelimit, greedy=False):
         self.child = child
         self.timelimit = timelimit
         self.greedy = greedy
+        self.use_alarm = hasattr(signal, "SIGALRM")
 
     def prepare(self, top_searcher, q, context):
         self.child.prepare(top_searcher, q, context)
 
+        self.timedout = False
+        if self.use_alarm:
+            signal.signal(signal.SIGALRM, self._was_signaled)
+
         # Start a timer thread. If the timer fires, it will call this object's
         # _timestop() method
-        self.timedout = False
         self.timer = threading.Timer(self.timelimit, self._timestop)
         self.timer.start()
 
     def _timestop(self):
+        # Called when the timer expires
         self.timer = None
         # Set an attribute that will be noticed in the collect_matches() loop
         self.timedout = True
 
+        if self.use_alarm:
+            os.kill(os.getpid(), signal.SIGALRM)
+
+    def _was_signaled(self, signum, frame):
+        raise TimeLimit
+
     def collect_matches(self):
         child = self.child
         greedy = self.greedy

File src/whoosh/searching.py

             # Wrap it with a TimeLimitedCollector with a time limit of
             # 10.5 seconds
             from whoosh.collectors import TimeLimitedCollector
-            c = TimeLimitedCollector(c, 10.5)
+            c = TimeLimitCollector(c, 10.5)
 
             # Search using the custom collector
             results = mysearcher.search_with_collector(myquery, c)

File tests/test_collector.py

 from __future__ import with_statement
 
-from whoosh import fields, qparser, query
-from whoosh.compat import b, u
+import pytest
+
+from whoosh import collectors, fields, query, searching
+from whoosh.compat import b, u, xrange
 from whoosh.filedb.filestore import RamStorage
+from whoosh.util.testing import TempIndex
 
 
 def test_add():
         assert len(r) == 0
 
 
+def test_timelimit():
+    schema = fields.Schema(text=fields.TEXT)
+    ix = RamStorage().create_index(schema)
+    w = ix.writer()
+    for _ in xrange(50):
+        w.add_document(text=u("alfa"))
+    w.commit()
 
+    import time
+    from whoosh import collectors, matching
 
+    class SlowMatcher(matching.WrappingMatcher):
+        def next(self):
+            time.sleep(0.02)
+            self.child.next()
+
+    class SlowQuery(query.WrappingQuery):
+        def matcher(self, searcher, context=None):
+            return SlowMatcher(self.child.matcher(searcher, context))
+
+    with ix.searcher() as s:
+        oq = query.Term("text", u("alfa"))
+        sq = SlowQuery(oq)
+
+        col = collectors.TimeLimitCollector(s.collector(limit=None),
+                                            timelimit=0.1)
+        with pytest.raises(searching.TimeLimit):
+            s.search_with_collector(sq, col)
+
+        col = collectors.TimeLimitCollector(s.collector(limit=40),
+                                            timelimit=0.1)
+        with pytest.raises(collectors.TimeLimit):
+            s.search_with_collector(sq, col)
+
+        col = collectors.TimeLimitCollector(s.collector(limit=None),
+                                            timelimit=0.25)
+        try:
+            s.search_with_collector(sq, col)
+            assert False  # Shouldn't get here
+        except collectors.TimeLimit:
+            r = col.results()
+            assert r.scored_length() > 0
+
+        col = collectors.TimeLimitCollector(s.collector(limit=None),
+                                            timelimit=0.5)
+        s.search_with_collector(oq, col)
+        assert col.results().runtime < 0.5
+
+
+@pytest.mark.skipif("not hasattr(__import__('signal'), 'SIGALRM')")
+def test_timelimit_alarm():
+    import time
+    from whoosh import matching
+
+    class SlowMatcher(matching.Matcher):
+        def __init__(self):
+            self._id = 0
+
+        def id(self):
+            return self._id
+
+        def is_active(self):
+            return self._id == 0
+
+        def next(self):
+            time.sleep(10)
+            self._id = 1
+
+        def score(self):
+            return 1.0
+
+    class SlowQuery(query.Query):
+        def matcher(self, searcher, context=None):
+            return SlowMatcher()
+
+    schema = fields.Schema(text=fields.TEXT)
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        w.add_document(text=u("Hello"))
+
+    with ix.searcher() as s:
+        q = SlowQuery()
+
+        c = s.collector()
+        c = collectors.TimeLimitCollector(c, 0.2)
+        with pytest.raises(searching.TimeLimit):
+            _ = s.search_with_collector(q, c)
+
+
+
+

File tests/test_searching.py

         r = s.search(query.Term("text", "bravo"), filter=fq)
         assert [d["id"] for d in r] == [1, 2, 5, 7, ]
 
-def test_timelimit():
-    schema = fields.Schema(text=fields.TEXT)
-    ix = RamStorage().create_index(schema)
-    w = ix.writer()
-    for _ in xrange(50):
-        w.add_document(text=u("alfa"))
-    w.commit()
-
-    import time
-    from whoosh import collectors, matching
-
-    class SlowMatcher(matching.WrappingMatcher):
-        def next(self):
-            time.sleep(0.02)
-            self.child.next()
-
-    class SlowQuery(query.WrappingQuery):
-        def matcher(self, searcher, context=None):
-            return SlowMatcher(self.child.matcher(searcher, context))
-
-    with ix.searcher() as s:
-        oq = query.Term("text", u("alfa"))
-        sq = SlowQuery(oq)
-
-        col = collectors.TimeLimitCollector(s.collector(limit=None),
-                                            timelimit=0.1)
-        with pytest.raises(searching.TimeLimit):
-            s.search_with_collector(sq, col)
-
-        col = collectors.TimeLimitCollector(s.collector(limit=40),
-                                            timelimit=0.1)
-        with pytest.raises(collectors.TimeLimit):
-            s.search_with_collector(sq, col)
-
-        col = collectors.TimeLimitCollector(s.collector(limit=None),
-                                            timelimit=0.25)
-        try:
-            s.search_with_collector(sq, col)
-            assert False  # Shouldn't get here
-        except collectors.TimeLimit:
-            r = col.results()
-            assert r.scored_length() > 0
-
-        col = collectors.TimeLimitCollector(s.collector(limit=None),
-                                            timelimit=0.5)
-        s.search_with_collector(oq, col)
-        assert col.results().runtime < 0.5
-
 
 def test_fieldboost():
     schema = fields.Schema(id=fields.STORED, a=fields.TEXT, b=fields.TEXT)