Commits

Matt Chaput committed 743b184

Several Matcher subclasses weren't overriding replace() properly. See issue #59.
Bumped version number.

  • Participants
  • Parent commits baf32d4

Comments (0)

Files changed (4)

File src/whoosh/__init__.py

 # limitations under the License.
 #===============================================================================
 
-__version__ = (1, 2, 2)
+__version__ = (1, 2, 3)
 
 
 def versionstring(build=True, extra=True):

File src/whoosh/matching.py

     
     def all_ids(self):
         return []
-
+    
+    def copy(self):
+        return self
+    
 
 class ListMatcher(Matcher):
     """Synthetic matcher backed by a list of IDs.
     def depth(self):
         return 1 + self.child.depth()
     
+    def _replacement(self, newchild):
+        return self.__class__(newchild, boost=self.boost)
+    
     def replace(self):
         r = self.child.replace()
-        if not r.is_active(): return NullMatcher()
-        if r is not self.child: return self.__class__(r)
-        return self
+        if not r.is_active():
+            return NullMatcher()
+        if r is not self.child:
+            try:
+                return self._replacement(r)
+            except TypeError, e:
+                raise TypeError("Class %s got exception %s trying "
+                                "to replace itself" % (self.__class__, e))
+        else:
+            return self
     
     def id(self):
         return self.child.id()
     def copy(self):
         return self.__class__(self.child.copy(), self.excluded, boost=self.boost)
     
+    def _replacement(self, newchild):
+        return self.__class__(newchild, self.excluded, boost=self.boost)
+    
     def _find_next(self):
         child = self.child
         excluded = self.excluded
         return self.__class__(self.child.copy(), self.limit,
                               weight=self._weight, missing=self.missing)
     
+    def _replacement(self, newchild):
+        return self.__class__(newchild, self.limit, missing=self.missing,
+                              weight=self.weight)
+    
     def is_active(self):
         return self._id < self.limit
     
         super(ConstantScoreMatcher, self).__init__(child)
         self._score = score
     
+    def copy(self):
+        return self.__class__(self.child.copy(), score=self._score)
+    
+    def _replacement(self, newchild):
+        return self.__class__(newchild, score=self._score)
+    
     def quality(self):
         return self._score
     

File src/whoosh/spans.py

             self._find_next()
     
     def copy(self):
-        m = self.__class__(self.child.copy(), self.fn)
+        m = self.__class__(self.child.copy())
         m._spans = self._spans
         return m
     
+    def _replacement(self, newchild):
+        return self.__class__(newchild)
+    
     def _find_next(self):
         if not self.is_active():
             return
             self.limit = limit
             super(SpanFirst.SpanFirstMatcher, self).__init__(child)
         
+        def copy(self):
+            return self.__class__(self.child.copy(), limit=self.limit)
+        
+        def _replacement(self, newchild):
+            return self.__class__(newchild, limit=self.limit)
+        
         def _get_spans(self):
             return [span for span in self.child.spans()
                     if span.end <= self.limit]
             isect = IntersectionMatcher(a, b)
             super(SpanNear.SpanNearMatcher, self).__init__(isect)
         
+        def copy(self):
+            return self.__class__(self.a.copy(), self.b.copy(), slop=self.slop,
+                                  ordered=self.ordered, mindist=self.mindist)
+        
+        def replace(self):
+            if not self.is_active():
+                return NullMatcher()
+            return self
+        
         def _get_spans(self):
             slop = self.slop
             mindist = self.mindist
             return sorted(spans)
 
 
-class SpanProximity(SpanQuery):
-    def __init__(self, subqueries, window=10, minmatch=1):
-        self.subqueries = subqueries
-        self.window = window
-        self.minmatch = minmatch
+class SpanBiMatcher(SpanWrappingMatcher):
+    def copy(self):
+        return self.__class__(self.a.copy(), self.b.copy())
     
-    def matcher(self, searcher, exclude_docs=None):
-        matchers = [q.matcher(searcher, exclude_docs=exclude_docs)
-                    for q in self.subqueries]
-        matchers = [m for m in matchers if m.is_active()]
-        
-        if not matchers:
+    def replace(self):
+        if not self.is_active():
             return NullMatcher()
-        elif len(matchers) == 1:
-            return matchers[0]
-        else:
-            return SpanProximity.ProxMatcher(matchers, self.window, self.minmatch)
-    
-    class ProxMatcher(SpanWrappingMatcher):
-        def __init__(self, matchers, window, minmatch):
-            union = make_binary_tree(UnionMatcher, matchers)
-            self.matchers = matchers
-            self.window = window
-            self.minmatch = minmatch
-            super(SpanProximity.ProxMatcher, self).__init__(union)
-            
-        def _get_spans(self):
-            window = self.window
-            
-            id = self.child.id()
-            matching = [m for m in self.matchers if m.id() == id]
-            if len(matching) < self.minmatch:
-                return []
-            all_spans = [m.spans() for m in matching]
-            
-            mindists = {}
-            for i, alist in enumerate(all_spans):
-                for j in xrange(i+1, len(all_spans)):
-                    for a in alist:
-                        pre = a.start - window
-                        post = a.end + window
-                        
-                        blist = all_spans[j]
-                        for b in blist:
-                            if b.end < pre: continue
-                            if b.start > post: break
-                        
-                            dist = a.distance_to(b)
-                            coords = (i, j)
-                            if coords not in mindists or dist < mindists[coords]:
-                                mindists[coords] = dist
-                                if b.start > a.end:
-                                    break
-            print "mindists=", mindists
-            return []
-            
-                    
+        return self
 
 
 class SpanNot(SpanQuery):
         mb = self.b.matcher(searcher, exclude_docs=exclude_docs)
         return SpanNot.SpanNotMatcher(ma, mb)
     
-    class SpanNotMatcher(SpanWrappingMatcher):
+    class SpanNotMatcher(SpanBiMatcher):
         def __init__(self, a, b):
             self.a = a
             self.b = b
                     for q in self.subqs]
         return make_binary_tree(SpanOr.SpanOrMatcher, matchers)
     
-    class SpanOrMatcher(SpanWrappingMatcher):
+    class SpanOrMatcher(SpanBiMatcher):
         def __init__(self, a, b):
             self.a = a
             self.b = b
         mb = self.b.matcher(searcher, exclude_docs=exclude_docs)
         return SpanContains.SpanContainsMatcher(ma, mb)
     
-    class SpanContainsMatcher(SpanWrappingMatcher):
+    class SpanContainsMatcher(SpanBiMatcher):
         def __init__(self, a, b):
             self.a = a
             self.b = b
         mb = self.b.matcher(searcher, exclude_docs=exclude_docs)
         return SpanBefore.SpanBeforeMatcher(ma, mb)
         
-    class SpanBeforeMatcher(SpanWrappingMatcher):
+    class SpanBeforeMatcher(SpanBiMatcher):
         def __init__(self, a, b):
             self.a = a
             self.b = b
         mb = self.b.matcher(searcher, exclude_docs=exclude_docs)
         return SpanCondition.SpanConditionMatcher(ma, mb)
     
-    class SpanConditionMatcher(SpanWrappingMatcher):
+    class SpanConditionMatcher(SpanBiMatcher):
         def __init__(self, a, b):
             self.a = a
             isect = IntersectionMatcher(a, b)

File tests/test_searching.py

                                date=fields.DATETIME)
         ix = RamStorage().create_index(schema)
         w = ix.writer()
-        domain = u"abcdefghijk"
+        from string import ascii_letters
+        domain = unicode(ascii_letters)
+        
         dt = datetime.now()
         for i, letter in enumerate(domain):
             w.add_document(id=letter, num=i, date=dt + timedelta(days=i))
         self.assertEqual(nq.__class__, query.Not)
         q = nq.query
         self.assertEqual(q.__class__, query.Every)
-        self.assertEqual("".join(s.stored_fields(d)["id"] for d in q.docs(s)),
+        self.assertEqual("".join(h["id"] for h in s.search(q, limit=None)),
                          domain)
         self.assertEqual(list(nq.docs(s)), [])
         
         self.assertEqual(q.__class__, query.NumericRange)
         self.assertEqual(q.start, None)
         self.assertEqual(q.end, None)
-        self.assertEqual("".join(s.stored_fields(d)["id"] for d in q.docs(s)),
+        self.assertEqual("".join(h["id"] for h in s.search(q, limit=None)),
                          domain)
         self.assertEqual(list(nq.docs(s)), [])
         
         self.assertEqual(nq.__class__, query.Not)
         q = nq.query
         self.assertEqual(q.__class__, query.Every)
-        self.assertEqual("".join(s.stored_fields(d)["id"] for d in q.docs(s)),
+        self.assertEqual("".join(h["id"] for h in s.search(q, limit=None)),
                          domain)
         self.assertEqual(list(nq.docs(s)), [])