Commits

Matt Chaput committed 16e341c

Replaced NullMatcher with a singleton.
Although instantiation of NullMatchers isn't exactly on a hot path (it can only happen
during replace()), I always meant to get around to this because it makes sense, and
will add some tiny speed improvement, so why not?
Fixes issue #181.

Fixed braindead test in test_results.

Comments (0)

Files changed (4)

src/whoosh/matching.py

         return self.__eq__(other) or self.__gt__(other)
 
 
-class NullMatcher(Matcher):
+class NullMatcherClass(Matcher):
     """Matcher with no postings which is never active.
     """
 
+    def __call__(self):
+        return self
+
     def supports_block_quality(self):
         return True
 
         return 0
 
 
+# Singleton instance
+NullMatcher = NullMatcherClass()
+
+
 class ListMatcher(Matcher):
     """Synthetic matcher backed by a list of IDs.
     """

src/whoosh/query.py

 from whoosh.matching import (AndMaybeMatcher, DisjunctionMaxMatcher,
                              ListMatcher, IntersectionMatcher, InverseMatcher,
                              NullMatcher, RequireMatcher, UnionMatcher,
-                             WrappingMatcher, AndNotMatcher)
+                             WrappingMatcher, AndNotMatcher, NullMatcherClass)
 from whoosh.reading import TermNotFound
 from whoosh.support.times import datetime_to_long
 from whoosh.util import make_binary_tree, make_weighted_tree, methodcaller
 
     def matcher(self, searcher):
         m = self.child.matcher(searcher)
-        if isinstance(m, NullMatcher):
+        if isinstance(m, NullMatcherClass):
             return m
         else:
             ids = array("I", m.all_ids())

tests/test_quality.py

 def test_block():
     st = RamStorage()
     f = st.create_file("postfile")
-    
+
     b = current(f, 0)
     b.append(0, 1.0, '', 1)
     b.append(1, 2.0, '', 2)
     b.append(2, 12.0, '', 6)
     b.append(5, 6.5, '', 420)
     assert b
-    
+
     assert_equal(len(b), 4)
     assert_equal(list(b.ids), [0, 1, 2, 5])
     assert_equal(list(b.weights), [1.0, 2.0, 12.0, 6.5])
     assert_equal(b.max_length(), byte_to_length(length_to_byte(420)))
     assert_equal(b.max_weight(), 12.0)
     assert_equal(b.max_wol(), 2.0)
-    
+
     ti = FileTermInfo()
     ti.add_block(b)
     assert_equal(ti.weight(), 21.5)
     assert_equal(ti.max_length(), byte_to_length(length_to_byte(420)))
     assert_equal(ti.max_weight(), 12.0)
     assert_equal(ti.max_wol(), 2.0)
-    
+
     b.write(compression=3)
     f.close()
     f = st.open_file("postfile")
     bb = current.from_file(f, 0)
-    
+
     bb.read_ids()
     assert_equal(list(bb.ids), [0, 1, 2, 5])
     bb.read_weights()
     fpw.write(1, 2.0, fmt.encode(2.0), 2)
     fpw.write(2, 12.0, fmt.encode(12.0), 6)
     fpw.write(5, 6.5, fmt.encode(6.5), 420)
-    
+
     fpw.write(11, 1.5, fmt.encode(1.5), 1)
     fpw.write(12, 2.5, fmt.encode(2.5), 2)
     fpw.write(26, 100.5, fmt.encode(100.5), 21)
     fpw.write(50, 8.0, fmt.encode(8.0), 1020)
     ti = fpw.finish()
-    
+
     assert_equal(ti.weight(), 134.0)
     assert_equal(ti.doc_frequency(), 8)
     assert_equal(ti.min_length(), 1)
     w = ix.writer()
     w.add_document(t=u("alfa bravo charlie delta alfa bravo alfa"))
     w.commit()
-    
+
     with ix.reader() as r:
         ti = r.termsindex["t", u("alfa")]
         assert_equal(ti.weight(), 3.0)
         assert_equal(ti.max_length(), 7)
         assert_equal(ti.max_weight(), 3.0)
         assert_almost_equal(ti.max_wol(), 3.0 / 7)
-        assert_equal(ti.postings, ((0, ), (3.0, ), (b('\x00\x00\x00\x03'), )))
+        assert_equal(ti.postings, ((0,), (3.0,), (b('\x00\x00\x00\x03'),)))
 
     w = ix.writer()
     w.add_document(t=u("alfa charlie alfa"))
     w.commit()
-    
+
     with ix.reader() as r:
         ti = r.termsindex["t", u("alfa")]
         assert_equal(ti.weight(), 5.0)
         w = ix.writer()
         w.add_document(t=u(" ").join(["word"] * i))
         w.commit()
-        
+
         with ix.reader() as r:
             assert_equal(r.max_field_length("t"), _discreet(i))
-    
+
 def test_minmax_field_length():
     st = RamStorage()
     schema = fields.Schema(t=fields.TEXT)
         most = max(count, most)
         w.add_document(t=u(" ").join(["word"] * count))
         w.commit()
-        
+
         with ix.reader() as r:
             assert_equal(r.min_field_length("t"), _discreet(least))
             assert_equal(r.max_field_length("t"), _discreet(most))
         assert_equal(ti.max_length(), 5)
         assert_equal(ti.max_weight(), 3.0)
         assert_equal(ti.max_wol(), 3.0 / 4.0)
-        
+
         assert_equal(r.term_info("t", u("echo")).min_length(), 3)
-        
+
         assert_equal(r.doc_field_length(3, "t"), 3)
         assert_equal(r.min_field_length("t"), 3)
         assert_equal(r.max_field_length("t"), 6)
-        
+
     w = ix.writer()
     w.add_document(t=u("alfa"))
     w.add_document(t=u("bravo charlie"))
         assert_equal(ti.max_length(), 7)
         assert_equal(ti.max_weight(), 3.0)
         assert_equal(ti.max_wol(), 1.0)
-        
+
         assert_equal(r.term_info("t", u("echo")).min_length(), 3)
-        
+
         assert_equal(r.min_field_length("t"), 1)
         assert_equal(r.max_field_length("t"), 7)
 
     w.add_document(id=3, t=u("delta echo foxtrot"))
     w.add_document(id=4, t=u("echo foxtrot golf"))
     w.commit()
-    
+
     with ix.reader() as r:
         ti = r.term_info("t", u("delta"))
         assert_equal(ti.min_id(), 1)
         assert_equal(ti.max_id(), 3)
-        
+
         ti = r.term_info("t", u("alfa"))
         assert_equal(ti.min_id(), 0)
         assert_equal(ti.max_id(), 0)
-        
+
         ti = r.term_info("t", u("foxtrot"))
         assert_equal(ti.min_id(), 3)
         assert_equal(ti.max_id(), 4)
-        
+
     w = ix.writer()
     w.add_document(id=5, t=u("foxtrot golf hotel"))
     w.add_document(id=6, t=u("golf hotel alfa"))
     w.add_document(id=7, t=u("hotel alfa bravo"))
     w.add_document(id=8, t=u("alfa bravo charlie"))
     w.commit(merge=False)
-    
+
     with ix.reader() as r:
         ti = r.term_info("t", u("delta"))
         assert_equal(ti.min_id(), 1)
         assert_equal(ti.max_id(), 3)
-        
+
         ti = r.term_info("t", u("alfa"))
         assert_equal(ti.min_id(), 0)
         assert_equal(ti.max_id(), 8)
-        
+
         ti = r.term_info("t", u("foxtrot"))
         assert_equal(ti.min_id(), 3)
         assert_equal(ti.max_id(), 5)
     a = matching.ListMatcher([1, 2, 3], [0.25, 0.25, 0.25], scorer=sc)
     b = matching.ListMatcher([1, 2, 3], [0.25, 0.25, 0.25], scorer=sc)
     um = matching.UnionMatcher(a, b)
-    
+
     a2 = a.replace(0.5)
-    assert_equal(a2.__class__, matching.NullMatcher)
-    
+    assert_equal(a2.__class__, matching.NullMatcherClass)
+
     um2 = um.replace(0.5)
     assert_equal(um2.__class__, matching.IntersectionMatcher)
     um2 = um.replace(0.6)
-    assert_equal(um2.__class__, matching.NullMatcher)
+    assert_equal(um2.__class__, matching.NullMatcherClass)
 
     wm = matching.WrappingMatcher(um, boost=2.0)
     wm = wm.replace(0.5)

tests/test_results.py

     writer.add_document(title=u("Snow White"),
                         content=u("Snow white lived in the forest with seven dwarfs"))
     writer.commit()
-    
+
     with ix.searcher() as s:
         results = s.search(query.Term("content", "white"))
         assert_equal(len(results), 2)
-        assert_equal(results[0]['title'], u("Snow White"))
-        assert_equal(results[1]['title'], u("Miss Mary"))
+        assert_equal(results[0]['title'], u("Miss Mary"))
+        assert_equal(results[1]['title'], u("Snow White"))
         assert_not_equal(results.score(0), None)
         assert_not_equal(results.score(0), 0)
         assert_not_equal(results.score(0), 1)
     schema = fields.Schema(a=fields.TEXT(stored=True))
     st = RamStorage()
     ix = st.create_index(schema)
-    
+
     w = ix.writer()
     w.add_document(a=u("alfa bravo charlie"))
     w.add_document(a=u("bravo charlie delta"))
     w.add_document(a=u("charlie delta echo"))
     w.add_document(a=u("delta echo foxtrot"))
     w.commit()
-    
+
     with ix.searcher() as s:
         r = s.search(qparser.QueryParser("a", None).parse(u("charlie")))
         assert_equal(len(r), 3)
         rcopy = r.copy()
         assert_equal(r.top_n, rcopy.top_n)
-    
+
 def test_resultslength():
     schema = fields.Schema(id=fields.ID(stored=True),
                            value=fields.TEXT)
     ix = RamStorage().create_index(schema)
-    
+
     w = ix.writer()
     w.add_document(id=u("1"), value=u("alfa alfa alfa alfa alfa"))
     w.add_document(id=u("2"), value=u("alfa alfa alfa alfa"))
     w.add_document(id=u("5"), value=u("alfa"))
     w.add_document(id=u("6"), value=u("bravo"))
     w.commit()
-    
+
     with ix.searcher() as s:
         r = s.search(query.Term("value", u("alfa")), limit=3)
         assert_equal(len(r), 5)
     w.add_document(id=u("7"), value=u("juliet alfa bravo all"))
     w.add_document(id=u("8"), value=u("charlie charlie charlie all"))
     w.commit()
-    
+
     with ix.searcher() as s:
         def idsof(r):
             return "".join(hit["id"] for hit in r)
-        
+
         def check(r1, methodname, r2, ids):
             getattr(r1, methodname)(r2)
             assert_equal(idsof(r1), ids)
-        
+
         def rfor(t):
             return s.search(query.Term("value", t))
-        
+
         assert_equal(idsof(rfor(u("foxtrot"))), "345")
         check(rfor(u("foxtrot")), "extend", rfor("charlie"), "345812")
         check(rfor(u("foxtrot")), "filter", rfor("juliet"), "5")
     w.add_document(id="6", words=u("charlie bottom"))
     w.add_document(id="7", words=u("charlie bottom"))
     w.commit()
-    
+
     with ix.searcher() as s:
         def check(r, target):
             result = "".join(s.stored_fields(d)["id"] for d in r.docs())
             assert_equal(result, target)
-        
+
         r = s.search(query.Term("words", u("alfa")))
         r.filter(s.search(query.Term("words", u("bottom"))))
         check(r, "4")
     w.add_document(id=4, words=u("delta echo foxtrot"))
     w.add_document(id=5, words=u("echo foxtrot golf"))
     w.commit()
-    
+
     with ix.searcher() as s:
         # Get an empty results object
         r1 = s.search(query.Term("words", u("hotel")))
     w.add_document(id=4, text=u("delta bravo alfa"))
     w.add_document(id=5, text=u("foxtrot sierra tango"))
     w.commit()
-    
+
     hits = lambda result: [hit["id"] for hit in result]
-    
+
     with ix.searcher() as s:
         r1 = s.search(query.Term("text", u("alfa")), filter=set([1, 4]))
         assert_equal(r1._filter, set([1, 4]))
         assert_equal(len(r1.top_n), 0)
-        
+
         r2 = s.search(query.Term("text", u("bravo")))
         assert_equal(len(r2.top_n), 3)
         assert_equal(hits(r2), [1, 2, 4])
-        
+
         r3 = r1.copy()
         assert_equal(r3._filter, set([1, 4]))
         assert_equal(len(r3.top_n), 0)
 
 def test_pages():
     from whoosh.scoring import Frequency
-    
+
     schema = fields.Schema(id=fields.ID(stored=True), c=fields.TEXT)
     ix = RamStorage().create_index(schema)
-    
+
     w = ix.writer()
     w.add_document(id=u("1"), c=u("alfa alfa alfa alfa alfa alfa"))
     w.add_document(id=u("2"), c=u("alfa alfa alfa alfa alfa"))
     w.add_document(id=u("5"), c=u("alfa alfa"))
     w.add_document(id=u("6"), c=u("alfa"))
     w.commit()
-    
+
     with ix.searcher(weighting=Frequency) as s:
         q = query.Term("c", u("alfa"))
         r = s.search(q)
         assert_equal([d["id"] for d in r], ["1", "2", "3", "4", "5", "6"])
         r = s.search_page(q, 2, pagelen=2)
         assert_equal([d["id"] for d in r], ["3", "4"])
-        
+
         r = s.search_page(q, 2, pagelen=4)
         assert_equal(r.total, 6)
         assert_equal(r.pagenum, 2)
     for char in u("abcdefghijklmnopqrstuvwxyz"):
         w.add_document(key=char)
     w.commit()
-    
+
     with ix.searcher() as s:
         r = s.search(query.Every(), limit=5)
         assert_equal(r[6:7], [])
 
 def test_page_counts():
     from whoosh.scoring import Frequency
-    
+
     schema = fields.Schema(id=fields.ID(stored=True))
     st = RamStorage()
     ix = st.create_index(schema)
-    
+
     w = ix.writer()
     for i in xrange(10):
         w.add_document(id=text_type(i))
     w.commit()
-    
+
     with ix.searcher(weighting=Frequency) as s:
         q = query.Every("id")
-        
+
         r = s.search(q)
         assert_equal(len(r), 10)
-        
+
         assert_raises(ValueError, s.search_page, q, 0)
-        
+
         r = s.search_page(q, 1, 5)
         assert_equal(len(r), 10)
         assert_equal(r.pagecount, 2)
-        
+
         r = s.search_page(q, 1, 5)
         assert_equal(len(r), 10)
         assert_equal(r.pagecount, 2)
-        
+
         r = s.search_page(q, 2, 5)
         assert_equal(len(r), 10)
         assert_equal(r.pagecount, 2)
         assert_equal(r.pagenum, 2)
-        
+
         r = s.search_page(q, 1, 10)
         assert_equal(len(r), 10)
         assert_equal(r.pagecount, 1)
 def test_resultspage():
     schema = fields.Schema(id=fields.STORED, content=fields.TEXT)
     ix = RamStorage().create_index(schema)
-    
+
     domain = ("alfa", "bravo", "bravo", "charlie", "delta")
     w = ix.writer()
     for i, lst in enumerate(permutations(domain, 3)):
         w.add_document(id=text_type(i), content=u(" ").join(lst))
     w.commit()
-    
+
     with ix.searcher() as s:
         q = query.Term("content", u("bravo"))
         r = s.search(q, limit=10)
         tops = list(r)
-        
+
         rp = s.search_page(q, 1, pagelen=5)
         assert_equal(rp.scored_length(), 5)
         assert_equal(list(rp), tops[0:5])
         assert_equal(rp[10:], [])
-        
+
         rp = s.search_page(q, 2, pagelen=5)
         assert_equal(list(rp), tops[5:10])
-        
+
         rp = s.search_page(q, 1, pagelen=10)
         assert_equal(len(rp), 54)
         assert_equal(rp.pagecount, 6)
         rp = s.search_page(q, 6, pagelen=10)
         assert_equal(len(list(rp)), 4)
         assert rp.is_last_page()
-        
+
         assert_raises(ValueError, s.search_page, q, 0)
         assert_raises(ValueError, s.search_page, q, 7)
-        
+
         rp = s.search_page(query.Term("content", "glonk"), 1)
         assert_equal(len(rp), 0)
         assert rp.is_last_page()
     w = ix.writer()
     w.add_document(text=u("Hello"))
     w.commit()
-    
+
     r = ix.searcher().search(query.Term("text", "hello"))
     hl = highlight.Highlighter()
     ucf = highlight.UppercaseFormatter()
     r.formatter = ucf
     print r.formatter
     assert hl.formatter is ucf
-    
+
 def test_snippets():
     ana = analysis.StemmingAnalyzer()
     schema = fields.Schema(text=fields.TEXT(stored=True, analyzer=ana))
     w.add_document(text=u("Keying everything gives quick, immediate results. But it can become difficult to tweak the animation later, especially for complex characters."))
     w.add_document(text=u("Copy the current pose to create the next one: pose the character, key everything, then copy the keyframe in the playbar to another frame, and key everything at that frame."))
     w.commit()
-    
+
     target = ["Set KEY frames on everything that's KEY-able",
               "Copy the current pose to create the next one: pose the character, KEY everything, then copy the keyframe in the playbar to another frame, and KEY everything at that frame",
               "KEYING everything gives quick, immediate results"]
-    
+
     with ix.searcher() as s:
         qp = qparser.QueryParser("text", ix.schema)
         q = qp.parse(u("key"))
         r = s.search(q, terms=True)
         r.fragmenter = highlight.SentenceFragmenter()
         r.formatter = highlight.UppercaseFormatter()
-        
+
         assert_equal(sorted([hit.highlights("text", top=1) for hit in r]), sorted(target))
 
 def test_keyterms():
     st = RamStorage()
     ix = st.create_index(schema)
     w = ix.writer()
-    w.add_document(path=u("a"),content=u("This is some generic content"))
-    w.add_document(path=u("b"),content=u("This is some distinctive content"))
+    w.add_document(path=u("a"), content=u("This is some generic content"))
+    w.add_document(path=u("b"), content=u("This is some distinctive content"))
     w.commit()
-    
+
     with ix.searcher() as s:
         docnum = s.document_number(path=u("b"))
         keyterms = list(s.key_terms([docnum], "content"))
         assert len(keyterms) > 0
         assert_equal(keyterms[0][0], "distinctive")
-        
+
         r = s.search(query.Term("path", u("b")))
         keyterms2 = list(r.key_terms("content"))
         assert len(keyterms2) > 0
         assert_equal(keyterms2[0][0], "distinctive")
-    
+
 def test_lengths():
     schema = fields.Schema(id=fields.STORED, text=fields.TEXT)
     ix = RamStorage().create_index(schema)
-    
+
     w = ix.writer()
     w.add_document(id=1, text=u("alfa bravo charlie delta echo"))
     w.add_document(id=2, text=u("bravo charlie delta echo foxtrot"))
     w.add_document(id=7, text=u("golf needle india juliet kilo"))
     w.add_document(id=8, text=u("hotel india juliet needle lima"))
     w.commit()
-    
+
     with ix.searcher() as s:
         q = query.Or([query.Term("text", u("needle")), query.Term("text", u("charlie"))])
         r = s.search(q, limit=2)
                 count += 1
             w.add_document(text=u(" ").join(ls))
         w.commit(merge=False)
-    
+
     with ix.searcher() as s:
         q = query.Or([query.Term("text", u("bravo")), query.Term("text", u("charlie"))])
         r = s.search(q, limit=None)
         assert_equal(len(r), count)
-        
+
         r = s.search(q, limit=3)
         assert_equal(len(r), count)
 
     for ls in permutations(domain, 3):
         w.add_document(text=u(" ").join(ls))
     w.commit()
-    
+
     with ix.searcher() as s:
         q = query.Term("text", u("bravo"))
         last = []
     w.add_document(text=u("charlie delta echo"))
     w.add_document(text=u("delta echo foxtrot"))
     w.commit()
-    
+
     q = query.Or([query.Term("text", "bravo"), query.Term("text", "charlie")])
     r = ix.searcher().search(q, terms=True)
     for hit in r:
     w.add_document(text=u("charlie delta echo"))
     w.add_document(text=u("delta echo foxtrot"))
     w.commit()
-    
+
     qp = qparser.QueryParser("text", ix.schema)
     q = qp.parse(u("(bravo AND charlie) OR foxtrot OR missing"))
     r = ix.searcher().search(q, terms=True)
-    
+
     def txts(tset):
         return sorted(t[1] for t in tset)
-    
+
     assert_equal(txts(r.matched_terms()), ["bravo", "charlie", "foxtrot"])
     for hit in r:
         value = hit["text"]
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.