Matt Chaput avatar Matt Chaput committed 3e8786b

Added mode keyword argument to highlight.top_fragments(). Fixes issue #156.
Made the mode an argument because there are theoretical situations where it might be useful to change it.

Comments (0)

Files changed (3)

src/whoosh/highlight.py

 # Highlighting
 
 def top_fragments(text, terms, analyzer, fragmenter, top=3,
-                  scorer=None, minscore=1):
+                  scorer=None, minscore=1, mode="query"):
     if scorer is None:
         scorer = BasicFragmentScorer()
     
     termset = frozenset(terms)
-    tokens = copyandmatchfilter(termset, analyzer(text, chars=True,
-                                                  keeporiginal=True))
+    tokens = analyzer(text, chars=True, keeporiginal=True, mode=mode)
+    tokens = copyandmatchfilter(termset, tokens)
     scored_frags = nlargest(top, ((scorer(f), f)
                                   for f in fragmenter(text, tokens)))
     return [sf for score, sf in scored_frags if score > minscore]
 
 
 def highlight(text, terms, analyzer, fragmenter, formatter, top=3,
-              scorer=None, minscore=1, order=FIRST):
+              scorer=None, minscore=1, order=FIRST, mode="query"):
     
     if scorer is None:
         scorer = BasicFragmentScorer()
     if type(scorer) is type:
         scorer = scorer()
     
-    fragments = top_fragments(text, terms, analyzer, fragmenter,
-                              top=top, scorer=scorer, minscore=minscore)
+    fragments = top_fragments(text, terms, analyzer, fragmenter, top=top,
+                              scorer=scorer, minscore=minscore, mode=mode)
     fragments.sort(key=order)
     return formatter(text, fragments)
     

src/whoosh/searching.py

         return set(self._termlists.keys())
 
     def highlights(self, n, fieldname, text=None, top=3, fragmenter=None,
-                   formatter=None, order=highlight.FIRST, force=True):
+                   formatter=None, order=highlight.FIRST, force=True,
+                   mode="query"):
         """Returns highlighted snippets for the document in the Nth position
         in the results. It is usually more convenient to call this method on a
         Hit object instead of the Results.
         
         return highlight.highlight(text, terms, analyzer, fragmenter,
                                    formatter, top=top, order=order,
-                                   scorer=self.fragment_scorer)
+                                   scorer=self.fragment_scorer, mode=mode)
 
     def key_terms(self, fieldname, docs=10, numterms=5,
                   model=classify.Bo1Model, normalize=True):
         return s
     
     def highlights(self, fieldname, text=None, top=3, fragmenter=None,
-                   formatter=None, order=highlight.FIRST, force=True):
+                   formatter=None, order=highlight.FIRST, force=True,
+                   mode="query"):
         """Returns highlighted snippets from the given field::
         
             r = searcher.search(myquery)
             None instead of highlights when the document does not contain any
             matching terms. This can save time by avoiding retokenizing large
             amounts of text.
+        :param mode: EXPERT: the mode argument to pass to the analyzer. The
+            default is "query". You should not need to change this unless you
+            want to get different analyzer behavior in highlights for some
+            reason.
         """
         
         return self.results.highlights(self.rank, fieldname, text=text,
                                        top=top, fragmenter=fragmenter,
                                        formatter=formatter, order=order,
-                                       force=force)
+                                       force=force, mode=mode)
     
     def more_like_this(self, fieldname, text=None, top=10, numterms=5,
                        model=classify.Bo1Model, normalize=True, filter=None):

tests/test_highlighting.py

     hit = ix.searcher().search(query.Term("text", "bravo"))[0]
     assert_raises(KeyError, hit.highlights, "tags")
 
+def test_multifilter():
+    iwf_for_index = analysis.IntraWordFilter(mergewords=True, mergenums=False)
+    iwf_for_query = analysis.IntraWordFilter(mergewords=False, mergenums=False)
+    mf = analysis.MultiFilter(index=iwf_for_index, query=iwf_for_query)
+    ana = analysis.RegexTokenizer() | mf | analysis.LowercaseFilter()
+    schema = fields.Schema(text=fields.TEXT(analyzer=ana, stored=True))
+    ix = RamStorage().create_index(schema)
+    w = ix.writer()
+    w.add_document(text=u("Our BabbleTron5000 is great"))
+    w.commit()
+    
+    with ix.searcher() as s:
+        hit = s.search(query.Term("text", "5000"))[0]
+        assert_equal(hit.highlights("text"), '<b class="match term0">BabbleTron5000</b> is great')
+    
 
 
 
 
-
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.