Commits

sharky93  committed 74d2eaf

Adds ngram'ed search, highlights, shows to user about skipped terms in search query

  • Participants
  • Parent commits 7ec50a7

Comments (0)

Files changed (4)

File MoinMoin/apps/frontend/views.py

 from babel import Locale
 
 from whoosh.query import Term, Prefix, And, Or, DateRange, Every
+from whoosh.analysis import StandardAnalyzer
 
 from MoinMoin import log
 logging = log.getLogger(__name__)
         query = search_form['q'].value
         history = bool(request.values.get('history'))
     if valid or ajax:
+        #most fields in the schema use a StandardAnalyzer, it omits fairly frequently used words
+        #this finds such words and reports to the user
+        analyzer = StandardAnalyzer()
+        omitted_words = [token.text for token in analyzer(query, removestops=False) if token.stop]
+
         idx_name = ALL_REVS if history else LATEST_REVS
-        qp = flaskg.storage.query_parser([NAME_EXACT, NAME, SUMMARY, CONTENT], idx_name=idx_name)
+        qp = flaskg.storage.query_parser([NAME_EXACT, NAME, SUMMARY, CONTENT, CONTENTNGRAM], idx_name=idx_name)
         q = qp.parse(query)
 
         _filter = None
             _filter = Or(terms)
 
         with flaskg.storage.indexer.ix[idx_name].searcher() as searcher:
+             #terms is set to retrieve list of terms which matched, in the searchtemplate, for highlight.
             flaskg.clock.start('search')
-            results = searcher.search(q, filter=_filter, limit=100)
+            results = searcher.search(q, filter=_filter, limit=100, terms=True)
             flaskg.clock.stop('search')
             flaskg.clock.start('search suggestions')
             name_suggestions = [word for word, score in results.key_terms(NAME, docs=20, numterms=10)]
                                    word_suggestions=u', '.join(word_suggestions),
                                    name_suggestions=u', '.join(name_suggestions),
                                    content_suggestions=u', '.join(content_suggestions),
+                                   omitted_words=u', '.join(omitted_words),
                 )
             else:
                 html = render_template('search.html',
                                    query=query,
                                    medium_search_form=search_form,
                                    item_name=item_name,
+                                   omitted_words=u', '.join(omitted_words),
                 )
             flaskg.clock.stop('search render')
     else:

File MoinMoin/constants/keys.py

 ITEMLINKS = u"itemlinks"
 ITEMTRANSCLUSIONS = u"itemtransclusions"
 TAGS = u"tags"
+CONTENTNGRAM = u"contentngram"
 
 ACTION = u"action"
 ADDRESS = u"address"

File MoinMoin/storage/middleware/indexing.py

 from flask import g as flaskg
 from flask import current_app as app
 
-from whoosh.fields import Schema, TEXT, ID, IDLIST, NUMERIC, DATETIME, KEYWORD, BOOLEAN
+from whoosh.fields import Schema, TEXT, ID, IDLIST, NUMERIC, DATETIME, KEYWORD, BOOLEAN, NGRAMWORDS
 from whoosh.writing import AsyncWriter
 from whoosh.qparser import QueryParser, MultifieldParser, RegexPlugin, PseudoFieldPlugin
 from whoosh.qparser import WordNode
     doc[WIKINAME] = wikiname
     doc[CONTENT] = content
     doc[BACKENDNAME] = backend_name
+    if CONTENTNGRAM in schema:
+        doc[CONTENTNGRAM] = content
     return doc
 
 
             ITEMTRANSCLUSIONS: ID(stored=True),
             # tokenized ACL from metadata
             ACL: TEXT(analyzer=AclTokenizer(acl_rights_contents), multitoken_query="and", stored=True),
+            # ngram words, index ngrams of words from main content
+            CONTENTNGRAM: NGRAMWORDS(minsize=3, maxsize=6),
         }
         latest_revs_fields.update(**common_fields)
 

File MoinMoin/templates/ajaxsearch.html

+<br/>
+{% if omitted_words %}
+  <p>{{ _("common words in query: %(termlist)s", termlist=omitted_words) }}</p>
+{% endif %}
+<br/> 
 {% if results is defined %}
     <p class="searchstats">
         {% if results %}