Georg Brandl avatar Georg Brandl committed e5bd5a8

Closes #1067: implement pluggable search scorer and tweak scoring to give good results. Patch by Hernan Grecco.

Comments (0)

Files changed (6)

 Release 1.2 (in development)
 ============================
 
+* #1067: Improve the ordering of the JavaScript search results: matches in titles
+  come before matches in full text, and object results are better categorized.
+  Also implement a pluggable search scorer.
+
 * PR#72: #975: Fix gettext does not extract definition terms before docutils 0.10.0
 
 * PR#25: In inheritance diagrams, the first line of the class docstring
 
    .. versionadded:: 1.1
 
+.. confval:: html_search_scorer
+
+   The name of a javascript file (relative to the configuration directory) that
+   implements a search results scorer.  If empty, the default will be used.
+
+   .. XXX describe interface for scorer here
+
+   .. versionadded:: 1.2
+
 .. confval:: htmlhelp_basename
 
    Output file base name for HTML help builder.  Default is ``'pydoc'``.

sphinx/builders/html.py

         if not lang or lang not in languages:
             lang = 'en'
         self.indexer = IndexBuilder(self.env, lang,
-                                    self.config.html_search_options)
+                                    self.config.html_search_options,
+                                    self.config.html_search_scorer)
         self.load_indexer(docnames)
 
         self.docwriter = HTMLWriter(self)
         html_secnumber_suffix = ('. ', 'html'),
         html_search_language = (None, 'html'),
         html_search_options = ({}, 'html'),
+        html_search_scorer = ('', None),
 
         # HTML help only options
         htmlhelp_basename = (lambda self: make_filename(self.project), None),

sphinx/search/__init__.py

         'pickle':   pickle
     }
 
-    def __init__(self, env, lang, options):
+    def __init__(self, env, lang, options, scoring):
         self.env = env
         # filename -> title
         self._titles = {}
         # add language-specific SearchLanguage instance
         self.lang = languages[lang](options)
 
+        if scoring:
+            with open(scoring, 'rb') as fp:
+                self.js_scorer_code = fp.read().decode('utf-8')
+        else:
+            self.js_scorer_code = u''
+
     def load(self, stream, format):
         """Reconstruct from frozen data."""
         if isinstance(format, basestring):
         return dict(
             search_language_stemming_code = self.lang.js_stemmer_code,
             search_language_stop_words = jsdump.dumps(sorted(self.lang.stopwords)),
+            search_scorer_tool = self.js_scorer_code,
         )

sphinx/themes/basic/static/searchtools.js_t

  *
  */
 
+{{ search_language_stemming_code|safe }}
+
+{% if search_scorer_tool %}
+{{ search_scorer_tool|safe }}
+{% else %}
 /**
- * helper function to return a node containing the
- * search summary for a given text. keywords is a list
- * of stemmed words, hlwords is the list of normal, unstemmed
- * words. the first one is used to find the occurance, the
- * latter for highlighting it.
+ * Simple result scoring code.
  */
+var Scorer = {
+  // Implement the following function to further tweak the score for each result
+  // The function takes a result array [filename, title, anchor, descr, score]
+  // and returns the new score.
+  /*
+  score: function(result) {
+    return result[4];
+  },
+  */
 
-jQuery.makeSearchSummary = function(text, keywords, hlwords) {
-  var textLower = text.toLowerCase();
-  var start = 0;
-  $.each(keywords, function() {
-    var i = textLower.indexOf(this.toLowerCase());
-    if (i > -1)
-      start = i;
-  });
-  start = Math.max(start - 120, 0);
-  var excerpt = ((start > 0) ? '...' : '') +
-  $.trim(text.substr(start, 240)) +
-  ((start + 240 - text.length) ? '...' : '');
-  var rv = $('<div class="context"></div>').text(excerpt);
-  $.each(hlwords, function() {
-    rv = rv.highlightText(this, 'highlighted');
-  });
-  return rv;
+  // query matches the full name of an object
+  objNameMatch: 11,
+  // or matches in the last dotted part of the object name
+  objPartialMatch: 6,
+  // Additive scores depending on the priority of the object
+  objPrio: {0:  15,   // used to be importantResults
+            1:  5,   // used to be objectResults
+            2: -5},  // used to be unimportantResults
+  //  Used when the priority is not in the mapping.
+  objPrioDefault: 0,
+
+  // query found in title
+  title: 15,
+  // query found in terms
+  term: 5
 };
-
-{{ search_language_stemming_code|safe }}
+{% endif %}
 
 /**
  * Search Module
     }
 
     // lookup as search terms in fulltext
-    results = results.concat(this.performTermsSearch(searchterms, excluded, terms, 0))
-                     .concat(this.performTermsSearch(searchterms, excluded, titleterms, 20));
+    results = results.concat(this.performTermsSearch(searchterms, excluded, terms, Scorer.term))
+                     .concat(this.performTermsSearch(searchterms, excluded, titleterms, Scorer.title));
 
     // delete unused variables in order to not waste memory until list is
     // retrieved completely
     delete filenames, titles, terms, titleterms;
 
-    // now sort the regular results by score (in opposite order of appearance,
-    // since the display function below uses pop() to retrieve items)
+    // let the scorer override scores with a custom scoring function
+    if (Scorer.score) {
+      for (i = 0; i < results.length; i++)
+        results[i][4] = Scorer.score(results[i]);
+    }
+
+    // now sort the results by score (in opposite order of appearance, since the
+    // display function below uses pop() to retrieve items) and then
+    // alphabetically
     results.sort(function(a, b) {
       var left = a[4];
       var right = b[4];
-      return (left > right) ? 1 : ((left < right) ? -1 : 0);
+      if (left > right) {
+        return 1;
+      } else if (left < right) {
+        return -1;
+      } else {
+        // same score: sort alphabetically
+        left = a[1].toLowerCase();
+        right = b[1].toLowerCase();
+        return (left > right) ? -1 : ((left < right) ? 1 : 0);
+      }
     });
 
-    console.info('search results:', results);
-    Search.lastresults = results.slice();  // a copy
+    // for debugging
+    //Search.lastresults = results.slice();  // a copy
+    //console.info('search results:', Search.lastresults);
 
     // print the results
     var resultCount = results.length;
           $.get(DOCUMENTATION_OPTIONS.URL_ROOT + '_sources/' +
                 item[0] + '.txt', function(data) {
             if (data != '') {
-              listItem.append($.makeSearchSummary(data, searchterms, hlterms));
+              listItem.append(Search.makeSearchSummary(data, searchterms, hlterms));
               Search.output.append(listItem);
             }
             listItem.slideDown(5, function() {
       for (var name in objects[prefix]) {
         var fullname = (prefix ? prefix + '.' : '') + name;
         if (fullname.toLowerCase().indexOf(object) > -1) {
+          var score = 0;
+          var parts = fullname.split('.');
+          // check for different match types: exact matches of full name or
+          // "last name" (i.e. last dotted part)
+          if (fullname == object || parts[parts.length - 1] == object) {
+            score += Scorer.objNameMatch;
+          // matches in last name
+          } else if (parts[parts.length - 1].indexOf(object) > -1) {
+            score += Scorer.objPartialMatch;
+          }
           var match = objects[prefix][name];
           var objname = objnames[match[1]][2];
           var title = titles[match[0]];
             }
           }
           var descr = objname + _(', in ') + title;
+
           anchor = match[3];
           if (anchor == '')
             anchor = fullname;
           else if (anchor == '-')
             anchor = objnames[match[1]][1] + '-' + fullname;
-          result = [filenames[match[0]], fullname, '#'+anchor, descr, 0];
-          var score;
-          switch (match[2]) {
-          case 1: // normal results -- display between important and fulltext
-            score = 5; break;
-          case 0: // "important" results -- show directly after title results
-            score = 10; break;
-          case 2: // "unimportant" results -- show after fulltext results
-            score = -10; break;
+          // add custom score for some objects according to scorer
+          if (Scorer.objPrio.hasOwnProperty(match[2])) {
+            score += Scorer.objPrio[match[2]];
+          } else {
+            score += Scorer.objPrioDefault;
           }
           results.push([filenames[match[0]], fullname, '#'+anchor, descr, score]);
         }
       }
 
       // if we have still a valid result we can add it to the result list
-      if (valid)
+      if (valid) {
         results.push([filenames[file], titles[file], '', null, score]);
+      }
     }
     return results;
   },
+
+  /**
+   * helper function to return a node containing the
+   * search summary for a given text. keywords is a list
+   * of stemmed words, hlwords is the list of normal, unstemmed
+   * words. the first one is used to find the occurance, the
+   * latter for highlighting it.
+   */
+  makeSearchSummary : function(text, keywords, hlwords) {
+    var textLower = text.toLowerCase();
+    var start = 0;
+    $.each(keywords, function() {
+      var i = textLower.indexOf(this.toLowerCase());
+      if (i > -1)
+        start = i;
+    });
+    start = Math.max(start - 120, 0);
+    var excerpt = ((start > 0) ? '...' : '') +
+      $.trim(text.substr(start, 240)) +
+      ((start + 240 - text.length) ? '...' : '');
+    var rv = $('<div class="context"></div>').text(excerpt);
+    $.each(hlwords, function() {
+      rv = rv.highlightText(this, 'highlighted');
+    });
+    return rv;
+  }
 };
 
 $(document).ready(function() {
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.