Commits

mitsuhiko  committed 2252807

More changes on the search.

  • Participants
  • Parent commits 5d29a61

Comments (0)

Files changed (1)

File solace/search.py

 """
 from __future__ import with_statement
 import re
+from calendar import timegm
 from threading import Lock
 from itertools import chain
 from werkzeug import import_string
 
 def update_search(changes):
     """Updates the search index."""
-    engine = get_engine()
-    for model, operation in reversed(changes):
-        if isinstance(model, Topic):
-            if operation == 'insert':
-                engine.add_topic(model)
-            elif operation == 'delete':
-                engine.remove_topic(model)
-            elif operation == 'update':
-                engine.update_topic(model)
-        elif isinstance(model, Post) and not model.is_question:
-            if operation == 'insert':
-                engine.add_post(model)
-            elif operation == 'delete':
+    engine = None
+    for model, operation in changes:
+        if isinstance(model, Post):
+            if engine is None:
+                engine = get_engine()
+            if model.is_deleted or operation == 'delete':
                 engine.remove_post(model)
-            elif operation == 'update':
+            else:
                 engine.update_post(model)
 
 
 class SearchEngine(object):
     """Baseclass for all search engines."""
 
-    def add_topic(self, topic):
-        pass
-
-    def remove_topic(self, topic):
-        pass
-
-    def update_topic(self, topic):
-        pass
-
-    def add_post(self, post):
-        pass
-
     def remove_post(self, post):
+        """Removes a post from the search index."""
         pass
 
     def update_post(self, post):
+        """Adds or updates a post in the search index."""
         pass
 
-    def query(self, query, locale, page=1, per_page=20):
+    def query(self, query, locale, page=1, per_page=20, order_by='relevance'):
+        """Queries the search index for the given query and returns the
+        result as list.  The query can either be a search string or a
+        parsed query.  If it's a string, the search engine should not
+        directly forward it do the internal parser but attempt to parse
+        it with `parse_query` first and then translate the query object.
+
+        Searches are language bound, the engine may only return results
+        from the given locale.
+
+        `page` and `per_page` are used for slicing.  Page will never be
+        negative.
+
+        `order_by` can be one of the following: `relevance`, `date`,
+        `votes` and `replies`.
+        """
         pass
 
 
         if query.type == 'empty':
             return self._xap.Query()
         if query.type == 'term':
-            return self._xap.Query(stemmer(query.s), 1, query.pos)
+            return self._xap.Query(stemmer(query.s.lower()), 1, query.pos)
         if query.type == 'phrase':
             nodes = []
             for pos, value in enumerate(query.s.split()):
-                nodes.append(self._xap.Query(stemmer(value), 1, pos + 1))
+                nodes.append(self._xap.Query(stemmer(value.lower()), 1, pos + 1))
             return self._xap.Query(self._xap.Query.OP_PHRASE, nodes)
         if query.type in ('or', 'and', 'concat', 'andnot'):
             op = {'or': enum.OP_OR, 'and': enum.OP_AND, 'andnot': enum.OP_AND_NOT,
                                               self._xap.DB_CREATE_OR_OPEN)
         return self._xap.Database(settings.XAPIAN_DATABASE)
 
-    def _index_topic(self, topic, doc):
+    def _index_post(self, post, doc):
         indexer = self._xap.TermGenerator()
-        indexer.set_stemmer(self.get_stemmer(topic.locale))
+        indexer.set_stemmer(self.get_stemmer(post.topic.locale))
         indexer.set_document(doc)
-        indexer.index_text(topic.title)
-        indexer.index_text(topic.question.text)
+        if post.is_question:
+            indexer.index_text(post.topic.title)
+            doc.add_value(3, self._xap.sortable_serialise(post.votes))
+            doc.add_value(4, self._xap.sortable_serialise(post.topic.reply_count))
+            for tag in post.topic.tags:
+                doc.add_term(stemmer(
+        indexer.index_text(post.text)
+        doc.add_term('CP%d' % post.id)
+        doc.add_term('L%s' % post.topic.locale)
+        doc.add_term('T%d' % post.topic.id)
+        doc.add_term('U%d' % post.author.id)
+        doc.add_value(0, 'post:%d' % post.id)
+        doc.add_value(1, 'topic:%d' % post.topic.id)
+        time = self._xap.sortable_serialise(timegm(post.created.timetuple()))
+        doc.add_value(2, time)
 
-    def _topic_term(self, topic):
-        return '_TOPIC_%d' % topic.id
-
-    def _find_object_document(self, topic, con=None):
-        if con is None:
-            con = self._get_connection()
-        enq = self._xap.Enquire(con)
-        q = self._xap.Query(self._object_term(topic))
-        enq.set_query(q)
-        rv = list(enq.get_mset(0, 1))
-        if rv:
-            return con.get_document(rv[0].get_docid())
-
-    def _object_term(self, obj):
-        return '_TYPE_%s:%d' % (type(obj).__name__, obj.id)
-
-    def _start_document(self, obj, language):
-        cls = type(obj)
-        typename = '%s.%s' % (cls.__module__, cls.__name__)
+    def update_post(self, post):
+        con = self._get_connection(writable=True)
         doc = self._xap.Document()
-        doc.set_data(dumps({'type': typename, 'id': obj.id}, 2))
-        doc.add_term('_LOCALE_%s' % language)
-        doc.add_term(self._object_term(obj))
-        return doc
-
-    def add_topic(self, topic):
-        doc = self._start_document(topic, topic.locale)
-        self._index_topic(topic, doc)
-        con = self._get_connection(writable=True)
-        con.add_document(doc)
+        self._index_post(post, doc)
+        con.replace_document('CP%d' % post.id, doc)
         con.flush()
 
-    def update_topic(self, topic):
+    def remove_post(self, post):
         con = self._get_connection(writable=True)
-        doc = self._find_object_document(topic, con)
-        # let's just say that's intentional
-        if doc is None:
-            return
-
-        self._index_topic(topic, doc)
-        con.replace_document(doc.get_docid(), doc)
+        con.delete_document('CP%d' % post.id)
         con.flush()
 
-    def remove_topic(self, topic):
-        con = self._get_connection(writable=True)
-        doc = self._find_object_document(topic, con)
-        if doc is not None:
-            con.delete_document(con)
-            con.flush()
-
-    def query(self, query, locale, page=1, per_page=20):
+    def query(self, query, locale, page=1, per_page=20, order_by='relevance'):
         stemmer = self.get_stemmer(locale)
         if isinstance(query, basestring):
             query = parse_query(query)
-        xap_query = self._make_xapian_query(query, stemmer)
         enq = self._xap.Enquire(self._get_connection())
-        enq.set_query(xap_query)
+        enq.set_query(self._xap.Query(self._xap.Query.OP_AND,
+            self._xap.Query('L%s' % locale),
+            self._make_xapian_query(query, stemmer)
+        ))
+        enq.set_collapse_key(1)
         offset = (page - 1) * per_page
         mset = enq.get_mset(offset, per_page, per_page * 3)
-        return mset
+
+        if order_by == 'relevance':
+            enq.set_sort_by_relevance_then_value(2, False)
+        else:
+            key = {'date': 2, 'votes': 3, 'replies': 4}[order_by]
+            enq.set_sort_by_value_then_relevance(key, False)
+
+        topic_ids = []
+        for match in mset:
+            doc = match.get_document()
+            assert doc.get_value(0).startswith('post:')
+            topic_ids.append(int(doc.get_value(1)[6:]))
+        topics = dict((x.id, x) for x in
+            Topic.query.filter(Topic.id.in_(topic_ids)))
+        return [topics[x] for x in topic_ids if x in topics]
 
 
 # database signal handlers