Anonymous avatar Anonymous committed b85f718

More search updates, workaround a SA bug?

Comments (0)

Files changed (2)

solace/database.py

 class SignalEmittingSessionExtension(SessionExtension):
     """Emits signals the mapper extension accumulated."""
 
+    def _attr_modified(self, model, attr):
+        try:
+            return attr.get_history(model, passive=True).has_changes()
+        except AttributeError:
+            # some proxies do not implement get_history internally which
+            # is why this can fail.  In that case we just assume the
+            # attribute was not modified.
+            return False
+
     def after_flush(self, session, flush_context):
         d = session._model_changes
         if d:
                 if operation != 'update':
                     continue
                 state = orm.attributes.instance_state(model)
-                changed_columns = []
-                for attr in state.manager.attributes:
-                    if attr.get_history(model, passive=True).has_changes():
-                        changed_columns.append(attr.key)
+                changed_columns = [attr.key for attr in state.manager.attributes
+                                   if self._attr_modified(model, attr)]
                 d[key] = (model, operation, frozenset(changed_columns))
         return EXT_CONTINUE
 
 
 def update_search(changes):
     """Updates the search index."""
-    engine = None
-    for model, operation in changes:
-        if isinstance(model, Post):
-            if engine is None:
-                engine = get_engine()
-            if model.is_deleted or operation == 'delete':
+    engine = get_engine()
+    for model, operation, changed_columns in changes:
+        if isinstance(model, Topic) and operation == 'update':
+            engine.update_topic(model, changed_columns)
+        elif isinstance(model, Post):
+            if operation == 'delete':
                 engine.remove_post(model)
             else:
-                engine.update_post(model)
+                engine.update_post(model, changed_columns)
 
 
 class SearchEngine(object):
     """Baseclass for all search engines."""
 
     def remove_post(self, post):
-        """Removes a post from the search index."""
+        """Removes a post from the search index.  This is called when the
+        model commit hook detected that a post was deleted from the database.
+        """
         pass
 
-    def update_post(self, post):
-        """Adds or updates a post in the search index."""
+    def update_post(self, post, changed_columns):
+        """Called when the model commit hook detected a change on a post.  This
+        will affect both questions and replies.  If the post was added to the
+        database this is called as well but `changed_columns` will be None.
+        Otherwise it's a set of the columns that were changed.
+        """
         pass
 
-    def query(self, query, locale, page=1, per_page=20, order_by='relevance'):
+    def update_topic(self, topic, changed_columns):
+        """This is called when a topic is updated that is already tracked
+        over a question.  Topics are not indexed separately, the question post
+        is responsible for indexing the topic.  However because the title of
+        the topic and the number of replies change on the topic itself and not
+        the question post, this callback exists to aid the search engine
+        keeping the index up to date.
+        """
+        pass
+
+    def query(self, query, locale, page=1, per_page=20, order_by='relevance',
+              user=None):
         """Queries the search index for the given query and returns the
         result as list.  The query can either be a search string or a
         parsed query.  If it's a string, the search engine should not
 
         `order_by` can be one of the following: `relevance`, `date`,
         `votes` and `replies`.
+
+        If a user is given the search should only return results the user
+        can see.
         """
         pass
 
                                               self._xap.DB_CREATE_OR_OPEN)
         return self._xap.Database(settings.XAPIAN_DATABASE)
 
-    def _index_post(self, post, doc):
+    def _find_post(self, post, con=None):
+        """Looks up a post document in the search database."""
+        if con is None:
+            con = self.get_connection()
+        enq = self._xap.Enquire(con)
+        enq.set_query(self._xap.Query('CP%d' % post.id))
+        rv = list(enq.get_mset(0, 1))
+        if rv:
+            return rv[0].get_document()
+
+    def _change_answer_state(self, post, doc):
+        if post.is_answer:
+            doc.add_term('A1')
+        doc.set_value(2, post.is_answer and 1 or 0)
+
+    def _change_delete_flag(self, post, doc):
+        is_deleted = post.is_deleted or post.topic.is_deleted
+        doc.set_value(3, is_deleted and 1 or 0)
+
+    def _change_reply_count(self, post, doc):
+        val = self._xap.sortable_serialise(post.topic.reply_count)
+        doc.set_value(4, val)
+
+    def _change_votes(self, post, doc):
+        val = self._xap.sortable_serialise(post.topic.votes)
+        doc.set_value(5, val)
+
+    def _reindex_post(self, post, con):
+        doc = self._xap.Document()
+        doc.add_term('CP%d' % post.id)
+        # this value is unused, but should make it possible to
+        # migrate to more complex indices in the future.
+        doc.set_value(0, 'post:%d' % post.id)
         indexer = self._xap.TermGenerator()
-        indexer.set_stemmer(self.get_stemmer(post.topic.locale))
+        indexer.set_stemmer(stemmer)
         indexer.set_document(doc)
-        if post.is_question:
-            indexer.index_text(post.topic.title)
-            doc.add_value(3, self._xap.sortable_serialise(post.votes))
-            doc.add_value(4, self._xap.sortable_serialise(post.topic.reply_count))
-            for tag in post.topic.tags:
-                doc.add_term(stemmer(tag.name).lower())
+        indexer.index_text(post.topic.title)
         indexer.index_text(post.text)
-        doc.add_term('CP%d' % post.id)
-        doc.add_term('L%s' % post.topic.locale)
-        doc.add_term('T%d' % post.topic.id)
-        doc.add_term('U%d' % post.author.id)
-        doc.add_value(0, 'post:%d' % post.id)
-        doc.add_value(1, 'topic:%d' % post.topic.id)
+        doc.set_value(1, 'topic:%d' % post.topic.id)
+        self._change_answer_state(post, doc)
         time = self._xap.sortable_serialise(timegm(post.created.timetuple()))
-        doc.add_value(2, time)
-
-    def update_post(self, post):
-        con = self._get_connection(writable=True)
-        doc = self._xap.Document()
-        self._index_post(post, doc)
-        con.replace_document('CP%d' % post.id, doc)
-        con.flush()
+        doc.set_value(6, time)
+        self._change_reply_count(post, doc)
+        self._change_votes(post, doc)
+        return doc
 
     def remove_post(self, post):
-        con = self._get_connection(writable=True)
+        con = self.get_connection(writable=True)
         con.delete_document('CP%d' % post.id)
         con.flush()
 
-    def query(self, query, locale, page=1, per_page=20, order_by='relevance'):
+    def update_post(self, post, changed_columns=None):
+        stemmer = self.get_stemmer(post.topic.locale)
+        con = self._get_connection(writable=True)
+        doc = self._find_post(post, con)
+        new = doc is None
+        update = False
+
+        def modified(col):
+            return new or changed_columns is None or col in changed_columns
+
+        if modified('text'):
+            doc = self._reindex_post(post, con)
+            update = True
+        elif modified('is_answer'):
+            self._change_answer_state(post, doc)
+            update = True
+        elif modified('is_deleted'):
+            self._change_delete_flag(post, doc)
+            update = True
+
+        if update:
+            con.replace_document('CP%d' % post.id, doc)
+            con.flush()
+
+    def update_topic(self, topic, changed_columns):
+        con = self._get_connection(writable=True)
+
+        for post in topic.posts:
+            doc = self._find_post(post)
+            changed = False
+            if doc is None or 'title' in changed_columns:
+                doc = self._reindex_post(post, don)
+                changed = True
+            else:
+                if 'is_deleted' in changed_columns:
+                    self._change_delete_flag(post, doc)
+                    changed = True
+                if 'reply_count' in changed_columns:
+                    self._change_reply_count(post, doc)
+                    changed = True
+                if 'votes' in changed_columns:
+                    self._change_votes(post, doc)
+                    changed = True
+            if changed:
+                con.replace_document('CP%d' % post.id, doc)
+        con.flush()
+
+    def query(self, query, locale, page=1, per_page=20, order_by='relevance',
+              user=None):
         stemmer = self.get_stemmer(locale)
         if isinstance(query, basestring):
             query = parse_query(query)
         mset = enq.get_mset(offset, per_page, per_page * 3)
 
         if order_by == 'relevance':
-            enq.set_sort_by_relevance_then_value(2, False)
+            enq.set_sort_by_relevance_then_value(6, False)
         else:
-            key = {'date': 2, 'votes': 3, 'replies': 4}[order_by]
+            key = {'date': 6, 'votes': 5, 'replies': 4}[order_by]
             enq.set_sort_by_value_then_relevance(key, False)
 
         topic_ids = []
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.