1. Armin Ronacher
  2. solace-search-support

Commits

mitsuhiko  committed 5d29a61

Search does something now.

  • Participants
  • Parent commits 1397a32
  • Branches default

Comments (0)

Files changed (5)

File setup.py

View file
  • Ignore whitespace
         'SQLAlchemy>=0.5.5',
         'creoleparser',
         'simplejson',
-        'webdepcompress'
+        'webdepcompress',
+        'translitcodec'
     ],
     tests_require=[
         'lxml',

File solace/application.py

View file
  • Ignore whitespace
 
 # important because of initialization code (such as signal subscriptions)
 import solace.badges
+import solace.search

File solace/database.py

View file
  • Ignore whitespace
         return self._record(instance, 'update')
 
     def _record(self, model, operation):
-        pk = tuple(orm.object_mapper(model).primary_key_from_instance(model))
+        pk = (orm.object_mapper(model).primary_key_from_instance(model))
+        pk.append(type(model))
+        pk = tuple(pk)
         orm.object_session(model)._model_changes[pk] = (model, operation)
         return EXT_CONTINUE
 

File solace/search.py

View file
  • Ignore whitespace
 from werkzeug import import_string
 from solace.i18n import _
 from solace import settings
+from cPickle import dumps, loads
 
 
 _engine = None
 
 
 def get_engine():
-    """Creates or returns the engine."""
+    """Creates or returns the engine.  Currently this returns an instance
+    of the Xapian engine only.  In the future we might support different
+    engines.
+    """
+    global _engine
     with _engine_lock:
         if _engine is None:
-            _engine = import_string(settings.SEARCH_ENGINE)()
+            _engine = XapianEngine()
         return _engine
 
 
     def update_post(self, post):
         pass
 
-    def get_stemmer(self, locale=None):
-        return lambda x: x
-
-    def stem(self, word, locale=None):
-        return self.get_stemmer(locale)()
-
-    def iter_stemmed_words(self, text, locale=None):
-        stemmer = self.get_stemmer(locale)
-        for word in _word_re.findmatch(text):
-            yield stemmer(word.lower())
+    def query(self, query, locale, page=1, per_page=20):
+        pass
 
 
 class XapianEngine(SearchEngine):
         """Use the xapian stemmers."""
         if locale is not None:
             locale = str(locale)
+            if '_' in locale:
+                locale = locale.split('_', 1)[0]
         stemmer = self._stemmers.get(locale)
         if stemmer is not None:
             return stemmer
         self._stemmers[locale] = stemmer
         return stemmer
 
+    def _make_xapian_query(self, query, stemmer):
+        if query.type == 'empty':
+            return self._xap.Query()
+        if query.type == 'term':
+            return self._xap.Query(stemmer(query.s), 1, query.pos)
+        if query.type == 'phrase':
+            nodes = []
+            for pos, value in enumerate(query.s.split()):
+                nodes.append(self._xap.Query(stemmer(value), 1, pos + 1))
+            return self._xap.Query(self._xap.Query.OP_PHRASE, nodes)
+        if query.type in ('or', 'and', 'concat', 'andnot'):
+            op = {'or': enum.OP_OR, 'and': enum.OP_AND, 'andnot': enum.OP_AND_NOT,
+                  'concat': enum.OP_AND}[query.type]
+            return self._xap.Query(op,
+                                   self._make_xapian_query(query.left),
+                                   self._make_xapian_query(query.right))
+        # should not happen
+        return self._xap.Query()
+
     def _get_connection(self, writable=False):
         """Return a connection to the Xapian database."""
         if writable:
             return self._xap.WritableDatabase(settings.XAPIAN_DATABASE,
-                                              self._xapian.DB_CREATE_OR_OPEN)
+                                              self._xap.DB_CREATE_OR_OPEN)
         return self._xap.Database(settings.XAPIAN_DATABASE)
 
     def _index_topic(self, topic, doc):
-        word_iter = chain(
-            self.iter_stemmed_words(topic.title, topic.locale),
-            self.iter_stemmed_words(topic.question.text, topic.locale)
-        )
-        doc.add_term(
-        for idx, word in enumerate(word_iter):
-            doc.add_posting(word, idx, topic.locale)
+        indexer = self._xap.TermGenerator()
+        indexer.set_stemmer(self.get_stemmer(topic.locale))
+        indexer.set_document(doc)
+        indexer.index_text(topic.title)
+        indexer.index_text(topic.question.text)
 
     def _topic_term(self, topic):
         return '_TOPIC_%d' % topic.id
 
-    def _find_topic(self, topic, con=None):
+    def _find_object_document(self, topic, con=None):
         if con is None:
             con = self._get_connection()
         enq = self._xap.Enquire(con)
-        q = self._xap.Query(self._topic_term(topic))
+        q = self._xap.Query(self._object_term(topic))
         enq.set_query(q)
         rv = list(enq.get_mset(0, 1))
         if rv:
             return con.get_document(rv[0].get_docid())
 
+    def _object_term(self, obj):
+        return '_TYPE_%s:%d' % (type(obj).__name__, obj.id)
+
+    def _start_document(self, obj, language):
+        cls = type(obj)
+        typename = '%s.%s' % (cls.__module__, cls.__name__)
+        doc = self._xap.Document()
+        doc.set_data(dumps({'type': typename, 'id': obj.id}, 2))
+        doc.add_term('_LOCALE_%s' % language)
+        doc.add_term(self._object_term(obj))
+        return doc
+
     def add_topic(self, topic):
-        doc = self._xap.Document()
-        doc.add_term(self._topic_term(topic))
+        doc = self._start_document(topic, topic.locale)
         self._index_topic(topic, doc)
         con = self._get_connection(writable=True)
         con.add_document(doc)
 
     def update_topic(self, topic):
         con = self._get_connection(writable=True)
-        doc = self._find_topic(topic, con)
+        doc = self._find_object_document(topic, con)
         # let's just say that's intentional
         if doc is None:
             return
 
         self._index_topic(topic, doc)
-        con.replace_document(doc)
+        con.replace_document(doc.get_docid(), doc)
         con.flush()
 
     def remove_topic(self, topic):
         con = self._get_connection(writable=True)
-        doc = self._find_topic(topic, con)
+        doc = self._find_object_document(topic, con)
         if doc is not None:
             con.delete_document(con)
             con.flush()
 
+    def query(self, query, locale, page=1, per_page=20):
+        stemmer = self.get_stemmer(locale)
+        if isinstance(query, basestring):
+            query = parse_query(query)
+        xap_query = self._make_xapian_query(query, stemmer)
+        enq = self._xap.Enquire(self._get_connection())
+        enq.set_query(xap_query)
+        offset = (page - 1) * per_page
+        mset = enq.get_mset(offset, per_page, per_page * 3)
+        return mset
+
 
 # database signal handlers
 from solace.models import Post, Topic

File solace/settings.py

View file
  • Ignore whitespace
 del with_statement
 
 # temporary imports, delete at end of file
-import os, sys, solace
+import os, sys, solace, tempfile
 
 # propagate early.  That way we can import "from solace import settings"
 # when the settings is not yet set up.  This is needed because during
 PLATFORM = os.name
 
 #: the database URI
-if PLATFORM == 'nt':
-    DATABASE_URI = 'sqlite:///C:/Temp/solace.db'
-else:
-    DATABASE_URI = 'sqlite:////tmp/solace.db'
+DATABASE_URI = 'sqlite:///%s/solace.db' % tempfile.gettempdir()
 
 #: the title of the website
 WEBSITE_TITLE = _(u'Plurk Solace')
 #: use TLS for SMTP?
 SMTP_USE_TLS = False
 
-#: the search engine to use.
-SEARCH_ENGINE = 'solace.search.XapianEngine'
-
 #: if xapian is used as search engine, this is the database it
 #: will use.
-XAPIAN_DATABASE = '/tmp/solace.xapdb'
+XAPIAN_DATABASE = '%s/solace.xapdb' % tempfile.gettempdir()
 
 #: the default language that is assumed if the client does not send
 #: a language information etc.  This language also has to be listed