Commits

Thomas Waldmann committed e1a68b3

use the right tokenizers/analyzers in the schema, use name_exact for 1:1 matching

Comments (0)

Files changed (3)

     if app.cfg.create_storage:
         app.router.create()
     app.router.open()
-    app.storage = indexing.IndexingMiddleware(app.cfg.index_dir, app.router, wiki_name=app.cfg.interwikiname) # XXX give user name etc.
+    app.storage = indexing.IndexingMiddleware(app.cfg.index_dir, app.router,
+                                              wiki_name=app.cfg.interwikiname,
+                                              acl_rights_contents=app.cfg.acl_rights_contents)
     if app.cfg.create_index:
         app.storage.create()
     app.storage.open()

MoinMoin/search/analyzers.py

 class AclTokenizer(Tokenizer):
     """ Access control list tokenizer """
 
-    def __init__(self, cfg):
+    def __init__(self, acl_rights_contents):
         """
         :param cfg: wiki config
         """
-        self._acl_rights_contents = cfg.acl_rights_contents
+        self._acl_rights_contents = acl_rights_contents
 
     def __call__(self, value, start_pos=0, positions=False, mode=u'', **kwargs):
         """

MoinMoin/storage/middleware/indexing.py

                             CONTENT, ITEMLINKS, ITEMTRANSCLUSIONS, ACL, EMAIL, OPENID, \
                             ITEMID, REVID, CURRENT
 
+from MoinMoin.search.analyzers import item_name_analyzer, MimeTokenizer, AclTokenizer
 from MoinMoin.util.crypto import make_uuid
 
 LATEST_REVS = 'latest_revs'
 
 
 class IndexingMiddleware(object):
-    def __init__(self, index_dir, backend, wiki_name=None, **kw):
+    def __init__(self, index_dir, backend, wiki_name=None, acl_rights_contents=[], **kw):
         """
         Store params, create schemas.
         """
             # wikiname so we can have a shared index in a wiki farm, always check this!
             WIKINAME: ID(stored=True),
             # tokenized NAME from metadata - use this for manual searching from UI
-            # TODO was: NAME: TEXT(stored=True, multitoken_query="and", analyzer=item_name_analyzer(), field_boost=2.0),
-            NAME: ID(stored=True, field_boost=2.0),
+            NAME: TEXT(stored=True, multitoken_query="and", analyzer=item_name_analyzer(), field_boost=2.0),
             # unmodified NAME from metadata - use this for precise lookup by the code.
             # also needed for wildcard search, so the original string as well as the query
             # (with the wildcard) is not cut into pieces.
             # MTIME from revision metadata (converted to UTC datetime)
             MTIME: DATETIME(stored=True),
             # tokenized CONTENTTYPE from metadata
-            # TODO was: CONTENTTYPE: TEXT(stored=True, multitoken_query="and", analyzer=MimeTokenizer()),
-            CONTENTTYPE: ID(stored=True),
+            CONTENTTYPE: TEXT(stored=True, multitoken_query="and", analyzer=MimeTokenizer()),
             # unmodified list of TAGS from metadata
             TAGS: ID(stored=True),
             LANGUAGE: ID(stored=True),
             # unmodified list of ITEMTRANSCLUSIONS from metadata
             ITEMTRANSCLUSIONS: ID(stored=True),
             # tokenized ACL from metadata
-            # TODO was: ACL: TEXT(analyzer=AclTokenizer(self._cfg), multitoken_query="and", stored=True),
-            ACL: ID(stored=True),
+            ACL: TEXT(analyzer=AclTokenizer(acl_rights_contents), multitoken_query="and", stored=True),
         }
         latest_revs_fields.update(**common_fields)
 
         """
         Return item with <name> (may be a new or existing item).
         """
-        return Item(self, name=name)
+        return Item(self, name_exact=name)
 
     def get_item(self, **query):
         """
         Return item identified by the query (may be a new or existing item).
 
-        :kwargs **query: e.g. name=u"Foo" or itemid="..." or ...
+        :kwargs **query: e.g. name_exact=u"Foo" or itemid="..." or ...
                          (must be a unique fieldname=value for the latest-revs index)
         """
         return Item(self, **query)
         """
         Return item identified by the query (must be a new item).
 
-        :kwargs **query: e.g. name=u"Foo" or itemid="..." or ...
+        :kwargs **query: e.g. name_exact=u"Foo" or itemid="..." or ...
                          (must be a unique fieldname=value for the latest-revs index)
         """
         return Item.create(self, **query)
         """
         Return item identified by query (must be an existing item).
 
-        :kwargs **query: e.g. name=u"Foo" or itemid="..." or ...
+        :kwargs **query: e.g. name_exact=u"Foo" or itemid="..." or ...
                          (must be a unique fieldname=value for the latest-revs index)
         """
         return Item.existing(self, **query)
                            it can be given there, to avoid us fetching same doc again
                            from the index
         :kwargs **query: any unique fieldname=value for the latest-revs index, e.g.:
-                         name="foo" or itemid="....." to fetch the item's current
+                         name_exact="foo" or itemid="....." to fetch the item's current
                          doc from the index (if not given via latest_doc).
         """
         self.indexer = indexer