Thomas Waldmann avatar Thomas Waldmann committed 7cfec13

remove indexing + tests

Comments (0)

Files changed (2)

MoinMoin/search/_tests/test_indexing.py

-# Copyright: 2011 MoinMoin:MichaelMayorov
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-MoinMoin - MoinMoin.search.indexing Tests
-"""
-
-
-import py
-
-import shutil, tempfile
-from datetime import datetime
-
-from whoosh.qparser import QueryParser
-
-from MoinMoin import log
-from MoinMoin.search.indexing import WhooshIndex
-
-# Documents what will be added to index
-docs = {
-        u"Document One": [
-                         {
-                          "wikiname": u"Test",
-                          "name": u"Document One",
-                          "uuid": u"68054804bd7141609b7c441143adf83d",
-                          "rev_no": 0,
-                          "mtime":  datetime(2011, 6, 10, 2, 17, 5),
-                          "content": u"Wi-Fi",
-                          "contenttype": u"text/plain;charset=utf-8",
-                          "tags": [u"Rest", u"in", u"peace"],
-                          "itemlinks": [u"Home", u"Find"],
-                          "itemtransclusions": [u"Another", u"Stuff"],
-                          "acl": u"JoeDoe:read,write",
-                          "language": u"en",
-                          "userid": u"1307875904.23.55111",
-                          "address": u"127.0.0.1",
-                          "hostname": u"localhost",
-                        },
-                        {
-                          "wikiname": u"Test",
-                          "name": u"Document One",
-                          "uuid": u"68054804bd7141609b7c441143adf83d",
-                          "rev_no": 1,
-                          "mtime":  datetime(2011, 6, 12, 2, 17, 5),
-                          "content": u"Mo in Moin",
-                          "contenttype": u"text/plain;charset=utf-8",
-                          "tags": [u"first_tag", u"second_tag"],
-                          "itemlinks": [u"Home", u"Find"],
-                          "itemtransclusions": [u"Another", u"Stuff"],
-                          "acl": u"JoeDoe:read,write",
-                          "language": u"en",
-                          "address": u"195.54.14.254",
-                          "hostname": u"kb.csu.ru",
-                        },
-                       ],
-        u"Document Two": [
-                         {
-                          "wikiname": u"Test",
-                          "name": u"Document Two",
-                          "uuid": u"12354804bd7141609b7c441143adf83d",
-                          "rev_no": 0,
-                          "mtime":  datetime(2011, 6, 10, 1, 17, 5),
-                          "content": u"Hello document one",
-                          "contenttype": u"text/plain;charset=utf-8",
-                          "tags": [u"first_tag", u"tag"],
-                          "itemlinks": [u"Home", u"Find"],
-                          "itemtransclusions": [u"Another"],
-                          "acl": u"User:-write",
-                          "language": u"en",
-                          "userid": u"1307875904.23.55111",
-                          "address": u"123.213.132.231",
-                         },
-                         {
-                          "wikiname": u"Test",
-                          "name": u"Document Two",
-                          "uuid": u"12354804bd7141609b7c441143adf83d",
-                          "rev_no": 1,
-                          "mtime":  datetime(2011, 6, 12, 2, 20, 5),
-                          "content": u"Hello document two",
-                          "contenttype": u"text/plain;charset=utf-8",
-                          "tags": [u"tag", u"second_tag"],
-                          "itemlinks": [u"Home", u"Find"],
-                          "itemtransclusions": [u"Another"],
-                          "acl": u"User:read,write,admin",
-                          "language": u"en",
-                          "address": u"123.213.132.231",
-                         },
-                        ]
-       }
-
-# (field_name, search_string, expected_result_count_for_latest, excpected_result_count_for_all)
-queries = [
-           (u"wikiname", u"Test", 2, 4),
-           (u"name", u"Document", 2, 4),
-           (u"uuid", u"68054804bd7141609b7c441143adf83d", 1, 2),
-           (u"rev_no", u"1", 2, 2),
-           (u"content", u"moin", 1, 1),
-           (u"contenttype", u"text/plain", 2, 4),
-           (u"tags", u"first_tag", 1, 2),
-           (u"itemlinks", u"Home", 2, None),
-           (u"itemtransclusions", u"Stuff", 1, None),
-           (u"acl", u"JoeDoe:+read", 1, None),
-           (u"acl", u"JoeDoe:+write", 1, None),
-           (u"language", u"en", 2, 4),
-           (u"userid", u"1307875904.23.55111", 0, 2),
-           (u"address", u"127.0.0.1", 0, 1),
-           (u"hostname", u"kb.csu.ru", 1, 1),
-          ]
-
-
-class TestWhooshIndex(object):
-
-    queries = []
-
-    def setup_method(self, method):
-        """ indexing: create temporary directory with indexes """
-
-        self.index_dir = tempfile.mkdtemp('', 'moin-')
-
-    def teardown_method(self, method):
-        """ indexing: delete temporary directory """
-
-        shutil.rmtree(self.index_dir)
-
-    def testIndexSchema(self):
-        """
-        indexing: create temporary directory with indexes, add documents from
-        "docs" to indexes, and check results using "queries"
-        """
-
-        index_object = WhooshIndex(index_dir=self.index_dir)
-        latest_revs_index = index_object.latest_revisions_index
-        all_revs_index = index_object.all_revisions_index
-
-        # Add docs to indexes
-        with all_revs_index.writer() as all_revs_writer:
-            for item_name, documents in docs.items():
-                for document in documents:
-                    with latest_revs_index.writer() as latest_revs_writer:
-                        latest_revs_writer.update_document(**document)
-                    all_revs_names = all_revs_index.schema.names()
-                    all_revs_doc = dict([(key, value)
-                                         for key, value in document.items()
-                                         if key in all_revs_names])
-
-                    all_revs_writer.add_document(**all_revs_doc)
-
-       # Check that all docs were added successfully
-        with latest_revs_index.searcher() as latest_revs_searcher:
-            with all_revs_index.searcher() as all_revs_searcher:
-                for field_name, query, latest_res_len, all_res_len in queries:
-                    query = QueryParser(field_name, latest_revs_index.schema).parse(query)
-                    assert len(latest_revs_searcher.search(query)) == latest_res_len
-                    if field_name in all_revs_index.schema.names():
-                        assert len(all_revs_searcher.search(query)) == all_res_len

MoinMoin/search/indexing.py

-# Copyright: 2011 MoinMoin:MichaelMayorov
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-MoinMoin - Whoosh index schemas / index managment
-"""
-
-import os
-import datetime
-
-from flask import current_app as app
-
-from whoosh.fields import Schema, TEXT, ID, IDLIST, NUMERIC, DATETIME, KEYWORD, BOOLEAN
-from whoosh.index import open_dir, create_in, EmptyIndexError
-
-from MoinMoin.config import WIKINAME, NAME, NAME_EXACT, REV_NO, MTIME, CONTENTTYPE, TAGS, \
-                            LANGUAGE, USERID, ADDRESS, HOSTNAME, SIZE, ACTION, COMMENT, \
-                            CONTENT, UUID, ITEMLINKS, ITEMTRANSCLUSIONS, ACL, EMAIL, OPENID
-from MoinMoin.search.analyzers import *
-from MoinMoin.error import FatalError
-
-from MoinMoin import log
-logging = log.getLogger(__name__)
-
-
-def backend_to_index(backend_rev, rev_no, schema, content, wikiname=u''):
-    """
-    Convert fields from backend format to whoosh schema
-
-    :param backend_rev: MoinMoin backend revision
-    :param rev_no: Revision number
-    :param schema_fields: list with whoosh schema fields
-    :returns: document to put into whoosh index
-    """
-
-    doc = dict([(str(key), value)
-                for key, value in backend_rev.items()
-                if key in schema])
-    doc[MTIME] = datetime.datetime.utcfromtimestamp(backend_rev[MTIME])
-    doc[NAME_EXACT] = backend_rev[NAME]
-    doc[REV_NO] = rev_no
-    doc[WIKINAME] = wikiname
-    doc[CONTENT] = content
-    return doc
-
-
-class WhooshIndex(object):
-    """
-    Managing whoosh indexes
-    """
-
-    # Index names, schemas
-    _indexes = {'latest_revisions_index': 'latest_revisions_schema',
-                'all_revisions_index': 'all_revisions_schema',
-               }
-
-    def __init__(self, index_dir=None, cfg=None, force_create=False):
-        """
-        Create and open indexes in index_dir
-
-        :param force_create: Create empty index in index_dir even if index exists
-        :param index_dir: Directory where whoosh indexes will be created, default None
-        :param cfg: Application config (app.cfg), default None
-        """
-        self._cfg = cfg or app.cfg
-        self._index_dir = index_dir or self._cfg.index_dir
-
-        common_fields = {
-            # wikiname so we can have a shared index in a wiki farm, always check this!
-            # taken from app.cfg.interwikiname
-            WIKINAME: ID(stored=True),
-            # tokenized NAME from metadata - use this for manual searching from UI
-            NAME: TEXT(stored=True, multitoken_query="and", analyzer=item_name_analyzer(), field_boost=2.0),
-            # unmodified NAME from metadata - use this for precise lookup by the code.
-            # also needed for wildcard search, so the original string as well as the query
-            # (with the wildcard) is not cut into pieces.
-            NAME_EXACT: ID(field_boost=3.0),
-            # revision number, integer 0..n
-            REV_NO: NUMERIC(stored=True),
-            # MTIME from revision metadata (converted to UTC datetime)
-            MTIME: DATETIME(stored=True),
-            # tokenized CONTENTTYPE from metadata
-            CONTENTTYPE: TEXT(stored=True, multitoken_query="and", analyzer=MimeTokenizer()),
-            # unmodified list of TAGS from metadata
-            TAGS: ID(stored=True),
-            # LANGUAGE from metadata
-            LANGUAGE: ID(stored=True),
-            # USERID from metadata
-            USERID: ID(stored=True),
-            # ADDRESS from metadata
-            ADDRESS: ID(stored=True),
-            # HOSTNAME from metadata
-            HOSTNAME: ID(stored=True),
-            # SIZE from metadata
-            SIZE: NUMERIC(stored=True),
-            # ACTION from metadata
-            ACTION: ID(stored=True),
-            # tokenized COMMENT from metadata
-            COMMENT: TEXT(stored=True, multitoken_query="and"),
-            # data (content), converted to text/plain and tokenized
-            CONTENT: TEXT(stored=True, multitoken_query="and"),
-        }
-
-        latest_revs_fields = {
-            # UUID from metadata - as there is only latest rev of same item here, it is unique
-            UUID: ID(unique=True, stored=True),
-            # unmodified list of ITEMLINKS from metadata
-            ITEMLINKS: ID(stored=True),
-            # unmodified list of ITEMTRANSCLUSIONS from metadata
-            ITEMTRANSCLUSIONS: ID(stored=True),
-            # tokenized ACL from metadata
-            ACL: TEXT(analyzer=AclTokenizer(self._cfg), multitoken_query="and", stored=True),
-        }
-        latest_revs_fields.update(**common_fields)
-
-        userprofile_fields = {
-            # EMAIL from user profile metadata
-            EMAIL: ID(unique=True, stored=True),
-            # OPENID from user profile metadata
-            OPENID: ID(unique=True, stored=True),
-        }
-        latest_revs_fields.update(**userprofile_fields)
-
-        all_revs_fields = {
-            # UUID from metadata
-            UUID: ID(stored=True),
-        }
-        all_revs_fields.update(**common_fields)
-
-        self.latest_revisions_schema = Schema(**latest_revs_fields)
-        self.all_revisions_schema = Schema(**all_revs_fields)
-
-        # Define dynamic fields
-        dynamic_fields = [("*_id", ID(stored=True)),
-                          ("*_text", TEXT(stored=True)),
-                          ("*_keyword", KEYWORD(stored=True)),
-                          ("*_numeric", NUMERIC(stored=True)),
-                          ("*_datetime", DATETIME(stored=True)),
-                          ("*_boolean", BOOLEAN(stored=True)),
-                         ]
-
-        # Adding dynamic fields to schemas
-        for glob, field_type in dynamic_fields:
-            self.latest_revisions_schema.add(glob, field_type, glob=True)
-            self.all_revisions_schema.add(glob, field_type, glob=True)
-
-        for index_name, index_schema in self._indexes.items():
-            self.open_index(index_name, index_schema, create=True, force_create=force_create,
-                            index_dir=self._index_dir
-                           )
-
-    def open_index(self, indexname, schema, create=False, force_create=False, index_dir=None):
-        """
-        Open index <indexname> in <index_dir>. if opening fails and <create>
-        is True, try creating the index and retry opening it afterwards.
-        return index object.
-
-
-        :param indexname: Name of created index
-        :param schema: which schema applies
-        :param create: create index if index doesn't exist
-        :param force_create: force create new empty index in index_dir
-        :param index_dir: Directory where whoosh indexes will be created
-        """
-        index_dir = index_dir or self._cfg.index_dir
-        if force_create:
-            self.create_index(index_dir, indexname, schema)
-        try:
-            index = open_dir(index_dir, indexname=indexname)
-            setattr(self, indexname, index)
-        except (IOError, OSError, EmptyIndexError) as err:
-            if create:
-                self.create_index(index_dir, indexname, schema)
-                try:
-                    index = open_dir(index_dir, indexname=indexname)
-                    setattr(self, indexname, index)
-                except:
-                    # if we get here, it failed without recovery
-                    raise FatalError("can't open whoosh index")
-            else:
-                raise FatalError("can't open whoosh index")
-
-    def create_index(self, index_dir, indexname, schema):
-        """
-        Create <indexname> in <index_dir>
-
-        :param index_dir: Directory where whoosh indexes will be created
-        :param indexname: Name of created index
-        :param schema: which schema applies
-        """
-        try:
-            os.mkdir(index_dir)
-        except:
-            # ignore exception, we'll get another exception below
-            # in case there are problems with the index_dir
-            pass
-        try:
-            create_in(index_dir, getattr(self, schema), indexname=indexname)
-        except (IOError, OSError) as err:
-            logging.error(u"%s [while trying to create index '%s' in '%s']" % (str(err), indexname, index_dir))
-
-    def remove_index(self):
-        """
-        Create empty index in index_dir and removing old
-        """
-        for index_name, index_schema in self._indexes.items():
-            self.create_index(indexname=index_name, schema=index_schema, index_dir=self._index_dir)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.