Commits

Thomas Waldmann committed 226499a

split storage into backends (really storing stuff) and middleware (and mixins also in there)

moved acl and router middleware there, also indexing and serialization mixins

  • Participants
  • Parent commits a52b0d3

Comments (0)

Files changed (19)

MoinMoin/_tests/test_test_environ.py

 from MoinMoin.conftest import init_test_app, deinit_test_app
 from MoinMoin.config import NAME, CONTENTTYPE, IS_SYSITEM, SYSITEM_VERSION
 from MoinMoin.storage.error import NoSuchItemError
-from MoinMoin.storage.serialization import serialize, unserialize
+from MoinMoin.storage.middleware.serialization import serialize, unserialize
 
 from MoinMoin._tests import wikiconfig
 

MoinMoin/_tests/test_wikiutil.py

 
 from MoinMoin import config, wikiutil
 from MoinMoin._tests import wikiconfig
-from MoinMoin.storage.serialization import serialize, unserialize
+from MoinMoin.storage.middleware.serialization import serialize, unserialize
 
 from werkzeug import MultiDict
 
 
 
 from MoinMoin.storage.error import StorageError
-from MoinMoin.storage.serialization import serialize, unserialize
-from MoinMoin.storage.backends import router, acl, memory
+from MoinMoin.storage.backends import memory
+from MoinMoin.storage.middleware.serialization import serialize, unserialize
+from MoinMoin.storage.middleware import router, acl
 from MoinMoin import auth, config, user
 
 

MoinMoin/script/maint/index.py

 from MoinMoin.storage.error import NoSuchItemError, NoSuchRevisionError
 from MoinMoin.util.mime import Type
 from MoinMoin.search.indexing import backend_to_index
-from MoinMoin.storage.backends.indexing import convert_to_indexable
+from MoinMoin.storage.middleware.indexing import convert_to_indexable
 
 from MoinMoin import log
 logging = log.getLogger(__name__)

MoinMoin/script/maint/xml.py

 
 from MoinMoin.script import fatal
 
-from MoinMoin.storage.serialization import unserialize, serialize, \
-                                           NLastRevs, SinceTime
+from MoinMoin.storage.middleware.serialization import unserialize, serialize, NLastRevs, SinceTime
 
 class XML(Command):
     description = "This command can be used to save items to a file or to create items by loading from a file"
 
         if moin19data:
             # this is for backend migration scenario from moin 1.9
-            from MoinMoin.storage.backends import create_simple_mapping, router
+            from MoinMoin.storage.backends import create_simple_mapping
+            from MoinMoin.storage.middleware import router
             namespace_mapping = create_simple_mapping(backend_uri='fs19:%s' % moin19data)
             storage = router.RouterBackend(
                     [(ns, be) for ns, be, acls in namespace_mapping], cfg=app.cfg)

MoinMoin/storage/_tests/test_backends_fs2.py

 
 from MoinMoin.storage._tests.test_backends import BackendTest
 from MoinMoin.storage.backends.fs2 import FS2Backend
-from MoinMoin.storage.backends.router import RouterBackend
 
 class TestFS2Backend(BackendTest):
 

MoinMoin/storage/_tests/test_backends_router.py

 from MoinMoin.error import ConfigurationError
 from MoinMoin.storage._tests.test_backends import BackendTest
 from MoinMoin.storage.backends.memory import MemoryBackend
-from MoinMoin.storage.backends.router import RouterBackend
+from MoinMoin.storage.middleware.router import RouterBackend
 from MoinMoin.search.indexing import WhooshIndex
 
 class TestRouterBackend(BackendTest):

MoinMoin/storage/_tests/test_indexing.py

 
 from MoinMoin._tests import update_item, nuke_item
 from MoinMoin._tests.wikiconfig import Config
-from MoinMoin.storage.backends.indexing import ItemIndex
+from MoinMoin.storage.middleware.indexing import ItemIndex
 from MoinMoin.config import NAME
 
 # Revisions for tests

MoinMoin/storage/_tests/test_serialization.py

 from flask import g as flaskg
 
 from MoinMoin._tests import become_trusted, update_item
-from MoinMoin.storage.serialization import Entry, create_value_object, serialize, unserialize
+from MoinMoin.storage.middleware.serialization import Entry, create_value_object, serialize, unserialize
 
 XML_DECL = '<?xml version="1.0" encoding="UTF-8"?>\n'
 

MoinMoin/storage/backends/__init__.py

 # Copyright: 2007 MoinMoin:HeinrichWendel
 # Copyright: 2008 MoinMoin:PawelPacana
 # Copyright: 2009 MoinMoin:ChristopherDenter
-# Copyright: 2009-2010 MoinMoin:ThomasWaldmann
+# Copyright: 2009-2011 MoinMoin:ThomasWaldmann
 # License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
 
 """
-    MoinMoin - Backends
-
-    This package contains code for the backends of the new storage layer.
+MoinMoin - Storage Backends
 """
 
 
 from flask import current_app as app
 from flask import g as flaskg
 
-from MoinMoin.storage.serialization import unserialize
 from MoinMoin.storage.error import NoSuchItemError, RevisionAlreadyExistsError
 from MoinMoin.error import ConfigurationError
-from MoinMoin.storage.backends import router, fs, fs2, fs19, memory
+from MoinMoin.storage.backends import fs, fs2, fs19, memory
+from MoinMoin.storage.middleware import router
+from MoinMoin.storage.middleware.serialization import unserialize
 
 CONTENT = 'content'
 USERPROFILES = 'userprofiles'

MoinMoin/storage/backends/acl.py

-# Copyright: 2003-2011 MoinMoin:ThomasWaldmann
-# Copyright: 2000-2004 Juergen Hermann <jh@web.de>
-# Copyright: 2003 Gustavo Niemeyer
-# Copyright: 2005 Oliver Graf
-# Copyright: 2007 Alexander Schremmer
-# Copyright: 2009 Christopher Denter
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-MoinMoin - ACL Middleware
-
-This backend is a middleware implementing access control using ACLs (access
-control lists) and is referred to as AMW (ACL MiddleWare) hereafter.
-It does not store any data, but uses a given backend for this.
-This middleware is injected between the user of the storage API and the actual
-backend used for storage. It is independent of the backend being used.
-Instances of the AMW are bound to individual request objects. The user whose
-permissions the AMW checks is hence obtained by a lookup on the request object.
-The backend itself (and the objects it returns) need to be wrapped in order
-to make sure that no object of the real backend is (directly or indirectly)
-made accessible to the user of the API.
-The real backend is still available as an attribute of the request and can
-be used by conversion utilities or for similar tasks (flaskg.unprotected_storage).
-Regular users of the storage API, such as the views that modify an item,
-*MUST NOT*, in any way, use the real backend unless the author knows *exactly*
-what he's doing (as this may introduce security bugs without the code actually
-being broken).
-
-The classes wrapped are:
-    * AclWrapperBackend (wraps MoinMoin.storage.Backend)
-    * AclWrapperItem (wraps MoinMoin.storage.Item)
-    * AclWrapperRevision (wraps MoinMoin.storage.Revision)
-
-When an attribute is 'wrapped' it means that, in this context, the user's
-permissions are checked prior to attribute usage. If the user may not perform
-the action he intended to perform, an AccessDeniedError is raised.
-Otherwise the action is performed on the respective attribute of the real backend.
-It is important to note here that the outcome of such an action may need to
-be wrapped itself, as is the case when items or revisions are returned.
-
-All wrapped classes must, of course, adhere to the normal storage API.
-"""
-
-
-from UserDict import DictMixin
-
-from flask import current_app as app
-from flask import g as flaskg
-
-from MoinMoin.security import AccessControlList
-
-from MoinMoin.storage import Item, NewRevision, StoredRevision
-from MoinMoin.storage.error import NoSuchItemError, NoSuchRevisionError, AccessDeniedError
-
-from MoinMoin.config import ACL, ADMIN, READ, WRITE, CREATE, DESTROY
-
-
-class AclWrapperBackend(object):
-    """
-    The AMW is bound to a specific request. The actual backend is retrieved
-    from the config upon request initialization. Any method that is in some
-    way relevant to security needs to be wrapped in order to ensure the user
-    has the permissions necessary to perform the desired action.
-    Note: This may *not* inherit from MoinMoin.storage.Backend because that would
-    break our __getattr__ attribute 'redirects' (which are necessary because a backend
-    implementor may decide to use his own helper functions which the items and revisions
-    will still try to call).
-    """
-    def __init__(self, cfg, backend, hierarchic=False, before=u"", default=u"", after=u"", valid=None):
-        """
-        :type backend: Some object that implements the storage API.
-        :param backend: The unprotected backend that we want to protect.
-        :type hierarchic: bool
-        :param hierarchic: Indicate whether we want to process ACLs in hierarchic mode.
-        :type before: unicode
-        :param before: ACL to be applied before all the other ACLs.
-        :type default: unicode
-        :param default: If no ACL information is given on the item in question, use this default.
-        :type after: unicode
-        :param after: ACL to be applied after all the other ACLs.
-        :type valid: list of strings or None
-        :param valid: If a list is given, only strings in the list are treated as valid acl privilege descriptors.
-                      If None is give, the global wiki default is used.
-        """
-        self.cfg = cfg
-        self.backend = backend
-        self.hierarchic = hierarchic
-        self.valid = valid if valid is not None else cfg.acl_rights_contents
-        self.before = AccessControlList([before], default=default, valid=self.valid)
-        self.default = AccessControlList([default], default=default, valid=self.valid)
-        self.after = AccessControlList([after], default=default, valid=self.valid)
-
-    def __getattr__(self, attr):
-        # Attributes that this backend does not define itself are just looked
-        # up on the real backend.
-        return getattr(self.backend, attr)
-
-    def get_item(self, itemname):
-        """
-        @see: Backend.get_item.__doc__
-        """
-        if not self._may(itemname, READ):
-            raise AccessDeniedError(flaskg.user.name, READ, itemname)
-        real_item = self.backend.get_item(itemname)
-        # Wrap the item here as well.
-        wrapped_item = AclWrapperItem(real_item, self)
-        return wrapped_item
-
-    def has_item(self, itemname):
-        """
-        @see: Backend.has_item.__doc__
-        """
-        # We do not hide the sheer existance of items. When trying
-        # to create an item with the same name, the user would notice anyway.
-        return self.backend.has_item(itemname)
-
-    def create_item(self, itemname):
-        """
-        @see: Backend.create_item.__doc__
-        """
-        if not self._may(itemname, CREATE):
-            raise AccessDeniedError(flaskg.user.name, CREATE, itemname)
-        real_item = self.backend.create_item(itemname)
-        # Wrap item.
-        wrapped_item = AclWrapperItem(real_item, self)
-        return wrapped_item
-
-    def iter_items_noindex(self):
-        """
-        @see: Backend.iter_items_noindex.__doc__
-        """
-        for item in self.backend.iteritems():
-            if self._may(item.name, READ):
-                yield AclWrapperItem(item, self)
-
-    iteritems = iter_items_noindex
-
-    def _get_acl(self, itemname):
-        """
-        Get ACL strings from the last revision's metadata and return ACL object.
-        """
-        try:
-            item = self.backend.get_item(itemname)
-            # we always use the ACLs set on the latest revision:
-            current_rev = item.get_revision(-1)
-            acl = current_rev[ACL]
-            if not isinstance(acl, unicode):
-                raise TypeError("%s metadata has unsupported type: %r" % (ACL, acl))
-            acls = [acl, ]
-        except (NoSuchItemError, NoSuchRevisionError, KeyError):
-            # do not use default acl here
-            acls = []
-        default = self.default.default
-        return AccessControlList(tuple(acls), default=default, valid=self.valid)
-
-    def _may(self, itemname, right, username=None):
-        """ Check if username may have <right> access on item <itemname>.
-
-        For hierarchic=False we just check the item in question.
-
-        For hierarchic=True, we check each item in the hierarchy. We
-        start with the deepest item and recurse to the top of the tree.
-        If one of those permits, True is returned.
-        This is done *only* if there is *no ACL at all* (not even an empty one)
-        on the items we 'recurse over'.
-
-        For both configurations, we check `before` before the item/default
-        acl and `after` after the item/default acl, of course.
-
-        `default` is only used if there is no ACL on the item (and none on
-        any of the item's parents when using hierarchic.)
-
-        :param itemname: item to get permissions from
-        :param right: the right to check
-        :param username: username to use for permissions check (default is to
-                         use the username doing the current request)
-        :rtype: bool
-        :returns: True if you have permission or False
-        """
-        if username is None:
-            username = flaskg.user.name
-
-        allowed = self.before.may(username, right)
-        if allowed is not None:
-            return allowed
-
-        if self.hierarchic:
-            items = itemname.split('/') # create item hierarchy list
-            some_acl = False
-            for i in range(len(items), 0, -1):
-                # Create the next pagename in the hierarchy
-                # starting at the leaf, going to the root
-                name = '/'.join(items[:i])
-                acl = self._get_acl(name)
-                if acl.has_acl():
-                    some_acl = True
-                    allowed = acl.may(username, right)
-                    if allowed is not None:
-                        return allowed
-                    # If the item has an acl (even one that doesn't match) we *do not*
-                    # check the parents. We only check the parents if there's no acl on
-                    # the item at all.
-                    break
-            if not some_acl:
-                allowed = self.default.may(username, right)
-                if allowed is not None:
-                    return allowed
-        else:
-            acl = self._get_acl(itemname)
-            if acl.has_acl():
-                allowed = acl.may(username, right)
-                if allowed is not None:
-                    return allowed
-            else:
-                allowed = self.default.may(username, right)
-                if allowed is not None:
-                    return allowed
-
-        allowed = self.after.may(username, right)
-        if allowed is not None:
-            return allowed
-
-        return False
-
-
-class AclWrapperItem(Item):
-    """
-    Similar to AclWrapperBackend. Wrap a storage item and protect its
-    attributes by performing permission checks prior to performing the
-    action and raising AccessDeniedErrors if appropriate.
-    """
-    def __init__(self, item, aclbackend):
-        """
-        :type item: Object adhering to the storage item API.
-        :param item: The unprotected item we want to wrap.
-        :type aclbackend: Instance of AclWrapperBackend.
-        :param aclbackend: The AMW this item belongs to.
-        """
-        self._backend = aclbackend
-        self._item = item
-        self._may = aclbackend._may
-
-    @property
-    def name(self):
-        """
-        @see: Item.name.__doc__
-        """
-        return self._item.name
-
-    # needed by storage.serialization:
-    @property
-    def element_name(self):
-        return self._item.element_name
-    @property
-    def element_attrs(self):
-        return self._item.element_attrs
-
-    def require_privilege(*privileges):
-        """
-        This decorator is used in order to avoid code duplication
-        when checking a user's permissions. It allows providing arguments
-        that represent the permissions to check, such as READ and WRITE
-        (see module level constants; don't pass strings, please).
-
-        :type privileges: List of strings.
-        :param privileges: Represent the privileges to check.
-        """
-        def wrap(f):
-            def wrapped_f(self, *args, **kwargs):
-                for privilege in privileges:
-                    if not self._may(self.name, privilege):
-                        username = flaskg.user.name
-                        raise AccessDeniedError(username, privilege, self.name)
-                return f(self, *args, **kwargs)
-            return wrapped_f
-        return wrap
-
-
-    @require_privilege(WRITE)
-    def __setitem__(self, key, value):
-        """
-        @see: Item.__setitem__.__doc__
-        """
-        return self._item.__setitem__(key, value)
-
-    @require_privilege(WRITE)
-    def __delitem__(self, key):
-        """
-        @see: Item.__delitem__.__doc__
-        """
-        return self._item.__delitem__(key)
-
-    @require_privilege(READ)
-    def __getitem__(self, key):
-        """
-        @see: Item.__getitem__.__doc__
-        """
-        return self._item.__getitem__(key)
-
-    @require_privilege(READ)
-    def keys(self):
-        """
-        @see: Item.keys.__doc__
-        """
-        return self._item.keys()
-
-    @require_privilege(WRITE)
-    def change_metadata(self):
-        """
-        @see: Item.change_metadata.__doc__
-        """
-        return self._item.change_metadata()
-
-    @require_privilege(WRITE)
-    def publish_metadata(self):
-        """
-        @see: Item.publish_metadata.__doc__
-        """
-        return self._item.publish_metadata()
-
-    @require_privilege(READ)
-    def get_revision(self, revno):
-        """
-        @see: Item.get_revision.__doc__
-        """
-        return AclWrapperRevision(self._item.get_revision(revno), self)
-
-    @require_privilege(READ)
-    def list_revisions(self):
-        """
-        @see: Item.list_revisions.__doc__
-        """
-        return self._item.list_revisions()
-
-    @require_privilege(READ, WRITE)
-    def rename(self, newname):
-        """
-        Rename item from name (src) to newname (dst).
-        Note that there is no special rename privilege. By taking other
-        privileges into account, we implicitly perform the permission check here.
-        This checks R/W at src and W/C at dst. This combination was chosen for
-        the following reasons:
-         * It is the most intuitive of the possible solutions.
-         * If we'd only check for R at src, everybody would be able to rename even
-           ImmutablePages if there is a writable/creatable name somewhere else
-           (e.g., Trash/).
-         * 'delete' aka 'rename to trashbin' can be controlled with 'create':
-           Just don't provide create for the trash namespace.
-         * Someone without create in the target namespace cannot rename.
-
-        @see: Item.rename.__doc__
-        """
-        # Special case since we need to check newname as well. Easier to special-case than
-        # adjusting the decorator.
-        username = flaskg.user.name
-        if not self._may(newname, CREATE):
-            raise AccessDeniedError(username, CREATE, newname)
-        if not self._may(newname, WRITE):
-            raise AccessDeniedError(username, WRITE, newname)
-        return self._item.rename(newname)
-
-    @require_privilege(WRITE)
-    def commit(self):
-        """
-        @see: Item.commit.__doc__
-        """
-        return self._item.commit()
-
-    # This does not require a privilege as the item must have been obtained
-    # by either get_item or create_item already, which already check permissions.
-    def rollback(self):
-        """
-        @see: Item.rollback.__doc__
-        """
-        return self._item.rollback()
-
-    @require_privilege(DESTROY)
-    def destroy(self):
-        """
-        USE WITH GREAT CARE!
-
-        @see: Item.destroy.__doc__
-        """
-        return self._item.destroy()
-
-    @require_privilege(WRITE)
-    def create_revision(self, revno):
-        """
-        @see: Item.create_revision.__doc__
-        """
-        wrapped_revision = AclWrapperRevision(self._item.create_revision(revno), self)
-        return wrapped_revision
-
-
-class AclWrapperRevision(object, DictMixin):
-    """
-    Wrapper for revision classes. We need to wrap NewRevisions because they allow altering data.
-    We need to wrap StoredRevisions since they offer a destroy() method and access to their item.
-    The caller should know what kind of revision he gets. Hence, we just implement the methods of
-    both, StoredRevision and NewRevision. If a method is invoked that is not defined on the
-    kind of revision we wrap, we will see an AttributeError one level deeper anyway, so this is ok.
-    """
-    def __init__(self, revision, item):
-        """
-        :type revision: Object adhering to the storage revision API.
-        :param revision: The revision we want to protect.
-        :type item: Object adhering to the storage item API.
-        :param item: The item this revision belongs to
-        """
-        self._revision = revision
-        self._item = item
-        self._may = item._may
-
-    def __getattr__(self, attr):
-        # Pass through any call that is not subject to ACL protection (e.g. serialize)
-        return getattr(self._revision, attr)
-
-    @property
-    def item(self):
-        """
-        @see: Revision.item.__doc__
-        """
-        return self._item
-
-    @property
-    def timestamp(self):
-        """This property accesses the creation timestamp of the revision"""
-        return self._revision.timestamp
-
-    def __setitem__(self, key, value):
-        """
-        In order to change an ACL on an item you must have the ADMIN privilege.
-        We must allow the (unchanged) preceeding revision's ACL being stored
-        into the new revision, though.
-
-        TODO: the ACL specialcasing done here (requiring admin privilege for
-              changing ACLs) is only one case of a more generic problem:
-              Access (read,write,change) to some metadata must be checked.
-              ACL - changing needs ADMIN priviledge
-              userid, ip, hostname, etc. - writing them should be from system only
-              content hash - writing it should be from system only
-              For the metadata editing offered to the wiki user on the UI,
-              we should only offer metadata for which the wiki user has change
-              permissions. On save, we have to check the permissions.
-              Idea: have metadata key prefixes, classifying metadata entries:
-              security.* - security related
-                      .acl - content acl
-                      .insecure - allow insecure rendering (e.g. raw html)
-              system.* - internal stuff, only system may process this
-              user.* - user defined entries
-              (... needs more thinking ...)
-
-        @see: NewRevision.__setitem__.__doc__
-        """
-        if key == ACL:
-            try:
-                # This rev is not yet committed
-                last_rev = self._item.get_revision(-1)
-                last_acl = last_rev[ACL]
-            except (NoSuchRevisionError, KeyError):
-                last_acl = u''
-
-            acl_changed = value != last_acl
-
-            if acl_changed and not self._may(self._item.name, ADMIN):
-                username = flaskg.user.name
-                raise AccessDeniedError(username, ADMIN, self._item.name)
-        return self._revision.__setitem__(key, value)
-
-    def __getitem__(self, key):
-        """
-        @see: NewRevision.__getitem__.__doc__
-        """
-        return self._revision[key]
-
-    def __delitem__(self, key):
-        """
-        @see: NewRevision.__delitem__.__doc__
-        """
-        del self._revision[key]
-
-    def read(self, chunksize=-1):
-        """
-        @see: Backend._read_revision_data.__doc__
-        """
-        return self._revision.read(chunksize)
-
-    def seek(self, position, mode=0):
-        """
-        @see: StringIO.StringIO().seek.__doc__
-        """
-        return self._revision.seek(position, mode)
-
-    def destroy(self):
-        """
-        @see: Backend._destroy_revision.__doc__
-        """
-        if not self._may(self._item.name, DESTROY):
-            username = flaskg.user.name
-            raise AccessDeniedError(username, DESTROY + " revisions of", self._item.name)
-        return self._revision.destroy()
-
-    def write(self, data):
-        """
-        @see: Backend._write_revision_data.__doc__
-        """
-        return self._revision.write(data)
-

MoinMoin/storage/backends/indexing.py

-# Copyright: 2010-2011 MoinMoin:ThomasWaldmann
-# Copyright: 2011 MoinMoin:MichaelMayorov
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - Indexing Mixin Classes
-
-    Other backends mix in the Indexing*Mixin classes into their Backend,
-    Item, Revision classes to support flexible metadata indexing and querying
-    for wiki items / revisions
-
-    Wiki items and revisions of same item are identified by same UUID.
-    The wiki item name is contained in the item revision's metadata.
-    If you rename an item, this is done by creating a new revision with a different
-    (new) name in its revision metadata.
-"""
-
-
-import os
-import time, datetime
-
-from uuid import uuid4
-make_uuid = lambda: unicode(uuid4().hex)
-
-from flask import current_app as app
-from flask import g as flaskg
-from flask import request
-
-from MoinMoin.storage.error import NoSuchItemError, NoSuchRevisionError, \
-                                   AccessDeniedError
-from MoinMoin.config import ACL, CONTENTTYPE, UUID, NAME, NAME_OLD, MTIME, TAGS, \
-                            ADDRESS, HOSTNAME, USERID, ITEMLINKS, ITEMTRANSCLUSIONS, \
-                            REV_NO
-from MoinMoin.search.indexing import backend_to_index
-from MoinMoin.converter import default_registry
-from MoinMoin.util.iri import Iri
-from MoinMoin.util.mime import Type, type_moin_document
-from MoinMoin.util.tree import moin_page
-from MoinMoin import wikiutil
-
-from MoinMoin import log
-logging = log.getLogger(__name__)
-
-
-def convert_to_indexable(rev, new_rev=False):
-    """
-    convert a revision to an indexable document
-
-    :param rev: item revision - please make sure that the content file is
-                ready to read all indexable content from it. if you have just
-                written that content or already read from it, you need to call
-                rev.seek(0) before calling convert_to_indexable(rev).
-    """
-    try:
-        # TODO use different converter mode?
-        # Maybe we want some special mode for the input converters so they emit
-        # different output than for normal rendering), esp. for the non-markup
-        # content types (images, etc.).
-        input_contenttype = rev[CONTENTTYPE]
-        output_contenttype = 'text/plain'
-        type_input_contenttype = Type(input_contenttype)
-        type_output_contenttype = Type(output_contenttype)
-        reg = default_registry
-        # first try a direct conversion (this could be useful for extraction
-        # of (meta)data from binary types, like from images or audio):
-        conv = reg.get(type_input_contenttype, type_output_contenttype)
-        if conv:
-            doc = conv(rev, input_contenttype)
-            return doc
-        # otherwise try via DOM as intermediate format (this is useful if
-        # input type is markup, to get rid of the markup):
-        input_conv = reg.get(type_input_contenttype, type_moin_document)
-        refs_conv = reg.get(type_moin_document, type_moin_document, items='refs')
-        output_conv = reg.get(type_moin_document, type_output_contenttype)
-        if input_conv and output_conv:
-            doc = input_conv(rev, input_contenttype)
-            # We do not convert smileys, includes, macros, links, because
-            # it does not improve search results or even makes results worse.
-            # We do run the referenced converter, though, to extract links and
-            # transclusions.
-            if new_rev:
-                # we only can modify new, uncommitted revisions, not stored revs
-                i = Iri(scheme='wiki', authority='', path='/' + rev[NAME])
-                doc.set(moin_page.page_href, unicode(i))
-                refs_conv(doc)
-                # side effect: we update some metadata:
-                rev[ITEMLINKS] = refs_conv.get_links()
-                rev[ITEMTRANSCLUSIONS] = refs_conv.get_transclusions()
-            doc = output_conv(doc)
-            return doc
-        # no way
-        raise TypeError("No converter for %s --> %s" % (input_contenttype, output_contenttype))
-    except Exception as e: # catch all exceptions, we don't want to break an indexing run
-        logging.exception("Exception happened in conversion of item %r rev %d contenttype %s:" % (rev[NAME], rev.revno, rev[CONTENTTYPE]))
-        doc = u'ERROR [%s]' % str(e)
-        return doc
-
-
-class IndexingBackendMixin(object):
-    """
-    Backend indexing support / functionality using the index.
-    """
-    def __init__(self, *args, **kw):
-        cfg = kw.pop('cfg')
-        super(IndexingBackendMixin, self).__init__(*args, **kw)
-        self._index = ItemIndex(cfg)
-
-    def close(self):
-        self._index.close()
-        super(IndexingBackendMixin, self).close()
-
-    def create_item(self, itemname):
-        """
-        intercept new item creation and make sure there is NAME / UUID in the item
-        """
-        item = super(IndexingBackendMixin, self).create_item(itemname)
-        item.change_metadata()
-        if NAME not in item:
-            item[NAME] = itemname
-        if UUID not in item:
-            item[UUID] = make_uuid()
-        item.publish_metadata()
-        return item
-
-    def query_parser(self, default_fields, all_revs=False):
-        return self._index.query_parser(default_fields, all_revs=all_revs)
-
-    def searcher(self, all_revs=False):
-        return self._index.searcher(all_revs=all_revs)
-
-    def search(self, q, all_revs=False, **kw):
-        return self._index.search(q, all_revs=all_revs, **kw)
-
-    def search_page(self, q, all_revs=False, pagenum=1, pagelen=10, **kw):
-        return self._index.search_page(q, all_revs=all_revs, pagenum=pagenum, pagelen=pagelen, **kw)
-
-    def documents(self, all_revs=False, **kw):
-        return self._index.documents(all_revs=all_revs, **kw)
-
-
-class IndexingItemMixin(object):
-    """
-    Item indexing support
-    """
-    def __init__(self, backend, *args, **kw):
-        super(IndexingItemMixin, self).__init__(backend, *args, **kw)
-        self._index = backend._index
-        self.__unindexed_revision = None
-
-    def create_revision(self, revno):
-        self.__unindexed_revision = super(IndexingItemMixin, self).create_revision(revno)
-        return self.__unindexed_revision
-
-    def commit(self):
-        self.__unindexed_revision.update_index()
-        self.__unindexed_revision = None
-        return super(IndexingItemMixin, self).commit()
-
-    def rollback(self):
-        self.__unindexed_revision = None
-        return super(IndexingItemMixin, self).rollback()
-
-    def publish_metadata(self):
-        self.update_index()
-        return super(IndexingItemMixin, self).publish_metadata()
-
-    def destroy(self):
-        self.remove_index()
-        return super(IndexingItemMixin, self).destroy()
-
-    def update_index(self):
-        """
-        update the index with metadata of this item
-
-        this is automatically called by item.publish_metadata() and can be used by a indexer script also.
-        """
-        logging.debug("item %r update index:" % (self.name, ))
-        for k, v in self.items():
-            logging.debug(" * item meta %r: %r" % (k, v))
-        self._index.update_item(metas=self)
-
-    def remove_index(self):
-        """
-        update the index, removing everything related to this item
-        """
-        uuid = self[UUID]
-        logging.debug("item %r %r remove index!" % (self.name, uuid))
-        self._index.remove_item(uuid)
-
-
-class IndexingRevisionMixin(object):
-    """
-    Revision indexing support
-    """
-    def __init__(self, item, *args, **kw):
-        super(IndexingRevisionMixin, self).__init__(item, *args, **kw)
-        self._index = item._index
-
-    def destroy(self):
-        self.remove_index()
-        return super(IndexingRevisionMixin, self).destroy()
-
-    def update_index(self):
-        """
-        update the index with metadata of this revision
-
-        this is automatically called by item.commit() and can be used by a indexer script also.
-        """
-        name = self.item.name
-        uuid = self.item[UUID]
-        revno = self.revno
-        logging.debug("Processing: name %s revno %s" % (name, revno))
-        if MTIME not in self:
-            self[MTIME] = int(time.time())
-        if NAME not in self:
-            self[NAME] = name
-        if UUID not in self:
-            self[UUID] = uuid # do we want the item's uuid in the rev's metadata?
-        if CONTENTTYPE not in self:
-            self[CONTENTTYPE] = u'application/octet-stream'
-
-        if app.cfg.log_remote_addr:
-            remote_addr = request.remote_addr
-            if remote_addr:
-                self[ADDRESS] = unicode(remote_addr)
-                hostname = wikiutil.get_hostname(remote_addr)
-                if hostname:
-                    self[HOSTNAME] = hostname
-        try:
-            if flaskg.user.valid:
-                self[USERID] = unicode(flaskg.user.uuid)
-        except:
-            # when loading xml via script, we have no flaskg.user
-            pass
-
-        self.seek(0) # for a new revision, file pointer points to EOF, rewind first
-        rev_content = convert_to_indexable(self, new_rev=True)
-
-        logging.debug("item %r revno %d update index:" % (name, revno))
-        for k, v in self.items():
-            logging.debug(" * rev meta %r: %r" % (k, v))
-        logging.debug("Indexable content: %r" % (rev_content[:250], ))
-        self._index.add_rev(uuid, revno, self, rev_content)
-
-    def remove_index(self):
-        """
-        update the index, removing everything related to this revision
-        """
-        name = self.item.name
-        uuid = self.item[UUID]
-        revno = self.revno
-        metas = self
-        logging.debug("item %r revno %d remove index!" % (name, revno))
-        self._index.remove_rev(metas[UUID], revno)
-
-    # TODO maybe use this class later for data indexing also,
-    # TODO by intercepting write() to index data written to a revision
-
-from whoosh.writing import AsyncWriter
-from whoosh.qparser import QueryParser, MultifieldParser
-
-from MoinMoin.search.indexing import WhooshIndex
-
-class ItemIndex(object):
-    """
-    Index for Items/Revisions
-    """
-    def __init__(self, cfg, force_create=False):
-        self.wikiname = cfg.interwikiname
-        self.index_object = WhooshIndex(force_create=force_create, cfg=cfg)
-
-    def close(self):
-        self.index_object.all_revisions_index.close()
-        self.index_object.latest_revisions_index.close()
-
-    def remove_index(self):
-        self.index_object.remove_index()
-
-    def update_item(self, metas):
-        """
-        update item (not revision!) metadata
-        """
-        # XXX we do not have an index for item metadata yet!
-
-    def remove_item(self, uuid):
-        """
-        remove all data related to this item and all its revisions from the index
-        """
-        with self.index_object.latest_revisions_index.searcher() as latest_revs_searcher:
-            doc_number = latest_revs_searcher.document_number(uuid=uuid,
-                                                              wikiname=self.wikiname
-                                                             )
-        if doc_number is not None:
-            with AsyncWriter(self.index_object.latest_revisions_index) as async_writer:
-                async_writer.delete_document(doc_number)
-
-        with self.index_object.all_revisions_index.searcher() as all_revs_searcher:
-            doc_numbers = list(all_revs_searcher.document_numbers(uuid=uuid,
-                                                                  wikiname=self.wikiname
-                                                                 ))
-        if doc_numbers:
-            with AsyncWriter(self.index_object.all_revisions_index) as async_writer:
-                for doc_number in doc_numbers:
-                    async_writer.delete_document(doc_number)
-
-    def add_rev(self, uuid, revno, rev, rev_content):
-        """
-        add a new revision <revno> for item <uuid> with metadata <metas>
-        """
-        with self.index_object.all_revisions_index.searcher() as all_revs_searcher:
-            all_found_document = all_revs_searcher.document(uuid=rev[UUID],
-                                                            rev_no=revno,
-                                                            wikiname=self.wikiname
-                                                           )
-        with self.index_object.latest_revisions_index.searcher() as latest_revs_searcher:
-            latest_found_document = latest_revs_searcher.document(uuid=rev[UUID],
-                                                                  wikiname=self.wikiname
-                                                                 )
-        if not all_found_document:
-            schema = self.index_object.all_revisions_index.schema
-            with AsyncWriter(self.index_object.all_revisions_index) as async_writer:
-                converted_rev = backend_to_index(rev, revno, schema, rev_content, self.wikiname)
-                logging.debug("All revisions: adding %s %s", converted_rev[NAME], converted_rev[REV_NO])
-                async_writer.add_document(**converted_rev)
-        if not latest_found_document or int(revno) > latest_found_document[REV_NO]:
-            schema = self.index_object.latest_revisions_index.schema
-            with AsyncWriter(self.index_object.latest_revisions_index) as async_writer:
-                converted_rev = backend_to_index(rev, revno, schema, rev_content, self.wikiname)
-                logging.debug("Latest revisions: updating %s %s", converted_rev[NAME], converted_rev[REV_NO])
-                async_writer.update_document(**converted_rev)
-
-    def remove_rev(self, uuid, revno):
-        """
-        remove a revision <revno> of item <uuid>
-        """
-        with self.index_object.latest_revisions_index.searcher() as latest_revs_searcher:
-            latest_doc_number = latest_revs_searcher.document_number(uuid=uuid,
-                                                                     rev_no=revno,
-                                                                     wikiname=self.wikiname
-                                                                    )
-        if latest_doc_number is not None:
-            with AsyncWriter(self.index_object.latest_revisions_index) as async_writer:
-                logging.debug("Latest revisions: removing %d", latest_doc_number)
-                async_writer.delete_document(latest_doc_number)
-
-        with self.index_object.all_revisions_index.searcher() as all_revs_searcher:
-            doc_number = all_revs_searcher.document_number(uuid=uuid,
-                                                           rev_no=revno,
-                                                           wikiname=self.wikiname
-                                                          )
-        if doc_number is not None:
-            with AsyncWriter(self.index_object.all_revisions_index) as async_writer:
-                logging.debug("All revisions: removing %d", doc_number)
-                async_writer.delete_document(doc_number)
-
-    def query_parser(self, default_fields, all_revs=False):
-        if all_revs:
-            schema = self.index_object.all_revisions_schema
-        else:
-            schema = self.index_object.latest_revisions_schema
-        if len(default_fields) > 1:
-            qp = MultifieldParser(default_fields, schema=schema)
-        elif len(default_fields) == 1:
-            qp = QueryParser(default_fields[0], schema=schema)
-        else:
-            raise ValueError("default_fields list must at least contain one field name")
-        return qp
-
-    def searcher(self, all_revs=False):
-        """
-        Get a searcher for the right index. Always use this with "with":
-
-        with storage.searcher(all_revs) as searcher:
-            # your code
-
-        If you do not need the searcher itself or the Result object, but rather
-        the found documents, better use search() or search_page(), see below.
-        """
-        if all_revs:
-            ix = self.index_object.all_revisions_index
-        else:
-            ix = self.index_object.latest_revisions_index
-        return ix.searcher()
-
-    def search(self, q, all_revs=False, **kw):
-        with self.searcher(all_revs) as searcher:
-            # Note: callers must consume everything we yield, so the for loop
-            # ends and the "with" is left to close the index files.
-            for hit in searcher.search(q, **kw):
-                yield hit.fields()
-
-    def search_page(self, q, all_revs=False, pagenum=1, pagelen=10, **kw):
-        with self.searcher(all_revs) as searcher:
-            # Note: callers must consume everything we yield, so the for loop
-            # ends and the "with" is left to close the index files.
-            for hit in searcher.search_page(q, pagenum, pagelen=pagelen, **kw):
-                yield hit.fields()
-
-    def documents(self, all_revs=False, **kw):
-        if all_revs:
-            ix = self.index_object.all_revisions_index
-        else:
-            ix = self.index_object.latest_revisions_index
-        with ix.searcher() as searcher:
-            # Note: callers must consume everything we yield, so the for loop
-            # ends and the "with" is left to close the index files.
-            for doc in searcher.documents(**kw):
-                yield doc
-

MoinMoin/storage/backends/router.py

-# Copyright: 2008-2010 MoinMoin:ThomasWaldmann
-# Copyright: 2009 MoinMoin:ChristopherDenter
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - routing backend
-
-    You can use this backend to route requests to different backends
-    depending on the item name. I.e., you can specify mountpoints and
-    map them to different backends. E.g. you could route all your items
-    to an FSBackend and only items below hg/<youritemnamehere> go into
-    a MercurialBackend and similarly tmp/<youritemnamehere> is for
-    temporary items in a MemoryBackend() that are discarded when the
-    process terminates.
-"""
-
-
-import re
-
-from MoinMoin import log
-logging = log.getLogger(__name__)
-
-from MoinMoin.error import ConfigurationError
-from MoinMoin.storage.error import AccessDeniedError
-
-from MoinMoin.storage import Backend as BackendBase
-from MoinMoin.storage import Item as ItemBase
-from MoinMoin.storage import NewRevision as NewRevisionBase
-from MoinMoin.storage import StoredRevision as StoredRevisionBase
-
-from MoinMoin.storage.backends.indexing import IndexingBackendMixin, IndexingItemMixin, IndexingRevisionMixin
-
-from MoinMoin.storage.serialization import SerializableRevisionMixin, SerializableItemMixin, SerializableBackendMixin
-
-
-class BareRouterBackend(BackendBase):
-    """
-    Router Backend - routes requests to different backends depending
-    on the item name.
-
-    For method docstrings, please see the "Backend" base class.
-    """
-    def __init__(self, mapping, *args, **kw):
-        """
-        Initialize router backend.
-
-        The mapping given must satisfy the following criteria:
-            * Order matters.
-            * Mountpoints are just item names, including the special '' (empty)
-              root item name. A trailing '/' of a mountpoint will be ignored.
-            * There *must* be a backend with mountpoint '' (or '/') at the very
-              end of the mapping. That backend is then used as root, which means
-              that all items that don't lie in the namespace of any other
-              backend are stored there.
-
-        :type mapping: list of tuples of mountpoint -> backend mappings
-        :param mapping: [(mountpoint, backend), ...]
-        """
-        super(BareRouterBackend, self).__init__(*args, **kw)
-        self.mapping = [(mountpoint.rstrip('/'), backend) for mountpoint, backend in mapping]
-
-    def close(self):
-        super(BareRouterBackend, self).close()
-        for mountpoint, backend in self.mapping:
-            backend.close()
-        self.mapping = []
-
-    def _get_backend(self, itemname):
-        """
-        For a given fully-qualified itemname (i.e. something like Company/Bosses/Mr_Joe)
-        find the backend it belongs to (given by this instance's mapping), the local
-        itemname inside that backend and the mountpoint of the backend.
-
-        Note: Internally (i.e. in all Router* classes) we always use the normalized
-              item name for consistency reasons.
-
-        :type itemname: str
-        :param itemname: fully-qualified itemname
-        :returns: tuple of (backend, itemname, mountpoint)
-        """
-        if not isinstance(itemname, (str, unicode)):
-            raise TypeError("Item names must have string type, not %s" % (type(itemname)))
-
-        for mountpoint, backend in self.mapping:
-            if itemname == mountpoint or itemname.startswith(mountpoint and mountpoint + '/' or ''):
-                lstrip = mountpoint and len(mountpoint)+1 or 0
-                return backend, itemname[lstrip:], mountpoint
-        raise AssertionError("No backend found for %r. Available backends: %r" % (itemname, self.mapping))
-
-    def get_backend(self, namespace):
-        """
-        Given a namespace, return the backend mounted there.
-
-        :type namespace: basestring
-        :param namespace: The namespace of which we look the backend up.
-        """
-        return self._get_backend(namespace)[0]
-
-    def iter_items_noindex(self):
-        """
-        Iterate over all items.
-
-        Must not use the index as this method is used to *build* the index.
-
-        @see: Backend.iter_items_noindex.__doc__
-        """
-        for mountpoint, backend in self.mapping:
-            for item in backend.iter_items_noindex():
-                yield RouterItem(self, item.name, item, mountpoint)
-
-    # TODO: implement a faster iteritems using the index
-    iteritems = iter_items_noindex
-
-    def has_item(self, itemname):
-        """
-        @see: Backend.has_item.__doc__
-        """
-        # While we could use the inherited, generic implementation
-        # it is generally advised to override this method.
-        # Thus, we pass the call down.
-        logging.debug("has_item: %r" % itemname)
-        backend, itemname, mountpoint = self._get_backend(itemname)
-        return backend.has_item(itemname)
-
-    def get_item(self, itemname):
-        """
-        @see: Backend.get_item.__doc__
-        """
-        logging.debug("get_item: %r" % itemname)
-        backend, itemname, mountpoint = self._get_backend(itemname)
-        return RouterItem(self, itemname, backend.get_item(itemname), mountpoint)
-
-    def create_item(self, itemname):
-        """
-        @see: Backend.create_item.__doc__
-        """
-        logging.debug("create_item: %r" % itemname)
-        backend, itemname, mountpoint = self._get_backend(itemname)
-        return RouterItem(self, itemname, backend.create_item(itemname), mountpoint)
-
-
-class RouterBackend(SerializableBackendMixin, IndexingBackendMixin, BareRouterBackend):
-    pass
-
-
-class BareRouterItem(ItemBase):
-    """
-    Router Item - Wraps 'real' storage items to make them aware of their full name.
-
-    Items stored in the backends managed by the RouterBackend do not know their full
-    name since the backend they belong to is looked up from a list for a given
-    mountpoint and only the itemname itself (without leading mountpoint) is given to
-    the specific backend.
-    This is done so as to allow mounting a given backend at a different mountpoint.
-    The problem with that is, of course, that items do not know their full name if they
-    are retrieved via the specific backends directly. Thus, it is neccessary to wrap the
-    items returned from those specific backends in an instance of this RouterItem class.
-    This makes sure that an item in a specific backend only knows its local name (as it
-    should be; this allows mounting at a different place without renaming all items) but
-    items that the RouterBackend creates or gets know their fully qualified name.
-
-    In order to achieve this, we must mimic the Item interface here. In addition to that,
-    a backend implementor may have decided to provide additional methods on his Item class.
-    We can not know that here, ahead of time. We must redirect any attribute lookup to the
-    encapsulated item, hence, and only intercept calls that are related to the item name.
-    To do this, we store the wrapped item and redirect all calls via this classes __getattr__
-    method. For this to work, RouterItem *must not* inherit from Item, because otherwise
-    the attribute would be looked up on the abstract base class, which certainly is not what
-    we want.
-    Furthermore there's a problem with __getattr__ and new-style classes' special methods
-    which can be looked up here:
-    http://docs.python.org/reference/datamodel.html#special-method-lookup-for-new-style-classes
-    """
-    def __init__(self, backend, item_name, item, mountpoint, *args, **kw):
-        """
-        :type backend: Object adhering to the storage API.
-        :param backend: The backend this item belongs to.
-        :type itemname: basestring.
-        :param itemname: The name of the item (not the FQIN).
-        :type item: Object adhering to the storage item API.
-        :param item: The item we want to wrap.
-        :type mountpoint: basestring.
-        :param mountpoint: The mountpoint where this item is located.
-        """
-        self._get_backend = backend._get_backend
-        self._itemname = item_name
-        self._item = item
-        self._mountpoint = mountpoint
-        super(BareRouterItem, self).__init__(backend, item_name, *args, **kw)
-
-    def __getattr__(self, attr):
-        """
-        Redirect all attribute lookups to the item that is proxied by this instance.
-
-        Note: __getattr__ only deals with stuff that is not found in instance,
-              this class and base classes, so be careful!
-        """
-        return getattr(self._item, attr)
-
-    @property
-    def name(self):
-        """
-        :rtype: str
-        :returns: the item's fully-qualified name
-        """
-        mountpoint = self._mountpoint
-        if mountpoint:
-            mountpoint += '/'
-        return mountpoint + self._itemname
-
-    def __setitem__(self, key, value):
-        """
-        @see: Item.__setitem__.__doc__
-        """
-        return self._item.__setitem__(key, value)
-
-    def __delitem__(self, key):
-        """
-        @see: Item.__delitem__.__doc__
-        """
-        return self._item.__delitem__(key)
-
-    def __getitem__(self, key):
-        """
-        @see: Item.__getitem__.__doc__
-        """
-        return self._item.__getitem__(key)
-
-    def keys(self):
-        return self._item.keys()
-
-    def change_metadata(self):
-        return self._item.change_metadata()
-
-    def publish_metadata(self):
-        return self._item.publish_metadata()
-
-    def rollback(self):
-        return self._item.rollback()
-
-    def commit(self):
-        return self._item.commit()
-
-    def rename(self, newname):
-        """
-        For intra-backend renames, this is the same as the normal Item.rename
-        method.
-        For inter-backend renames, this *moves* the complete item over to the
-        new backend, possibly with a new item name.
-        In order to avoid content duplication, the old item is destroyed after
-        having been copied (in inter-backend scenarios only, of course).
-
-        @see: Item.rename.__doc__
-        """
-        old_name = self._item.name
-        backend, itemname, mountpoint = self._get_backend(newname)
-        if mountpoint != self._mountpoint:
-            # Mountpoint changed! That means we have to copy the item over.
-            converts, skips, fails = backend.copy_item(self._item, verbose=False, name=itemname)
-            assert len(converts) == 1
-
-            new_item = backend.get_item(itemname)
-            old_item = self._item
-            self._item = new_item
-            self._mountpoint = mountpoint
-            self._itemname = itemname
-            # We destroy the old item in order not to duplicate data.
-            # It may be the case that the item we want to destroy is ACL protected. In that case,
-            # the destroy() below doesn't irreversibly kill the item because at this point it is already
-            # guaranteed that it lives on at another place and we do not require 'destroy' hence.
-            try:
-                # Perhaps we don't deal with acl protected items anyway.
-                old_item.destroy()
-            except AccessDeniedError:
-                # OK, we're indeed routing to an ACL protected backend. Use unprotected item.
-                old_item._item.destroy()
-
-        else:
-            # Mountpoint didn't change
-            self._item.rename(itemname)
-            self._itemname = itemname
-
-    def list_revisions(self):
-        return self._item.list_revisions()
-
-    def create_revision(self, revno):
-        """
-        In order to make item name lookups via revision.item.name work, we need
-        to wrap the revision here.
-
-        @see: Item.create_revision.__doc__
-        """
-        rev = self._item.create_revision(revno)
-        return NewRouterRevision(self, revno, rev)
-
-    def get_revision(self, revno):
-        """
-        In order to make item name lookups via revision.item.name work, we need
-        to wrap the revision here.
-
-        @see: Item.get_revision.__doc__
-        """
-        rev = self._item.get_revision(revno)
-        return StoredRouterRevision(self, revno, rev)
-
-    def destroy(self):
-        """
-        ATTENTION!
-        This method performs an irreversible operation and deletes potentially important
-        data. Use with great care.
-
-        @see: Item.destroy.__doc__
-        """
-        return self._item.destroy()
-
-
-class RouterItem(SerializableItemMixin, IndexingItemMixin, BareRouterItem):
-    pass
-
-
-class BareNewRouterRevision(NewRevisionBase):
-    """
-    """
-    def __init__(self, item, revno, revision, *args, **kw):
-        self._item = item
-        self._revision = revision
-        super(BareNewRouterRevision, self).__init__(item, revno, *args, **kw)
-
-    def __getattr__(self, attr):
-        """
-        Redirect all attribute lookups to the revision that is proxied by this instance.
-
-        Note: __getattr__ only deals with stuff that is not found in instance,
-              this class and base classes, so be careful!
-        """
-        return getattr(self._revision, attr)
-
-    @property
-    def item(self):
-        """
-        Here we have to return the RouterItem, which in turn wraps the real item
-        and provides it with its full name that we need for the rev.item.name lookup.
-
-        @see: Revision.item.__doc__
-        """
-        assert isinstance(self._item, RouterItem)
-        return self._item
-
-    @property
-    def revno(self):
-        return self._revision.revno
-
-    @property
-    def timestamp(self):
-        return self._revision.timestamp
-
-    def __setitem__(self, key, value):
-        """
-        We only need to redirect this manually here because python doesn't do that
-        in combination with __getattr__. See RouterBackend.__doc__ for an explanation.
-
-        As this class wraps generic Revisions, this may very well result in an exception
-        being raised if the wrapped revision is a StoredRevision.
-        """
-        return self._revision.__setitem__(key, value)
-
-    def __delitem__(self, key):
-        """
-        @see: RouterRevision.__setitem__.__doc__
-        """
-        return self._revision.__delitem__(key)
-
-    def __getitem__(self, key):
-        """
-        @see: RouterRevision.__setitem__.__doc__
-        """
-        return self._revision.__getitem__(key)
-
-    def keys(self):
-        return self._revision.keys()
-
-    def read(self, chunksize=-1):
-        return self._revision.read(chunksize)
-
-    def seek(self, position, mode=0):
-        return self._revision.seek(position, mode)
-
-    def tell(self):
-        return self._revision.tell()
-
-    def write(self, data):
-        self._revision.write(data)
-
-    def destroy(self):
-        return self._revision.destroy()
-
-
-class NewRouterRevision(SerializableRevisionMixin, IndexingRevisionMixin, BareNewRouterRevision):
-    pass
-
-class BareStoredRouterRevision(StoredRevisionBase):
-    """
-    """
-    def __init__(self, item, revno, revision, *args, **kw):
-        self._item = item
-        self._revision = revision
-        super(BareStoredRouterRevision, self).__init__(item, revno, *args, **kw)
-
-    def __getattr__(self, attr):
-        """
-        Redirect all attribute lookups to the revision that is proxied by this instance.
-
-        Note: __getattr__ only deals with stuff that is not found in instance,
-              this class and base classes, so be careful!
-        """
-        return getattr(self._revision, attr)
-
-    @property
-    def item(self):
-        """
-        Here we have to return the RouterItem, which in turn wraps the real item
-        and provides it with its full name that we need for the rev.item.name lookup.
-
-        @see: Revision.item.__doc__
-        """
-        assert isinstance(self._item, RouterItem)
-        return self._item
-
-    @property
-    def revno(self):
-        return self._revision.revno
-
-    @property
-    def timestamp(self):
-        return self._revision.timestamp
-
-    def __getitem__(self, key):
-        return self._revision.__getitem__(key)
-
-    def keys(self):
-        return self._revision.keys()
-
-    def read(self, chunksize=-1):
-        return self._revision.read(chunksize)
-
-    def seek(self, position, mode=0):
-        return self._revision.seek(position, mode)
-
-    def tell(self):
-        return self._revision.tell()
-
-    def destroy(self):
-        return self._revision.destroy()
-
-
-class StoredRouterRevision(SerializableRevisionMixin, IndexingRevisionMixin, BareStoredRouterRevision):
-    pass
-

MoinMoin/storage/middleware/__init__.py

+# Copyright: 2011 MoinMoin:ThomasWaldmann
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+MoinMoin - Storage Middleware / Mixins
+"""
+

MoinMoin/storage/middleware/acl.py

+# Copyright: 2003-2011 MoinMoin:ThomasWaldmann
+# Copyright: 2000-2004 Juergen Hermann <jh@web.de>
+# Copyright: 2003 Gustavo Niemeyer
+# Copyright: 2005 Oliver Graf
+# Copyright: 2007 Alexander Schremmer
+# Copyright: 2009 Christopher Denter
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+MoinMoin - ACL Middleware
+
+This backend is a middleware implementing access control using ACLs (access
+control lists) and is referred to as AMW (ACL MiddleWare) hereafter.
+It does not store any data, but uses a given backend for this.
+This middleware is injected between the user of the storage API and the actual
+backend used for storage. It is independent of the backend being used.
+Instances of the AMW are bound to individual request objects. The user whose
+permissions the AMW checks is hence obtained by a lookup on the request object.
+The backend itself (and the objects it returns) need to be wrapped in order
+to make sure that no object of the real backend is (directly or indirectly)
+made accessible to the user of the API.
+The real backend is still available as an attribute of the request and can
+be used by conversion utilities or for similar tasks (flaskg.unprotected_storage).
+Regular users of the storage API, such as the views that modify an item,
+*MUST NOT*, in any way, use the real backend unless the author knows *exactly*
+what he's doing (as this may introduce security bugs without the code actually
+being broken).
+
+The classes wrapped are:
+    * AclWrapperBackend (wraps MoinMoin.storage.Backend)
+    * AclWrapperItem (wraps MoinMoin.storage.Item)
+    * AclWrapperRevision (wraps MoinMoin.storage.Revision)
+
+When an attribute is 'wrapped' it means that, in this context, the user's
+permissions are checked prior to attribute usage. If the user may not perform
+the action he intended to perform, an AccessDeniedError is raised.
+Otherwise the action is performed on the respective attribute of the real backend.
+It is important to note here that the outcome of such an action may need to
+be wrapped itself, as is the case when items or revisions are returned.
+
+All wrapped classes must, of course, adhere to the normal storage API.
+"""
+
+
+from UserDict import DictMixin
+
+from flask import current_app as app
+from flask import g as flaskg
+
+from MoinMoin.security import AccessControlList
+
+from MoinMoin.storage import Item, NewRevision, StoredRevision
+from MoinMoin.storage.error import NoSuchItemError, NoSuchRevisionError, AccessDeniedError
+
+from MoinMoin.config import ACL, ADMIN, READ, WRITE, CREATE, DESTROY
+
+
+class AclWrapperBackend(object):
+    """
+    The AMW is bound to a specific request. The actual backend is retrieved
+    from the config upon request initialization. Any method that is in some
+    way relevant to security needs to be wrapped in order to ensure the user
+    has the permissions necessary to perform the desired action.
+    Note: This may *not* inherit from MoinMoin.storage.Backend because that would
+    break our __getattr__ attribute 'redirects' (which are necessary because a backend
+    implementor may decide to use his own helper functions which the items and revisions
+    will still try to call).
+    """
+    def __init__(self, cfg, backend, hierarchic=False, before=u"", default=u"", after=u"", valid=None):
+        """
+        :type backend: Some object that implements the storage API.
+        :param backend: The unprotected backend that we want to protect.
+        :type hierarchic: bool
+        :param hierarchic: Indicate whether we want to process ACLs in hierarchic mode.
+        :type before: unicode
+        :param before: ACL to be applied before all the other ACLs.
+        :type default: unicode
+        :param default: If no ACL information is given on the item in question, use this default.
+        :type after: unicode
+        :param after: ACL to be applied after all the other ACLs.
+        :type valid: list of strings or None
+        :param valid: If a list is given, only strings in the list are treated as valid acl privilege descriptors.
+                      If None is give, the global wiki default is used.
+        """
+        self.cfg = cfg
+        self.backend = backend
+        self.hierarchic = hierarchic
+        self.valid = valid if valid is not None else cfg.acl_rights_contents
+        self.before = AccessControlList([before], default=default, valid=self.valid)
+        self.default = AccessControlList([default], default=default, valid=self.valid)
+        self.after = AccessControlList([after], default=default, valid=self.valid)
+
+    def __getattr__(self, attr):
+        # Attributes that this backend does not define itself are just looked
+        # up on the real backend.
+        return getattr(self.backend, attr)
+
+    def get_item(self, itemname):
+        """
+        @see: Backend.get_item.__doc__
+        """
+        if not self._may(itemname, READ):
+            raise AccessDeniedError(flaskg.user.name, READ, itemname)
+        real_item = self.backend.get_item(itemname)
+        # Wrap the item here as well.
+        wrapped_item = AclWrapperItem(real_item, self)
+        return wrapped_item
+
+    def has_item(self, itemname):
+        """
+        @see: Backend.has_item.__doc__
+        """
+        # We do not hide the sheer existance of items. When trying
+        # to create an item with the same name, the user would notice anyway.
+        return self.backend.has_item(itemname)
+
+    def create_item(self, itemname):
+        """
+        @see: Backend.create_item.__doc__
+        """
+        if not self._may(itemname, CREATE):
+            raise AccessDeniedError(flaskg.user.name, CREATE, itemname)
+        real_item = self.backend.create_item(itemname)
+        # Wrap item.
+        wrapped_item = AclWrapperItem(real_item, self)
+        return wrapped_item
+
+    def iter_items_noindex(self):
+        """
+        @see: Backend.iter_items_noindex.__doc__
+        """
+        for item in self.backend.iteritems():
+            if self._may(item.name, READ):
+                yield AclWrapperItem(item, self)
+
+    iteritems = iter_items_noindex
+
+    def _get_acl(self, itemname):
+        """
+        Get ACL strings from the last revision's metadata and return ACL object.
+        """
+        try:
+            item = self.backend.get_item(itemname)
+            # we always use the ACLs set on the latest revision:
+            current_rev = item.get_revision(-1)
+            acl = current_rev[ACL]
+            if not isinstance(acl, unicode):
+                raise TypeError("%s metadata has unsupported type: %r" % (ACL, acl))
+            acls = [acl, ]
+        except (NoSuchItemError, NoSuchRevisionError, KeyError):
+            # do not use default acl here
+            acls = []
+        default = self.default.default
+        return AccessControlList(tuple(acls), default=default, valid=self.valid)
+
+    def _may(self, itemname, right, username=None):
+        """ Check if username may have <right> access on item <itemname>.
+
+        For hierarchic=False we just check the item in question.
+
+        For hierarchic=True, we check each item in the hierarchy. We
+        start with the deepest item and recurse to the top of the tree.
+        If one of those permits, True is returned.
+        This is done *only* if there is *no ACL at all* (not even an empty one)
+        on the items we 'recurse over'.
+
+        For both configurations, we check `before` before the item/default
+        acl and `after` after the item/default acl, of course.
+
+        `default` is only used if there is no ACL on the item (and none on
+        any of the item's parents when using hierarchic.)
+
+        :param itemname: item to get permissions from
+        :param right: the right to check
+        :param username: username to use for permissions check (default is to
+                         use the username doing the current request)
+        :rtype: bool
+        :returns: True if you have permission or False
+        """
+        if username is None:
+            username = flaskg.user.name
+
+        allowed = self.before.may(username, right)
+        if allowed is not None:
+            return allowed
+
+        if self.hierarchic:
+            items = itemname.split('/') # create item hierarchy list
+            some_acl = False
+            for i in range(len(items), 0, -1):
+                # Create the next pagename in the hierarchy
+                # starting at the leaf, going to the root
+                name = '/'.join(items[:i])
+                acl = self._get_acl(name)
+                if acl.has_acl():
+                    some_acl = True
+                    allowed = acl.may(username, right)
+                    if allowed is not None:
+                        return allowed
+                    # If the item has an acl (even one that doesn't match) we *do not*
+                    # check the parents. We only check the parents if there's no acl on
+                    # the item at all.
+                    break
+            if not some_acl:
+                allowed = self.default.may(username, right)
+                if allowed is not None:
+                    return allowed
+        else:
+            acl = self._get_acl(itemname)
+            if acl.has_acl():
+                allowed = acl.may(username, right)
+                if allowed is not None:
+                    return allowed
+            else:
+                allowed = self.default.may(username, right)
+                if allowed is not None:
+                    return allowed
+
+        allowed = self.after.may(username, right)
+        if allowed is not None:
+            return allowed
+
+        return False
+
+
+class AclWrapperItem(Item):
+    """
+    Similar to AclWrapperBackend. Wrap a storage item and protect its
+    attributes by performing permission checks prior to performing the
+    action and raising AccessDeniedErrors if appropriate.
+    """
+    def __init__(self, item, aclbackend):
+        """
+        :type item: Object adhering to the storage item API.
+        :param item: The unprotected item we want to wrap.
+        :type aclbackend: Instance of AclWrapperBackend.
+        :param aclbackend: The AMW this item belongs to.
+        """
+        self._backend = aclbackend
+        self._item = item
+        self._may = aclbackend._may
+
+    @property
+    def name(self):
+        """
+        @see: Item.name.__doc__
+        """
+        return self._item.name
+
+    # needed by storage.serialization:
+    @property
+    def element_name(self):
+        return self._item.element_name
+    @property
+    def element_attrs(self):
+        return self._item.element_attrs
+
+    def require_privilege(*privileges):
+        """
+        This decorator is used in order to avoid code duplication
+        when checking a user's permissions. It allows providing arguments
+        that represent the permissions to check, such as READ and WRITE
+        (see module level constants; don't pass strings, please).
+
+        :type privileges: List of strings.
+        :param privileges: Represent the privileges to check.
+        """
+        def wrap(f):
+            def wrapped_f(self, *args, **kwargs):
+                for privilege in privileges:
+                    if not self._may(self.name, privilege):
+                        username = flaskg.user.name
+                        raise AccessDeniedError(username, privilege, self.name)
+                return f(self, *args, **kwargs)
+            return wrapped_f
+        return wrap
+
+
+    @require_privilege(WRITE)
+    def __setitem__(self, key, value):
+        """
+        @see: Item.__setitem__.__doc__
+        """
+        return self._item.__setitem__(key, value)
+
+    @require_privilege(WRITE)
+    def __delitem__(self, key):
+        """
+        @see: Item.__delitem__.__doc__
+        """
+        return self._item.__delitem__(key)
+
+    @require_privilege(READ)
+    def __getitem__(self, key):
+        """
+        @see: Item.__getitem__.__doc__
+        """
+        return self._item.__getitem__(key)
+
+    @require_privilege(READ)
+    def keys(self):
+        """
+        @see: Item.keys.__doc__
+        """
+        return self._item.keys()
+
+    @require_privilege(WRITE)
+    def change_metadata(self):
+        """
+        @see: Item.change_metadata.__doc__
+        """
+        return self._item.change_metadata()
+
+    @require_privilege(WRITE)
+    def publish_metadata(self):
+        """
+        @see: Item.publish_metadata.__doc__
+        """
+        return self._item.publish_metadata()
+
+    @require_privilege(READ)
+    def get_revision(self, revno):
+        """
+        @see: Item.get_revision.__doc__
+        """
+        return AclWrapperRevision(self._item.get_revision(revno), self)
+
+    @require_privilege(READ)
+    def list_revisions(self):
+        """
+        @see: Item.list_revisions.__doc__
+        """
+        return self._item.list_revisions()
+
+    @require_privilege(READ, WRITE)
+    def rename(self, newname):
+        """
+        Rename item from name (src) to newname (dst).
+        Note that there is no special rename privilege. By taking other
+        privileges into account, we implicitly perform the permission check here.
+        This checks R/W at src and W/C at dst. This combination was chosen for
+        the following reasons:
+         * It is the most intuitive of the possible solutions.
+         * If we'd only check for R at src, everybody would be able to rename even
+           ImmutablePages if there is a writable/creatable name somewhere else
+           (e.g., Trash/).
+         * 'delete' aka 'rename to trashbin' can be controlled with 'create':
+           Just don't provide create for the trash namespace.
+         * Someone without create in the target namespace cannot rename.
+
+        @see: Item.rename.__doc__
+        """
+        # Special case since we need to check newname as well. Easier to special-case than
+        # adjusting the decorator.
+        username = flaskg.user.name
+        if not self._may(newname, CREATE):
+            raise AccessDeniedError(username, CREATE, newname)
+        if not self._may(newname, WRITE):
+            raise AccessDeniedError(username, WRITE, newname)
+        return self._item.rename(newname)
+
+    @require_privilege(WRITE)
+    def commit(self):
+        """
+        @see: Item.commit.__doc__
+        """
+        return self._item.commit()
+
+    # This does not require a privilege as the item must have been obtained
+    # by either get_item or create_item already, which already check permissions.
+    def rollback(self):
+        """
+        @see: Item.rollback.__doc__
+        """
+        return self._item.rollback()
+
+    @require_privilege(DESTROY)
+    def destroy(self):
+        """
+        USE WITH GREAT CARE!
+
+        @see: Item.destroy.__doc__
+        """
+        return self._item.destroy()
+
+    @require_privilege(WRITE)
+    def create_revision(self, revno):
+        """
+        @see: Item.create_revision.__doc__
+        """
+        wrapped_revision = AclWrapperRevision(self._item.create_revision(revno), self)
+        return wrapped_revision
+
+
+class AclWrapperRevision(object, DictMixin):
+    """
+    Wrapper for revision classes. We need to wrap NewRevisions because they allow altering data.
+    We need to wrap StoredRevisions since they offer a destroy() method and access to their item.
+    The caller should know what kind of revision he gets. Hence, we just implement the methods of
+    both, StoredRevision and NewRevision. If a method is invoked that is not defined on the
+    kind of revision we wrap, we will see an AttributeError one level deeper anyway, so this is ok.
+    """
+    def __init__(self, revision, item):
+        """
+        :type revision: Object adhering to the storage revision API.
+        :param revision: The revision we want to protect.
+        :type item: Object adhering to the storage item API.
+        :param item: The item this revision belongs to
+        """
+        self._revision = revision
+        self._item = item
+        self._may = item._may
+
+    def __getattr__(self, attr):
+        # Pass through any call that is not subject to ACL protection (e.g. serialize)
+        return getattr(self._revision, attr)
+
+    @property
+    def item(self):
+        """
+        @see: Revision.item.__doc__
+        """
+        return self._item
+
+    @property
+    def timestamp(self):
+        """This property accesses the creation timestamp of the revision"""
+        return self._revision.timestamp
+
+    def __setitem__(self, key, value):
+        """
+        In order to change an ACL on an item you must have the ADMIN privilege.
+        We must allow the (unchanged) preceeding revision's ACL being stored
+        into the new revision, though.
+
+        TODO: the ACL specialcasing done here (requiring admin privilege for
+              changing ACLs) is only one case of a more generic problem:
+              Access (read,write,change) to some metadata must be checked.