Commits

Thomas Waldmann  committed 1c66f39

implement validation for revision metadata storage using flatland

Note: currently invalid metadata is not rejected, moin2 will just print out
a validation dump if validation fails.

  • Participants
  • Parent commits cb2f379

Comments (0)

Files changed (7)

File MoinMoin/constants/contenttypes.py

                        'cplusplus', 'java', 'pascal', 'diff', 'gettext', 'xslt', 'creole', )
 
 CONTENTTYPE_USER = u'application/x.moin.userprofile'
+CONTENTTYPE_DEFAULT = u'application/octet-stream'
 
 # structure for contenttype groups
 CONTENTTYPE_GROUPS = [

File MoinMoin/constants/keys.py

 # data. meta[HASH_ALGORITHM] = hash(rev_data, HASH_ALGORITHM)
 # some backends may use this also for other purposes.
 HASH_ALGORITHM = 'sha1'
+HASH_LEN = 40 # length of hex str representation of hash value
 
 # some field names for whoosh index schema / documents in index:
 NAME_EXACT = "name_exact"

File MoinMoin/items/__init__.py

         if comment:
             meta[COMMENT] = unicode(comment)
 
-        if CONTENTTYPE not in meta:
-            # make sure we have CONTENTTYPE
-            meta[CONTENTTYPE] = unicode(contenttype_current or contenttype_guessed or 'application/octet-stream')
-
-        if ADDRESS not in meta:
-            meta[ADDRESS] = u'0.0.0.0' # TODO
-
-        if USERID not in meta and flaskg.user.valid:
-            meta[USERID] = flaskg.user.itemid
-
-        meta[ACTION] = unicode(action)
-
         if not overwrite and REVID in meta:
             # we usually want to create a new revision, thus we must remove the existing REVID
             del meta[REVID]
                 data = ''
 
         if isinstance(data, unicode):
-            data = data.encode(config.charset)
+            data = data.encode(config.charset) # XXX wrong! if contenttype gives a coding, we MUST use THAT.
 
         if isinstance(data, str):
             data = StringIO(data)
 
-        newrev = storage_item.store_revision(meta, data, overwrite=overwrite)
+        newrev = storage_item.store_revision(meta, data, overwrite=overwrite,
+                                             action=unicode(action),
+                                             contenttype_current=contenttype_current,
+                                             contenttype_guessed=contenttype_guessed,
+                                             )
         item_modified.send(app._get_current_object(), item_name=name)
         return newrev.revid, newrev.meta[SIZE]
 

File MoinMoin/storage/middleware/_tests/test_indexing.py

     def test_destroy_revision(self):
         item_name = u'foo'
         item = self.imw[item_name]
-        rev = item.store_revision(dict(name=item_name, mtime=1), StringIO('bar'))
+        rev = item.store_revision(dict(name=item_name, mtime=1),
+                                  StringIO('bar'), trusted=True)
         revid0 = rev.revid
-        rev = item.store_revision(dict(name=item_name, mtime=2), StringIO('baz'))
+        rev = item.store_revision(dict(name=item_name, mtime=2),
+                                  StringIO('baz'), trusted=True)
         revid1 = rev.revid
-        rev = item.store_revision(dict(name=item_name, mtime=3), StringIO('...'))
+        rev = item.store_revision(dict(name=item_name, mtime=3),
+                                  StringIO('...'), trusted=True)
         revid2 = rev.revid
         print "revids:", revid0, revid1, revid2
         # destroy a non-current revision:
         revids = []
         item_name = u'foo'
         item = self.imw[item_name]
-        rev = item.store_revision(dict(name=item_name, mtime=1), StringIO('bar'))
+        rev = item.store_revision(dict(name=item_name, mtime=1),
+                                  StringIO('bar'), trusted=True)
         revids.append(rev.revid)
-        rev = item.store_revision(dict(name=item_name, mtime=2), StringIO('baz'))
+        rev = item.store_revision(dict(name=item_name, mtime=2),
+                                  StringIO('baz'), trusted=True)
         revids.append(rev.revid)
         # destroy item:
         item.destroy_all_revisions()
         expected_latest_revids = []
         item_name = u'foo'
         item = self.imw[item_name]
-        r = item.store_revision(dict(name=item_name, mtime=1), StringIO('does not count, different name'))
+        r = item.store_revision(dict(name=item_name, mtime=1),
+                                StringIO('does not count, different name'), trusted=True)
         expected_latest_revids.append(r.revid)
         item_name = u'bar'
         item = self.imw[item_name]
-        item.store_revision(dict(name=item_name, mtime=1), StringIO('1st'))
-        r = item.store_revision(dict(name=item_name, mtime=2), StringIO('2nd'))
+        item.store_revision(dict(name=item_name, mtime=1),
+                            StringIO('1st'), trusted=True)
+        r = item.store_revision(dict(name=item_name, mtime=2),
+                                StringIO('2nd'), trusted=True)
         expected_latest_revids.append(r.revid)
 
         # now we remember the index contents built that way:
         missing_revids = []
         item_name = u'updated'
         item = self.imw[item_name]
-        r = item.store_revision(dict(name=item_name, mtime=1), StringIO('updated 1st'))
+        r = item.store_revision(dict(name=item_name, mtime=1),
+                                StringIO('updated 1st'), trusted=True)
         expected_all_revids.append(r.revid)
         # we update this item below, so we don't add it to expected_latest_revids
         item_name = u'destroyed'
         item = self.imw[item_name]
-        r = item.store_revision(dict(name=item_name, mtime=1), StringIO('destroyed 1st'))
+        r = item.store_revision(dict(name=item_name, mtime=1),
+                                StringIO('destroyed 1st'), trusted=True)
         destroy_revid = r.revid
         # we destroy this item below, so we don't add it to expected_all_revids
         # we destroy this item below, so we don't add it to expected_latest_revids
         item_name = u'stayssame'
         item = self.imw[item_name]
-        r = item.store_revision(dict(name=item_name, mtime=1), StringIO('stayssame 1st'))
+        r = item.store_revision(dict(name=item_name, mtime=1),
+                                StringIO('stayssame 1st'), trusted=True)
         expected_all_revids.append(r.revid)
         # we update this item below, so we don't add it to expected_latest_revids
-        r = item.store_revision(dict(name=item_name, mtime=2), StringIO('stayssame 2nd'))
+        r = item.store_revision(dict(name=item_name, mtime=2),
+                                StringIO('stayssame 2nd'), trusted=True)
         expected_all_revids.append(r.revid)
         expected_latest_revids.append(r.revid)
 
         # this will not change the fresh index, but the old index we are still using.
         item_name = u'updated'
         item = self.imw[item_name]
-        r = item.store_revision(dict(name=item_name, mtime=2), StringIO('updated 2nd'))
+        r = item.store_revision(dict(name=item_name, mtime=2),
+                                StringIO('updated 2nd'), trusted=True)
         expected_all_revids.append(r.revid)
         expected_latest_revids.append(r.revid)
         missing_revids.append(r.revid)
         item_name = u'added'
         item = self.imw[item_name]
-        r = item.store_revision(dict(name=item_name, mtime=1), StringIO('added 1st'))
+        r = item.store_revision(dict(name=item_name, mtime=1),
+                                StringIO('added 1st'), trusted=True)
         expected_all_revids.append(r.revid)
         expected_latest_revids.append(r.revid)
         missing_revids.append(r.revid)

File MoinMoin/storage/middleware/indexing.py

 
 import logging
 
+from flask import request
+from flask import g as flaskg
+from flask import current_app as app
+
 from whoosh.fields import Schema, TEXT, ID, IDLIST, NUMERIC, DATETIME, KEYWORD, BOOLEAN
 from whoosh.index import open_dir, create_in, EmptyIndexError
 from whoosh.writing import AsyncWriter
                             LANGUAGE, USERID, ADDRESS, HOSTNAME, SIZE, ACTION, COMMENT, \
                             CONTENT, ITEMLINKS, ITEMTRANSCLUSIONS, ACL, EMAIL, OPENID, \
                             ITEMID, REVID, CURRENT, PARENTID, \
-                            LATEST_REVS, ALL_REVS
+                            LATEST_REVS, ALL_REVS, \
+                            CONTENTTYPE_USER
+from MoinMoin.constants import keys
+
 from MoinMoin import user
 from MoinMoin.search.analyzers import item_name_analyzer, MimeTokenizer, AclTokenizer
 from MoinMoin.themes import utctimestamp
 from MoinMoin.util.crypto import make_uuid
+from MoinMoin.storage.middleware.validation import ContentMetaSchema, UserMetaSchema
+
 
 INDEXES = [LATEST_REVS, ALL_REVS, ]
 
         """
         preprocess a revision before it gets stored and put into index.
         """
-        meta[ITEMID] = self.itemid
-        if MTIME not in meta:
-            meta[MTIME] = int(time.time())
-        #if CONTENTTYPE not in meta:
-        #    meta[CONTENTTYPE] = u'application/octet-stream'
         content = convert_to_indexable(meta, data, is_new=True)
         return meta, data, content
 
-    def store_revision(self, meta, data, overwrite=False):
+    def store_revision(self, meta, data, overwrite=False,
+                       trusted=False, # True for loading a serialized representation or other trusted sources
+                       name=None, # TODO name we decoded from URL path
+                       action=u'SAVE',
+                       remote_addr=None,
+                       userid=None,
+                       wikiname=None,
+                       contenttype_current=None,
+                       contenttype_guessed=None,
+                       acl_parent=None,
+                       ):
         """
         Store a revision into the backend, write metadata and data to it.
 
         :param overwrite: if True, allow overwriting of existing revs.
         :returns: a Revision instance of the just created revision
         """
+        if remote_addr is None:
+            try:
+                # if we get here outside a request, this won't work:
+                remote_addr = unicode(request.remote_addr)
+            except:
+                pass
+        if userid is None:
+            try:
+                # if we get here outside a request, this won't work:
+                userid = flaskg.user.valid and flaskg.user.itemid or None
+            except:
+                pass
+        if wikiname is None:
+            wikiname = app.cfg.interwikiname
+        state = {'trusted': trusted,
+                 keys.NAME: name,
+                 keys.ACTION: action,
+                 keys.ADDRESS: remote_addr,
+                 keys.USERID: userid,
+                 keys.WIKINAME: wikiname,
+                 keys.ITEMID: self.itemid, # real itemid or None
+                 'contenttype_current': contenttype_current,
+                 'contenttype_guessed': contenttype_guessed,
+                 'acl_parent': acl_parent,
+                }
+        ct = meta.get(keys.CONTENTTYPE)
+        if ct == CONTENTTYPE_USER:
+            Schema = UserMetaSchema
+        else:
+            Schema = ContentMetaSchema
+        m = Schema(meta)
+        valid = m.validate(state)
+        # TODO: currently we just print validation results. in the end we should
+        # reject invalid stuff in some comfortable way.
+        if not valid:
+            for e in m.children:
+                print e.valid, e
+
+        # we do not have anything in m that is not defined in the schema,
+        # e.g. userdefined meta keys or stuff we do not validate. thus, we
+        # just update the meta dict with the validated stuff:
+        meta.update(dict(m.value.items()))
+        # we do not want None / empty values:
+        meta = dict([(k, v) for k, v in meta.items() if v not in [None, []]])
+
         if self.itemid is None:
-            self.itemid = make_uuid()
+            self.itemid = meta[ITEMID]
         backend = self.backend
         if not overwrite:
             revid = meta.get(REVID)

File MoinMoin/storage/middleware/protecting.py

     def get_revision(self, revid):
         return self[revid]
 
-    def store_revision(self, meta, data, overwrite=False):
+    def store_revision(self, meta, data, overwrite=False, **kw):
         self.require(WRITE)
         if not self:
             self.require(CREATE)
         if overwrite:
             self.require(DESTROY)
-        rev = self.item.store_revision(meta, data, overwrite=overwrite)
+        rev = self.item.store_revision(meta, data, overwrite=overwrite, **kw)
         return ProtectedRevision(self.protector, rev, p_item=self)
 
     def store_all_revisions(self, meta, data):

File MoinMoin/user.py

                 value = tuple(value)
             meta[key] = value
         meta[CONTENTTYPE] = CONTENTTYPE_USER
-        meta[ACTION] = u'SAVE'
         item.store_revision(meta, StringIO(''), overwrite=True)
 
         if not self.disabled: