Commits

Thomas Waldmann committed fc85c7c

fix fileserver backend + related tests, see below

create the revids (and also itemids) by urlquote(relpath + "." + mtime)

this has the following properties:
* revid in the backend changes if a file was modified -> discoverable for index update
* urlquote kills the slashes, so our URL routing does not get confused

added some fake HASH_ALGORITHM and some other metadata, so the code on higher layers is happy

moved some code around, so similar stuff is nearby

changed some str values to unicode, so whoosh is happy

Comments (0)

Files changed (2)

MoinMoin/storage/backends/_tests/test_fileserver.py

 
 import pytest
 
-from MoinMoin.config import MTIME
+from MoinMoin.config import NAME, MTIME, REVID, ITEMID, HASH_ALGORITHM
 from ..fileserver import Backend
 from . import BackendTestBase
 
                 pass
             with open(fn, 'wb') as f:
                 f.write(data)
+            meta[NAME] = name
             meta = tuple(sorted(meta.items()))
             expected_result.add((meta, data))
         return expected_result
 
+    def test_iter(self):
+        # for the fileserver store, even if the directory is empty,
+        # we will get a revid for the root directory:
+        contents = list(self.be)
+        assert len(contents) == 1
+        root_revid = contents[0]
+        # revids are like relpath.mtime
+        relpath, mtime = root_revid.split('.')
+        assert relpath == ''
+
     def test_files(self):
         # note: as we can only store the data into the file system, meta can
         # only have items that are generated by the fileserver backend:
         items = [#name,  meta,   data
-                 ('foo.png', dict(size=11, contenttype='image/png'), 'png content'),
-                 ('bar.txt', dict(size=12, contenttype='text/plain'), 'text content'),
+                 (u'foo.png', dict(size=11, contenttype=u'image/png'), 'png content'),
+                 (u'bar.txt', dict(size=12, contenttype=u'text/plain'), 'text content'),
                 ]
         expected_result = self._prepare(items)
-        result = set()
-        for i in self.be:
-            meta, data = self.be.retrieve(i)
-            # we don't want to check mtime
-            del meta[MTIME]
-            meta = tuple(sorted(meta.items()))
-            data = data.read()
-            result.add((meta, data))
-        assert result == expected_result
-
-    def test_dir(self):
-        # note: as we can only store the data into the file system, meta can
-        # only have items that are generated by the fileserver backend:
-        items = [#name,  meta,   data
-                 ('dir/foo.png', dict(size=11, contenttype='image/png'), 'png content'),
-                 ('dir/bar.txt', dict(size=12, contenttype='text/plain'), 'text content'),
-                ]
-        expected_result = self._prepare(items)
-        dir_meta = tuple(sorted(dict(size=0, contenttype='text/x.moin.wiki;charset=utf-8').items()))
+        dir_meta = tuple(sorted(dict(name=u'', size=0, contenttype=u'text/x.moin.wiki;charset=utf-8').items()))
         dir_data = """\
 = Directory contents =
  * [[../]]
         result = set()
         for i in self.be:
             meta, data = self.be.retrieve(i)
-            # we don't want to check mtime
+            # we don't want to check some meta values
             del meta[MTIME]
+            del meta[HASH_ALGORITHM]
+            del meta[ITEMID]
+            del meta[REVID]
             meta = tuple(sorted(meta.items()))
             data = data.read()
             result.add((meta, data))
         assert result == expected_result
 
+    def test_dir(self):
+        # note: as we can only store the data into the file system, meta can
+        # only have items that are generated by the fileserver backend:
+        items = [#name,  meta,   data
+                 (u'dir/foo.png', dict(size=11, contenttype=u'image/png'), 'png content'),
+                 (u'dir/bar.txt', dict(size=12, contenttype=u'text/plain'), 'text content'),
+                ]
+        expected_result = self._prepare(items)
+        dir_meta = tuple(sorted(dict(name=u'', size=0, contenttype=u'text/x.moin.wiki;charset=utf-8').items()))
+        dir_data = """\
+= Directory contents =
+ * [[../]]
+ * [[/dir|dir/]]
+""".replace('\n', '\r\n')
+        expected_result.add((dir_meta, dir_data))
+        dir_meta = tuple(sorted(dict(name=u'dir', size=0, contenttype=u'text/x.moin.wiki;charset=utf-8').items()))
+        dir_data = """\
+= Directory contents =
+ * [[../]]
+ * [[/bar.txt|bar.txt]]
+ * [[/foo.png|foo.png]]
+""".replace('\n', '\r\n')
+        expected_result.add((dir_meta, dir_data))
+        result = set()
+        for i in self.be:
+            meta, data = self.be.retrieve(i)
+            # we don't want to check some meta values
+            del meta[MTIME]
+            del meta[HASH_ALGORITHM]
+            del meta[ITEMID]
+            del meta[REVID]
+            meta = tuple(sorted(meta.items()))
+            data = data.read()
+            result.add((meta, data))
+        assert result == expected_result
 

MoinMoin/storage/backends/fileserver.py

 import errno
 import stat
 from StringIO import StringIO
+from werkzeug import url_quote, url_unquote
 
-from MoinMoin.config import MTIME, SIZE, CONTENTTYPE
+from MoinMoin.config import NAME, ITEMID, REVID, MTIME, SIZE, CONTENTTYPE, HASH_ALGORITHM
 from . import BackendBase
 
 from MoinMoin.util.mimetype import MimeType
         pass
 
     def _mkpath(self, key):
+        """
+        key -> rel path, absolute path (strip mtime)
+        """
         # XXX unsafe keys?
-        return os.path.join(self.path, key)
+        try:
+            relpath, mtime = key.rsplit('.', 1)
+        except ValueError:
+            # we only generate revids that look like path.mtime,
+            # so if the split does not work, the revid is invalid
+            # and we raise KeyError like if the rev is not there
+            raise KeyError(key)
+        return relpath, os.path.join(self.path, relpath)
 
     def _mkkey(self, path):
+        """
+        absolute path -> relpath, mtime
+        """
+        st = os.stat(path)
         root = self.path
         assert path.startswith(root)
-        key = path[len(root)+1:]
-        return key
+        return path[len(root)+1:], int(st.st_mtime)
 
-    def __iter__(self):
-        # note: instead of just yielding the relative <path>, yield <path>/<mtime>,
-        # so if the file is updated, the revid will change (and the indexer's
-        # update() method can efficiently update the index).
-        for dirpath, dirnames, filenames in os.walk(self.path):
-            key = self._mkkey(dirpath)
-            if key:
-                yield key
-            for filename in filenames:
-                yield self._mkkey(os.path.join(dirpath, filename))
+    def _encode(self, key):
+        """
+        we need to get rid of slashes in revids because we put them into URLs
+        and it would confuse the URL routing.
+        """
+        return url_quote(key, safe='')
 
-    def _get_meta(self, fn):
-        path = self._mkpath(fn)
+    def _decode(self, qkey):
+        return url_unquote(qkey)
+
+    def _get_meta(self, fn, path):
         try:
             st = os.stat(path)
         except OSError as e:
                 raise KeyError(fn)
             raise
         meta = {}
+        meta[NAME] = fn
         meta[MTIME] = int(st.st_mtime) # use int, not float
+        meta[REVID] = unicode(self._encode('%s.%d' % (meta[NAME], meta[MTIME])))
+        meta[ITEMID] = meta[REVID]
+        meta[HASH_ALGORITHM] = u'' # XXX crap, but sendfile needs it for etag
         if stat.S_ISDIR(st.st_mode):
             # directory
             # we create a virtual wiki page listing links to subitems:
-            ct = 'text/x.moin.wiki;charset=utf-8'
+            ct = u'text/x.moin.wiki;charset=utf-8'
             size = 0
         elif stat.S_ISREG(st.st_mode):
             # normal file
-            ct = MimeType(filename=fn).content_type()
+            ct = unicode(MimeType(filename=fn).content_type())
             size = int(st.st_size) # use int instead of long
         else:
             # symlink, device file, etc.
-            ct = 'application/octet-stream'
+            ct = u'application/octet-stream'
             size = 0
         meta[CONTENTTYPE] = ct
         meta[SIZE] = size
             content = unicode(err)
         return content
 
-    def _get_data(self, fn):
-        path = self._mkpath(fn)
+    def _get_data(self, fn, path):
         try:
             st = os.stat(path)
             if stat.S_ISDIR(st.st_mode):
                 raise KeyError(fn)
             raise
 
-    def retrieve(self, fn):
-        meta = self._get_meta(fn)
-        data = self._get_data(fn)
+    def __iter__(self):
+        # note: instead of just yielding the relative <path>, yield <path>.<mtime>,
+        # so if the file is updated, the revid will change (and the indexer's
+        # update() method can efficiently update the index).
+        for dirpath, dirnames, filenames in os.walk(self.path):
+            key, mtime = self._mkkey(dirpath)
+            if 1: # key:
+                yield self._encode('%s.%d' % (key, mtime))
+            for filename in filenames:
+                yield self._encode('%s.%d' % self._mkkey(os.path.join(dirpath, filename)))
+
+    def retrieve(self, key):
+        key = self._decode(key)
+        fn, path = self._mkpath(key)
+        meta = self._get_meta(fn, path)
+        data = self._get_data(fn, path)
         return meta, data