Commits

Thomas Waldmann committed 21a6765

killed backend.history(), replace it by .search() and appropriate query/reverse/sortedby/limit params

killed the .history() tests, added a few tests for .search().
Note: whoosh has own tests, so we maybe do not need that many tests there.

global history view got a major speedup when the user uses a bookmark (because
we then can add a DateRange term to massively reduce the search results in
big wikis).

atom feed limits to 100 results now.

some minor refactorings.

Comments (0)

Files changed (6)

MoinMoin/_tests/test_test_environ.py

         storage = flaskg.storage
         assert storage
         assert hasattr(storage, 'get_item')
-        assert hasattr(storage, 'history')
         assert not list(storage.iteritems())
-        assert not list(storage.history())
         itemname = u"this item shouldn't exist yet"
         assert pytest.raises(NoSuchItemError, storage.get_item, itemname)
         item = storage.create_item(itemname)

MoinMoin/apps/feed/views.py

 
 from werkzeug.contrib.atom import AtomFeed
 
+from whoosh.query import Term, And
+
 from MoinMoin import log
 logging = log.getLogger(__name__)
 
     if content is None:
         title = app.cfg.sitename
         feed = AtomFeed(title=title, feed_url=request.url, url=request.host_url)
-        for doc in flaskg.storage.history(item_name=item_name):
+        query = Term("wikiname", app.cfg.interwikiname)
+        if item_name:
+            query = And([query, Term("name_exact", item_name), ])
+        history = flaskg.storage.search(query, all_revs=True, sortedby=[MTIME, "rev_no"], reverse=True, limit=100)
+        for doc in history:
             name = doc[NAME]
             this_revno = doc["rev_no"]
             item = flaskg.storage.get_item(name)

MoinMoin/apps/frontend/views.py

 import pytz
 from babel import Locale
 
+from whoosh.query import Term, And, DateRange
+
 from MoinMoin import log
 logging = log.getLogger(__name__)
 
 
 @frontend.route('/+history/<itemname:item_name>')
 def history(item_name):
-    history = flaskg.storage.history(item_name=item_name)
     offset = request.values.get('offset', 0)
     offset = max(int(offset), 0)
-
-    results_per_page = int(app.cfg.results_per_page)
     if flaskg.user.valid:
         results_per_page = flaskg.user.results_per_page
+    else:
+        results_per_page = app.cfg.results_per_page
+    query = And([Term("wikiname", app.cfg.interwikiname), Term("name_exact", item_name), ])
+    # TODO: due to how getPageContent and the template works, we need to use limit=None -
+    # it would be better to use search_page (and an appropriate limit, if needed)
+    history = flaskg.storage.search(query, all_revs=True, sortedby="rev_no", reverse=True, limit=None)
     history_page = util.getPageContent(history, offset, results_per_page)
-
     return render_template('history.html',
                            item_name=item_name, # XXX no item here
                            history_page=history_page,
 
 @frontend.route('/+history')
 def global_history():
-    history = flaskg.storage.history(item_name='')
     bookmark_time = None
-    results_per_page = int(app.cfg.results_per_page)
     if flaskg.user.valid:
         bm = flaskg.user.getBookmark()
         if bm is not None:
             bookmark_time = datetime.utcfromtimestamp(bm)
-        results_per_page = flaskg.user.results_per_page # if it is 0, means no paging
+    if flaskg.user.valid:
+        results_per_page = flaskg.user.results_per_page
+    else:
+        results_per_page = app.cfg.results_per_page
+    query = Term("wikiname", app.cfg.interwikiname)
+    if bookmark_time is not None:
+        query = And([query, DateRange(MTIME, start=bookmark_time, end=None)])
+    # TODO: we need use limit=None to simulate previous implementation's behaviour -
+    # it would be better to use search_page (and an appropriate limit, if needed)
+    history = flaskg.storage.search(query, all_revs=True, sortedby=[MTIME, "rev_no"], reverse=True, limit=None)
     item_groups = OrderedDict()
     for doc in history:
         current_item_name = doc[NAME]

MoinMoin/storage/__init__.py

         a wiki item, as such a deletion does not really delete anything from disk but
         just hides the former existence of the item. Such a deletion is undoable, while
         having destroyed an item is not.
-        This also destroys all history related to the item. In particular, this also
-        deletes all the item's revisions and they won't turn up in history any longer.
 
         In case the item has already been destroyed by someone else (e.g. another process)
         this method should just pass silently as the job is already done.
     that defaults to None for newly created revisions in which case it will be
     assigned at commit() time. It is writable for use by converter backends, but
     care must be taken in that case to create monotone timestamps!
-    This timestamp is also retrieved via the backend's history() method.
     """
     def __init__(self, item, revno):
         """

MoinMoin/storage/_tests/test_backends_router.py

 """
 
 import os
+import time
 
 import pytest
 
 from flask import current_app as app
 
-from MoinMoin.config import NAME
+from whoosh.query import Term, And, Every
+
+from MoinMoin.config import NAME, MTIME
 from MoinMoin.error import ConfigurationError
 from MoinMoin.storage._tests.test_backends import BackendTest
 from MoinMoin.storage.backends.memory import MemoryBackend
         assert name == ''
         assert mountpoint == 'child'
 
+    def test_search_item_history_order(self):
+        item_name = u'some item'
+        item = self.backend.create_item(item_name)
+        for rev_no in range(3):
+            rev = item.create_revision(rev_no)
+            item.commit()
+        query = Term("name_exact", item_name)
+        results = list(self.backend.search(query, all_revs=True, sortedby="rev_no"))
+        print results
+        assert results[0].get("rev_no") == 0
+        assert results[1].get("rev_no") == 1
+        assert results[2].get("rev_no") == 2
+        results = list(self.backend.search(query, all_revs=True, sortedby="rev_no", reverse=True))
+        print results
+        assert results[0].get("rev_no") == 2
+        assert results[1].get("rev_no") == 1
+        assert results[2].get("rev_no") == 0
 
-    def test_history(self):
-        order = [(u'first', 0, ), (u'second', 0, ), (u'first', 1, ), (u'a', 0), (u'child/my_subitem', 0) ]
-        for name, revno in order:
-            if revno == 0:
-                item = self.backend.create_item(name)
-            else:
-                item = self.backend.get_item(name)
-            item.create_revision(revno)
+    def test_search_global_history_order(self):
+        names = [u'foo', u'bar', u'baz', ]
+        for item_name in names:
+            item = self.backend.create_item(item_name)
+            rev = item.create_revision(0)
             item.commit()
+            time.sleep(1) # make sure we have different MTIME
+        query = Every()
+        results = list(self.backend.search(query, all_revs=True, sortedby=[MTIME, "rev_no"]))
+        print results
+        assert results[0].get(NAME) == names[0]
+        assert results[1].get(NAME) == names[1]
+        assert results[2].get(NAME) == names[2]
+        results = list(self.backend.search(query, all_revs=True, sortedby=[MTIME, "rev_no"], reverse=True))
+        print results
+        assert results[0].get(NAME) == names[2]
+        assert results[1].get(NAME) == names[1]
+        assert results[2].get(NAME) == names[0]
 
-            # Revisions are created too fast for the rev's timestamp's granularity.
-            # This only affects the RouterBackend because there several different
-            # backends are used and no means for storing simultaneously created revs
-            # in the correct order exists between backends. It affects AclWrapperBackend
-            # tests as well because those use a RouterBackend internally for real-world-likeness.
 
-            # XXX XXX
-            # You may have realized that all the items above belong to the same backend so this shouldn't actually matter.
-            # It does matter, however, once you consider that the RouterBackend uses the generic, slow history implementation.
-            # This one uses iteritems and then sorts all the revisions itself, hence discarding any information of ordering
-            # for simultaneously created revisions. If we just call history of that single backend directly, it works without
-            # time.sleep. For n backends, however, you'd have to somehow merge the revisions into one generator again, thus
-            # discarding that information again. Besides, that would be a costly operation. The ordering for simultaneosly
-            # created revisions remains the same since it's based on tuple ordering. Better proposals welcome.
-            import time
-            time.sleep(1)
-
-        for num, doc in enumerate(self.backend.history(reverse=False)):
-            name, revno = order[num]
-            assert doc[NAME] == name
-            assert doc["rev_no"] == revno
-
-        order.reverse()
-        for num, doc in enumerate(self.backend.history(reverse=True)):
-            name, revno = order[num]
-            assert doc[NAME] == name
-            assert doc["rev_no"] == revno
-
-    def test_history_size_after_rename(self):
-        item = self.backend.create_item(u'first')
-        item.create_revision(0)
-        item.commit()
-        item.rename(u'second')
-        item.create_revision(1)
-        item.commit()
-        assert len(list(self.backend.history())) == 2
-
-    def test_history_after_destroy_item(self):
-        itemname = u"I will be completely destroyed"
-        rev_data = "I will be completely destroyed, too, hopefully"
-        item = self.backend.create_item(itemname)
-        rev = item.create_revision(0)
-        rev.write(rev_data)
-        item.commit()
-
-        item.destroy()
-
-        itemnames_history = [doc[NAME] for doc in self.backend.history()]
-        assert itemname not in itemnames_history
-
-    def test_history_after_destroy_revision(self):
-        itemname = u"I will see my children die"
-        rev_data = "I will die!"
-        persistent_rev = "I will see my sibling die :-("
-        item = self.backend.create_item(itemname)
-        rev = item.create_revision(0)
-        rev.write(rev_data)
-        item.commit()
-        rev = item.create_revision(1)
-        rev.write(persistent_rev)
-        item.commit()
-
-        rev = item.get_revision(0)
-        rev.destroy()
-
-        itemnames_revs_history = [(doc[NAME], doc["rev_no"]) for doc in self.backend.history()]
-        assert (itemname, 0) not in itemnames_revs_history
-
-    def test_history_item_names(self):
-        item = self.backend.create_item(u'first')
-        item.create_revision(0)
-        item.commit()
-        item.rename(u'second')
-        item.create_revision(1)
-        item.commit()
-        docs_history = list(self.backend.history(reverse=False))
-        assert docs_history[0]["rev_no"] == 0
-        assert docs_history[0][NAME] == u'first'
-        assert docs_history[1]["rev_no"] == 1
-        assert docs_history[1][NAME] == u'second'
-

MoinMoin/storage/backends/indexing.py

         item.publish_metadata()
         return item
 
-    def history(self, reverse=True, item_name=u'', start=None, end=None):
-        """
-        History implementation using the index.
-        """
-        for doc in self._index.history(reverse=reverse, item_name=item_name, start=start, end=end):
-            logging.debug("HISTORY: name %s revno %s" % (doc[NAME], doc["rev_no"]))
-            # XXX ACL checks?
-            yield doc
+    def search(self, q, all_revs=False, **kw):
+        return self._index.search(q, all_revs=all_revs, **kw)
+
+    def search_page(self, q, all_revs=False, pagenum=1, pagelen=10, **kw):
+        return self._index.search_page(q, all_revs=all_revs, pagenum=pagenum, pagelen=pagelen, **kw)
 
     def all_tags(self):
         """
                 logging.debug("Latest revisions: removing %d", latest_doc_number)
                 async_writer.delete_document(latest_doc_number)
 
-    def history(self, item_name=u'', reverse=True, start=None, end=None):
-        with self.index_object.all_revisions_index.searcher() as all_revs_searcher:
-            if item_name:
-                docs = all_revs_searcher.documents(name_exact=item_name,
-                                                   wikiname=self.wikiname
-                                                  )
-            else:
-                docs = all_revs_searcher.documents(wikiname=self.wikiname)
-            from operator import itemgetter
-            # sort by mtime and rev_no do deal better with mtime granularity for fast item rev updates
-            for doc in sorted(docs, key=itemgetter("mtime", "rev_no"), reverse=reverse)[start:end]:
-                yield doc
+    def search(self, q, all_revs=False, **kw):
+        if all_revs:
+            ix = self.index_object.all_revisions_index
+        else:
+            ix = self.index_object.latest_revisions_index
+        with ix.searcher() as searcher:
+            # Note: callers must consume everything we yield, so the for loop
+            # ends and the "with" is left to close the index files.
+            for hit in searcher.search(q, **kw):
+                yield hit.fields()
+
+    def search_page(self, q, all_revs=False, pagenum=1, pagelen=10, **kw):
+        if all_revs:
+            ix = self.index_object.all_revisions_index
+        else:
+            ix = self.index_object.latest_revisions_index
+        with ix.searcher() as searcher:
+            # Note: callers must consume everything we yield, so the for loop
+            # ends and the "with" is left to close the index files.
+            for hit in searcher.search_page(q, pagenum, pagelen=pagelen, **kw):
+                yield hit.fields()
 
     def all_tags(self):
         with self.index_object.latest_revisions_index.searcher() as latest_revs_searcher:
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.