1. marchael
  2. moin-2.0

Commits

Thomas Waldmann  committed a4ec2c6

history: do not yield Revision objects, but directly result documents from whoosh

we have all that global and local history view (and also the atom feed) needs
in the whoosh index. the whoosh documents yielded are dicts with all stored fields.

by just using that, we do not need to access the backend storage any more for
displaying history (which was one of the reasons for the slow global history
for wikis with many revisions).

one small difference is rev.timestamp (UNIX timestamp) vs. doc[MTIME] (datetime
object).

added size, action, comment fields to whoosh indexes (needed for history
display and also useful for other reasons).

adjusted the tests (pytest 2)

TODO: ACL checks for history? (this is a general thing we need to be careful
with: not to expose index data that should not be exposed).

removed unused "mountpoint" history() param.

add utctimestamp() == inverse of datetime.utcfromtimestamp()

  • Participants
  • Parent commits 813802d
  • Branches pytest2

Comments (0)

Files changed (8)

File MoinMoin/apps/feed/views.py

View file
  • Ignore whitespace
 from MoinMoin import wikiutil
 from MoinMoin.i18n import _, L_, N_
 from MoinMoin.apps.feed import feed
-from MoinMoin.config import NAME, ACL, ACTION, ADDRESS, HOSTNAME, USERID, COMMENT
+from MoinMoin.config import NAME, ACL, ACTION, ADDRESS, HOSTNAME, USERID, COMMENT, MTIME
 from MoinMoin.themes import get_editor_info
 from MoinMoin.items import Item
 from MoinMoin.util.crypto import cache_key
     if content is None:
         title = app.cfg.sitename
         feed = AtomFeed(title=title, feed_url=request.url, url=request.host_url)
-        for rev in flaskg.storage.history(item_name=item_name):
-            this_rev = rev
-            this_revno = rev.revno
-            item = rev.item
-            name = rev[NAME]
+        for doc in flaskg.storage.history(item_name=item_name):
+            name = doc[NAME]
+            this_revno = doc["rev_no"]
+            item = flaskg.storage.get_item(name)
+            this_rev = item.get_revision(this_revno)
             try:
                 hl_item = Item.create(name, rev_no=this_revno)
                 previous_revno = this_revno - 1
                 content = _(u'MoinMoin feels unhappy.')
                 content_type = 'text'
             feed.add(title=name, title_type='text',
-                     summary=rev.get(COMMENT, ''), summary_type='text',
+                     summary=doc.get(COMMENT, ''), summary_type='text',
                      content=content, content_type=content_type,
-                     author=get_editor_info(rev, external=True),
+                     author=get_editor_info(doc, external=True),
                      url=url_for_item(name, rev=this_revno, _external=True),
-                     updated=datetime.utcfromtimestamp(rev.timestamp),
+                     updated=doc[MTIME],
                     )
         content = feed.to_string()
         app.cache.set(cid, content)

File MoinMoin/apps/frontend/views.py

View file
  • Ignore whitespace
 from MoinMoin.items import Item, NonExistent
 from MoinMoin.items import ROWS_META, COLS, ROWS_DATA
 from MoinMoin import config, user, util, wikiutil
-from MoinMoin.config import ACTION, COMMENT, CONTENTTYPE, ITEMLINKS, ITEMTRANSCLUSIONS, NAME, CONTENTTYPE_GROUPS
+from MoinMoin.config import ACTION, COMMENT, CONTENTTYPE, ITEMLINKS, ITEMTRANSCLUSIONS, NAME, CONTENTTYPE_GROUPS, MTIME
 from MoinMoin.util import crypto
 from MoinMoin.util.interwiki import url_for_item
 from MoinMoin.security.textcha import TextCha, TextChaizedForm, TextChaValid
 @frontend.route('/+history/<itemname:item_name>')
 def history(item_name):
     history = flaskg.storage.history(item_name=item_name)
-
     offset = request.values.get('offset', 0)
     offset = max(int(offset), 0)
 
                            history_page=history_page,
                           )
 
+
 @frontend.route('/+history')
 def global_history():
     history = flaskg.storage.history(item_name='')
             bookmark_time = datetime.utcfromtimestamp(bm)
         results_per_page = flaskg.user.results_per_page # if it is 0, means no paging
     item_groups = OrderedDict()
-    for rev in history:
-        current_item_name = rev.item.name
-        if bookmark_time and rev.timestamp <= bookmark_time:
+    for doc in history:
+        current_item_name = doc[NAME]
+        if bookmark_time and doc[MTIME] <= bookmark_time:
             break
         elif current_item_name in item_groups:
-            latest_rev = item_groups[current_item_name][0]
-            tm_latest = datetime.utcfromtimestamp(latest_rev.timestamp)
-            tm_current = datetime.utcfromtimestamp(rev.timestamp)
+            latest_doc = item_groups[current_item_name][0]
+            tm_latest = latest_doc[MTIME]
+            tm_current = doc[MTIME]
             if format_date(tm_latest) == format_date(tm_current): # this change took place on the same day
-                item_groups[current_item_name].append(rev)
+                item_groups[current_item_name].append(doc)
         else:
-            item_groups[current_item_name] = [rev]
+            item_groups[current_item_name] = [doc]
 
     # Got the item dict, now doing grouping inside them
     editor_info = namedtuple('editor_info', ['editor', 'editor_revnos'])
-    for item_name, revs in item_groups.items():
+    for item_name, docs in item_groups.items():
         item_info = {}
         editors_info = OrderedDict()
         editors = []
         revnos = []
         comments = []
-        current_rev = revs[0]
+        current_doc = docs[0]
         item_info["item_name"] = item_name
-        item_info["timestamp"] = current_rev.timestamp
-        item_info["contenttype"] = current_rev.get(CONTENTTYPE)
-        item_info["action"] = current_rev.get(ACTION)
-        item_info["name"] = current_rev.get(NAME)
+        item_info["name"] = current_doc[NAME]
+        item_info["timestamp"] = current_doc[MTIME]
+        item_info["contenttype"] = current_doc[CONTENTTYPE]
+        item_info["action"] = current_doc[ACTION]
 
         # Aggregating comments, authors and revno
-        for rev in revs:
-            revnos.append(rev.revno)
-            comment = rev.get(COMMENT)
+        for doc in docs:
+            rev_no = doc["rev_no"]
+            revnos.append(rev_no)
+            comment = doc.get(COMMENT)
             if comment:
                 comment = "#%(revno)d %(comment)s" % {
-                          'revno': rev.revno,
+                          'revno': rev_no,
                           'comment': comment
                           }
                 comments.append(comment)
-            editor = get_editor_info(rev)
+            editor = get_editor_info(doc)
             editor_name = editor["name"]
             if editor_name in editors_info:
-                editors_info[editor_name].editor_revnos.append(rev.revno)
+                editors_info[editor_name].editor_revnos.append(rev_no)
             else:
-                editors_info[editor_name] = editor_info(editor, [rev.revno])
+                editors_info[editor_name] = editor_info(editor, [rev_no])
 
         if len(revnos) == 1:
             # there is only one change for this item in the history considered
     rev_tuple = namedtuple('rev_tuple', ['rev_date', 'item_revs'])
     rev_tuples = rev_tuple(prev_date, [])
     for item_group in item_groups.values():
-        tm = datetime.utcfromtimestamp(item_group["timestamp"])
+        tm = item_group["timestamp"]
         rev_date = format_date(tm)
         if revcount < offset:
             revcount += len(item_group["revnos"])

File MoinMoin/search/indexing.py

View file
  • Ignore whitespace
             userid=ID(stored=True),
             address=ID(stored=True),
             hostname=ID(stored=True),
+            size=NUMERIC(stored=True),
+            action=ID(stored=True),
+            comment=TEXT(stored=True, multitoken_query="and"),
             content=TEXT(stored=True, multitoken_query="and"),
         )
 

File MoinMoin/storage/_tests/test_backends_router.py

View file
  • Ignore whitespace
 
 from flask import current_app as app
 
+from MoinMoin.config import NAME
 from MoinMoin.error import ConfigurationError
 from MoinMoin.storage._tests.test_backends import BackendTest
 from MoinMoin.storage.backends.memory import MemoryBackend
             import time
             time.sleep(1)
 
-        for num, rev in enumerate(self.backend.history(reverse=False)):
+        for num, doc in enumerate(self.backend.history(reverse=False)):
             name, revno = order[num]
-            assert rev.item.name == name
-            assert rev.revno == revno
+            assert doc[NAME] == name
+            assert doc["rev_no"] == revno
 
         order.reverse()
-        for num, rev in enumerate(self.backend.history(reverse=True)):
+        for num, doc in enumerate(self.backend.history(reverse=True)):
             name, revno = order[num]
-            assert rev.item.name == name
-            assert rev.revno == revno
+            assert doc[NAME] == name
+            assert doc["rev_no"] == revno
 
-    # See history function in indexing.py for comments on why this test fails.
-    @pytest.mark.xfail
     def test_history_size_after_rename(self):
         item = self.backend.create_item(u'first')
         item.create_revision(0)
         item.rename(u'second')
         item.create_revision(1)
         item.commit()
-        assert len([rev for rev in self.backend.history()]) == 2
+        assert len(list(self.backend.history())) == 2
 
     def test_history_after_destroy_item(self):
         itemname = u"I will be completely destroyed"
 
         item.destroy()
 
-        all_rev_data = [rev.read() for rev in self.backend.history()]
-        assert not rev_data in all_rev_data
-
-        for rev in self.backend.history():
-            assert not rev.item.name == itemname
-        for rev in self.backend.history(reverse=False):
-            assert not rev.item.name == itemname
+        itemnames_history = [doc[NAME] for doc in self.backend.history()]
+        assert itemname not in itemnames_history
 
     def test_history_after_destroy_revision(self):
-        itemname = u"I will see my children die"    # removed the smiley ':-(' temporarily as it slows the test in addition with a failure
+        itemname = u"I will see my children die"
         rev_data = "I will die!"
         persistent_rev = "I will see my sibling die :-("
         item = self.backend.create_item(itemname)
         rev = item.get_revision(0)
         rev.destroy()
 
-        for rev in self.backend.history():
-            assert not (rev.item.name == itemname and rev.revno == 0)
+        itemnames_revs_history = [(doc[NAME], doc["rev_no"]) for doc in self.backend.history()]
+        assert (itemname, 0) not in itemnames_revs_history
 
     def test_history_item_names(self):
         item = self.backend.create_item(u'first')
         item.rename(u'second')
         item.create_revision(1)
         item.commit()
-        revs_in_create_order = [rev for rev in self.backend.history(reverse=False)]
-        assert revs_in_create_order[0].revno == 0
-        assert revs_in_create_order[0].item.name == u'second'
-        assert revs_in_create_order[1].revno == 1
-        assert revs_in_create_order[1].item.name == u'second'
+        docs_history = list(self.backend.history(reverse=False))
+        assert docs_history[0]["rev_no"] == 0
+        assert docs_history[0][NAME] == u'first'
+        assert docs_history[1]["rev_no"] == 1
+        assert docs_history[1][NAME] == u'second'
 

File MoinMoin/storage/backends/indexing.py

View file
  • Ignore whitespace
         """
         History implementation using the index.
         """
-        for result in self._index.history(reverse=reverse, item_name=item_name, start=start, end=end):
-            # we currently create the item, the revision and yield it to stay
-            # compatible with storage api definition, but this could be changed to
-            # just return the data we get from the index (without accessing backend)
-            # TODO: A problem exists at item = self.get_item(name).
-            # In the history_size_after_rename test in test_backends.py,
-            # an item was created with the name "first" and then renamed to "second."
-            # When it runs through this history function and runs item = self.get_item("first"),
-            # it can't find it because it was already renamed to "second."
-            # Some suggested solutions are: using some neverchanging uuid to identify some specific item
-            # or continuing to use the name, but tracking name changes within the item's history.
-            rev_datetime, name, rev_no = result
-            try:
-                logging.debug("HISTORY: name %s revno %s" % (name, rev_no))
-                item = self.get_item(name)
-                yield item.get_revision(rev_no)
-            except AccessDeniedError as e:
-                # just skip items we may not access
-                pass
-            except (NoSuchItemError, NoSuchRevisionError) as e:
-                logging.exception("history processing catched exception")
+        for doc in self._index.history(reverse=reverse, item_name=item_name, start=start, end=end):
+            logging.debug("HISTORY: name %s revno %s" % (doc[NAME], doc["rev_no"]))
+            # XXX ACL checks?
+            yield doc
 
     def all_tags(self):
         """
                 logging.debug("Latest revisions: removing %d", latest_doc_number)
                 async_writer.delete_document(latest_doc_number)
 
-    def history(self, mountpoint=u'', item_name=u'', reverse=True, start=None, end=None):
-        if mountpoint:
-            mountpoint += '/'
+    def history(self, item_name=u'', reverse=True, start=None, end=None):
         with self.index_object.all_revisions_index.searcher() as all_revs_searcher:
             if item_name:
                 docs = all_revs_searcher.documents(name_exact=item_name,
             from operator import itemgetter
             # sort by mtime and rev_no do deal better with mtime granularity for fast item rev updates
             for doc in sorted(docs, key=itemgetter("mtime", "rev_no"), reverse=reverse)[start:end]:
-                yield (doc[MTIME], mountpoint + doc[NAME], doc["rev_no"])
+                yield doc
 
     def all_tags(self):
         with self.index_object.latest_revisions_index.searcher() as latest_revs_searcher:

File MoinMoin/templates/global_history.html

View file
  • Ignore whitespace
                     <span>
                         <h2>{{ rev_date }}</h2>
                         {% if user.valid %}
-                        <a class="bookmark-link" href="{{ url_for('frontend.bookmark', time=latest_timestamp) }}">{{ _("Set bookmark") }}</a>
+                        <a class="bookmark-link" href="{{ url_for('frontend.bookmark', time=utctimestamp(latest_timestamp)) }}">{{ _("Set bookmark") }}</a>
                         {% endif %}
                    </span>
                 </div>

File MoinMoin/templates/history.html

View file
  • Ignore whitespace
                 <th>{{ _("Comment") }}</th>
                 <th colspan="6">{{ _("Actions") }}</th>
             </tr>
-            {% for rev in history %}
+            {% for doc in history %}
             <tr>
-                <td class="moin-wordbreak">{{ rev.name }}</td>
-                <td class="moin-integer">{{ rev.revno }}</td>
-                <td>{{ rev.timestamp|datetimeformat }}</td>
-                <td class="moin-integer">{{ rev.size }}</td>
+                <td class="moin-wordbreak">{{ doc.name }}</td>
+                <td class="moin-integer">{{ doc.rev_no }}</td>
+                <td>{{ doc.mtime|datetimeformat }}</td>
+                <td class="moin-integer">{{ doc.size }}</td>
                 <td>
                     <div class="moin-hist-rev">
-                        <input type="radio" name="rev1" value="{{ rev.revno }}" />
-                        <input type="radio" name="rev2" value="{{ rev.revno }}" />
+                        <input type="radio" name="rev1" value="{{ doc.rev_no }}" />
+                        <input type="radio" name="rev2" value="{{ doc.rev_no }}" />
                     </div>
                 </td>
-                <td class="moin-wordbreak">{{ utils.editor_info(rev) }}</td>
-                <td class="moin-wordbreak">{{ rev.contenttype }}</td>
-                <td class="moin-wordbreak">{{ rev.comment }}</td>
-                <td><a href="{{ url_for('frontend.show_item', item_name=rev.item.name, rev=rev.revno) }}">{{ _('show') }}</a></td>
-                <td><a href="{{ url_for('frontend.show_item_meta', item_name=rev.item.name, rev=rev.revno) }}">{{ _('meta') }}</a></td>
-                <td><a href="{{ url_for('frontend.download_item', item_name=rev.item.name, rev=rev.revno) }}">{{ _('download') }}</a></td>
-                <td><a href="{{ url_for('frontend.highlight_item', item_name=rev.item.name, rev=rev.revno) }}">{{ _('highlight') }}</a></td>
-                <td><a href="{{ url_for('frontend.revert_item', item_name=rev.item.name, rev=rev.revno) }}">{{ _('revert') }}</a></td>
-                <td><a href="{{ url_for('frontend.destroy_item', item_name=rev.item.name, rev=rev.revno) }}">{{ _('destroy') }}</a></td>
+                <td class="moin-wordbreak">{{ utils.editor_info(doc) }}</td>
+                <td class="moin-wordbreak">{{ doc.contenttype }}</td>
+                <td class="moin-wordbreak">{{ doc.comment }}</td>
+                <td><a href="{{ url_for('frontend.show_item', item_name=doc.name, rev=doc.rev_no) }}">{{ _('show') }}</a></td>
+                <td><a href="{{ url_for('frontend.show_item_meta', item_name=doc.name, rev=doc.rev_no) }}">{{ _('meta') }}</a></td>
+                <td><a href="{{ url_for('frontend.download_item', item_name=doc.name, rev=doc.rev_no) }}">{{ _('download') }}</a></td>
+                <td><a href="{{ url_for('frontend.highlight_item', item_name=doc.name, rev=doc.rev_no) }}">{{ _('highlight') }}</a></td>
+                <td><a href="{{ url_for('frontend.revert_item', item_name=doc.name, rev=doc.rev_no) }}">{{ _('revert') }}</a></td>
+                <td><a href="{{ url_for('frontend.destroy_item', item_name=doc.name, rev=doc.rev_no) }}">{{ _('destroy') }}</a></td>
             </tr>
             {% endfor %}
         </table>

File MoinMoin/themes/__init__.py

View file
  • Ignore whitespace
     return 'moin-mime-%s' % cls
 
 
+def utctimestamp(dt):
+    """
+    convert a datetime object (UTC) to a UNIX timestamp (UTC)
+
+    Note: time library writers seem to have a distorted relationship to inverse
+          functions and also to UTC (see time.gmtime, see datetime.utcfromtimestamp).
+    """
+    from calendar import timegm
+    return timegm(dt.timetuple())
+
+
 def setup_jinja_env():
     app.jinja_env.filters['shorten_item_name'] = shorten_item_name
     app.jinja_env.filters['contenttype_to_class'] = contenttype_to_class
                             'item_name': 'handlers need to give it',
                             'url_for_item': url_for_item,
                             'get_editor_info': lambda rev: get_editor_info(rev),
+                            'utctimestamp': lambda dt: utctimestamp(dt),
                             'gen': make_generator(),
                             })