Commits

Thomas Waldmann committed c62f12d

1.9 migration: make a 'import19' script command, make a migration package

moin import19 --data-dir your_1.9_data_dir

you may need the -s -i option to initialize moin2 storage and index,
if you did not do that before.

Comments (0)

Files changed (9)

MoinMoin/script/__init__.py

     manager.add_command("item-put", modify_item.PutItem())
     from MoinMoin.script.maint.modified_systemitems import Modified_SystemItems
     manager.add_command("maint_modified_systemitems", Modified_SystemItems())
+    from MoinMoin.script.migration.import19 import ImportMoin19
+    manager.add_command("import19", ImportMoin19())
 
     return manager.run(default_command=default_command)
 

MoinMoin/script/migration/1.9/__init__.py

+# Copyright: 2011 MoinMoin:ThomasWaldmann
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+MoinMoin - migration (upgrading) code for upgrades 1.9 -> 2.0
+"""
+

MoinMoin/script/migration/1.9/_logfile19.py

+# Copyright: 2005-2007 MoinMoin:ThomasWaldmann
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+    MoinMoin - LogFile package
+
+    This module supports buffered log reads, iterating forward and backward line-by-line, etc.
+"""
+
+
+import os
+import codecs
+import errno
+
+from MoinMoin import log
+logging = log.getLogger(__name__)
+
+CHARSET = 'utf-8'
+
+
+class LogError(Exception):
+    """ Base class for log errors """
+
+class LogMissing(LogError):
+    """ Raised when the log is missing """
+
+
+class LineBuffer:
+    """
+    Reads lines from a file
+
+    :ivar len: number of lines in self.lines
+    :ivar lines: list of lines (unicode)
+    :ivar offsets: list of file offsets for each line. additionally the position
+                   after the last read line is stored into self.offsets[-1]
+    """
+    def __init__(self, file, offset, size, forward=True):
+        """
+
+        TODO: when this gets refactored, don't use "file" (is a builtin)
+
+        :param file: open file object
+        :param offset: position in file to start from
+        :param size: aproximate number of bytes to read
+        :param forward : read from offset on or from offset-size to offset
+        :type forward: boolean
+        """
+        self.loglevel = logging.NOTSET
+        if forward:
+            begin = offset
+            logging.log(self.loglevel, "LineBuffer.init: forward seek %d read %d" % (begin, size))
+            file.seek(begin)
+            lines = file.readlines(size)
+        else:
+            if offset < 2 * size:
+                begin = 0
+                size = offset
+            else:
+                begin = offset - size
+            logging.log(self.loglevel, "LineBuffer.init: backward seek %d read %d" % (begin, size))
+            file.seek(begin)
+            lines = file.read(size).splitlines(True)
+            if begin != 0:
+                # remove potentially incomplete first line
+                begin += len(lines[0])
+                lines = lines[1:]
+                # XXX check for min one line read
+
+        linecount = len(lines)
+
+        # now calculate the file offsets of all read lines
+        offsets = [len(line) for line in lines]
+        offsets.append(0) # later this element will have the file offset after the last read line
+
+        lengthpreviousline = 0
+        offset = begin
+        for i in xrange(linecount+1):
+            offset += lengthpreviousline
+            lengthpreviousline = offsets[i]
+            offsets[i] = offset
+
+        self.offsets = offsets
+        self.len = linecount
+        # Decode lines after offset in file is calculated
+        self.lines = [unicode(line, CHARSET) for line in lines]
+
+
+class LogFile:
+    """
+    .filter: function that gets the values from .parser.
+             must return True to keep it or False to remove it
+
+    Overwrite .parser() and .add() to customize this class to special log files
+    """
+
+    def __init__(self, filename, buffer_size=4096):
+        """
+        :param filename: name of the log file
+        :param buffer_size: approx. size of one buffer in bytes
+        """
+        self.loglevel = logging.NOTSET
+        self.__filename = filename
+        self.__buffer = None # currently used buffer, points to one of the following:
+        self.__buffer1 = None
+        self.__buffer2 = None
+        self.buffer_size = buffer_size
+        self.__lineno = 0
+        self.filter = None
+
+    def __iter__(self):
+        return self
+
+    def reverse(self):
+        """ yield log entries in reverse direction starting from last one
+
+        :rtype: iterator
+        """
+        self.to_end()
+        while True:
+            try:
+                logging.log(self.loglevel, "LogFile.reverse %s" % self.__filename)
+                result = self.previous()
+            except StopIteration:
+                return
+            yield result
+
+    def sanityCheck(self):
+        """ Check for log file write access.
+
+        :rtype: string (error message) or None
+        """
+        if not os.access(self.__filename, os.W_OK):
+            return "The log '%s' is not writable!" % (self.__filename, )
+        return None
+
+    def __getattr__(self, name):
+        """
+        generate some attributes when needed
+        """
+        if name == "_LogFile__rel_index": # Python black magic: this is the real name of the __rel_index attribute
+            # starting iteration from begin
+            self.__buffer1 = LineBuffer(self._input, 0, self.buffer_size)
+            self.__buffer2 = LineBuffer(self._input,
+                                        self.__buffer1.offsets[-1],
+                                        self.buffer_size)
+            self.__buffer = self.__buffer1
+            self.__rel_index = 0
+            return 0
+        elif name == "_input":
+            try:
+                # Open the file (NOT using codecs.open, it breaks our offset calculation. We decode it later.).
+                # Use binary mode in order to retain \r - otherwise the offset calculation would fail.
+                self._input = file(self.__filename, "rb", )
+            except IOError as err:
+                if err.errno == errno.ENOENT: # "file not found"
+                    # XXX workaround if edit-log does not exist: just create it empty
+                    # if this workaround raises another error, we don't catch
+                    # it, so the admin will see it.
+                    f = file(self.__filename, "ab")
+                    f.write('')
+                    f.close()
+                    self._input = file(self.__filename, "rb", )
+                else:
+                    logging.error("logfile: %r IOERROR errno %d (%s)" % (self.__filename, err.errno, os.strerror(err.errno)))
+                    raise
+            return self._input
+        elif name == "_output":
+            self._output = codecs.open(self.__filename, 'a', CHARSET)
+            return self._output
+        else:
+            raise AttributeError(name)
+
+    def size(self):
+        """ Return log size in bytes
+
+        Return 0 if the file does not exist. Raises other OSError.
+
+        :returns: size of log file in bytes
+        :rtype: Int
+        """
+        try:
+            return os.path.getsize(self.__filename)
+        except OSError as err:
+            if err.errno == errno.ENOENT:
+                return 0
+            raise
+
+    def lines(self):
+        """ Return number of lines in the log file
+
+        Return 0 if the file does not exist. Raises other OSError.
+
+        Expensive for big log files - O(n)
+
+        :returns: size of log file in lines
+        :rtype: Int
+        """
+        try:
+            f = file(self.__filename, 'r')
+            try:
+                count = 0
+                for line in f:
+                    count += 1
+                return count
+            finally:
+                f.close()
+        except (OSError, IOError) as err:
+            if err.errno == errno.ENOENT:
+                return 0
+            raise
+
+    def peek(self, lines):
+        """ Move position in file forward or backwards by "lines" count
+
+        It adjusts .__lineno if set.
+        This function is not aware of filters!
+
+        :param lines: number of lines, may be negative to move backward
+        :rtype: boolean
+        :returns: True if moving more than to the beginning and moving
+                 to the end or beyond
+        """
+        logging.log(self.loglevel, "LogFile.peek %s" % self.__filename)
+        self.__rel_index += lines
+        while self.__rel_index < 0:
+            if self.__buffer is self.__buffer2:
+                if self.__buffer.offsets[0] == 0:
+                    # already at the beginning of the file
+                    self.__rel_index = 0
+                    self.__lineno = 0
+                    return True
+                else:
+                    # change to buffer 1
+                    self.__buffer = self.__buffer1
+                    self.__rel_index += self.__buffer.len
+            else: # self.__buffer is self.__buffer1
+                if self.__buffer.offsets[0] == 0:
+                    # already at the beginning of the file
+                    self.__rel_index = 0
+                    self.__lineno = 0
+                    return True
+                else:
+                    # load previous lines
+                    self.__buffer2 = self.__buffer1
+                    self.__buffer1 = LineBuffer(self._input,
+                                                self.__buffer.offsets[0],
+                                                self.buffer_size,
+                                                forward=False)
+                    self.__buffer = self.__buffer1
+                    self.__rel_index += self.__buffer.len
+
+        while self.__rel_index >= self.__buffer.len:
+            if self.__buffer is self.__buffer1:
+                # change to buffer 2
+                self.__rel_index -= self.__buffer.len
+                self.__buffer = self.__buffer2
+            else: # self.__buffer is self.__buffer2
+                # try to load next buffer
+                tmpbuff = LineBuffer(self._input,
+                                     self.__buffer.offsets[-1],
+                                     self.buffer_size)
+                if tmpbuff.len == 0:
+                    # end of file
+                    if self.__lineno is not None:
+                        self.__lineno += (lines -
+                                         (self.__rel_index - self.__buffer.len))
+                    self.__rel_index = self.__buffer.len # point to after last read line
+                    return True
+                # shift buffers
+                self.__rel_index -= self.__buffer.len
+                self.__buffer1 = self.__buffer2
+                self.__buffer2 = tmpbuff
+                self.__buffer = self.__buffer2
+
+        if self.__lineno is not None:
+            self.__lineno += lines
+        return False
+
+    def __next(self):
+        """get next line already parsed"""
+        if self.peek(0):
+            raise StopIteration
+        result = self.parser(self.__buffer.lines[self.__rel_index])
+        self.peek(1)
+        return result
+
+    def next(self):
+        """get next line that passes through the filter
+        :returns: next entry
+        raises StopIteration at file end
+        """
+        result = None
+        while result is None:
+            while result is None:
+                logging.log(self.loglevel, "LogFile.next %s" % self.__filename)
+                result = self.__next()
+            if self.filter and not self.filter(result):
+                result = None
+        return result
+
+    def __previous(self):
+        """get previous line already parsed"""
+        if self.peek(-1):
+            raise StopIteration
+        return self.parser(self.__buffer.lines[self.__rel_index])
+
+    def previous(self):
+        """get previous line that passes through the filter
+        :returns: previous entry
+        raises StopIteration at file begin
+        """
+        result = None
+        while result is None:
+            while result is None:
+                logging.log(self.loglevel, "LogFile.previous %s" % self.__filename)
+                result = self.__previous()
+            if self.filter and not self.filter(result):
+                result = None
+        return result
+
+    def to_begin(self):
+        """moves file position to the begin"""
+        logging.log(self.loglevel, "LogFile.to_begin %s" % self.__filename)
+        if self.__buffer1 is None or self.__buffer1.offsets[0] != 0:
+            self.__buffer1 = LineBuffer(self._input,
+                                        0,
+                                        self.buffer_size)
+            self.__buffer2 = LineBuffer(self._input,
+                                        self.__buffer1.offsets[-1],
+                                        self.buffer_size)
+        self.__buffer = self.__buffer1
+        self.__rel_index = 0
+        self.__lineno = 0
+
+    def to_end(self):
+        """moves file position to the end"""
+        logging.log(self.loglevel, "LogFile.to_end %s" % self.__filename)
+        self._input.seek(0, 2) # to end of file
+        size = self._input.tell()
+        if self.__buffer2 is None or size > self.__buffer2.offsets[-1]:
+            self.__buffer2 = LineBuffer(self._input,
+                                        size,
+                                        self.buffer_size,
+                                        forward=False)
+
+            self.__buffer1 = LineBuffer(self._input,
+                                        self.__buffer2.offsets[0],
+                                        self.buffer_size,
+                                        forward=False)
+        self.__buffer = self.__buffer2
+        self.__rel_index = self.__buffer2.len
+        self.__lineno = None
+
+    def position(self):
+        """ Return the current file position
+
+        This can be converted into a String using back-ticks and then be rebuild.
+        For this plain file implementation position is an Integer.
+        """
+        return self.__buffer.offsets[self.__rel_index]
+
+    def seek(self, position, line_no=None):
+        """ moves file position to an value formerly gotten from .position().
+        To enable line counting line_no must be provided.
+        .seek is much more efficient for moving long distances than .peek.
+        raises ValueError if position is invalid
+        """
+        logging.log(self.loglevel, "LogFile.seek %s pos %d" % (self.__filename, position))
+        if self.__buffer1:
+            logging.log(self.loglevel, "b1 %r %r" % (self.__buffer1.offsets[0], self.__buffer1.offsets[-1]))
+        if self.__buffer2:
+            logging.log(self.loglevel, "b2 %r %r" % (self.__buffer2.offsets[0], self.__buffer2.offsets[-1]))
+        if self.__buffer1 and self.__buffer1.offsets[0] <= position < self.__buffer1.offsets[-1]:
+            # position is in .__buffer1
+            self.__rel_index = self.__buffer1.offsets.index(position)
+            self.__buffer = self.__buffer1
+        elif self.__buffer2 and self.__buffer2.offsets[0] <= position < self.__buffer2.offsets[-1]:
+            # position is in .__buffer2
+            self.__rel_index = self.__buffer2.offsets.index(position)
+            self.__buffer = self.__buffer2
+        elif self.__buffer1 and self.__buffer1.offsets[-1] == position:
+            # we already have one buffer directly before where we want to go
+            self.__buffer2 = LineBuffer(self._input,
+                                        position,
+                                        self.buffer_size)
+            self.__buffer = self.__buffer2
+            self.__rel_index = 0
+        elif self.__buffer2 and self.__buffer2.offsets[-1] == position:
+            # we already have one buffer directly before where we want to go
+            self.__buffer1 = self.__buffer2
+            self.__buffer2 = LineBuffer(self._input,
+                                        position,
+                                        self.buffer_size)
+            self.__buffer = self.__buffer2
+            self.__rel_index = 0
+        else:
+            # load buffers around position
+            self.__buffer1 = LineBuffer(self._input,
+                                        position,
+                                        self.buffer_size,
+                                        forward=False)
+            self.__buffer2 = LineBuffer(self._input,
+                                        position,
+                                        self.buffer_size)
+            self.__buffer = self.__buffer2
+            self.__rel_index = 0
+            # XXX test for valid position
+        self.__lineno = line_no
+
+    def line_no(self):
+        """:returns: the current line number or None if line number is unknown"""
+        return self.__lineno
+
+    def calculate_line_no(self):
+        """ Calculate the current line number from buffer offsets
+
+        If line number is unknown it is calculated by parsing the whole file.
+        This may be expensive.
+        """
+        self._input.seek(0, 0)
+        lines = self._input.read(self.__buffer.offsets[self.__rel_index])
+        self.__lineno = len(lines.splitlines())
+        return self.__lineno
+
+    def parser(self, line):
+        """
+        Converts the line from file to program representation.
+        This implementation uses TAB separated strings.
+        This method should be overwritten by the sub classes.
+
+        :param line: line as read from file
+        :returns: parsed line or None on error
+        """
+        return line.split("\t")
+
+    def add(self, *data):
+        """
+        add line to log file
+        This implementation save the values as TAB separated strings.
+        This method should be overwritten by the sub classes.
+        """
+        line = "\t".join(data)
+        self._add(line)
+
+    def _add(self, line):
+        """
+        :param line: flat line
+        :type line: String
+        write on entry in the log file
+        """
+        if line is not None:
+            if line[-1] != '\n':
+                line += '\n'
+            self._output.write(line)
+            self._output.close() # does this maybe help against the sporadic fedora wikis 160 \0 bytes in the edit-log?
+            del self._output # re-open the output file automagically

MoinMoin/script/migration/1.9/_utils19.py

+# Copyright: 2010 MoinMoin:ThomasWaldmann
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+MoinMoin - helpers for 1.9 migration
+"""
+
+import re
+
+from MoinMoin.config import NAME, ACL, CONTENTTYPE, MTIME, LANGUAGE
+
+CHARSET = 'utf-8'
+
+# Precompiled patterns for file name [un]quoting
+UNSAFE = re.compile(r'[^a-zA-Z0-9_]+')
+QUOTED = re.compile(r'\(([a-fA-F0-9]+)\)')
+
+
+def split_body(body):
+    """
+    Extract the processing instructions / acl / etc. at the beginning of a page's body.
+
+    Hint: if you have a Page object p, you already have the result of this function in
+          p.meta and (even better) parsed/processed stuff in p.pi.
+
+    Returns a list of (pi, restofline) tuples and a string with the rest of the body.
+    """
+    pi = {}
+    while body.startswith('#'):
+        try:
+            line, body = body.split('\n', 1) # extract first line
+            line = line.rstrip('\r')
+        except ValueError:
+            line = body
+            body = ''
+
+        # end parsing on empty (invalid) PI
+        if line == "#":
+            body = line + '\n' + body
+            break
+
+        if line[1] == '#':# two hash marks are a comment
+            comment = line[2:]
+            if not comment.startswith(' '):
+                # we don't require a blank after the ##, so we put one there
+                comment = ' ' + comment
+                line = '##%s' % comment
+
+        verb, args = (line[1:] + ' ').split(' ', 1) # split at the first blank
+        pi.setdefault(verb.lower(), []).append(args.strip())
+
+    for key, value in pi.iteritems():
+        if key in ['#', ]:
+            # transform the lists to tuples:
+            pi[key] = tuple(value)
+        elif key in ['acl', ]:
+            # join the list of values to a single value
+            pi[key] = u' '.join(value)
+        else:
+            # for keys that can't occur multiple times, don't use a list:
+            pi[key] = value[-1] # use the last value to copy 1.9 parsing behaviour
+
+    return pi, body
+
+
+def add_metadata_to_body(metadata, data):
+    """
+    Adds the processing instructions to the data.
+    """
+    meta_keys = [NAME, ACL, CONTENTTYPE, MTIME, LANGUAGE, ]
+
+    metadata_data = ""
+    for key, value in metadata.iteritems():
+        if key not in meta_keys:
+            continue
+        # special handling for list metadata
+        if isinstance(value, (list, tuple)):
+            for line in value:
+                metadata_data += "#%s %s\n" % (key, line)
+        else:
+            metadata_data += "#%s %s\n" % (key, value)
+    return metadata_data + data
+
+
+def quoteWikinameFS(wikiname, charset=CHARSET):
+    """
+    Return file system representation of a Unicode WikiName.
+
+    Warning: will raise UnicodeError if wikiname can not be encoded using
+    charset. The default value 'utf-8' can encode any character.
+
+    :param wikiname: wiki name [unicode]
+    :param charset: charset to encode string (before quoting)
+    :rtype: string
+    :returns: quoted name, safe for any file system
+    """
+    filename = wikiname.encode(charset)
+
+    quoted = []
+    location = 0
+    for needle in UNSAFE.finditer(filename):
+        # append leading safe stuff
+        quoted.append(filename[location:needle.start()])
+        location = needle.end()
+        # Quote and append unsafe stuff
+        quoted.append('(')
+        for character in needle.group():
+            quoted.append('%02x' % ord(character))
+        quoted.append(')')
+
+    # append rest of string
+    quoted.append(filename[location:])
+    return ''.join(quoted)
+
+
+class InvalidFileNameError(Exception):
+    """ Called when we find an invalid file name """
+    pass
+
+
+def unquoteWikiname(filename, charset=CHARSET):
+    """
+    Return Unicode WikiName from quoted file name.
+
+    raises an InvalidFileNameError in case of unquoting problems.
+
+    :param filename: quoted wiki name
+    :param charset: charset to use for decoding (after unquoting)
+    :rtype: unicode
+    :returns: WikiName
+    """
+    # From some places we get called with Unicode strings
+    if isinstance(filename, unicode):
+        filename = filename.encode(CHARSET)
+
+    parts = []
+    start = 0
+    for needle in QUOTED.finditer(filename):
+        # append leading unquoted stuff
+        parts.append(filename[start:needle.start()])
+        start = needle.end()
+        # Append quoted stuff
+        group = needle.group(1)
+        # Filter invalid filenames
+        if (len(group) % 2 != 0):
+            raise InvalidFileNameError(filename)
+        try:
+            for i in range(0, len(group), 2):
+                byte = group[i:i+2]
+                character = chr(int(byte, 16))
+                parts.append(character)
+        except ValueError:
+            # byte not in hex, e.g 'xy'
+            raise InvalidFileNameError(filename)
+
+    # append rest of string
+    if start == 0:
+        wikiname = filename
+    else:
+        parts.append(filename[start:len(filename)])
+        wikiname = ''.join(parts)
+
+    return wikiname.decode(charset)
+

MoinMoin/script/migration/1.9/import19.py

+# Copyright: 2008 MoinMoin:JohannesBerg
+# Copyright: 2008-2011 MoinMoin:ThomasWaldmann
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+MoinMoin - import content and user data from a moin 1.9 compatible storage
+           into the moin2 storage.
+
+TODO
+----
+
+* translate revno numbering into revid parents
+* ACLs for attachments
+"""
+
+
+import sys
+import os
+import re
+import codecs
+import hashlib
+from StringIO import StringIO
+
+from flask import current_app as app
+from flaskext.script import Command, Option
+
+from MoinMoin import log
+logging = log.getLogger(__name__)
+
+from ._utils19 import quoteWikinameFS, unquoteWikiname, split_body
+from ._logfile19 import LogFile
+
+from MoinMoin.config import ACL, CONTENTTYPE, NAME, NAME_OLD, REVERTED_TO, \
+                            ACTION, ADDRESS, HOSTNAME, USERID, MTIME, EXTRA, COMMENT, \
+                            IS_SYSITEM, SYSITEM_VERSION, \
+                            TAGS, SIZE, HASH_ALGORITHM, \
+                            ITEMID, REVID, DATAID
+
+UID_OLD = 'old_user_id' # dynamic field *_id, so we don't have to change schema
+
+from MoinMoin.storage.error import NoSuchRevisionError
+from MoinMoin.util.mimetype import MimeType
+from MoinMoin.util.crypto import make_uuid
+from MoinMoin.storage.middleware.serialization import serialize_rev
+from MoinMoin import security
+
+
+CHARSET = 'utf-8'
+
+ACL_RIGHTS_CONTENTS = ['read', 'write', 'create', 'destroy', 'admin', ]
+
+DELETED_MODE_KEEP = 'keep'
+DELETED_MODE_KILL = 'kill'
+
+CONTENTTYPE_DEFAULT = u'text/plain;charset=utf-8'
+CONTENTTYPE_USER = u'text/x.moin.userprofile'
+CONTENTTYPE_MOINWIKI = u'text/x.moin.wiki;charset=utf-8'
+FORMAT_TO_CONTENTTYPE = {
+    'wiki': CONTENTTYPE_MOINWIKI,
+    'text/wiki': CONTENTTYPE_MOINWIKI,
+    'text/moin-wiki': CONTENTTYPE_MOINWIKI,
+    'creole': u'text/x.moin.creole;charset=utf-8',
+    'text/creole': u'text/x.moin.creole;charset=utf-8',
+    'rst': u'text/rst;charset=utf-8',
+    'text/rst': u'text/rst;charset=utf-8',
+    'plain': u'text/plain;charset=utf-8',
+    'text/plain': u'text/plain;charset=utf-8',
+}
+
+
+class ImportMoin19(Command):
+    description = 'Import data from a moin 1.9 wiki.'
+
+    option_list = [
+        Option('--data_dir', '-d', dest='data_dir', type=unicode, required=True,
+               help='moin 1.9 data_dir (contains pages and users subdirectories).'),
+    ]
+
+    def run(self, data_dir=None):
+        indexer = app.storage
+        backend = indexer.backend # backend without indexing
+        print "Users..."
+        for rev in UserBackend(os.path.join(data_dir, 'user')): # assumes user/ below data_dir
+            backend.store(rev.meta, rev.data)
+
+        print "Pages/Attachments..."
+        for rev in PageBackend(data_dir, deleted_mode=DELETED_MODE_KILL, default_markup=u'wiki'):
+            backend.store(rev.meta, rev.data)
+
+        print "Building the index..."
+        indexer.rebuild()
+
+        print "Fix userids..."
+        userid_map = dict([(rev.meta[UID_OLD], rev.meta[ITEMID]) for rev in indexer.documents(all_revs=False, contenttype=CONTENTTYPE_USER)])
+        for revid in backend:
+            meta, data = backend.retrieve(revid)
+            if USERID in meta:
+                try:
+                    meta[USERID] = userid_map[meta[USERID]]
+                except KeyError:
+                    # user profile lost, but userid referred by revision
+                    print "lost %r" % meta[USERID]
+                    del meta[USERID]
+                backend.store(meta, data)
+            elif meta.get(CONTENTTYPE) == CONTENTTYPE_USER:
+                meta.pop(UID_OLD, None) # not needed any more
+                backend.store(meta, data)
+
+        print "Rebuilding the index..."
+        indexer.close()
+        indexer.destroy()
+        indexer.create()
+        indexer.rebuild()
+        indexer.open()
+
+
+class KillRequested(Exception):
+    """raised if item killing is requested by DELETED_MODE"""
+
+
+class PageBackend(object):
+    """
+    moin 1.9 page directory
+    """
+    def __init__(self, path, deleted_mode=DELETED_MODE_KEEP,
+                 default_markup=u'wiki',
+                 item_category_regex=ur'(?P<all>Category(?P<key>(?!Template)\S+))'):
+        """
+        :param path: storage path (data_dir)
+        :param deleted_mode: 'kill' - just ignore deleted pages (pages with
+                                      non-existing current revision) and their attachments
+                                      as if they were not there.
+                                      Non-deleted pages (pages with an existing current
+                                      revision) that have non-current deleted revisions
+                                      will be treated as for 'keep'.
+                             'keep' - keep deleted pages as items with empty revisions,
+                                      keep their attachments. (default)
+        :param default_markup: used if a page has no #format line, moin 1.9's default
+                               'wiki' and we also use this default here.
+        """
+        self._path = path
+        assert deleted_mode in (DELETED_MODE_KILL, DELETED_MODE_KEEP, )
+        self.deleted_mode = deleted_mode
+        self.format_default = default_markup
+        self.item_category_regex = re.compile(item_category_regex, re.UNICODE)
+
+    def __iter__(self):
+        pages_dir = os.path.join(self._path, 'pages')
+        for f in os.listdir(pages_dir):
+            itemname = unquoteWikiname(f)
+            try:
+                item = PageItem(self, os.path.join(pages_dir, f), itemname)
+            except Exception as err:
+                logging.exception("PageItem %r raised exception:" % itemname)
+            else:
+                for rev in item.iter_revisions():
+                    yield rev
+                for rev in item.iter_attachments():
+                    yield rev
+
+
+class PageItem(object):
+    """
+    moin 1.9 page
+    """
+    def __init__(self, backend, path, itemname):
+        self.backend = backend
+        self.name = itemname
+        self.path = path
+        currentpath = os.path.join(self.path, 'current')
+        with open(currentpath, 'r') as f:
+            self.current = int(f.read().strip())
+        editlogpath = os.path.join(self.path, 'edit-log')
+        self.editlog = EditLog(editlogpath)
+        self.acl = None # TODO
+        self.itemid = make_uuid()
+        if backend.deleted_mode == DELETED_MODE_KILL:
+            revpath = os.path.join(self.path, 'revisions', '%08d' % self.current)
+            PageRevision(self, self.current, revpath) # will raise exception if killing is requested
+
+    def iter_revisions(self):
+        revisionspath = os.path.join(self.path, 'revisions')
+        try:
+            # rather use this or a range(1, self.current+1)?
+            fnames = os.listdir(revisionspath)
+        except OSError:
+            fnames = []
+        for fname in fnames:
+            try:
+                revno = int(fname)
+                yield PageRevision(self, revno, os.path.join(revisionspath, fname))
+            except Exception as err:
+                logging.exception("PageRevision %r %r raised exception:" % (self.name, fname))
+
+    def iter_attachments(self):
+        attachmentspath = os.path.join(self.path, 'attachments')
+        try:
+            fnames = os.listdir(attachmentspath)
+        except OSError:
+            fnames = []
+        for fname in fnames:
+            attachname = fname.decode('utf-8')
+            try:
+                yield AttachmentRevision(self.name, attachname, os.path.join(attachmentspath, fname), self.editlog, self.acl)
+            except Exception as err:
+                logging.exception("AttachmentRevision %r/%r raised exception:" % (self.name, attachname))
+
+
+class PageRevision(object):
+    """
+    moin 1.9 page revision
+    """
+    def __init__(self, item, revno, path):
+        item_name = item.name
+        itemid = item.itemid
+        editlog = item.editlog
+        self.backend = item.backend
+        # we just read the page and parse it here, makes the rest of the code simpler:
+        try:
+            with codecs.open(path, 'r', CHARSET) as f:
+                content = f.read()
+        except (IOError, OSError):
+            if revno == item.current and self.backend.deleted_mode == DELETED_MODE_KILL:
+                raise KillRequested('deleted_mode wants killing/ignoring')
+            # handle deleted revisions (for all revnos with 0<=revno<=current) here
+            # we prepare some values for the case we don't find a better value in edit-log:
+            meta = {MTIME: -1, # fake, will get 0 in the end
+                    NAME: item_name, # will get overwritten with name from edit-log
+                                     # if we have an entry there
+                   }
+            try:
+                previous_meta = PageRevision(item, revno-1)._fs_meta
+                # if this page revision is deleted, we have no on-page metadata.
+                # but some metadata is required, thus we have to copy it from the
+                # (non-deleted) revision revno-1:
+                for key in [ACL, NAME, CONTENTTYPE, MTIME, ]:
+                    if key in previous_meta:
+                        meta[key] = previous_meta[key]
+            except NoSuchRevisionError:
+                pass # should not happen
+            meta[MTIME] += 1 # it is now either 0 or prev rev mtime + 1
+            data = u''
+            try:
+                editlog_data = editlog.find_rev(revno)
+            except KeyError:
+                if 0 <= revno <= item._fs_current:
+                    editlog_data = { # make something up
+                        ACTION: u'SAVE/DELETE',
+                    }
+                else:
+                    raise NoSuchRevisionError('Item %r has no revision %d (not even a deleted one)!' %
+                            (item.name, revno))
+        else:
+            try:
+                editlog_data = editlog.find_rev(revno)
+            except KeyError:
+                if 1 <= revno <= item.current:
+                    editlog_data = { # make something up
+                        NAME: item.name,
+                        MTIME: int(os.path.getmtime(path)),
+                        ACTION: u'SAVE',
+                    }
+            meta, data = split_body(content)
+        meta.update(editlog_data)
+        format = meta.pop('format', self.backend.format_default)
+        meta[CONTENTTYPE] = FORMAT_TO_CONTENTTYPE.get(format, CONTENTTYPE_DEFAULT)
+        data = self._process_data(meta, data)
+        data = data.encode(CHARSET)
+        size, hash_name, hash_digest = hash_hexdigest(data)
+        meta[hash_name] = hash_digest
+        meta[SIZE] = size
+        meta[ITEMID] = itemid
+        meta[REVID] = make_uuid()
+        self.meta = {}
+        for k, v in meta.iteritems():
+            if isinstance(v, list):
+                v = tuple(v)
+            self.meta[k] = v
+        self.data = StringIO(data)
+
+        acl_line = self.meta.get(ACL)
+        if acl_line is not None:
+            self.meta[ACL] = regenerate_acl(acl_line)
+
+    def _process_data(self, meta, data):
+        """ In moin 1.x markup, not all metadata is stored in the page's header.
+            E.g. categories are stored in the footer of the page content. For
+            moin2, we extract that stuff from content and put it into metadata.
+        """
+        if meta[CONTENTTYPE] == CONTENTTYPE_MOINWIKI:
+            data = process_categories(meta, data, self.backend.item_category_regex)
+        return data
+
+
+def process_categories(meta, data, item_category_regex):
+    # process categories to tags
+    # find last ---- in the data plus the categories below it
+    m = re.search(r'\n\r?\s*-----*', data[::-1])
+    if m:
+        start = m.start()
+        end = m.end()
+        # categories are after the ---- line
+        if start > 0:
+            categories = data[-start:]
+        else:
+            categories = u''
+        # remove the ---- line from the content
+        data = data[:-end]
+        if categories:
+            # for CategoryFoo, group 'all' matches CategoryFoo, group 'key' matches just Foo
+            # we use 'all' so we don't need to rename category items
+            matches = list(item_category_regex.finditer(categories))
+            if matches:
+                tags = [m.group('all') for m in matches]
+                meta.setdefault(TAGS, []).extend(tags)
+                # remove everything between first and last category from the content
+                start = matches[0].start()
+                end = matches[-1].end()
+                rest = categories[:start] + categories[end:]
+                data += u'\r\n' + rest.lstrip()
+        data = data.rstrip() + u'\r\n'
+    return data
+
+
+class AttachmentRevision(object):
+    """
+    moin 1.9 attachment (there is no revisioning, just 1 revision per attachment)
+    """
+    def __init__(self, item_name, attach_name, attpath, editlog, acl):
+        try:
+            meta = editlog.find_attach(attach_name)
+        except KeyError:
+            meta = { # make something up
+                MTIME: int(os.path.getmtime(attpath)),
+                ACTION: u'SAVE',
+            }
+        meta[NAME] = u'%s/%s' % (item_name, attach_name)
+        if acl is not None:
+            meta[ACL] = acl
+        meta[CONTENTTYPE] = unicode(MimeType(filename=attach_name).content_type())
+        f = open(attpath, 'rb')
+        size, hash_name, hash_digest = hash_hexdigest(f)
+        f.seek(0)
+        self.data = f
+        meta[hash_name] = hash_digest
+        meta[SIZE] = size
+        meta[ITEMID] = make_uuid()
+        meta[REVID] = make_uuid()
+        self.meta = meta
+
+
+class EditLog(LogFile):
+    """ Access the edit-log and return metadata as the new api wants it. """
+    def __init__(self, filename, buffer_size=4096):
+        LogFile.__init__(self, filename, buffer_size)
+        self._NUM_FIELDS = 9
+
+    def parser(self, line):
+        """ Parse edit-log line into fields """
+        fields = line.strip().split(u'\t')
+        fields = (fields + [u''] * self._NUM_FIELDS)[:self._NUM_FIELDS]
+        keys = (MTIME, '__rev', ACTION, NAME, ADDRESS, HOSTNAME, USERID, EXTRA, COMMENT)
+        result = dict(zip(keys, fields))
+        # do some conversions/cleanups/fallbacks:
+        result[MTIME] = int(long(result[MTIME] or 0) / 1000000) # convert usecs to secs
+        result['__rev'] = int(result['__rev']) - 1 # old storage is 1-based, we want 0-based
+        result[NAME] = unquoteWikiname(result[NAME])
+        action = result[ACTION]
+        extra = result[EXTRA]
+        if extra:
+            if action.startswith('ATT'):
+                result[NAME] += u'/' + extra # append filename to pagename
+                # keep EXTRA for find_attach
+            elif action == 'SAVE/RENAME':
+                if extra:
+                    result[NAME_OLD] = extra
+                del result[EXTRA]
+                result[ACTION] = u'RENAME'
+            elif action == 'SAVE/REVERT':
+                if extra:
+                    result[REVERTED_TO] = int(extra)
+                del result[EXTRA]
+                result[ACTION] = u'REVERT'
+        userid = result[USERID]
+        #TODO
+        #if userid:
+        #    result[USERID] = self.idx.user_uuid(old_id=userid, refcount=True)
+        return result
+
+    def find_rev(self, revno):
+        """ Find metadata for some revno revision in the edit-log. """
+        for meta in self:
+            if meta['__rev'] == revno:
+                break
+        else:
+            self.to_begin()
+            raise KeyError
+        del meta['__rev']
+        meta = dict([(k, v) for k, v in meta.items() if v]) # remove keys with empty values
+        if meta.get(ACTION) == u'SAVENEW':
+            # replace SAVENEW with just SAVE
+            meta[ACTION] = u'SAVE'
+        return meta
+
+    def find_attach(self, attachname):
+        """ Find metadata for some attachment name in the edit-log. """
+        for meta in self.reverse(): # use reverse iteration to get the latest upload's data
+            if (meta['__rev'] == 99999998 and  # 99999999-1 because of 0-based
+                meta[ACTION] == 'ATTNEW' and
+                meta[EXTRA] == attachname):
+                break
+        else:
+            self.to_end()
+            raise KeyError
+        del meta['__rev']
+        del meta[EXTRA] #  we have full name in NAME
+        meta[ACTION] = u'SAVE'
+        meta = dict([(k, v) for k, v in meta.items() if v]) # remove keys with empty values
+        return meta
+
+
+def regenerate_acl(acl_string, acl_rights_valid=ACL_RIGHTS_CONTENTS):
+    """ recreate ACL string to remove invalid rights """
+    assert isinstance(acl_string, unicode)
+    result = []
+    for modifier, entries, rights in security.ACLStringIterator(acl_rights_valid, acl_string):
+        if (entries, rights) == (['Default'], []):
+            result.append("Default")
+        else:
+            result.append("%s%s:%s" % (
+                          modifier,
+                          u','.join(entries),
+                          u','.join(rights) # iterator has removed invalid rights
+                         ))
+    result = u' '.join(result)
+    logging.debug("regenerate_acl %r -> %r" % (acl_string, result))
+    return result
+
+
+def _decode_list(line):
+    """
+    Decode list of items from user data file
+
+    :param line: line containing list of items, encoded with _encode_list
+    :rtype: list of unicode strings
+    :returns: list of items in encoded in line
+    """
+    items = [item.strip() for item in line.split('\t')]
+    items = [item for item in items if item]
+    return tuple(items)
+
+def _decode_dict(line):
+    """
+    Decode dict of key:value pairs from user data file
+
+    :param line: line containing a dict, encoded with _encode_dict
+    :rtype: dict
+    :returns: dict  unicode:unicode items
+    """
+    items = [item.strip() for item in line.split('\t')]
+    items = [item for item in items if item]
+    items = [item.split(':', 1) for item in items]
+    return dict(items)
+
+
+class UserRevision(object):
+    """
+    moin 1.9 user
+    """
+    def __init__(self, path, uid):
+        self.path = path
+        self.uid = uid
+        meta = self._process_usermeta(self._parse_userprofile())
+        meta[CONTENTTYPE] = CONTENTTYPE_USER
+        meta[UID_OLD] = uid
+        meta[ITEMID] = make_uuid()
+        meta[REVID] = make_uuid()
+        meta[SIZE] = 0
+        meta[ACTION] = u'SAVE'
+        self.meta = meta
+        self.data = StringIO('')
+
+    def _parse_userprofile(self):
+        with codecs.open(os.path.join(self.path, self.uid), "r", CHARSET) as meta_file:
+            metadata = {}
+            for line in meta_file:
+                if line.startswith('#') or line.strip() == "":
+                    continue
+                key, value = line.strip().split('=', 1)
+                # Decode list values
+                if key.endswith('[]'):
+                    key = key[:-2]
+                    value = _decode_list(value)
+
+                # Decode dict values
+                elif key.endswith('{}'):
+                    key = key[:-2]
+                    value = _decode_dict(value)
+
+                metadata[key] = value
+        return metadata
+
+    def _process_usermeta(self, metadata):
+        # stuff we want to have stored as boolean:
+        bool_defaults = [ # taken from cfg.checkbox_defaults
+            ('show_comments', 'False'),
+            ('edit_on_doubleclick', 'True'),
+            ('want_trivial', 'False'),
+            ('mailto_author', 'False'),
+            ('disabled', 'False'),
+        ]
+        for key, default in bool_defaults:
+            metadata[key] = metadata.get(key, default) in ['True', 'true', '1']
+
+        # stuff we want to have stored as integer:
+        int_defaults = [
+            ('edit_rows', '0'),
+        ]
+        for key, default in int_defaults:
+            metadata[key] = int(metadata.get(key, default))
+
+        # rename last_saved to MTIME, int MTIME should be enough:
+        metadata[MTIME] = int(float(metadata.get('last_saved', '0')))
+
+        # rename subscribed_pages to subscribed_items
+        metadata['subscribed_items'] = metadata.get('subscribed_pages', [])
+
+        # convert bookmarks from usecs (and str) to secs (int)
+        metadata['bookmarks'] = [(interwiki, int(long(bookmark)/1000000))
+                                 for interwiki, bookmark in metadata.get('bookmarks', {}).items()]
+
+        # stuff we want to get rid of:
+        kill = ['real_language', # crap (use 'language')
+                'wikiname_add_spaces', # crap magic (you get it like it is)
+                'recoverpass_key', # user can recover again if needed
+                'editor_default', # not used any more
+                'editor_ui', # not used any more
+                'external_target', # ancient, not used any more
+                'passwd', # ancient, not used any more (use enc_passwd)
+                'show_emoticons', # ancient, not used any more
+                'show_fancy_diff', # kind of diff display now depends on mimetype
+                'show_fancy_links', # not used any more (now link rendering depends on theme)
+                'show_toolbar', # not used any more
+                'show_topbottom', # crap
+                'show_nonexist_qm', # crap, can be done by css
+                'show_page_trail', # theme decides whether to show trail
+                'remember_last_visit', # we show trail, user can click there
+                'remember_me', # don't keep sessions open for a long time
+                'subscribed_pages', # renamed to subscribed_items
+                'edit_cols', # not used any more
+                'jid', # no jabber support
+                'tz_offset', # we have real timezone now
+                'date_fmt', # not used any more
+                'datetime_fmt', # not used any more
+                'last_saved', # renamed to MTIME
+                'email_subscribed_events', # XXX no support yet
+                'jabber_subscribed_events', # XXX no support yet
+               ]
+        for key in kill:
+            if key in metadata:
+                del metadata[key]
+
+        # finally, remove some empty values (that have empty defaults anyway or
+        # make no sense when empty):
+        empty_kill = ['aliasname', 'bookmarks', 'enc_password',
+                      'language', 'css_url', 'email', ] # XXX check subscribed_items, quicklinks
+        for key in empty_kill:
+            if key in metadata and metadata[key] in [u'', tuple(), {}, [], ]:
+                del metadata[key]
+
+        return metadata
+
+
+class UserBackend(object):
+    """
+    moin 1.9 user directory
+    """
+    def __init__(self, path):
+        """
+        :param path: user_dir path
+        """
+        self.path = path
+
+    def __iter__(self):
+        user_re = re.compile(r'^\d+\.\d+(\.\d+)?$')
+        for uid in os.listdir(self.path):
+            if user_re.match(uid):
+                try:
+                    rev = UserRevision(self.path, uid)
+                except Exception as err:
+                    logging.exception("Exception in user item processing %s" % uid)
+                else:
+                    yield rev
+
+
+def hash_hexdigest(content, bufsize=4096):
+    size = 0
+    hash = hashlib.new(HASH_ALGORITHM)
+    if hasattr(content, "read"):
+        while True:
+            buf = content.read(bufsize)
+            hash.update(buf)
+            size += len(buf)
+            if not buf:
+                break
+    elif isinstance(content, str):
+        hash.update(content)
+        size = len(content)
+    else:
+        raise ValueError("unsupported content object: %r" % content)
+    return size, HASH_ALGORITHM, unicode(hash.hexdigest())
+

contrib/migration/__init__.py

Empty file removed.

contrib/migration/_logfile19.py

-# Copyright: 2005-2007 MoinMoin:ThomasWaldmann
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - LogFile package
-
-    This module supports buffered log reads, iterating forward and backward line-by-line, etc.
-"""
-
-
-from MoinMoin import log
-logging = log.getLogger(__name__)
-
-import os, codecs, errno
-from MoinMoin import config, wikiutil
-
-class LogError(Exception):
-    """ Base class for log errors """
-
-class LogMissing(LogError):
-    """ Raised when the log is missing """
-
-
-class LineBuffer:
-    """
-    Reads lines from a file
-
-    :ivar len: number of lines in self.lines
-    :ivar lines: list of lines (unicode)
-    :ivar offsets: list of file offsets for each line. additionally the position
-                   after the last read line is stored into self.offsets[-1]
-    """
-    def __init__(self, file, offset, size, forward=True):
-        """
-
-        TODO: when this gets refactored, don't use "file" (is a builtin)
-
-        :param file: open file object
-        :param offset: position in file to start from
-        :param size: aproximate number of bytes to read
-        :param forward : read from offset on or from offset-size to offset
-        :type forward: boolean
-        """
-        self.loglevel = logging.NOTSET
-        if forward:
-            begin = offset
-            logging.log(self.loglevel, "LineBuffer.init: forward seek %d read %d" % (begin, size))
-            file.seek(begin)
-            lines = file.readlines(size)
-        else:
-            if offset < 2 * size:
-                begin = 0
-                size = offset
-            else:
-                begin = offset - size
-            logging.log(self.loglevel, "LineBuffer.init: backward seek %d read %d" % (begin, size))
-            file.seek(begin)
-            lines = file.read(size).splitlines(True)
-            if begin != 0:
-                # remove potentially incomplete first line
-                begin += len(lines[0])
-                lines = lines[1:]
-                # XXX check for min one line read
-
-        linecount = len(lines)
-
-        # now calculate the file offsets of all read lines
-        offsets = [len(line) for line in lines]
-        offsets.append(0) # later this element will have the file offset after the last read line
-
-        lengthpreviousline = 0
-        offset = begin
-        for i in xrange(linecount+1):
-            offset += lengthpreviousline
-            lengthpreviousline = offsets[i]
-            offsets[i] = offset
-
-        self.offsets = offsets
-        self.len = linecount
-        # Decode lines after offset in file is calculated
-        self.lines = [unicode(line, config.charset) for line in lines]
-
-
-class LogFile:
-    """
-    .filter: function that gets the values from .parser.
-             must return True to keep it or False to remove it
-
-    Overwrite .parser() and .add() to customize this class to special log files
-    """
-
-    def __init__(self, filename, buffer_size=4096):
-        """
-        :param filename: name of the log file
-        :param buffer_size: approx. size of one buffer in bytes
-        """
-        self.loglevel = logging.NOTSET
-        self.__filename = filename
-        self.__buffer = None # currently used buffer, points to one of the following:
-        self.__buffer1 = None
-        self.__buffer2 = None
-        self.buffer_size = buffer_size
-        self.__lineno = 0
-        self.filter = None
-
-    def __iter__(self):
-        return self
-
-    def reverse(self):
-        """ yield log entries in reverse direction starting from last one
-
-        :rtype: iterator
-        """
-        self.to_end()
-        while True:
-            try:
-                logging.log(self.loglevel, "LogFile.reverse %s" % self.__filename)
-                result = self.previous()
-            except StopIteration:
-                return
-            yield result
-
-    def sanityCheck(self):
-        """ Check for log file write access.
-
-        :rtype: string (error message) or None
-        """
-        if not os.access(self.__filename, os.W_OK):
-            return "The log '%s' is not writable!" % (self.__filename, )
-        return None
-
-    def __getattr__(self, name):
-        """
-        generate some attributes when needed
-        """
-        if name == "_LogFile__rel_index": # Python black magic: this is the real name of the __rel_index attribute
-            # starting iteration from begin
-            self.__buffer1 = LineBuffer(self._input, 0, self.buffer_size)
-            self.__buffer2 = LineBuffer(self._input,
-                                        self.__buffer1.offsets[-1],
-                                        self.buffer_size)
-            self.__buffer = self.__buffer1
-            self.__rel_index = 0
-            return 0
-        elif name == "_input":
-            try:
-                # Open the file (NOT using codecs.open, it breaks our offset calculation. We decode it later.).
-                # Use binary mode in order to retain \r - otherwise the offset calculation would fail.
-                self._input = file(self.__filename, "rb", )
-            except IOError as err:
-                if err.errno == errno.ENOENT: # "file not found"
-                    # XXX workaround if edit-log does not exist: just create it empty
-                    # if this workaround raises another error, we don't catch
-                    # it, so the admin will see it.
-                    f = file(self.__filename, "ab")
-                    f.write('')
-                    f.close()
-                    self._input = file(self.__filename, "rb", )
-                else:
-                    logging.error("logfile: %r IOERROR errno %d (%s)" % (self.__filename, err.errno, os.strerror(err.errno)))
-                    raise
-            return self._input
-        elif name == "_output":
-            self._output = codecs.open(self.__filename, 'a', config.charset)
-            return self._output
-        else:
-            raise AttributeError(name)
-
-    def size(self):
-        """ Return log size in bytes
-
-        Return 0 if the file does not exist. Raises other OSError.
-
-        :returns: size of log file in bytes
-        :rtype: Int
-        """
-        try:
-            return os.path.getsize(self.__filename)
-        except OSError as err:
-            if err.errno == errno.ENOENT:
-                return 0
-            raise
-
-    def lines(self):
-        """ Return number of lines in the log file
-
-        Return 0 if the file does not exist. Raises other OSError.
-
-        Expensive for big log files - O(n)
-
-        :returns: size of log file in lines
-        :rtype: Int
-        """
-        try:
-            f = file(self.__filename, 'r')
-            try:
-                count = 0
-                for line in f:
-                    count += 1
-                return count
-            finally:
-                f.close()
-        except (OSError, IOError) as err:
-            if err.errno == errno.ENOENT:
-                return 0
-            raise
-
-    def peek(self, lines):
-        """ Move position in file forward or backwards by "lines" count
-
-        It adjusts .__lineno if set.
-        This function is not aware of filters!
-
-        :param lines: number of lines, may be negative to move backward
-        :rtype: boolean
-        :returns: True if moving more than to the beginning and moving
-                 to the end or beyond
-        """
-        logging.log(self.loglevel, "LogFile.peek %s" % self.__filename)
-        self.__rel_index += lines
-        while self.__rel_index < 0:
-            if self.__buffer is self.__buffer2:
-                if self.__buffer.offsets[0] == 0:
-                    # already at the beginning of the file
-                    self.__rel_index = 0
-                    self.__lineno = 0
-                    return True
-                else:
-                    # change to buffer 1
-                    self.__buffer = self.__buffer1
-                    self.__rel_index += self.__buffer.len
-            else: # self.__buffer is self.__buffer1
-                if self.__buffer.offsets[0] == 0:
-                    # already at the beginning of the file
-                    self.__rel_index = 0
-                    self.__lineno = 0
-                    return True
-                else:
-                    # load previous lines
-                    self.__buffer2 = self.__buffer1
-                    self.__buffer1 = LineBuffer(self._input,
-                                                self.__buffer.offsets[0],
-                                                self.buffer_size,
-                                                forward=False)
-                    self.__buffer = self.__buffer1
-                    self.__rel_index += self.__buffer.len
-
-        while self.__rel_index >= self.__buffer.len:
-            if self.__buffer is self.__buffer1:
-                # change to buffer 2
-                self.__rel_index -= self.__buffer.len
-                self.__buffer = self.__buffer2
-            else: # self.__buffer is self.__buffer2
-                # try to load next buffer
-                tmpbuff = LineBuffer(self._input,
-                                     self.__buffer.offsets[-1],
-                                     self.buffer_size)
-                if tmpbuff.len == 0:
-                    # end of file
-                    if self.__lineno is not None:
-                        self.__lineno += (lines -
-                                         (self.__rel_index - self.__buffer.len))
-                    self.__rel_index = self.__buffer.len # point to after last read line
-                    return True
-                # shift buffers
-                self.__rel_index -= self.__buffer.len
-                self.__buffer1 = self.__buffer2
-                self.__buffer2 = tmpbuff
-                self.__buffer = self.__buffer2
-
-        if self.__lineno is not None:
-            self.__lineno += lines
-        return False
-
-    def __next(self):
-        """get next line already parsed"""
-        if self.peek(0):
-            raise StopIteration
-        result = self.parser(self.__buffer.lines[self.__rel_index])
-        self.peek(1)
-        return result
-
-    def next(self):
-        """get next line that passes through the filter
-        :returns: next entry
-        raises StopIteration at file end
-        """
-        result = None
-        while result is None:
-            while result is None:
-                logging.log(self.loglevel, "LogFile.next %s" % self.__filename)
-                result = self.__next()
-            if self.filter and not self.filter(result):
-                result = None
-        return result
-
-    def __previous(self):
-        """get previous line already parsed"""
-        if self.peek(-1):
-            raise StopIteration
-        return self.parser(self.__buffer.lines[self.__rel_index])
-
-    def previous(self):
-        """get previous line that passes through the filter
-        :returns: previous entry
-        raises StopIteration at file begin
-        """
-        result = None
-        while result is None:
-            while result is None:
-                logging.log(self.loglevel, "LogFile.previous %s" % self.__filename)
-                result = self.__previous()
-            if self.filter and not self.filter(result):
-                result = None
-        return result
-
-    def to_begin(self):
-        """moves file position to the begin"""
-        logging.log(self.loglevel, "LogFile.to_begin %s" % self.__filename)
-        if self.__buffer1 is None or self.__buffer1.offsets[0] != 0:
-            self.__buffer1 = LineBuffer(self._input,
-                                        0,
-                                        self.buffer_size)
-            self.__buffer2 = LineBuffer(self._input,
-                                        self.__buffer1.offsets[-1],
-                                        self.buffer_size)
-        self.__buffer = self.__buffer1
-        self.__rel_index = 0
-        self.__lineno = 0
-
-    def to_end(self):
-        """moves file position to the end"""
-        logging.log(self.loglevel, "LogFile.to_end %s" % self.__filename)
-        self._input.seek(0, 2) # to end of file
-        size = self._input.tell()
-        if self.__buffer2 is None or size > self.__buffer2.offsets[-1]:
-            self.__buffer2 = LineBuffer(self._input,
-                                        size,
-                                        self.buffer_size,
-                                        forward=False)
-
-            self.__buffer1 = LineBuffer(self._input,
-                                        self.__buffer2.offsets[0],
-                                        self.buffer_size,
-                                        forward=False)
-        self.__buffer = self.__buffer2
-        self.__rel_index = self.__buffer2.len
-        self.__lineno = None
-
-    def position(self):
-        """ Return the current file position
-
-        This can be converted into a String using back-ticks and then be rebuild.
-        For this plain file implementation position is an Integer.
-        """
-        return self.__buffer.offsets[self.__rel_index]
-
-    def seek(self, position, line_no=None):
-        """ moves file position to an value formerly gotten from .position().
-        To enable line counting line_no must be provided.
-        .seek is much more efficient for moving long distances than .peek.
-        raises ValueError if position is invalid
-        """
-        logging.log(self.loglevel, "LogFile.seek %s pos %d" % (self.__filename, position))
-        if self.__buffer1:
-            logging.log(self.loglevel, "b1 %r %r" % (self.__buffer1.offsets[0], self.__buffer1.offsets[-1]))
-        if self.__buffer2:
-            logging.log(self.loglevel, "b2 %r %r" % (self.__buffer2.offsets[0], self.__buffer2.offsets[-1]))
-        if self.__buffer1 and self.__buffer1.offsets[0] <= position < self.__buffer1.offsets[-1]:
-            # position is in .__buffer1
-            self.__rel_index = self.__buffer1.offsets.index(position)
-            self.__buffer = self.__buffer1
-        elif self.__buffer2 and self.__buffer2.offsets[0] <= position < self.__buffer2.offsets[-1]:
-            # position is in .__buffer2
-            self.__rel_index = self.__buffer2.offsets.index(position)
-            self.__buffer = self.__buffer2
-        elif self.__buffer1 and self.__buffer1.offsets[-1] == position:
-            # we already have one buffer directly before where we want to go
-            self.__buffer2 = LineBuffer(self._input,
-                                        position,
-                                        self.buffer_size)
-            self.__buffer = self.__buffer2
-            self.__rel_index = 0
-        elif self.__buffer2 and self.__buffer2.offsets[-1] == position:
-            # we already have one buffer directly before where we want to go
-            self.__buffer1 = self.__buffer2
-            self.__buffer2 = LineBuffer(self._input,
-                                        position,
-                                        self.buffer_size)
-            self.__buffer = self.__buffer2
-            self.__rel_index = 0
-        else:
-            # load buffers around position
-            self.__buffer1 = LineBuffer(self._input,
-                                        position,
-                                        self.buffer_size,
-                                        forward=False)
-            self.__buffer2 = LineBuffer(self._input,
-                                        position,
-                                        self.buffer_size)
-            self.__buffer = self.__buffer2
-            self.__rel_index = 0
-            # XXX test for valid position
-        self.__lineno = line_no
-
-    def line_no(self):
-        """:returns: the current line number or None if line number is unknown"""
-        return self.__lineno
-
-    def calculate_line_no(self):
-        """ Calculate the current line number from buffer offsets
-
-        If line number is unknown it is calculated by parsing the whole file.
-        This may be expensive.
-        """
-        self._input.seek(0, 0)
-        lines = self._input.read(self.__buffer.offsets[self.__rel_index])
-        self.__lineno = len(lines.splitlines())
-        return self.__lineno
-
-    def parser(self, line):
-        """
-        Converts the line from file to program representation.
-        This implementation uses TAB separated strings.
-        This method should be overwritten by the sub classes.
-
-        :param line: line as read from file
-        :returns: parsed line or None on error
-        """
-        return line.split("\t")
-
-    def add(self, *data):
-        """
-        add line to log file
-        This implementation save the values as TAB separated strings.
-        This method should be overwritten by the sub classes.
-        """
-        line = "\t".join(data)
-        self._add(line)
-
-    def _add(self, line):
-        """
-        :param line: flat line
-        :type line: String
-        write on entry in the log file
-        """
-        if line is not None:
-            if line[-1] != '\n':
-                line += '\n'
-            self._output.write(line)
-            self._output.close() # does this maybe help against the sporadic fedora wikis 160 \0 bytes in the edit-log?
-            del self._output # re-open the output file automagically

contrib/migration/_utils19.py

-# Copyright: 2010 MoinMoin:ThomasWaldmann
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-MoinMoin - helpers for 1.9 migration
-"""
-
-import re
-
-from MoinMoin.config import NAME, ACL, CONTENTTYPE, MTIME, LANGUAGE
-
-CHARSET = 'utf-8'
-
-# Precompiled patterns for file name [un]quoting
-UNSAFE = re.compile(r'[^a-zA-Z0-9_]+')
-QUOTED = re.compile(r'\(([a-fA-F0-9]+)\)')
-
-
-def split_body(body):
-    """
-    Extract the processing instructions / acl / etc. at the beginning of a page's body.
-
-    Hint: if you have a Page object p, you already have the result of this function in
-          p.meta and (even better) parsed/processed stuff in p.pi.
-
-    Returns a list of (pi, restofline) tuples and a string with the rest of the body.
-    """
-    pi = {}
-    while body.startswith('#'):
-        try:
-            line, body = body.split('\n', 1) # extract first line
-            line = line.rstrip('\r')
-        except ValueError:
-            line = body
-            body = ''
-
-        # end parsing on empty (invalid) PI
-        if line == "#":
-            body = line + '\n' + body
-            break
-
-        if line[1] == '#':# two hash marks are a comment
-            comment = line[2:]
-            if not comment.startswith(' '):
-                # we don't require a blank after the ##, so we put one there
-                comment = ' ' + comment
-                line = '##%s' % comment
-
-        verb, args = (line[1:] + ' ').split(' ', 1) # split at the first blank
-        pi.setdefault(verb.lower(), []).append(args.strip())
-
-    for key, value in pi.iteritems():
-        if key in ['#', ]:
-            # transform the lists to tuples:
-            pi[key] = tuple(value)
-        elif key in ['acl', ]:
-            # join the list of values to a single value
-            pi[key] = u' '.join(value)
-        else:
-            # for keys that can't occur multiple times, don't use a list:
-            pi[key] = value[-1] # use the last value to copy 1.9 parsing behaviour
-
-    return pi, body
-
-
-def add_metadata_to_body(metadata, data):
-    """
-    Adds the processing instructions to the data.
-    """
-    meta_keys = [NAME, ACL, CONTENTTYPE, MTIME, LANGUAGE, ]
-
-    metadata_data = ""
-    for key, value in metadata.iteritems():
-        if key not in meta_keys:
-            continue
-        # special handling for list metadata
-        if isinstance(value, (list, tuple)):
-            for line in value:
-                metadata_data += "#%s %s\n" % (key, line)
-        else:
-            metadata_data += "#%s %s\n" % (key, value)
-    return metadata_data + data
-
-
-def quoteWikinameFS(wikiname, charset=CHARSET):
-    """
-    Return file system representation of a Unicode WikiName.
-
-    Warning: will raise UnicodeError if wikiname can not be encoded using
-    charset. The default value 'utf-8' can encode any character.
-
-    :param wikiname: wiki name [unicode]
-    :param charset: charset to encode string (before quoting)
-    :rtype: string
-    :returns: quoted name, safe for any file system
-    """
-    filename = wikiname.encode(charset)
-
-    quoted = []
-    location = 0
-    for needle in UNSAFE.finditer(filename):
-        # append leading safe stuff
-        quoted.append(filename[location:needle.start()])
-        location = needle.end()
-        # Quote and append unsafe stuff
-        quoted.append('(')
-        for character in needle.group():
-            quoted.append('%02x' % ord(character))
-        quoted.append(')')
-
-    # append rest of string
-    quoted.append(filename[location:])
-    return ''.join(quoted)
-
-
-class InvalidFileNameError(Exception):
-    """ Called when we find an invalid file name """
-    pass
-
-
-def unquoteWikiname(filename, charset=CHARSET):
-    """
-    Return Unicode WikiName from quoted file name.
-
-    raises an InvalidFileNameError in case of unquoting problems.
-
-    :param filename: quoted wiki name
-    :param charset: charset to use for decoding (after unquoting)
-    :rtype: unicode
-    :returns: WikiName
-    """
-    # From some places we get called with Unicode strings
-    if isinstance(filename, unicode):
-        filename = filename.encode(CHARSET)
-
-    parts = []
-    start = 0
-    for needle in QUOTED.finditer(filename):
-        # append leading unquoted stuff
-        parts.append(filename[start:needle.start()])
-        start = needle.end()
-        # Append quoted stuff
-        group = needle.group(1)
-        # Filter invalid filenames
-        if (len(group) % 2 != 0):
-            raise InvalidFileNameError(filename)
-        try:
-            for i in range(0, len(group), 2):
-                byte = group[i:i+2]
-                character = chr(int(byte, 16))
-                parts.append(character)
-        except ValueError:
-            # byte not in hex, e.g 'xy'
-            raise InvalidFileNameError(filename)
-
-    # append rest of string
-    if start == 0:
-        wikiname = filename
-    else:
-        parts.append(filename[start:len(filename)])
-        wikiname = ''.join(parts)
-
-    return wikiname.decode(charset)
-

contrib/migration/save19.py

-# Copyright: 2008 MoinMoin:JohannesBerg
-# Copyright: 2008-2011 MoinMoin:ThomasWaldmann
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-MoinMoin - save a moin 1.9 compatible storage to a file you can load into moin 2.0.
-
-Usage
------
-
- python save19.py DATA_DIR >saved19.moin 2>error.log
- # after this, review error.log. if there are too many errors, you may want
- # to clean up the 1.9 data first and then retry.
-
- moin load --file saved19.moin
-
-TODO
-----
-
-* translate revno numbering into revid parents
-* userid old -> user itemid
-* rename enc_password -> password?
-"""
-
-
-import sys
-import os
-import re
-import codecs
-import hashlib
-from StringIO import StringIO
-import logging
-
-from _utils19 import quoteWikinameFS, unquoteWikiname, split_body
-from _logfile19 import LogFile
-
-from MoinMoin.config import ACL, CONTENTTYPE, NAME, NAME_OLD, REVERTED_TO, \
-                            ACTION, ADDRESS, HOSTNAME, USERID, MTIME, EXTRA, COMMENT, \
-                            IS_SYSITEM, SYSITEM_VERSION, \
-                            TAGS, SIZE, HASH_ALGORITHM, \
-                            ITEMID, REVID, DATAID
-from MoinMoin.storage.error import NoSuchRevisionError
-from MoinMoin.util.mimetype import MimeType
-from MoinMoin.util.crypto import make_uuid
-from MoinMoin.storage.middleware.serialization import serialize_rev
-from MoinMoin import security
-
-
-CHARSET = 'utf-8'
-ACL_RIGHTS_CONTENTS = ['read', 'write', 'create', 'destroy', 'admin', ]
-
-DELETED_MODE_KEEP = 'keep'
-DELETED_MODE_KILL = 'kill'
-
-CONTENTTYPE_DEFAULT = u'text/plain;charset=utf-8'
-CONTENTTYPE_USER = u'text/x.moin.userprofile'
-CONTENTTYPE_MOINWIKI = u'text/x.moin.wiki;charset=utf-8'
-FORMAT_TO_CONTENTTYPE = {
-    'wiki': CONTENTTYPE_MOINWIKI,
-    'text/wiki': CONTENTTYPE_MOINWIKI,
-    'text/moin-wiki': CONTENTTYPE_MOINWIKI,
-    'creole': u'text/x.moin.creole;charset=utf-8',
-    'text/creole': u'text/x.moin.creole;charset=utf-8',
-    'rst': u'text/rst;charset=utf-8',
-    'text/rst': u'text/rst;charset=utf-8',
-    'plain': u'text/plain;charset=utf-8',
-    'text/plain': u'text/plain;charset=utf-8',
-}
-
-
-class KillRequested(Exception):
-    """raised if item killing is requested by DELETED_MODE"""
-
-
-class PageBackend(object):
-    """
-    moin 1.9 page directory
-    """
-    def __init__(self, path, deleted_mode=DELETED_MODE_KEEP,
-                 default_markup=u'wiki',
-                 item_category_regex=ur'(?P<all>Category(?P<key>(?!Template)\S+))'):
-        """
-        :param path: storage path (data_dir)
-        :param deleted_mode: 'kill' - just ignore deleted pages (pages with
-                                      non-existing current revision) and their attachments
-                                      as if they were not there.
-                                      Non-deleted pages (pages with an existing current
-                                      revision) that have non-current deleted revisions
-                                      will be treated as for 'keep'.
-                             'keep' - keep deleted pages as items with empty revisions,
-                                      keep their attachments. (default)
-        :param default_markup: used if a page has no #format line, moin 1.9's default
-                               'wiki' and we also use this default here.
-        """
-        self._path = path
-        assert deleted_mode in (DELETED_MODE_KILL, DELETED_MODE_KEEP, )
-        self.deleted_mode = deleted_mode
-        self.format_default = default_markup
-        self.item_category_regex = re.compile(item_category_regex, re.UNICODE)
-
-    def __iter__(self):
-        pages_dir = os.path.join(self._path, 'pages')
-        for f in os.listdir(pages_dir):
-            itemname = unquoteWikiname(f)
-            try:
-                item = PageItem(self, os.path.join(pages_dir, f), itemname)
-            except Exception as err:
-                logging.exception("PageItem %r raised exception:" % itemname)
-            else:
-                for rev in item.iter_revisions():
-                    yield rev
-                for rev in item.iter_attachments():
-                    yield rev
-
-
-class PageItem(object):
-    """
-    moin 1.9 page
-    """
-    def __init__(self, backend, path, itemname):
-        self.backend = backend
-        self.name = itemname