Source

hgmbox / mbox.py

Full commit
# mbox.py - qimport patches from mailboxes
#
# Copyright 2008 Patrick Mezard <pmezard@gmail.com>
#
# This extension was heavily inspired by Chris Mason mseries utility
# which can be found here: 
#
#    http://oss.oracle.com/~mason/mseries/
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

'''qimport patches from mailboxes

This extension let you read patches from mailboxes and append them to an
existing Mercurial Queue as the qimport command would do. Patch selection
is done interactively.

To enable this extension:

  [extensions]
  hgext.mbox =
  # MQ extension must be enabled as well
  hgext.mq =

  [mbox]
  # A list of mailboxes paths separated by the platform specific path separator,
  # colon on unixes, semi-colon on Windows.
  paths = mailbox1_path;mailbox_path2
'''

import email, mailbox, os, re, time
from mercurial import commands, hg, util, extensions, tempfile
from mercurial.i18n import gettext, _

class mboxerror(Exception):
    pass

re_patch = re.compile(r'^.*\[PATCH(?:\s+(\d+)\s+of\s+(\d+))?\]\s*(.+)$')

def parsesubject(s):
    s = s.replace('\n\t', ' ')
    m = re_patch.search(s)
    if not m:
        raise mboxerror(_('does not look like a patch message'))
    try:
        index = int(m.group(1))
        count = int(m.group(2)) + 1
    except (TypeError, IndexError):
        index = 0
        count = 1
    if count < 1:
        raise mboxerror(_('invalid patch count: %d') % count)
    title = m.group(3)
    return s, title, index, count

class patchmessage:
    def __init__(self, msg):
        self.msg = msg

        s = msg.get('Subject', '')
        self.subject, self.title, self.index, self.count = parsesubject(s)

        # Extract threading information
        self.id = msg.get('Message-ID')
        if not self.id:
            raise mboxerror(_('no message id'))
        self.parentid = msg.get('In-Reply-to')
        if not self.parentid:
            self.parentid = msg.get_all('References', [None])[-1]

    def __cmp__(self, other):
        return cmp(self.id, other.id)

    def __hash__(self):
        return hash(self.id)

    def date(self):
        d = email.utils.parsedate(self.msg['Date'])
        if d is None:
            d = tuple([0 for i in range(9)])
        return time.mktime(d)

    def sender(self):
        return self.msg.get('From')

def listmessages(paths):
    for path in paths:
        mbox = mailbox.mbox(path, create=False)
        for m in mbox:
            try:
                msg = patchmessage(m)
            except mboxerror:
                continue
            yield msg

def clustermessages(msgs):
    """Yield sublists of msgs eligible as message groups, looking at their
    author, date locality and items count.
    """
    def cmpdate(a, b):
        return cmp(a.date(), b.date())

    def findclusters(msgs):
        """If msgs has a many messages as the first one expects to have,
        return it. Otherwise, split the list at the widest time gap and recurse.
        """
        if len(msgs) == msgs[0].count:
            yield msgs
        elif len(msgs) < 2:
            return
        else:
            mincount = min([m.count for m in msgs])
            if mincount > len(msgs):
                return
            msgs.sort(cmpdate)
            split = max([(msgs[i].date() - msgs[i-1].date(), i) 
                         for i in range(1, len(msgs))])[1]
            for remaining in (msgs[:split], msgs[split:]):
                for c in findclusters(remaining):
                    yield c

    senders = {}
    for m in msgs:
        senders.setdefault(m.sender(), []).append(m)
    for groups in senders.itervalues():
        for c in findclusters(groups):
            yield c

def listgroups(paths, datefn, all=False):
    def makegroup(msgs):
        msgs = [(m.index, m) for m in msgs]
        msgs.sort()
        indexes = [m[0] for m in msgs]
        if indexes != list(range(len(msgs))):
            return None
        msgs = [m[1] for m in msgs]
        return msgs

    pendings = {}
    orphaneds = {}
    for m in listmessages(paths):
        if not datefn(m.date()):
            continue
        if m.count == 1:
            yield None, [m]
            continue
        msgid = m.parentid
        if msgid is None:
            orphaneds[m.id] = m
        if m.index == 0:
            msgid = m.id
        if not msgid:
            continue
        pendings.setdefault(msgid, []).append(m)
        msgs = pendings[msgid]
        if len(msgs) != m.count:
            continue
        msgs = makegroup(msgs)
        if msgs is None:
            continue
        for m in msgs:
            if m.id in orphaneds:
                del orphaneds[m.id]
        del pendings[msgid]
        yield msgs[0], msgs[1:]

    # Try to find more groups using sender and date locality
    for msgs in pendings.itervalues():
        for m in msgs:
            orphaneds[m.id] = m
    for msgs in clustermessages(orphaneds.values()):
        msgs = makegroup(msgs)
        if msgs is None:
            continue
        for m in msgs:
            del orphaneds[m.id]
        yield msgs[0], msgs[1:]

    if not all:
        return
    # Return orphaned messages as standalone groups
    for m in orphaneds.values():
        if m.count > 0 and m.index == 0:
            continue
        m.index, m.count = 0, 1
        yield None, [m]

def makepatchname(existing, title):
    def cleanup(s):
        s = re.sub('\s', '_', s.lower())
        s = re.sub('\W', '', s)
        return s

    name = cleanup(title)
    for i in xrange(100):
        if i:
            n = '%s__%d' % (name, i)
        else:
            n = name
        if n not in existing:
            return n

re_ispatch = re.compile(r'^(# HG|diff\s)')

def ispatch(s):
    for line in s.splitlines():
        if re_ispatch.search(line):
            return True
    return False

def getpayload(msg):
    if msg.is_multipart():
        for m in msg.get_payload():
            s = getpayload(m)
            if s is not None:
                return s
    else:
        s = msg.get_payload()
        if ispatch(s):
            return s
    return None

def importpatch(ui, repo, patchname, msg):
    try:
        mq = extensions.find('mq')
    except KeyError:
        raise util.Abort(_("'mq' extension not loaded"))

    s = getpayload(msg)
    if s is None:
        raise util.Abort(_("cannot find patch in message content"))

    s = re.sub('\r\n', '\n', s)
    tmpfd, tmppath = tempfile.mkstemp(prefix='hg-mbox-')
    try:
        try:
            fp = os.fdopen(tmpfd, 'wb')
            fp.write(s)
            fp.close()
            tmpfd = None
        except IOError:
            if tmpfd:
                os.close(tmpfd)
            raise

        mq.qimport(ui, repo, tmppath, name=patchname, existing=False,
                   force=False, rev=[], git=False)
    finally:
        os.remove(tmppath)
        
def importpatches(ui, repo, groups):
    imported = []
    q = repo.mq
    for patches in groups:
        for p in patches[::-1]:
            name = makepatchname(q.series, p.title)
            importpatch(ui, repo, name, p.msg)
            imported.append(name)
    ui.status(_('%d patches imported\n') % len(imported))

def makematcher(patterns):
    """Return a matcher function match((intro, patches)) returning True if
    the patch group matches the set of patterns.
    """
    if not patterns:
        return util.always

    regexps = [re.compile(p, re.I) for p in patterns]
    
    def match(group):
        intro, patches = group
        text = []
        if intro:
            text.append(intro.title)
        text += [p.title for p in patches]
        text = '\n'.join(text)
        for r in regexps:
            if not r.search(text):
                return False
        return True

    return match

def removeduplicates(groups):
    """Remove group duplicates, preserving input order."""
    seen = {}
    kept = []
    for intro, patches in groups:
        p = intro or patches[0]
        if (p.title, p.sender()) in seen:
            continue
        seen[(p.title, p.sender())] = 1
        kept.append((intro, patches))
    return kept

def mimport(ui, repo, *patterns, **opts):
    """qimport patches from mailboxes

    You will be prompted for whether to qimport items from every patch
    group found in configured mailboxes (see 'hg help mbox' for
    details). If patterns are passed they will be used to filter out
    patch groups not matching either of them. Group duplicates (based
    on group or first patch title and sender) are ignored too. For
    each query, the following responses are possible:

    n - skip this patch group
    y - qimport this patch group

    d - done, import selected patches and quit
    q - quit, importing nothing

    ? - display help
    """
    if opts['mbox']:
        paths = [opts['mbox']]
    else:
        paths = ui.config('mbox', 'paths', '').split(os.pathsep)
    paths = [p.strip() for p in paths if p]
    if not paths:
        raise util.Abort(_('no mailbox path configured'))

    matcher = makematcher(patterns)
    selecteds = []
    stop = False

    datefn = util.always
    if opts.get('date'):
        datefn = util.matchdate(opts.get('date'))
    listall = opts.get('all')
    groups = filter(matcher, listgroups(paths, datefn, listall))
    def cmpgroup(a, b):
        return -cmp(a[1][0].date(), b[1][0].date())
    groups.sort(cmpgroup)
    groups = removeduplicates(groups)

    for intro, patches in groups:
        if intro:
            ui.status('%s\n' % intro.subject)
            for p in patches:
                ui.status('    %s\n' % p.subject)
        else:
            ui.status('%s\n' % patches[0].subject)

        while 1:
            allowed = _('[Nydq?]')
            choices = [_('&No'), _('&Yes'), _('&Done'), _('&Quit'), _('&?')]
            r = ui.promptchoice(_('import this group? %s ') % allowed, choices)
            if r == 4:
                doc = gettext(mimport.__doc__)
                c = doc.find(_('n - skip this patch group'))
                for l in doc[c:].splitlines():
                    if l:
                        ui.write(l.strip(), '\n')
                continue
            elif r == 1:
                selecteds.append(patches)
            elif r == 2:
                stop = True
            elif r == 3:
                raise util.Abort(_('user quit'))
            break

        if stop:
            break
        ui.status('\n')

    importpatches(ui, repo, selecteds)

cmdtable = {}

def extsetup():
    try:
        mq = extensions.find('mq')
    except KeyError:
        return

    cmdtable['mimport'] = (
        mimport, 
        [('a', 'all', False, _('show all patches, including orphaned ones')),
         ('d', 'date', '', _('show patches matching date spec')),
         ('m', 'mbox', '', _('path to an mbox to parse')),
         ], 
        _('hg mimport PATTERN...'))