1. Andrew Godwin
  2. heechee


heechee / heechee / repo / hg.py

Repository backend for Mercurial.

import logging
import datetime
import time
import tempfile
import os

    from cStringIO import StringIO
except ImportError:
    from StringIO import StringIO

from heechee.repo import File, Directory, RepositoryBase
from heechee.exceptions import *
from heechee.svndiff import undiff

from mercurial import hg
from mercurial import ui as ui_
from mercurial.context import memctx, memfilectx

    from mercurial.encoding import tolocal
except ImportError:
    from mercurial.util import tolocal

class Repository(RepositoryBase):
    def __init__(self, directory):
        super(Repository, self).__init__()
        # Set our repo
        self.path = directory
        self._hg_repo = hg.repository(ui_.ui(), directory)
        # Work out a new 'UUID' (it's not one) based on the first commit's hash
        hash_hex = self.hg_repo[0].node().encode("hex")
        self.uuid = hash_hex[:8] + "-" + hash_hex[8:12] + "-" + hash_hex[12:16]\
                    + "-" + hash_hex[16:20] + "-" + hash_hex[20:32]
    def get_hg_repo(self):
        We have to invalidate the repo each time, as changes may happen
        externally - Heechee holds repos objects open for hours, days, etc.
        return self._hg_repo
    hg_repo = property(get_hg_repo)
    ########### Public functions (main heechee repo API) ###########
    def get_top_revision(self):
        "Returns the highest revision number in the entire repository."
        return int(self.hg_repo.changectx("tip")) + 1
    def tree_for_revision(self, revision):
        Returns the entire tree for the given revision - including the
        top-level trunk/, tags/ and branches/ directories.
        `revision` is a Subversion revision.
        revision = int(revision)
        # Root of the tree
        root = Directory(name=None, parent=None)
        # If they asked for the base revision, give them emptiness.
        if revision == 0:
            return root
        # Trunk
            self._tree_for_branch("default", revision, "trunk", root)
        except NoBranchError:
            # No trunk yet, so make an empty one.
            Directory(name="trunk", parent=root)
        # Branches
        branches = Directory(name="branches", parent=root)
        for branch in self._branches():
                self._tree_for_branch(branch, revision, branch, branches)
            except NoBranchError:
                # The branch doesn't exist yet.
        # Tags
        tags = Directory(name="tags", parent=root)
        for tag in self._tags(revision):
            # Don't include 'tip' as a tag.
            if tag == 'tip':

        return root

    def file_changes(self, path, source, target):
        Given two revisions and a path, yields the numbers of SVN commits which
        affected that file/directory. Note that source = 5 will first look at
        changeset 6, since that starts with commit 5.
        path = path.strip("/")
        source = int(source)
        target = int(target)
        # Make sure source is lower than target, and at least 0
        if source > target:
            target, source = source, target
        source = max(source, 0)
        # Loop, only yield changes that occurred to that file.
        # TODO: There must be a better way of doing this.
        type, value, subpath = self._identify_path(path)
        for i in range(source + 1, target + 1):
            # Get changectx
            changectx = self.hg_repo[i - 1]
            # Can we show this changeset? The patch will match, but does the branch?
            if type is "root":
                # Just show all logs.
            elif type == "branch":
                # Branch mode
                if changectx.branch() != value:
            elif type == "tag":
                # Tag mode. Currently, show only the tag's log message.
                if self.hg_repo[value] != changectx:
            elif type in ["tags-root", "branches-root"]:
                # We currently show no logs for these.
            # Alright, see if our file's in here.
            for changed_path in changectx.files():
                if changed_path.startswith(subpath):
                    yield i

    def logs_for_revisions(self, highest, lowest, path):
        "Returns log message dicts for any matching commit, in the given range."
        # Go through every HG revision, return ones which match the path
        for i in self.file_changes(path, lowest, highest):
            # Yes We Can! (send a log)
            changectx = self.hg_repo[i - 1]
            yield {
                "rev": i,
                "comment": unicode(tolocal(changectx.description()), 'utf-8'),
                "author": unicode(tolocal(changectx.user()), 'utf-8'),
                "date": datetime.datetime.fromtimestamp(changectx.date()[0]),

    def commit(self, branch_name, parent, message, author, changes, deletions):
        "Commits a change to the repository."
        # Work out the correct parent (i.e. go back till we hit the branch)
        parent = str(self._find_branch_from(branch_name, parent))
        # Gets called for every file, to yield the changes
        def filectxfn(repo, memctx, path):
            # Get the file's current source
                source = StringIO(self.hg_repo.changectx(parent)[path].data())
            except LookupError:
                # The file wasn't in Mercurial before.
                source = StringIO("")
            # Patch it with the svndiff (or mark as deleted)
                result = undiff(source, StringIO(changes[path]))
            except KeyError:
                # No svndiff, so this is a deletion.
                raise IOError()
            # Return that as a memfilectx
            return memfilectx(path, result, False, False, None)
        # Work out what to actually delete (some of the deleted paths may be
        # directories; do prefix checks)
        actual_deletions = []
        for path in self.hg_repo.changectx('tip'):
            for delete_path in deletions:
                if path.startswith(delete_path):
        # Make the change context which represents this new commit
        ctx = memctx(
            repo = self.hg_repo,
            parents = [parent, None],
            text = message,
            files = changes.keys() + actual_deletions,
            filectxfn = filectxfn,
            user = author,
        # Commit!
    def mtime(self):
        Returns effective mtime of the repo. This is just the max() of all
        changeable files in .hg.
        important_files = ["store/00changelog.i", "store/00manifest.i"]
        return max([
            os.stat(os.path.join(self.hg_repo.path, file_path)).st_mtime
            for file_path in important_files
    ########### Private functions (hg -> svn mapping) ###########
    def _get_first_ancestors(self, changectx):
        Given a changectx, returns a linear list of itself + ancestors.
        Since hg changesets can have more than one parent, this always
        chooses the older of the two parents.
        curr = changectx
        null = self.hg_repo.changectx("null")
        while curr != null:
            yield curr
            curr = sorted(curr.parents())[0]
    def _branches(self):
        "Returns a dict of branch names"
        return [x for x in self.hg_repo.branchtags().keys() if x != "default"]
    def _tags(self, revision):
        "Returns the list of tags which existed at SVN revision 'revision'."
        hg_revision = int(revision) - 1
        # Read the tags
        for tag, node in self.hg_repo.tags().items():
            chgctx = self.hg_repo.changectx(node)
            if int(chgctx) <= hg_revision and "/" not in tag:
                yield tag
    def _find_branch_from(self, branch, revision):
        Given an SVN revision, returns the commit which was most recently done
        but still older or equal to the revision.
        hg_revision = int(revision) - 1
        for i in range(hg_revision, -1, -1):
            changectx = self.hg_repo[i]
            if changectx.branch() == branch:
                return changectx
        # We've not found a match!
        raise NoBranchError("Cannot find matching changeset for branch '%s'." % branch)
    def _tree_for_branch(self, branch, revision, name, parent):
        Returns the file tree for the given branch at the given revision.
        Actually finds the highest-numbered changeset that is in the branch
        but is not greater than the given revision, and uses that.
        `revision` is the SVN revision, not the hg one.
        Branch is lazy-loaded.
        return self._tree_for_changectx(
            changectx = self._find_branch_from(branch, revision),
            name = name,
            parent = parent,
    def _tree_for_changectx(self, changectx, name, parent):
        Returns the file tree for the given changectx.
        Tree is lazy-loaded.
        return LazyRepoDirectory(
            repository = self,
            changectx = changectx,
            name = name,
            parent = parent,
    def _attach_for_changectx(self, changectx, root, subpath=None):
        Attaches a file tree for the given changectx on the provided Directory.
        If 'subpath' is provided, only loads files with a matching prefix.
        logging.debug('hg: loading revision %r' % changectx)
        # Loop through the files, and make a tree.
        if subpath:
            subpath = subpath + '/'
        dirs = {"": root}
        # Add all the files
        for path in changectx:
            # Subpath limiter.
            if subpath and not path.startswith(subpath):
            # Make sure we have directories for our full path.
            prev_path = ""
            for part in path.split("/")[:-1]:
                # If this directory isn't made yet...
                our_path = prev_path + "/" + part
                if our_path not in dirs:
                    # Make it.
                    dirs[our_path] = Directory(
                        name = part, 
                        parent = dirs[prev_path],
                prev_path = our_path
            #logging.debug('hg: loading file %r\n' % path)
                name = path.split("/")[-1],
                contents = changectx[path].data(),
                parent = dirs[prev_path],
        # Return!
        return root

class LazyRepoDirectory(Directory):
    Directory subclass that only loads its contents on request.
    def __init__(self, repository, changectx, *args, **kwargs):
        super(Directory, self).__init__(*args, **kwargs)
        self._loaded = False
        self._repository = repository
        self._changectx = changectx
        self._children = {}
    def possibly_load(self):
        "If we haven't loaded our revision yet, do so."
        if not self._loaded:
            self._loaded = True
            self._repository._attach_for_changectx(self._changectx, self)
    def _get_children(self):
        return self._children
    def _set_children(self, value):
        self._children = value
    children = property(_get_children, _set_children)