Commits

Kevin Bullock  committed 505d7cd

package with distutils

(patch tweaked slightly by Augie Fackler)

  • Participants
  • Parent commits 3e0eb85
  • Tags 0.1.0

Comments (0)

Files changed (13)

 *.pyc
+tests/*.err
+build
+dist
+*.egg-info
 syntax: glob
-
 *.pyc
 tests/*.err
+build
+dist
+*.egg-info

File __init__.py

-# git.py - git server bridge
-#
-# Copyright 2008 Scott Chacon <schacon at gmail dot com>
-#   also some code (and help) borrowed from durin42
-#
-# This software may be used and distributed according to the terms
-# of the GNU General Public License, incorporated herein by reference.
-
-'''push and pull from a Git server
-
-This extension lets you communicate (push and pull) with a Git server.
-This way you can use Git hosting for your project or collaborate with a
-project that is in Git.  A bridger of worlds, this plugin be.
-
-Try hg clone git:// or hg clone git+ssh://
-'''
-
-from mercurial import commands, extensions, hg, util
-from mercurial.i18n import _
-
-from dulwich.repo import Repo
-from dulwich.errors import NotGitRepository
-
-import gitrepo, hgrepo
-from git_handler import GitHandler
-
-# support for `hg clone git://github.com/defunkt/facebox.git`
-# also hg clone git+ssh://git@github.com/schacon/simplegit.git
-hg.schemes['git'] = gitrepo
-hg.schemes['git+ssh'] = gitrepo
-
-_oldlocal = hg.schemes['file']
-
-def _local(path):
-    p = util.drop_scheme('file', path)
-    try:
-        Repo(p)
-        return gitrepo
-    except NotGitRepository:
-        return _oldlocal(path)
-
-hg.schemes['file'] = _local
-
-def reposetup(ui, repo):
-    klass = hgrepo.generate_repo_subclass(repo.__class__)
-    repo.__class__ = klass
-
-def gimport(ui, repo, remote_name=None):
-    git = GitHandler(repo, ui)
-    git.import_commits(remote_name)
-
-def gexport(ui, repo):
-    git = GitHandler(repo, ui)
-    git.export_commits()
-
-def gclear(ui, repo):
-    repo.ui.status(_("clearing out the git cache data\n"))
-    git = GitHandler(repo, ui)
-    git.clear()
-
-cmdtable = {
-  "gimport":
-        (gimport, [], _('hg gimport')),
-  "gexport":
-        (gexport, [], _('hg gexport')),
-  "gclear":
-      (gclear, [], _('Clears out the Git cached data')),
-}

File git_handler.py

-import os, sys, math, urllib, re
-import toposort
-
-from dulwich.errors import HangupException
-from dulwich.index import commit_tree
-from dulwich.objects import Blob, Commit, Tag, Tree, parse_timezone
-from dulwich.pack import create_delta, apply_delta
-from dulwich.repo import Repo
-
-from hgext import bookmarks
-from mercurial.i18n import _
-from mercurial.node import hex, bin, nullid
-from mercurial import context, util as hgutil
-
-try:
-    from mercurial.error import RepoError
-except ImportError:
-    from mercurial.repo import RepoError
-
-
-class GitHandler(object):
-
-    def __init__(self, dest_repo, ui):
-        self.repo = dest_repo
-        self.ui = ui
-        self.mapfile = 'git-mapfile'
-        self.tagsfile = 'git-tags'
-
-        if ui.config('git', 'intree'):
-            self.gitdir = self.repo.wjoin('.git')
-        else:
-            self.gitdir = self.repo.join('git')
-
-        self.paths = ui.configitems('paths')
-
-        self.init_if_missing()
-        self.load_git()
-        self.load_map()
-        self.load_tags()
-
-    # make the git data directory
-    def init_if_missing(self):
-        if not os.path.exists(self.gitdir):
-            os.mkdir(self.gitdir)
-            Repo.init_bare(self.gitdir)
-
-    def load_git(self):
-        self.git = Repo(self.gitdir)
-
-    ## FILE LOAD AND SAVE METHODS
-
-    def map_set(self, gitsha, hgsha):
-        self._map_git[gitsha] = hgsha
-        self._map_hg[hgsha] = gitsha
-
-    def map_hg_get(self, gitsha):
-        return self._map_git.get(gitsha)
-
-    def map_git_get(self, hgsha):
-        return self._map_hg.get(hgsha)
-
-    def load_map(self):
-        self._map_git = {}
-        self._map_hg = {}
-        if os.path.exists(self.repo.join(self.mapfile)):
-            for line in self.repo.opener(self.mapfile):
-                gitsha, hgsha = line.strip().split(' ', 1)
-                self._map_git[gitsha] = hgsha
-                self._map_hg[hgsha] = gitsha
-
-    def save_map(self):
-        file = self.repo.opener(self.mapfile, 'w+', atomictemp=True)
-        for hgsha, gitsha in sorted(self._map_hg.iteritems()):
-            file.write("%s %s\n" % (gitsha, hgsha))
-        file.rename()
-
-
-    def load_tags(self):
-        self.tags = {}
-        if os.path.exists(self.repo.join(self.tagsfile)):
-            for line in self.repo.opener(self.tagsfile):
-                sha, name = line.strip().split(' ', 1)
-                self.tags[name] = sha
-
-    def save_tags(self):
-        file = self.repo.opener(self.tagsfile, 'w+', atomictemp=True)
-        for name, sha in sorted(self.tags.iteritems()):
-            if not self.repo.tagtype(name) == 'global':
-                file.write("%s %s\n" % (sha, name))
-        file.rename()
-
-    ## END FILE LOAD AND SAVE METHODS
-
-    ## COMMANDS METHODS
-
-    def import_commits(self, remote_name):
-        self.import_git_objects(remote_name)
-        self.save_map()
-
-    def fetch(self, remote, heads):
-        self.export_commits()
-        refs = self.fetch_pack(remote, heads)
-        remote_name = self.remote_name(remote)
-
-        if refs:
-            self.import_git_objects(remote_name, refs)
-            self.import_tags(refs)
-            self.update_hg_bookmarks(refs)
-            if remote_name:
-                self.update_remote_branches(remote_name, refs)
-            elif not self.paths:
-                # intial cloning
-                self.update_remote_branches('default', refs)
-        else:
-            self.ui.status(_("nothing new on the server\n"))
-
-        self.save_map()
-
-    def export_commits(self):
-        try:
-            self.export_git_objects()
-            self.export_hg_tags()
-            self.update_references()
-        finally:
-            self.save_map()
-
-    def get_refs(self, remote):
-        self.export_commits()
-        client, path = self.get_transport_and_path(remote)
-        old_refs = {}
-        new_refs = {}
-        def changed(refs):
-            old_refs.update(refs)
-            to_push = set(self.local_heads().values() + self.tags.values())
-            new_refs.update(self.get_changed_refs(refs, to_push, True))
-            # don't push anything
-            return {}
-
-        try:
-            client.send_pack(path, changed, None)
-
-            changed_refs = [ref for ref, sha in new_refs.iteritems()
-                            if sha != old_refs.get(ref)]
-            new = [bin(self.map_hg_get(new_refs[ref])) for ref in changed_refs]
-            old = dict( (bin(self.map_hg_get(old_refs[r])), 1)
-                       for r in changed_refs if r in old_refs)
-
-            return old, new
-        except HangupException:
-            raise hgutil.Abort("the remote end hung up unexpectedly")
-
-    def push(self, remote, revs, force):
-        self.export_commits()
-        changed_refs = self.upload_pack(remote, revs, force)
-        remote_name = self.remote_name(remote)
-
-        if remote_name and changed_refs:
-            for ref, sha in changed_refs.iteritems():
-                self.ui.status("    "+ remote_name + "::" + ref + " => GIT:" + sha[0:8] + "\n")
-
-            self.update_remote_branches(remote_name, changed_refs)
-
-    def clear(self):
-        mapfile = self.repo.join(self.mapfile)
-        if os.path.exists(self.gitdir):
-            for root, dirs, files in os.walk(self.gitdir, topdown=False):
-                for name in files:
-                    os.remove(os.path.join(root, name))
-                for name in dirs:
-                    os.rmdir(os.path.join(root, name))
-            os.rmdir(self.gitdir)
-        if os.path.exists(mapfile):
-            os.remove(mapfile)
-
-    ## CHANGESET CONVERSION METHODS
-
-    def export_git_objects(self):
-        self.ui.status(_("importing Hg objects into Git\n"))
-        nodes = [self.repo.lookup(n) for n in self.repo]
-        export = [node for node in nodes if not hex(node) in self._map_hg]
-        total = len(export)
-        if total:
-          magnitude = int(math.log(total, 10)) + 1
-        else:
-          magnitude = 1
-        for i, rev in enumerate(export):
-            if i%100 == 0:
-                self.ui.status(_("at: %*d/%d\n") % (magnitude, i, total))
-
-            ctx = self.repo.changectx(rev)
-            state = ctx.extra().get('hg-git', None)
-            if state == 'octopus':
-                self.ui.debug("revision %d is a part of octopus explosion\n" % ctx.rev())
-                continue
-            self.export_hg_commit(rev)
-
-    # convert this commit into git objects
-    # go through the manifest, convert all blobs/trees we don't have
-    # write the commit object (with metadata info)
-    def export_hg_commit(self, rev):
-        self.ui.note(_("converting revision %s\n") % rev)
-
-        oldenc = self.swap_out_encoding()
-
-        ctx = self.repo.changectx(rev)
-        extra = ctx.extra()
-
-        commit = Commit()
-
-        (time, timezone) = ctx.date()
-        commit.author = self.get_git_author(ctx)
-        commit.author_time = int(time)
-        commit.author_timezone = -timezone
-
-        if 'committer' in extra:
-            # fixup timezone
-            (name, timestamp, timezone) = extra['committer'].rsplit(' ', 2)
-            commit.committer = name
-            commit.commit_time = timestamp
-
-            # work around a timezone format change
-            if int(timezone) % 60 != 0: #pragma: no cover
-                timezone = parse_timezone(timezone)
-            else:
-                timezone = -int(timezone)
-            commit.commit_timezone = timezone
-        else:
-            commit.committer = commit.author
-            commit.commit_time = commit.author_time
-            commit.commit_timezone = commit.author_timezone
-
-        commit.parents = []
-        for parent in self.get_git_parents(ctx):
-            hgsha = hex(parent.node())
-            git_sha = self.map_git_get(hgsha)
-            if git_sha:
-                commit.parents.append(git_sha)
-
-        commit.message = self.get_git_message(ctx)
-
-        if 'encoding' in extra:
-            commit.encoding = extra['encoding']
-
-        tree_sha = commit_tree(self.git.object_store, self.iterblobs(ctx))
-        commit.tree = tree_sha
-
-        self.git.object_store.add_object(commit)
-        self.map_set(commit.id, ctx.hex())
-
-        self.swap_out_encoding(oldenc)
-        return commit.id
-
-    def get_git_author(self, ctx):
-        # hg authors might not have emails
-        author = ctx.user()
-
-        # check for git author pattern compliance
-        regex = re.compile('^(.*?) \<(.*?)\>(.*)$')
-        a = regex.match(author)
-
-        if a:
-            name = a.group(1)
-            email = a.group(2)
-            if len(a.group(3)) > 0:
-                name += ' ext:(' + urllib.quote(a.group(3)) + ')'
-            author = name + ' <' + email + '>'
-        else:
-            author = author + ' <none@none>'
-
-        if 'author' in ctx.extra():
-            author = apply_delta(author, ctx.extra()['author'])
-
-        return author
-
-    def get_git_parents(self, ctx):
-        def is_octopus_part(ctx):
-            return ctx.extra().get('hg-git', None) in ('octopus', 'octopus-done')
-
-        parents = []
-        if ctx.extra().get('hg-git', None) == 'octopus-done':
-            # implode octopus parents
-            part = ctx
-            while is_octopus_part(part):
-                (p1, p2) = part.parents()
-                assert not is_octopus_part(p1)
-                parents.append(p1)
-                part = p2
-            parents.append(p2)
-        else:
-            parents = ctx.parents()
-
-        return parents
-
-    def get_git_message(self, ctx):
-        extra = ctx.extra()
-
-        message = ctx.description() + "\n"
-        if 'message' in extra:
-            message = apply_delta(message, extra['message'])
-
-        # HG EXTRA INFORMATION
-        add_extras = False
-        extra_message = ''
-        if not ctx.branch() == 'default':
-            add_extras = True
-            extra_message += "branch : " + ctx.branch() + "\n"
-
-        renames = []
-        for f in ctx.files():
-            if f not in ctx.manifest():
-                continue
-            rename = ctx.filectx(f).renamed()
-            if rename:
-                renames.append((rename[0], f))
-
-        if renames:
-            add_extras = True
-            for oldfile, newfile in renames:
-                extra_message += "rename : " + oldfile + " => " + newfile + "\n"
-
-        for key, value in extra.iteritems():
-            if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
-                continue
-            else:
-                add_extras = True
-                extra_message += "extra : " + key + " : " +  urllib.quote(value) + "\n"
-
-        if add_extras:
-            message += "\n--HG--\n" + extra_message
-
-        return message
-
-    def iterblobs(self, ctx):
-        for f in ctx:
-            fctx = ctx[f]
-            blobid = self.map_git_get(hex(fctx.filenode()))
-
-            if not blobid:
-                blob = Blob.from_string(fctx.data())
-                self.git.object_store.add_object(blob)
-                self.map_set(blob.id, hex(fctx.filenode()))
-                blobid = blob.id
-
-            if 'l' in ctx.flags(f):
-                mode = 0120000
-            elif 'x' in ctx.flags(f):
-                mode = 0100755
-            else:
-                mode = 0100644
-
-            yield f, blobid, mode
-
-    def import_git_objects(self, remote_name=None, refs=None):
-        self.ui.status(_("importing Git objects into Hg\n"))
-        # import heads and fetched tags as remote references
-        todo = []
-        done = set()
-        convert_list = {}
-
-        # get a list of all the head shas
-        if refs:
-          for head, sha in refs.iteritems():
-              # refs contains all the refs in the server, not just the ones
-              # we are pulling
-              if sha in self.git.object_store:
-                  todo.append(sha)
-        else:
-            todo = self.git.refs.values()[:]
-
-        # traverse the heads getting a list of all the unique commits
-        while todo:
-            sha = todo.pop()
-            assert isinstance(sha, str)
-            if sha in done:
-                continue
-            done.add(sha)
-            obj = self.git.get_object(sha)
-            if isinstance (obj, Commit):
-                convert_list[sha] = obj
-                todo.extend([p for p in obj.parents if p not in done])
-            if isinstance(obj, Tag):
-                (obj_type, obj_sha) = obj.get_object()
-                obj = self.git.get_object(obj_sha)
-                if isinstance (obj, Commit):
-                    convert_list[sha] = obj
-                    todo.extend([p for p in obj.parents if p not in done])
-
-        # sort the commits
-        commits = toposort.TopoSort(convert_list).items()
-
-        commits = [commit for commit in commits if not commit in self._map_git]
-        # import each of the commits, oldest first
-        total = len(commits)
-        if total:
-            magnitude = int(math.log(total, 10)) + 1
-        else:
-            magnitude = 1
-        for i, csha in enumerate(commits):
-            if i%100 == 0:
-                self.ui.status(_("at: %*d/%d\n") % (magnitude, i, total))
-            commit = convert_list[csha]
-            self.import_git_commit(commit)
-
-    def import_git_commit(self, commit):
-        self.ui.debug(_("importing: %s\n") % commit.id)
-        # TODO: Do something less coarse-grained than try/except on the
-        #        get_file call for removed files
-
-        (strip_message, hg_renames, hg_branch, extra) = self.extract_hg_metadata(commit.message)
-
-        # get a list of the changed, added, removed files
-        files = self.get_files_changed(commit)
-
-        date = (commit.author_time, -commit.author_timezone)
-        text = strip_message
-
-        origtext = text
-        try:
-            text.decode('utf-8')
-        except UnicodeDecodeError:
-            text = self.decode_guess(text, commit.encoding)
-
-        text = '\n'.join([l.rstrip() for l in text.splitlines()]).strip('\n')
-        if text + '\n' != origtext:
-            extra['message'] = create_delta(text +'\n', origtext)
-
-        author = commit.author
-
-        # convert extra data back to the end
-        if ' ext:' in commit.author:
-            regex = re.compile('^(.*?)\ ext:\((.*)\) <(.*)\>$')
-            m = regex.match(commit.author)
-            if m:
-                name = m.group(1)
-                ex = urllib.unquote(m.group(2))
-                email = m.group(3)
-                author = name + ' <' + email + '>' + ex
-
-        if ' <none@none>' in commit.author:
-            author = commit.author[:-12]
-
-        try:
-            author.decode('utf-8')
-        except UnicodeDecodeError:
-            origauthor = author
-            author = self.decode_guess(author, commit.encoding)
-            extra['author'] = create_delta(author, origauthor)
-
-        oldenc = self.swap_out_encoding()
-
-        def getfilectx(repo, memctx, f):
-            try:
-                (mode, sha, data) = self.get_file(commit, f)
-                e = self.convert_git_int_mode(mode)
-            except (TypeError, KeyError):
-                raise IOError()
-            if f in hg_renames:
-                copied_path = hg_renames[f]
-            else:
-                copied_path = None
-            return context.memfilectx(f, data, 'l' in e, 'x' in e, copied_path)
-
-        gparents = map(self.map_hg_get, commit.parents)
-        p1, p2 = (nullid, nullid)
-        octopus = False
-
-        if len(gparents) > 1:
-            # merge, possibly octopus
-            def commit_octopus(p1, p2):
-                ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx,
-                                     author, date, {'hg-git': 'octopus'})
-                return hex(self.repo.commitctx(ctx))
-
-            octopus = len(gparents) > 2
-            p2 = gparents.pop()
-            p1 = gparents.pop()
-            while len(gparents) > 0:
-                p2 = commit_octopus(p1, p2)
-                p1 = gparents.pop()
-        else:
-            if gparents:
-                p1 = gparents.pop()
-
-        files = list(set(files))
-
-        pa = None
-        if not (p2 == nullid):
-            node1 = self.repo.changectx(p1)
-            node2 = self.repo.changectx(p2)
-            pa = node1.ancestor(node2)
-
-        # if named branch, add to extra
-        if hg_branch:
-            extra['branch'] = hg_branch
-
-        # if committer is different than author, add it to extra
-        if commit.author != commit.committer \
-               or commit.author_time != commit.commit_time \
-               or commit.author_timezone != commit.commit_timezone:
-            extra['committer'] = "%s %d %d" % (commit.committer, commit.commit_time, -commit.commit_timezone)
-
-        if commit.encoding:
-            extra['encoding'] = commit.encoding
-
-        if hg_branch:
-            extra['branch'] = hg_branch
-
-        if octopus:
-            extra['hg-git'] ='octopus-done'
-
-        ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx,
-                             author, date, extra)
-
-        node = self.repo.commitctx(ctx)
-
-        self.swap_out_encoding(oldenc)
-
-        # save changeset to mapping file
-        cs = hex(node)
-        self.map_set(commit.id, cs)
-
-    ## PACK UPLOADING AND FETCHING
-
-    def upload_pack(self, remote, revs, force):
-        client, path = self.get_transport_and_path(remote)
-        def changed(refs):
-            to_push = revs or set(self.local_heads().values() + self.tags.values())
-            return self.get_changed_refs(refs, to_push, force)
-
-        genpack = self.git.object_store.generate_pack_contents
-        try:
-            self.ui.status(_("creating and sending data\n"))
-            changed_refs = client.send_pack(path, changed, genpack)
-            return changed_refs
-        except HangupException:
-            raise hgutil.Abort("the remote end hung up unexpectedly")
-
-    def get_changed_refs(self, refs, revs, force):
-        new_refs = refs.copy()
-
-        #The remote repo is empty and the local one doesn't have bookmarks/tags
-        if refs.keys()[0] == 'capabilities^{}':
-            del new_refs['capabilities^{}']
-            if not self.local_heads():
-                tip = hex(self.repo.lookup('tip'))
-                bookmarks.bookmark(self.ui, self.repo, 'master', tip)
-                bookmarks.setcurrent(self.repo, 'master')
-                new_refs['refs/heads/master'] = self.map_git_get(tip)
-
-        for rev in revs:
-            ctx = self.repo[rev]
-            heads = [t for t in ctx.tags() if t in self.local_heads()]
-            tags = [t for t in ctx.tags() if t in self.tags]
-
-            if not (heads or tags):
-                raise hgutil.Abort("revision %s cannot be pushed since"
-                                   " it doesn't have a ref" % ctx)
-
-            for r in heads + tags:
-                if r in heads:
-                    ref = 'refs/heads/'+r
-                else:
-                    ref = 'refs/tags/'+r
-
-                if ref not in refs:
-                    new_refs[ref] = self.map_git_get(ctx.hex())
-                elif new_refs[ref] in self._map_git:
-                    rctx = self.repo[self.map_hg_get(new_refs[ref])]
-                    if rctx.ancestor(ctx) == rctx or force:
-                        new_refs[ref] = self.map_git_get(ctx.hex())
-                    else:
-                        raise hgutil.Abort("pushing %s overwrites %s"
-                                           % (ref, ctx))
-                else:
-                    raise hgutil.Abort("%s changed on the server, please pull "
-                                       "and merge before pushing" % ref)
-
-        return new_refs
-
-
-    def fetch_pack(self, remote_name, heads):
-        client, path = self.get_transport_and_path(remote_name)
-        graphwalker = self.git.get_graph_walker()
-        def determine_wants(refs):
-            if heads:
-                want = []
-                for h in heads:
-                    r = [ref for ref in refs if ref.endswith('/'+h)]
-                    if not r:
-                        raise hgutil.Abort("ref %s not found on remote server")
-                    elif len(r) == 1:
-                        want.append(refs[r[0]])
-                    else:
-                        raise hgutil.Abort("ambiguous reference %s: %r"%(h, r))
-            else:
-                want = [sha for ref, sha in refs.iteritems()
-                        if not ref.endswith('^{}')]
-            return want
-        f, commit = self.git.object_store.add_pack()
-        try:
-            return client.fetch_pack(path, determine_wants, graphwalker, f.write, self.ui.status)
-        except HangupException:
-            raise hgutil.Abort("the remote end hung up unexpectedly")
-        finally:
-            commit()
-
-    ## REFERENCES HANDLING
-
-    def update_references(self):
-        heads = self.local_heads()
-
-        # Create a local Git branch name for each
-        # Mercurial bookmark.
-        for key in heads:
-            self.git.refs['refs/heads/' + key] = self.map_git_get(heads[key])
-
-    def export_hg_tags(self):
-        for tag, sha in self.repo.tags().iteritems():
-            if self.repo.tagtype(tag) in ('global', 'git'):
-                self.git.refs['refs/tags/' + tag] = self.map_git_get(hex(sha))
-                self.tags[tag] = hex(sha)
-
-    def local_heads(self):
-        try:
-            bms = bookmarks.parse(self.repo)
-            return dict([(bm, hex(bms[bm])) for bm in bms])
-        except AttributeError: #pragma: no cover
-            return {}
-
-    def import_tags(self, refs):
-        keys = refs.keys()
-        if not keys:
-            return
-        for k in keys[:]:
-            ref_name = k
-            parts = k.split('/')
-            if parts[0] == 'refs' and parts[1] == 'tags':
-                ref_name = "/".join([v for v in parts[2:]])
-                # refs contains all the refs in the server, not just
-                # the ones we are pulling
-                if refs[k] not in self.git.object_store:
-                    continue
-                if ref_name[-3:] == '^{}':
-                    ref_name = ref_name[:-3]
-                if not ref_name in self.repo.tags():
-                    obj = self.git.get_object(refs[k])
-                    sha = None
-                    if isinstance (obj, Commit): # lightweight
-                        sha = self.map_hg_get(refs[k])
-                        self.tags[ref_name] = sha
-                    elif isinstance (obj, Tag): # annotated
-                        (obj_type, obj_sha) = obj.get_object()
-                        obj = self.git.get_object(obj_sha)
-                        if isinstance (obj, Commit):
-                            sha = self.map_hg_get(obj_sha)
-                            # TODO: better handling for annotated tags
-                            self.tags[ref_name] = sha
-        self.save_tags()
-
-    def update_hg_bookmarks(self, refs):
-        try:
-            bms = bookmarks.parse(self.repo)
-            heads = dict([(ref[11:],refs[ref]) for ref in refs
-                          if ref.startswith('refs/heads/')])
-
-            for head, sha in heads.iteritems():
-                # refs contains all the refs in the server, not just
-                # the ones we are pulling
-                if sha not in self.git.object_store:
-                    continue
-                hgsha = bin(self.map_hg_get(sha))
-                if not head in bms:
-                    # new branch
-                    bms[head] = hgsha
-                else:
-                    bm = self.repo[bms[head]]
-                    if bm.ancestor(self.repo[hgsha]) == bm:
-                        # fast forward
-                        bms[head] = hgsha
-            if heads:
-                bookmarks.write(self.repo, bms)
-
-        except AttributeError:
-            self.ui.warn(_('creating bookmarks failed, do you have'
-                         ' bookmarks enabled?\n'))
-
-    def update_remote_branches(self, remote_name, refs):
-        heads = dict([(ref[11:],refs[ref]) for ref in refs
-                      if ref.startswith('refs/heads/')])
-
-        for head, sha in heads.iteritems():
-            # refs contains all the refs in the server, not just the ones
-            # we are pulling
-            if sha not in self.git.object_store:
-                continue
-            hgsha = bin(self.map_hg_get(sha))
-            tag = '%s/%s' % (remote_name, head)
-            self.repo.tag(tag, hgsha, '', True, None, None)
-
-        for ref_name in refs:
-            if ref_name.startswith('refs/heads'):
-                new_ref = 'refs/remotes/%s/%s' % (remote_name, ref_name[10:])
-                self.git.refs[new_ref] = refs[ref_name]
-            elif ref_name.startswith('refs/tags'):
-                self.git.refs[ref_name] = refs[ref_name]
-
-
-    ## UTILITY FUNCTIONS
-
-    def convert_git_int_mode(self, mode):
-        # TODO: make these into constants
-        convert = {
-         0100644: '',
-         0100755: 'x',
-         0120000: 'l'}
-        if mode in convert:
-            return convert[mode]
-        return ''
-
-    def extract_hg_metadata(self, message):
-        split = message.split("\n--HG--\n", 1)
-        renames = {}
-        extra = {}
-        branch = False
-        if len(split) == 2:
-            message, meta = split
-            lines = meta.split("\n")
-            for line in lines:
-                if line == '':
-                    continue
-
-                command, data = line.split(" : ", 1)
-
-                if command == 'rename':
-                    before, after = data.split(" => ", 1)
-                    renames[after] = before
-                if command == 'branch':
-                    branch = data
-                if command == 'extra':
-                    before, after = data.split(" : ", 1)
-                    extra[before] = urllib.unquote(after)
-        return (message, renames, branch, extra)
-
-    def get_file(self, commit, f):
-        otree = self.git.tree(commit.tree)
-        parts = f.split('/')
-        for part in parts:
-            (mode, sha) = otree[part]
-            obj = self.git.get_object(sha)
-            if isinstance (obj, Blob):
-                return (mode, sha, obj._text)
-            elif isinstance(obj, Tree):
-                otree = obj
-
-    def get_files_changed(self, commit):
-        def filenames(basetree, comptree, prefix):
-            basefiles = set()
-            changes = list()
-            csha = None
-            cmode = None
-            if basetree is not None:
-                for (bmode, bname, bsha) in basetree.entries():
-                    if bmode == 0160000: # TODO: properly handle submodules
-                        continue
-                    basefiles.add(bname)
-                    bobj = self.git.get_object(bsha)
-                    if comptree is not None:
-                        if bname in comptree:
-                            (cmode, csha) = comptree[bname]
-                        else:
-                            (cmode, csha) = (None, None)
-                    if not ((csha == bsha) and (cmode == bmode)):
-                        if isinstance (bobj, Blob):
-                            changes.append (prefix + bname)
-                        elif isinstance(bobj, Tree):
-                            ctree = None
-                            if csha:
-                                ctree = self.git.get_object(csha)
-                            changes.extend(filenames(bobj,
-                                                     ctree,
-                                                     prefix + bname + '/'))
-
-            # handle removals
-            if comptree is not None:
-                for (bmode, bname, bsha) in comptree.entries():
-                    if bmode == 0160000: # TODO: handle submodles
-                        continue
-                    if bname not in basefiles:
-                        bobj = self.git.get_object(bsha)
-                        if isinstance(bobj, Blob):
-                            changes.append(prefix + bname)
-                        elif isinstance(bobj, Tree):
-                            changes.extend(filenames(None, bobj,
-                                                     prefix + bname + '/'))
-            return changes
-
-        all_changes = list()
-        otree = self.git.tree(commit.tree)
-        if len(commit.parents) == 0:
-            all_changes = filenames(otree, None, '')
-        for parent in commit.parents:
-            pcommit = self.git.commit(parent)
-            ptree = self.git.tree(pcommit.tree)
-            all_changes.extend(filenames(otree, ptree, ''))
-
-        return all_changes
-
-    def remote_name(self, remote):
-        names = [name for name, path in self.paths if path == remote]
-        if names:
-            return names[0]
-
-    # Stolen from hgsubversion
-    def swap_out_encoding(self, new_encoding='UTF-8'):
-        try:
-            from mercurial import encoding
-            old = encoding.encoding
-            encoding.encoding = new_encoding
-        except ImportError:
-            old = hgutil._encoding
-            hgutil._encoding = new_encoding
-        return old
-
-    def decode_guess(self, string, encoding):
-        # text is not valid utf-8, try to make sense of it
-        if encoding:
-            try:
-                return string.decode(encoding).encode('utf-8')
-            except UnicodeDecodeError:
-                pass
-
-        try:
-            return string.decode('latin-1').encode('utf-8')
-        except UnicodeDecodeError:
-            return string.decode('ascii', 'replace').encode('utf-8')
-
-    def get_transport_and_path(self, uri):
-        from dulwich.client import TCPGitClient, SSHGitClient, SubprocessGitClient
-        for handler, transport in (("git://", TCPGitClient), ("git@", SSHGitClient), ("git+ssh://", SSHGitClient)):
-            if uri.startswith(handler):
-                host, path = uri[len(handler):].split("/", 1)
-                return transport(host, thin_packs=False), '/' + path
-        # if its not git or git+ssh, try a local url..
-        return SubprocessGitClient(thin_packs=False), uri

File gitrepo.py

-from mercurial import repo, util
-from git_handler import GitHandler
-
-class gitrepo(repo.repository):
-    capabilities = ['lookup']
-    def __init__(self, ui, path, create):
-        if create: # pragma: no cover
-            raise util.Abort('Cannot create a git repository.')
-        self.path = path
-    def lookup(self, key):
-        if isinstance(key, str):
-            return key
-
-instance = gitrepo

File hggit/__init__.py

+# git.py - git server bridge
+#
+# Copyright 2008 Scott Chacon <schacon at gmail dot com>
+#   also some code (and help) borrowed from durin42
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+'''push and pull from a Git server
+
+This extension lets you communicate (push and pull) with a Git server.
+This way you can use Git hosting for your project or collaborate with a
+project that is in Git.  A bridger of worlds, this plugin be.
+
+Try hg clone git:// or hg clone git+ssh://
+'''
+
+from mercurial import commands, extensions, hg, util
+from mercurial.i18n import _
+
+from dulwich.repo import Repo
+from dulwich.errors import NotGitRepository
+
+import gitrepo, hgrepo
+from git_handler import GitHandler
+
+# support for `hg clone git://github.com/defunkt/facebox.git`
+# also hg clone git+ssh://git@github.com/schacon/simplegit.git
+hg.schemes['git'] = gitrepo
+hg.schemes['git+ssh'] = gitrepo
+
+_oldlocal = hg.schemes['file']
+
+def _local(path):
+    p = util.drop_scheme('file', path)
+    try:
+        Repo(p)
+        return gitrepo
+    except NotGitRepository:
+        return _oldlocal(path)
+
+hg.schemes['file'] = _local
+
+def reposetup(ui, repo):
+    klass = hgrepo.generate_repo_subclass(repo.__class__)
+    repo.__class__ = klass
+
+def gimport(ui, repo, remote_name=None):
+    git = GitHandler(repo, ui)
+    git.import_commits(remote_name)
+
+def gexport(ui, repo):
+    git = GitHandler(repo, ui)
+    git.export_commits()
+
+def gclear(ui, repo):
+    repo.ui.status(_("clearing out the git cache data\n"))
+    git = GitHandler(repo, ui)
+    git.clear()
+
+cmdtable = {
+  "gimport":
+        (gimport, [], _('hg gimport')),
+  "gexport":
+        (gexport, [], _('hg gexport')),
+  "gclear":
+      (gclear, [], _('Clears out the Git cached data')),
+}

File hggit/git_handler.py

+import os, sys, math, urllib, re
+import toposort
+
+from dulwich.errors import HangupException
+from dulwich.index import commit_tree
+from dulwich.objects import Blob, Commit, Tag, Tree, parse_timezone
+from dulwich.pack import create_delta, apply_delta
+from dulwich.repo import Repo
+
+from hgext import bookmarks
+from mercurial.i18n import _
+from mercurial.node import hex, bin, nullid
+from mercurial import context, util as hgutil
+
+try:
+    from mercurial.error import RepoError
+except ImportError:
+    from mercurial.repo import RepoError
+
+
+class GitHandler(object):
+
+    def __init__(self, dest_repo, ui):
+        self.repo = dest_repo
+        self.ui = ui
+        self.mapfile = 'git-mapfile'
+        self.tagsfile = 'git-tags'
+
+        if ui.config('git', 'intree'):
+            self.gitdir = self.repo.wjoin('.git')
+        else:
+            self.gitdir = self.repo.join('git')
+
+        self.paths = ui.configitems('paths')
+
+        self.init_if_missing()
+        self.load_git()
+        self.load_map()
+        self.load_tags()
+
+    # make the git data directory
+    def init_if_missing(self):
+        if not os.path.exists(self.gitdir):
+            os.mkdir(self.gitdir)
+            Repo.init_bare(self.gitdir)
+
+    def load_git(self):
+        self.git = Repo(self.gitdir)
+
+    ## FILE LOAD AND SAVE METHODS
+
+    def map_set(self, gitsha, hgsha):
+        self._map_git[gitsha] = hgsha
+        self._map_hg[hgsha] = gitsha
+
+    def map_hg_get(self, gitsha):
+        return self._map_git.get(gitsha)
+
+    def map_git_get(self, hgsha):
+        return self._map_hg.get(hgsha)
+
+    def load_map(self):
+        self._map_git = {}
+        self._map_hg = {}
+        if os.path.exists(self.repo.join(self.mapfile)):
+            for line in self.repo.opener(self.mapfile):
+                gitsha, hgsha = line.strip().split(' ', 1)
+                self._map_git[gitsha] = hgsha
+                self._map_hg[hgsha] = gitsha
+
+    def save_map(self):
+        file = self.repo.opener(self.mapfile, 'w+', atomictemp=True)
+        for hgsha, gitsha in sorted(self._map_hg.iteritems()):
+            file.write("%s %s\n" % (gitsha, hgsha))
+        file.rename()
+
+
+    def load_tags(self):
+        self.tags = {}
+        if os.path.exists(self.repo.join(self.tagsfile)):
+            for line in self.repo.opener(self.tagsfile):
+                sha, name = line.strip().split(' ', 1)
+                self.tags[name] = sha
+
+    def save_tags(self):
+        file = self.repo.opener(self.tagsfile, 'w+', atomictemp=True)
+        for name, sha in sorted(self.tags.iteritems()):
+            if not self.repo.tagtype(name) == 'global':
+                file.write("%s %s\n" % (sha, name))
+        file.rename()
+
+    ## END FILE LOAD AND SAVE METHODS
+
+    ## COMMANDS METHODS
+
+    def import_commits(self, remote_name):
+        self.import_git_objects(remote_name)
+        self.save_map()
+
+    def fetch(self, remote, heads):
+        self.export_commits()
+        refs = self.fetch_pack(remote, heads)
+        remote_name = self.remote_name(remote)
+
+        if refs:
+            self.import_git_objects(remote_name, refs)
+            self.import_tags(refs)
+            self.update_hg_bookmarks(refs)
+            if remote_name:
+                self.update_remote_branches(remote_name, refs)
+            elif not self.paths:
+                # intial cloning
+                self.update_remote_branches('default', refs)
+        else:
+            self.ui.status(_("nothing new on the server\n"))
+
+        self.save_map()
+
+    def export_commits(self):
+        try:
+            self.export_git_objects()
+            self.export_hg_tags()
+            self.update_references()
+        finally:
+            self.save_map()
+
+    def get_refs(self, remote):
+        self.export_commits()
+        client, path = self.get_transport_and_path(remote)
+        old_refs = {}
+        new_refs = {}
+        def changed(refs):
+            old_refs.update(refs)
+            to_push = set(self.local_heads().values() + self.tags.values())
+            new_refs.update(self.get_changed_refs(refs, to_push, True))
+            # don't push anything
+            return {}
+
+        try:
+            client.send_pack(path, changed, None)
+
+            changed_refs = [ref for ref, sha in new_refs.iteritems()
+                            if sha != old_refs.get(ref)]
+            new = [bin(self.map_hg_get(new_refs[ref])) for ref in changed_refs]
+            old = dict( (bin(self.map_hg_get(old_refs[r])), 1)
+                       for r in changed_refs if r in old_refs)
+
+            return old, new
+        except HangupException:
+            raise hgutil.Abort("the remote end hung up unexpectedly")
+
+    def push(self, remote, revs, force):
+        self.export_commits()
+        changed_refs = self.upload_pack(remote, revs, force)
+        remote_name = self.remote_name(remote)
+
+        if remote_name and changed_refs:
+            for ref, sha in changed_refs.iteritems():
+                self.ui.status("    "+ remote_name + "::" + ref + " => GIT:" + sha[0:8] + "\n")
+
+            self.update_remote_branches(remote_name, changed_refs)
+
+    def clear(self):
+        mapfile = self.repo.join(self.mapfile)
+        if os.path.exists(self.gitdir):
+            for root, dirs, files in os.walk(self.gitdir, topdown=False):
+                for name in files:
+                    os.remove(os.path.join(root, name))
+                for name in dirs:
+                    os.rmdir(os.path.join(root, name))
+            os.rmdir(self.gitdir)
+        if os.path.exists(mapfile):
+            os.remove(mapfile)
+
+    ## CHANGESET CONVERSION METHODS
+
+    def export_git_objects(self):
+        self.ui.status(_("importing Hg objects into Git\n"))
+        nodes = [self.repo.lookup(n) for n in self.repo]
+        export = [node for node in nodes if not hex(node) in self._map_hg]
+        total = len(export)
+        if total:
+          magnitude = int(math.log(total, 10)) + 1
+        else:
+          magnitude = 1
+        for i, rev in enumerate(export):
+            if i%100 == 0:
+                self.ui.status(_("at: %*d/%d\n") % (magnitude, i, total))
+
+            ctx = self.repo.changectx(rev)
+            state = ctx.extra().get('hg-git', None)
+            if state == 'octopus':
+                self.ui.debug("revision %d is a part of octopus explosion\n" % ctx.rev())
+                continue
+            self.export_hg_commit(rev)
+
+    # convert this commit into git objects
+    # go through the manifest, convert all blobs/trees we don't have
+    # write the commit object (with metadata info)
+    def export_hg_commit(self, rev):
+        self.ui.note(_("converting revision %s\n") % rev)
+
+        oldenc = self.swap_out_encoding()
+
+        ctx = self.repo.changectx(rev)
+        extra = ctx.extra()
+
+        commit = Commit()
+
+        (time, timezone) = ctx.date()
+        commit.author = self.get_git_author(ctx)
+        commit.author_time = int(time)
+        commit.author_timezone = -timezone
+
+        if 'committer' in extra:
+            # fixup timezone
+            (name, timestamp, timezone) = extra['committer'].rsplit(' ', 2)
+            commit.committer = name
+            commit.commit_time = timestamp
+
+            # work around a timezone format change
+            if int(timezone) % 60 != 0: #pragma: no cover
+                timezone = parse_timezone(timezone)
+            else:
+                timezone = -int(timezone)
+            commit.commit_timezone = timezone
+        else:
+            commit.committer = commit.author
+            commit.commit_time = commit.author_time
+            commit.commit_timezone = commit.author_timezone
+
+        commit.parents = []
+        for parent in self.get_git_parents(ctx):
+            hgsha = hex(parent.node())
+            git_sha = self.map_git_get(hgsha)
+            if git_sha:
+                commit.parents.append(git_sha)
+
+        commit.message = self.get_git_message(ctx)
+
+        if 'encoding' in extra:
+            commit.encoding = extra['encoding']
+
+        tree_sha = commit_tree(self.git.object_store, self.iterblobs(ctx))
+        commit.tree = tree_sha
+
+        self.git.object_store.add_object(commit)
+        self.map_set(commit.id, ctx.hex())
+
+        self.swap_out_encoding(oldenc)
+        return commit.id
+
+    def get_git_author(self, ctx):
+        # hg authors might not have emails
+        author = ctx.user()
+
+        # check for git author pattern compliance
+        regex = re.compile('^(.*?) \<(.*?)\>(.*)$')
+        a = regex.match(author)
+
+        if a:
+            name = a.group(1)
+            email = a.group(2)
+            if len(a.group(3)) > 0:
+                name += ' ext:(' + urllib.quote(a.group(3)) + ')'
+            author = name + ' <' + email + '>'
+        else:
+            author = author + ' <none@none>'
+
+        if 'author' in ctx.extra():
+            author = apply_delta(author, ctx.extra()['author'])
+
+        return author
+
+    def get_git_parents(self, ctx):
+        def is_octopus_part(ctx):
+            return ctx.extra().get('hg-git', None) in ('octopus', 'octopus-done')
+
+        parents = []
+        if ctx.extra().get('hg-git', None) == 'octopus-done':
+            # implode octopus parents
+            part = ctx
+            while is_octopus_part(part):
+                (p1, p2) = part.parents()
+                assert not is_octopus_part(p1)
+                parents.append(p1)
+                part = p2
+            parents.append(p2)
+        else:
+            parents = ctx.parents()
+
+        return parents
+
+    def get_git_message(self, ctx):
+        extra = ctx.extra()
+
+        message = ctx.description() + "\n"
+        if 'message' in extra:
+            message = apply_delta(message, extra['message'])
+
+        # HG EXTRA INFORMATION
+        add_extras = False
+        extra_message = ''
+        if not ctx.branch() == 'default':
+            add_extras = True
+            extra_message += "branch : " + ctx.branch() + "\n"
+
+        renames = []
+        for f in ctx.files():
+            if f not in ctx.manifest():
+                continue
+            rename = ctx.filectx(f).renamed()
+            if rename:
+                renames.append((rename[0], f))
+
+        if renames:
+            add_extras = True
+            for oldfile, newfile in renames:
+                extra_message += "rename : " + oldfile + " => " + newfile + "\n"
+
+        for key, value in extra.iteritems():
+            if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
+                continue
+            else:
+                add_extras = True
+                extra_message += "extra : " + key + " : " +  urllib.quote(value) + "\n"
+
+        if add_extras:
+            message += "\n--HG--\n" + extra_message
+
+        return message
+
+    def iterblobs(self, ctx):
+        for f in ctx:
+            fctx = ctx[f]
+            blobid = self.map_git_get(hex(fctx.filenode()))
+
+            if not blobid:
+                blob = Blob.from_string(fctx.data())
+                self.git.object_store.add_object(blob)
+                self.map_set(blob.id, hex(fctx.filenode()))
+                blobid = blob.id
+
+            if 'l' in ctx.flags(f):
+                mode = 0120000
+            elif 'x' in ctx.flags(f):
+                mode = 0100755
+            else:
+                mode = 0100644
+
+            yield f, blobid, mode
+
+    def import_git_objects(self, remote_name=None, refs=None):
+        self.ui.status(_("importing Git objects into Hg\n"))
+        # import heads and fetched tags as remote references
+        todo = []
+        done = set()
+        convert_list = {}
+
+        # get a list of all the head shas
+        if refs:
+          for head, sha in refs.iteritems():
+              # refs contains all the refs in the server, not just the ones
+              # we are pulling
+              if sha in self.git.object_store:
+                  todo.append(sha)
+        else:
+            todo = self.git.refs.values()[:]
+
+        # traverse the heads getting a list of all the unique commits
+        while todo:
+            sha = todo.pop()
+            assert isinstance(sha, str)
+            if sha in done:
+                continue
+            done.add(sha)
+            obj = self.git.get_object(sha)
+            if isinstance (obj, Commit):
+                convert_list[sha] = obj
+                todo.extend([p for p in obj.parents if p not in done])
+            if isinstance(obj, Tag):
+                (obj_type, obj_sha) = obj.get_object()
+                obj = self.git.get_object(obj_sha)
+                if isinstance (obj, Commit):
+                    convert_list[sha] = obj
+                    todo.extend([p for p in obj.parents if p not in done])
+
+        # sort the commits
+        commits = toposort.TopoSort(convert_list).items()
+
+        commits = [commit for commit in commits if not commit in self._map_git]
+        # import each of the commits, oldest first
+        total = len(commits)
+        if total:
+            magnitude = int(math.log(total, 10)) + 1
+        else:
+            magnitude = 1
+        for i, csha in enumerate(commits):
+            if i%100 == 0:
+                self.ui.status(_("at: %*d/%d\n") % (magnitude, i, total))
+            commit = convert_list[csha]
+            self.import_git_commit(commit)
+
+    def import_git_commit(self, commit):
+        self.ui.debug(_("importing: %s\n") % commit.id)
+        # TODO: Do something less coarse-grained than try/except on the
+        #        get_file call for removed files
+
+        (strip_message, hg_renames, hg_branch, extra) = self.extract_hg_metadata(commit.message)
+
+        # get a list of the changed, added, removed files
+        files = self.get_files_changed(commit)
+
+        date = (commit.author_time, -commit.author_timezone)
+        text = strip_message
+
+        origtext = text
+        try:
+            text.decode('utf-8')
+        except UnicodeDecodeError:
+            text = self.decode_guess(text, commit.encoding)
+
+        text = '\n'.join([l.rstrip() for l in text.splitlines()]).strip('\n')
+        if text + '\n' != origtext:
+            extra['message'] = create_delta(text +'\n', origtext)
+
+        author = commit.author
+
+        # convert extra data back to the end
+        if ' ext:' in commit.author:
+            regex = re.compile('^(.*?)\ ext:\((.*)\) <(.*)\>$')
+            m = regex.match(commit.author)
+            if m:
+                name = m.group(1)
+                ex = urllib.unquote(m.group(2))
+                email = m.group(3)
+                author = name + ' <' + email + '>' + ex
+
+        if ' <none@none>' in commit.author:
+            author = commit.author[:-12]
+
+        try:
+            author.decode('utf-8')
+        except UnicodeDecodeError:
+            origauthor = author
+            author = self.decode_guess(author, commit.encoding)
+            extra['author'] = create_delta(author, origauthor)
+
+        oldenc = self.swap_out_encoding()
+
+        def getfilectx(repo, memctx, f):
+            try:
+                (mode, sha, data) = self.get_file(commit, f)
+                e = self.convert_git_int_mode(mode)
+            except (TypeError, KeyError):
+                raise IOError()
+            if f in hg_renames:
+                copied_path = hg_renames[f]
+            else:
+                copied_path = None
+            return context.memfilectx(f, data, 'l' in e, 'x' in e, copied_path)
+
+        gparents = map(self.map_hg_get, commit.parents)
+        p1, p2 = (nullid, nullid)
+        octopus = False
+
+        if len(gparents) > 1:
+            # merge, possibly octopus
+            def commit_octopus(p1, p2):
+                ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx,
+                                     author, date, {'hg-git': 'octopus'})
+                return hex(self.repo.commitctx(ctx))
+
+            octopus = len(gparents) > 2
+            p2 = gparents.pop()
+            p1 = gparents.pop()
+            while len(gparents) > 0:
+                p2 = commit_octopus(p1, p2)
+                p1 = gparents.pop()
+        else:
+            if gparents:
+                p1 = gparents.pop()
+
+        files = list(set(files))
+
+        pa = None
+        if not (p2 == nullid):
+            node1 = self.repo.changectx(p1)
+            node2 = self.repo.changectx(p2)
+            pa = node1.ancestor(node2)
+
+        # if named branch, add to extra
+        if hg_branch:
+            extra['branch'] = hg_branch
+
+        # if committer is different than author, add it to extra
+        if commit.author != commit.committer \
+               or commit.author_time != commit.commit_time \
+               or commit.author_timezone != commit.commit_timezone:
+            extra['committer'] = "%s %d %d" % (commit.committer, commit.commit_time, -commit.commit_timezone)
+
+        if commit.encoding:
+            extra['encoding'] = commit.encoding
+
+        if hg_branch:
+            extra['branch'] = hg_branch
+
+        if octopus:
+            extra['hg-git'] ='octopus-done'
+
+        ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx,
+                             author, date, extra)
+
+        node = self.repo.commitctx(ctx)
+
+        self.swap_out_encoding(oldenc)
+
+        # save changeset to mapping file
+        cs = hex(node)
+        self.map_set(commit.id, cs)
+
+    ## PACK UPLOADING AND FETCHING
+
+    def upload_pack(self, remote, revs, force):
+        client, path = self.get_transport_and_path(remote)
+        def changed(refs):
+            to_push = revs or set(self.local_heads().values() + self.tags.values())
+            return self.get_changed_refs(refs, to_push, force)
+
+        genpack = self.git.object_store.generate_pack_contents
+        try:
+            self.ui.status(_("creating and sending data\n"))
+            changed_refs = client.send_pack(path, changed, genpack)
+            return changed_refs
+        except HangupException:
+            raise hgutil.Abort("the remote end hung up unexpectedly")
+
+    def get_changed_refs(self, refs, revs, force):
+        new_refs = refs.copy()
+
+        #The remote repo is empty and the local one doesn't have bookmarks/tags
+        if refs.keys()[0] == 'capabilities^{}':
+            del new_refs['capabilities^{}']
+            if not self.local_heads():
+                tip = hex(self.repo.lookup('tip'))
+                bookmarks.bookmark(self.ui, self.repo, 'master', tip)
+                bookmarks.setcurrent(self.repo, 'master')
+                new_refs['refs/heads/master'] = self.map_git_get(tip)
+
+        for rev in revs:
+            ctx = self.repo[rev]
+            heads = [t for t in ctx.tags() if t in self.local_heads()]
+            tags = [t for t in ctx.tags() if t in self.tags]
+
+            if not (heads or tags):
+                raise hgutil.Abort("revision %s cannot be pushed since"
+                                   " it doesn't have a ref" % ctx)
+
+            for r in heads + tags:
+                if r in heads:
+                    ref = 'refs/heads/'+r
+                else:
+                    ref = 'refs/tags/'+r
+
+                if ref not in refs:
+                    new_refs[ref] = self.map_git_get(ctx.hex())
+                elif new_refs[ref] in self._map_git:
+                    rctx = self.repo[self.map_hg_get(new_refs[ref])]
+                    if rctx.ancestor(ctx) == rctx or force:
+                        new_refs[ref] = self.map_git_get(ctx.hex())
+                    else:
+                        raise hgutil.Abort("pushing %s overwrites %s"
+                                           % (ref, ctx))
+                else:
+                    raise hgutil.Abort("%s changed on the server, please pull "
+                                       "and merge before pushing" % ref)
+
+        return new_refs
+
+
+    def fetch_pack(self, remote_name, heads):
+        client, path = self.get_transport_and_path(remote_name)
+        graphwalker = self.git.get_graph_walker()
+        def determine_wants(refs):
+            if heads:
+                want = []
+                for h in heads:
+                    r = [ref for ref in refs if ref.endswith('/'+h)]
+                    if not r:
+                        raise hgutil.Abort("ref %s not found on remote server")
+                    elif len(r) == 1:
+                        want.append(refs[r[0]])
+                    else:
+                        raise hgutil.Abort("ambiguous reference %s: %r"%(h, r))
+            else:
+                want = [sha for ref, sha in refs.iteritems()
+                        if not ref.endswith('^{}')]
+            return want
+        f, commit = self.git.object_store.add_pack()
+        try:
+            return client.fetch_pack(path, determine_wants, graphwalker, f.write, self.ui.status)
+        except HangupException:
+            raise hgutil.Abort("the remote end hung up unexpectedly")
+        finally:
+            commit()
+
+    ## REFERENCES HANDLING
+
+    def update_references(self):
+        heads = self.local_heads()
+
+        # Create a local Git branch name for each
+        # Mercurial bookmark.
+        for key in heads:
+            self.git.refs['refs/heads/' + key] = self.map_git_get(heads[key])
+
+    def export_hg_tags(self):
+        for tag, sha in self.repo.tags().iteritems():
+            if self.repo.tagtype(tag) in ('global', 'git'):
+                self.git.refs['refs/tags/' + tag] = self.map_git_get(hex(sha))
+                self.tags[tag] = hex(sha)
+
+    def local_heads(self):
+        try:
+            bms = bookmarks.parse(self.repo)
+            return dict([(bm, hex(bms[bm])) for bm in bms])
+        except AttributeError: #pragma: no cover
+            return {}
+
+    def import_tags(self, refs):
+        keys = refs.keys()
+        if not keys:
+            return
+        for k in keys[:]:
+            ref_name = k
+            parts = k.split('/')
+            if parts[0] == 'refs' and parts[1] == 'tags':
+                ref_name = "/".join([v for v in parts[2:]])
+                # refs contains all the refs in the server, not just
+                # the ones we are pulling
+                if refs[k] not in self.git.object_store:
+                    continue
+                if ref_name[-3:] == '^{}':
+                    ref_name = ref_name[:-3]
+                if not ref_name in self.repo.tags():
+                    obj = self.git.get_object(refs[k])
+                    sha = None
+                    if isinstance (obj, Commit): # lightweight
+                        sha = self.map_hg_get(refs[k])
+                        self.tags[ref_name] = sha
+                    elif isinstance (obj, Tag): # annotated
+                        (obj_type, obj_sha) = obj.get_object()
+                        obj = self.git.get_object(obj_sha)
+                        if isinstance (obj, Commit):
+                            sha = self.map_hg_get(obj_sha)
+                            # TODO: better handling for annotated tags
+                            self.tags[ref_name] = sha
+        self.save_tags()
+
+    def update_hg_bookmarks(self, refs):
+        try:
+            bms = bookmarks.parse(self.repo)
+            heads = dict([(ref[11:],refs[ref]) for ref in refs
+                          if ref.startswith('refs/heads/')])
+
+            for head, sha in heads.iteritems():
+                # refs contains all the refs in the server, not just
+                # the ones we are pulling
+                if sha not in self.git.object_store:
+                    continue
+                hgsha = bin(self.map_hg_get(sha))
+                if not head in bms:
+                    # new branch
+                    bms[head] = hgsha
+                else:
+                    bm = self.repo[bms[head]]
+                    if bm.ancestor(self.repo[hgsha]) == bm:
+                        # fast forward
+                        bms[head] = hgsha
+            if heads:
+                bookmarks.write(self.repo, bms)
+
+        except AttributeError:
+            self.ui.warn(_('creating bookmarks failed, do you have'
+                         ' bookmarks enabled?\n'))
+
+    def update_remote_branches(self, remote_name, refs):
+        heads = dict([(ref[11:],refs[ref]) for ref in refs
+                      if ref.startswith('refs/heads/')])
+
+        for head, sha in heads.iteritems():
+            # refs contains all the refs in the server, not just the ones
+            # we are pulling
+            if sha not in self.git.object_store:
+                continue
+            hgsha = bin(self.map_hg_get(sha))
+            tag = '%s/%s' % (remote_name, head)
+            self.repo.tag(tag, hgsha, '', True, None, None)
+
+        for ref_name in refs:
+            if ref_name.startswith('refs/heads'):
+                new_ref = 'refs/remotes/%s/%s' % (remote_name, ref_name[10:])
+                self.git.refs[new_ref] = refs[ref_name]
+            elif ref_name.startswith('refs/tags'):
+                self.git.refs[ref_name] = refs[ref_name]
+
+
+    ## UTILITY FUNCTIONS
+
+    def convert_git_int_mode(self, mode):
+        # TODO: make these into constants
+        convert = {
+         0100644: '',
+         0100755: 'x',
+         0120000: 'l'}
+        if mode in convert:
+            return convert[mode]
+        return ''
+
+    def extract_hg_metadata(self, message):
+        split = message.split("\n--HG--\n", 1)
+        renames = {}
+        extra = {}
+        branch = False
+        if len(split) == 2:
+            message, meta = split
+            lines = meta.split("\n")
+            for line in lines:
+                if line == '':
+                    continue
+
+                command, data = line.split(" : ", 1)
+
+                if command == 'rename':
+                    before, after = data.split(" => ", 1)
+                    renames[after] = before
+                if command == 'branch':
+                    branch = data
+                if command == 'extra':
+                    before, after = data.split(" : ", 1)
+                    extra[before] = urllib.unquote(after)
+        return (message, renames, branch, extra)
+
+    def get_file(self, commit, f):
+        otree = self.git.tree(commit.tree)
+        parts = f.split('/')
+        for part in parts:
+            (mode, sha) = otree[part]
+            obj = self.git.get_object(sha)
+            if isinstance (obj, Blob):
+                return (mode, sha, obj._text)
+            elif isinstance(obj, Tree):
+                otree = obj
+
+    def get_files_changed(self, commit):
+        def filenames(basetree, comptree, prefix):
+            basefiles = set()
+            changes = list()
+            csha = None
+            cmode = None
+            if basetree is not None:
+                for (bmode, bname, bsha) in basetree.entries():
+                    if bmode == 0160000: # TODO: properly handle submodules
+                        continue
+                    basefiles.add(bname)
+                    bobj = self.git.get_object(bsha)
+                    if comptree is not None:
+                        if bname in comptree:
+                            (cmode, csha) = comptree[bname]
+                        else:
+                            (cmode, csha) = (None, None)
+                    if not ((csha == bsha) and (cmode == bmode)):
+                        if isinstance (bobj, Blob):
+                            changes.append (prefix + bname)
+                        elif isinstance(bobj, Tree):
+                            ctree = None
+                            if csha:
+                                ctree = self.git.get_object(csha)
+                            changes.extend(filenames(bobj,
+                                                     ctree,
+                                                     prefix + bname + '/'))
+
+            # handle removals
+            if comptree is not None:
+                for (bmode, bname, bsha) in comptree.entries():
+                    if bmode == 0160000: # TODO: handle submodles
+                        continue
+                    if bname not in basefiles:
+                        bobj = self.git.get_object(bsha)
+                        if isinstance(bobj, Blob):
+                            changes.append(prefix + bname)
+                        elif isinstance(bobj, Tree):
+                            changes.extend(filenames(None, bobj,
+                                                     prefix + bname + '/'))
+            return changes
+
+        all_changes = list()
+        otree = self.git.tree(commit.tree)
+        if len(commit.parents) == 0:
+            all_changes = filenames(otree, None, '')
+        for parent in commit.parents:
+            pcommit = self.git.commit(parent)
+            ptree = self.git.tree(pcommit.tree)
+            all_changes.extend(filenames(otree, ptree, ''))
+
+        return all_changes
+
+    def remote_name(self, remote):
+        names = [name for name, path in self.paths if path == remote]
+        if names:
+            return names[0]
+
+    # Stolen from hgsubversion
+    def swap_out_encoding(self, new_encoding='UTF-8'):
+        try:
+            from mercurial import encoding
+            old = encoding.encoding
+            encoding.encoding = new_encoding
+        except ImportError:
+            old = hgutil._encoding
+            hgutil._encoding = new_encoding
+        return old
+
+    def decode_guess(self, string, encoding):
+        # text is not valid utf-8, try to make sense of it
+        if encoding:
+            try:
+                return string.decode(encoding).encode('utf-8')
+            except UnicodeDecodeError:
+                pass
+
+        try:
+            return string.decode('latin-1').encode('utf-8')
+        except UnicodeDecodeError:
+            return string.decode('ascii', 'replace').encode('utf-8')
+
+    def get_transport_and_path(self, uri):
+        from dulwich.client import TCPGitClient, SSHGitClient, SubprocessGitClient
+        for handler, transport in (("git://", TCPGitClient), ("git@", SSHGitClient), ("git+ssh://", SSHGitClient)):
+            if uri.startswith(handler):
+                host, path = uri[len(handler):].split("/", 1)
+                return transport(host, thin_packs=False), '/' + path
+        # if its not git or git+ssh, try a local url..
+        return SubprocessGitClient(thin_packs=False), uri

File hggit/gitrepo.py

+from mercurial import repo, util
+from git_handler import GitHandler
+
+class gitrepo(repo.repository):
+    capabilities = ['lookup']
+    def __init__(self, ui, path, create):
+        if create: # pragma: no cover
+            raise util.Abort('Cannot create a git repository.')
+        self.path = path
+    def lookup(self, key):
+        if isinstance(key, str):
+            return key
+
+instance = gitrepo

File hggit/hgrepo.py

+from mercurial.node import bin
+
+from git_handler import GitHandler
+from gitrepo import gitrepo
+
+
+def generate_repo_subclass(baseclass):
+    class hgrepo(baseclass):
+        def pull(self, remote, heads=None, force=False):
+            if isinstance(remote, gitrepo):
+                git = GitHandler(self, self.ui)
+                git.fetch(remote.path, heads)
+            else: #pragma: no cover
+                return super(hgrepo, self).pull(remote, heads, force)
+
+        def push(self, remote, force=False, revs=None):
+            if isinstance(remote, gitrepo):
+                git = GitHandler(self, self.ui)
+                git.push(remote.path, revs, force)
+            else: #pragma: no cover
+                return super(hgrepo, self).push(remote, force, revs)
+
+        def findoutgoing(self, remote, base=None, heads=None, force=False):
+            if isinstance(remote, gitrepo):
+                git = GitHandler(self, self.ui)
+                base, heads = git.get_refs(remote.path)
+                out, h = super(hgrepo, self).findoutgoing(remote, base, heads, force)
+                return out
+            else: #pragma: no cover
+                return super(hgrepo, self).findoutgoing(remote, base, heads, force)
+
+        def _findtags(self):
+            (tags, tagtypes) = super(hgrepo, self)._findtags()
+
+            git = GitHandler(self, self.ui)
+            for tag, rev in git.tags.iteritems():
+                if tag in tags:
+                    continue
+
+                tags[tag] = bin(rev)
+                tagtypes[tag] = 'git'
+
+            return (tags, tagtypes)
+
+        def tags(self):
+            if not hasattr(self, 'tagscache'):
+                # mercurial 1.4
+                return super(hgrepo, self).tags()
+
+            if self.tagscache:
+                return self.tagscache
+
+            git = GitHandler(self, self.ui)
+            tagscache = super(hgrepo, self).tags()
+            for tag, rev in git.tags.iteritems():
+                if tag in tagscache:
+                    continue
+
+                tagscache[tag] = bin(rev)
+                self._tagstypecache[tag] = 'git'
+
+            return tagscache
+
+    return hgrepo

File hggit/toposort.py

+''
+"""
+   Tarjan's algorithm and topological sorting implementation in Python
+   by Paul Harrison
+   Public domain, do with it as you will
+"""
+class TopoSort(object):
+
+    def __init__(self, commitdict):
+        self._sorted = self.robust_topological_sort(commitdict)
+        self._shas = []
+        for level in self._sorted:
+            for sha in level:
+                self._shas.append(sha)
+
+    def items(self):
+        self._shas.reverse()
+        return self._shas
+
+    def strongly_connected_components(self, graph):
+        """ Find the strongly connected components in a graph using
+            Tarjan's algorithm.
+
+            graph should be a dictionary mapping node names to
+            lists of successor nodes.
+            """
+
+        result = [ ]
+        stack = [ ]
+        low = { }
+
+        def visit(node):
+            if node in low: return
+
+            num = len(low)
+            low[node] = num
+            stack_pos = len(stack)
+            stack.append(node)
+
+            for successor in graph[node].parents:
+                visit(successor)
+                low[node] = min(low[node], low[successor])
+
+            if num == low[node]:
+                component = tuple(stack[stack_pos:])
+                del stack[stack_pos:]
+                result.append(component)
+                for item in component:
+                    low[item] = len(graph)
+
+        for node in graph:
+            visit(node)
+
+        return result
+
+    def strongly_connected_components_non(self, G):
+        """Returns a list of strongly connected components in G.
+
+         Uses Tarjan's algorithm with Nuutila's modifications.
+         Nonrecursive version of algorithm.
+
+         References:
+
+          R. Tarjan (1972). Depth-first search and linear graph algorithms.
+          SIAM Journal of Computing 1(2):146-160.
+
+          E. Nuutila and E. Soisalon-Soinen (1994).
+          On finding the strongly connected components in a directed graph.
+          Information Processing Letters 49(1): 9-14.
+
+         """
+        preorder={}
+        lowlink={}
+        scc_found={}
+        scc_queue = []
+        scc_list=[]
+        i=0     # Preorder counter
+        for source in G:
+            if source not in scc_found:
+                queue=[source]
+                while queue:
+                    v=queue[-1]
+                    if v not in preorder:
+                        i=i+1
+                        preorder[v]=i
+                    done=1
+                    v_nbrs=G[v]
+                    for w in v_nbrs.parents:
+                        if w not in preorder:
+                            queue.append(w)
+                            done=0
+                            break
+                    if done==1:
+                        lowlink[v]=preorder[v]
+                        for w in v_nbrs.parents:
+                            if w not in scc_found:
+                                if preorder[w]>preorder[v]:
+                                    lowlink[v]=min([lowlink[v],lowlink[w]])
+                                else:
+                                    lowlink[v]=min([lowlink[v],preorder[w]])
+                        queue.pop()
+                        if lowlink[v]==preorder[v]:
+                            scc_found[v]=True
+                            scc=(v,)
+                            while scc_queue and preorder[scc_queue[-1]]>preorder[v]:
+                                k=scc_queue.pop()
+                                scc_found[k]=True
+                                scc.append(k)
+                            scc_list.append(scc)
+                        else:
+                            scc_queue.append(v)
+        scc_list.sort(lambda x, y: cmp(len(y),len(x)))
+        return scc_list
+
+    def topological_sort(self, graph):
+        count = { }
+        for node in graph:
+            count[node] = 0
+        for node in graph:
+            for successor in graph[node]:
+                count[successor] += 1
+
+        ready = [ node for node in graph if count[node] == 0 ]
+
+        result = [ ]
+        while ready:
+            node = ready.pop(-1)
+            result.append(node)
+
+            for successor in graph[node]:
+                count[successor] -= 1
+                if count[successor] == 0:
+                    ready.append(successor)
+
+        return result
+
+    def robust_topological_sort(self, graph):
+        """ First identify strongly connected components,
+            then perform a topological sort on these components. """
+
+        components = self.strongly_connected_components_non(graph)
+        
+        node_component = { }
+        for component in components:
+            for node in component:
+                node_component[node] = component
+
+        component_graph = { }
+        for component in components:
+            component_graph[component] = [ ]
+
+        for node in graph:
+            node_c = node_component[node]
+            for successor in graph[node].parents:
+                successor_c = node_component[successor]
+                if node_c != successor_c:
+                    component_graph[node_c].append(successor_c)
+
+        return self.topological_sort(component_graph)

File hgrepo.py

-from mercurial.node import bin
-
-from git_handler import GitHandler
-from gitrepo import gitrepo
-
-
-def generate_repo_subclass(baseclass):
-    class hgrepo(baseclass):
-        def pull(self, remote, heads=None, force=False):
-            if isinstance(remote, gitrepo):
-                git = GitHandler(self, self.ui)
-                git.fetch(remote.path, heads)
-            else: #pragma: no cover
-                return super(hgrepo, self).pull(remote, heads, force)
-
-        def push(self, remote, force=False, revs=None):
-            if isinstance(remote, gitrepo):
-                git = GitHandler(self, self.ui)
-                git.push(remote.path, revs, force)
-            else: #pragma: no cover
-                return super(hgrepo, self).push(remote, force, revs)
-
-        def findoutgoing(self, remote, base=None, heads=None, force=False):
-            if isinstance(remote, gitrepo):
-                git = GitHandler(self, self.ui)
-                base, heads = git.get_refs(remote.path)
-                out, h = super(hgrepo, self).findoutgoing(remote, base, heads, force)
-                return out
-            else: #pragma: no cover
-                return super(hgrepo, self).findoutgoing(remote, base, heads, force)
-
-        def _findtags(self):
-            (tags, tagtypes) = super(hgrepo, self)._findtags()
-
-            git = GitHandler(self, self.ui)
-            for tag, rev in git.tags.iteritems():
-                if tag in tags:
-                    continue
-
-                tags[tag] = bin(rev)
-                tagtypes[tag] = 'git'
-
-            return (tags, tagtypes)
-
-        def tags(self):
-            if not hasattr(self, 'tagscache'):
-                # mercurial 1.4
-                return super(hgrepo, self).tags()
-
-            if self.tagscache:
-                return self.tagscache
-
-            git = GitHandler(self, self.ui)
-            tagscache = super(hgrepo, self).tags()
-            for tag, rev in git.tags.iteritems():
-                if tag in tagscache:
-                    continue
-