Marcin Kuzminski avatar Marcin Kuzminski committed 90243de

Multiple git fixes
- added option to get changeset by tag or branch name
- fixed issues with Tag objects in dulwich
- disabled global gitconfig when running git commands so .config doesn't influence the output data
- added pull & fetch options into git

Comments (0)

Files changed (5)

vcs/backends/git/changeset.py

 from vcs.exceptions import ChangesetDoesNotExistError
 from vcs.exceptions import ImproperArchiveTypeError
 from vcs.backends.base import BaseChangeset
-from vcs.nodes import FileNode, DirNode, NodeKind, RootNode, RemovedFileNode
+from vcs.nodes import FileNode, DirNode, NodeKind, RootNode, \
+    RemovedFileNode, SubModuleNode
 from vcs.utils import safe_unicode
 from vcs.utils import date_fromtimestamp
 from vcs.utils.lazy import LazyProperty
     def __init__(self, repository, revision):
         self._stat_modes = {}
         self.repository = repository
+
+        try:
+            commit = self.repository._repo.get_object(revision)
+            if isinstance(commit, objects.Tag):
+                revision = commit.object[1]
+                commit = self.repository._repo.get_object(commit.object[1])
+        except KeyError:
+            raise RepositoryError("Cannot get object with id %s" % revision)
         self.raw_id = revision
+        self.id = self.raw_id
+        self.short_id = self.raw_id[:12]
+        self._commit = commit
+
+        self._tree_id = commit.tree
+        self._commiter_property = 'committer'
+        self._date_property = 'commit_time'
+        self._date_tz_property = 'commit_timezone'
         self.revision = repository.revisions.index(revision)
 
-        self.short_id = self.raw_id[:12]
-        self.id = self.raw_id
-        try:
-            commit = self.repository._repo.get_object(self.raw_id)
-        except KeyError:
-            raise RepositoryError("Cannot get object with id %s" % self.raw_id)
-        self._commit = commit
-        self._tree_id = commit.tree
-
-        try:
-            self.message = safe_unicode(commit.message[:-1])
-            # Always strip last eol
-        except UnicodeDecodeError:
-            self.message = commit.message[:-1].decode(commit.encoding
-                or 'utf-8')
+        self.message = safe_unicode(commit.message)
         #self.branch = None
         self.tags = []
-        #tree = self.repository.get_object(self._tree_id)
         self.nodes = {}
         self._paths = {}
 
     @LazyProperty
     def author(self):
-        return safe_unicode(self._commit.committer)
+        return safe_unicode(getattr(self._commit, self._commiter_property))
 
     @LazyProperty
     def date(self):
-        return date_fromtimestamp(self._commit.commit_time,
-                                  self._commit.commit_timezone)
+        return date_fromtimestamp(getattr(self._commit, self._date_property),
+                                  getattr(self._commit, self._date_tz_property))
 
     @LazyProperty
     def status(self):
 
     @LazyProperty
     def branch(self):
-        # TODO: Cache as we walk (id <-> branch name mapping)
-        refs = self.repository._repo.get_refs()
-        heads = [(key[len('refs/heads/'):], val) for key, val in refs.items()
-            if key.startswith('refs/heads/')]
 
-        for name, id in heads:
-            walker = self.repository._repo.object_store.get_graph_walker([id])
-            while True:
-                id = walker.next()
-                if not id:
-                    break
-                if id == self.id:
-                    return safe_unicode(name)
-        raise ChangesetError("This should not happen... Have you manually "
-            "change id of the changeset?")
+        heads = self.repository._heads(reverse=False)
+
+        ref = heads.get(self.raw_id)
+        if ref:
+            return safe_unicode(ref)
 
     def _fix_path(self, path):
         """
         return path
 
     def _get_id_for_path(self, path):
+
         # FIXME: Please, spare a couple of minutes and make those codes cleaner;
         if not path in self._paths:
             path = path.strip('/')
             # set root tree
-            tree = self.repository._repo[self._commit.tree]
+            tree = self.repository._repo[self._tree_id]
             if path == '':
                 self._paths[''] = tree.id
                 return tree.id
             splitted = path.split('/')
             dirs, name = splitted[:-1], splitted[-1]
             curdir = ''
+
+            # initially extract things from root dir
+            for item, stat, id in tree.iteritems():
+                if curdir:
+                    name = '/'.join((curdir, item))
+                else:
+                    name = item
+                self._paths[name] = id
+                self._stat_modes[name] = stat
+
             for dir in dirs:
                 if curdir:
                     curdir = '/'.join((curdir, dir))
                 else:
                     curdir = dir
-                #if curdir in self._paths:
-                    ## This path have been already traversed
-                    ## Update tree and continue
-                    #tree = self.repository._repo[self._paths[curdir]]
-                    #continue
                 dir_id = None
                 for item, stat, id in tree.iteritems():
-                    if curdir:
-                        item_path = '/'.join((curdir, item))
-                    else:
-                        item_path = item
-                    self._paths[item_path] = id
-                    self._stat_modes[item_path] = stat
                     if dir == item:
                         dir_id = id
                 if dir_id:
                         raise ChangesetError('%s is not a directory' % curdir)
                 else:
                     raise ChangesetError('%s have not been found' % curdir)
-            for item, stat, id in tree.iteritems():
-                if curdir:
-                    name = '/'.join((curdir, item))
-                else:
-                    name = item
-                self._paths[name] = id
-                self._stat_modes[name] = stat
+
+                # cache all items from the given traversed tree
+                for item, stat, id in tree.iteritems():
+                    if curdir:
+                        name = '/'.join((curdir, item))
+                    else:
+                        name = item
+                    self._paths[name] = id
+                    self._stat_modes[name] = stat
             if not path in self._paths:
                 raise NodeDoesNotExistError("There is no file nor directory "
                     "at the given path %r at revision %r"
         return self._paths[path]
 
     def _get_kind(self, path):
-        id = self._get_id_for_path(path)
-        obj = self.repository._repo[id]
+        obj = self.repository._repo[self._get_id_for_path(path)]
         if isinstance(obj, objects.Blob):
             return NodeKind.FILE
         elif isinstance(obj, objects.Tree):
         Returns list of parents changesets.
         """
         return [self.repository.get_changeset(parent)
-            for parent in self._commit.parents]
+                for parent in self._commit.parents]
 
     def next(self, branch=None):
 
 
         return _prev(self, branch)
 
+    def diff(self, ignore_whitespace=True, context=3):
+        rev1 = self.parents[0] if self.parents else self.repository.EMPTY_CHANGESET
+        rev2 = self
+        return ''.join(self.repository.get_diff(rev1, rev2,
+                                    ignore_whitespace=ignore_whitespace,
+                                    context=context))
+
     def get_file_mode(self, path):
         """
         Returns stat mode of the file at the given ``path``.
         which is generally not good. Should be replaced with algorithm
         iterating commits.
         """
-        cmd = 'log --pretty="format: %%H" --name-status -p %s -- "%s"' % (
+        cmd = 'log --pretty="format: %%H" -s -p %s -- "%s"' % (
                   self.id, path
                )
         so, se = self.repository.run_git_command(cmd)
-        ids = re.findall(r'\w{40}', so)
+        ids = re.findall(r'[0-9a-fA-F]{40}', so)
         return [self.repository.get_changeset(id) for id in ids]
 
     def get_file_annotate(self, path):
         # --root ==> doesn't put '^' character for bounderies
         # -r sha ==> blames for the given revision
         so, se = self.repository.run_git_command(cmd)
+
         annotate = []
         for i, blame_line in enumerate(so.split('\n')[:-1]):
             ln_no = i + 1
-            id, line = re.split(r' \(.+?\) ', blame_line, 1)
+            id, line = re.split(r' ', blame_line, 1)
             annotate.append((ln_no, self.repository.get_changeset(id), line))
         return annotate
 
         tree = self.repository._repo[id]
         dirnodes = []
         filenodes = []
+        als = self.repository.alias
         for name, stat, id in tree.iteritems():
+            if objects.S_ISGITLINK(stat):
+                dirnodes.append(SubModuleNode(name, url=None, changeset=id,
+                                              alias=als))
+                continue
+
             obj = self.repository._repo.get_object(id)
             if path != '':
                 obj_path = '/'.join((path, name))
         path = self._fix_path(path)
         if not path in self.nodes:
             try:
-                id = self._get_id_for_path(path)
+                id_ = self._get_id_for_path(path)
             except ChangesetError:
                 raise NodeDoesNotExistError("Cannot find one of parents' "
                     "directories for a given path: %s" % path)
-            obj = self.repository._repo.get_object(id)
-            if isinstance(obj, objects.Tree):
-                if path == '':
-                    node = RootNode(changeset=self)
+
+            _GL = lambda m: m and objects.S_ISGITLINK(m)
+            if _GL(self._stat_modes.get(path)):
+                node = SubModuleNode(path, url=None, changeset=id_,
+                                     alias=self.repository.alias)
+            else:
+                obj = self.repository._repo.get_object(id_)
+
+                if isinstance(obj, objects.Tree):
+                    if path == '':
+                        node = RootNode(changeset=self)
+                    else:
+                        node = DirNode(path, changeset=self)
+                    node._tree = obj
+                elif isinstance(obj, objects.Blob):
+                    node = FileNode(path, changeset=self)
+                    node._blob = obj
                 else:
-                    node = DirNode(path, changeset=self)
-                node._tree = obj
-            elif isinstance(obj, objects.Blob):
-                node = FileNode(path, changeset=self)
-                node._blob = obj
-            else:
-                raise NodeDoesNotExistError("There is no file nor directory "
-                    "at the given path %r at revision %r"
-                    % (path, self.short_id))
+                    raise NodeDoesNotExistError("There is no file nor directory "
+                        "at the given path %r at revision %r"
+                        % (path, self.short_id))
             # cache node
             self.nodes[path] = node
         return self.nodes[path]
     def _diff_name_status(self):
         output = []
         for parent in self.parents:
-            cmd = 'diff --name-status %s %s' % (parent.raw_id, self.raw_id)
+            cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id, self.raw_id)
             so, se = self.repository.run_git_command(cmd)
             output.append(so.strip())
         return '\n'.join(output)
         for line in self._diff_name_status.splitlines():
             if not line:
                 continue
+
             if line.startswith(char):
-                splitted = line.split(char,1)
+                splitted = line.split(char, 1)
                 if not len(splitted) == 2:
                     raise VCSError("Couldn't parse diff result:\n%s\n\n and "
                         "particularly that line: %s" % (self._diff_name_status,
                         line))
-                paths.add(splitted[1].strip())
+                _path = splitted[1].strip()
+                paths.add(_path)
         return sorted(paths)
 
     @LazyProperty

vcs/backends/git/inmemory.py

 from dulwich.repo import Repo
 from vcs.backends.base import BaseInMemoryChangeset
 from vcs.exceptions import RepositoryError
+from vcs.utils import safe_str
 
 
 class GitInMemoryChangeset(BaseInMemoryChangeset):
 
     def commit(self, message, author, parents=None, branch=None, date=None,
-            **kwargs):
+               **kwargs):
         """
         Performs in-memory commit (doesn't check workdir in any way) and
         returns newly created ``Changeset``. Updates repository's
                     curtree = newtree
                 parent[reversed_dirnames[-1]] = DIRMOD, curtree.id
             else:
-                parent.add(node.mode, node_path, blob.id)
+                parent.add(name=node_path, mode=node.mode, hexsha=blob.id)
+
             new_trees.append(parent)
             # Update ancestors
             for parent, tree, path in reversed([(a[1], b[1], b[0]) for a, b in
         commit = objects.Commit()
         commit.tree = commit_tree.id
         commit.parents = [p._commit.id for p in self.parents if p]
-        commit.author = commit.committer = author
+        commit.author = commit.committer = safe_str(author)
         commit.encoding = ENCODING
-        commit.message = message + ' '
+        commit.message = safe_str(message)
 
         # Compute date
         if date is None:
         # Update vcs repository object & recreate dulwich repo
         self.repository.revisions.append(commit.id)
         self.repository._repo = Repo(self.repository.path)
+        # invalidate parsed refs after commit
+        self.repository._parsed_refs = self.repository._get_parsed_refs()
         tip = self.repository.get_changeset()
         self.reset()
         return tip

vcs/backends/git/repository.py

                 'config'),
             abspath(get_user_home(), '.gitconfig'),
         ]
+        self.bare = self._repo.bare
 
     @LazyProperty
     def revisions(self):
 
         :param cmd: git command to be executed
         """
-        #cmd = '(cd %s && git %s)' % (self.path, cmd)
+
+        _copts = ['-c', 'core.quotepath=false', ]
+        _str_cmd = False
         if isinstance(cmd, basestring):
-            cmd = 'git %s' % cmd
-        else:
-            cmd = ['git'] + cmd
+            cmd = [cmd]
+            _str_cmd = True
+
+        gitenv = os.environ
+        gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
+
+        cmd = ['git'] + _copts + cmd
+        if _str_cmd:
+            cmd = ' '.join(cmd)
         try:
             opts = dict(
                 shell=isinstance(cmd, basestring),
                 stdout=PIPE,
-                stderr=PIPE)
+                stderr=PIPE,
+                env=gitenv,
+            )
             if os.path.isdir(self.path):
                 opts['cwd'] = self.path
             p = Popen(cmd, **opts)
             raise RepositoryError(err)
 
     def _get_all_revisions(self):
-        cmd = 'rev-list --all --date-order'
+        cmd = 'rev-list --all --reverse --date-order'
         try:
             so, se = self.run_git_command(cmd)
         except RepositoryError:
             # Can be raised for empty repositories
             return []
-        revisions = so.splitlines()
-        revisions.reverse()
-        return revisions
+        return so.splitlines()
+
+    def _get_all_revisions2(self):
+        #alternate implementation using dulwich
+        includes = [x[1][0] for x in self._parsed_refs.iteritems()
+                    if x[1][1] != 'T']
+        return [c.commit.id for c in self._repo.get_walker(include=includes)]
 
     def _get_revision(self, revision):
         """
                     "for this repository %s" % (revision, self))
 
         elif is_bstr(revision):
-            if not pattern.match(revision) or revision not in self.revisions:
+            # get by branch/tag name
+            _ref_revision = self._parsed_refs.get(revision)
+            _tags_shas = self.tags.values()
+            if _ref_revision:  # and _ref_revision[1] in ['H', 'RH', 'T']:
+                return _ref_revision[0]
+
+            # maybe it's a tag ? we don't have them in self.revisions
+            elif revision in _tags_shas:
+                return _tags_shas[_tags_shas.index(revision)]
+
+            elif not pattern.match(revision) or revision not in self.revisions:
                 raise ChangesetDoesNotExistError("Revision %r does not exist "
                     "for this repository %s" % (revision, self))
 
         try:
             return time.mktime(self.get_changeset().date.timetuple())
         except RepositoryError:
+            idx_loc = '' if self.bare else '.git'
             # fallback to filesystem
-            in_path = os.path.join(self.path, '.git', "index")
-            he_path = os.path.join(self.path, '.git', "HEAD")
+            in_path = os.path.join(self.path, idx_loc, "index")
+            he_path = os.path.join(self.path, idx_loc, "HEAD")
             if os.path.exists(in_path):
                 return os.stat(in_path).st_mtime
             else:
 
     @LazyProperty
     def description(self):
+        idx_loc = '' if self.bare else '.git'
         undefined_description = u'unknown'
-        description_path = os.path.join(self.path, '.git', 'description')
+        description_path = os.path.join(self.path, idx_loc, 'description')
         if os.path.isfile(description_path):
             return safe_unicode(open(description_path).read())
         else:
     def branches(self):
         if not self.revisions:
             return {}
-        refs = self._repo.refs.as_dict()
         sortkey = lambda ctx: ctx[0]
-        _branches = [('/'.join(ref.split('/')[2:]), head)
-            for ref, head in refs.items()
-            if ref.startswith('refs/heads/') or
-            ref.startswith('refs/remotes/') and not ref.endswith('/HEAD')]
+        _branches = [(x[0], x[1][0])
+                     for x in self._parsed_refs.iteritems() if x[1][1] == 'H']
         return OrderedDict(sorted(_branches, key=sortkey, reverse=False))
 
+    @LazyProperty
+    def tags(self):
+        return self._get_tags()
+
     def _get_tags(self):
         if not self.revisions:
             return {}
+
         sortkey = lambda ctx: ctx[0]
-        _tags = [('/'.join(ref.split('/')[2:]), head) for ref, head in
-            self._repo.get_refs().items() if ref.startswith('refs/tags/')]
+        _tags = [(x[0], x[1][0])
+                 for x in self._parsed_refs.iteritems() if x[1][1] == 'T']
         return OrderedDict(sorted(_tags, key=sortkey, reverse=True))
 
-    @LazyProperty
-    def tags(self):
-        return self._get_tags()
-
     def tag(self, name, user, revision=None, message=None, date=None,
             **kwargs):
         """
             changeset.raw_id)
         self._repo.refs["refs/tags/%s" % name] = changeset._commit.id
 
+        self._parsed_refs = self._get_parsed_refs()
         self.tags = self._get_tags()
         return changeset
 
         tagpath = posixpath.join(self._repo.refs.path, 'refs', 'tags', name)
         try:
             os.remove(tagpath)
+            self._parsed_refs = self._get_parsed_refs()
             self.tags = self._get_tags()
         except OSError, e:
             raise RepositoryError(e.strerror)
 
+    @LazyProperty
+    def _parsed_refs(self):
+        return self._get_parsed_refs()
+
+    def _get_parsed_refs(self):
+        refs = self._repo.get_refs()
+        keys = [('refs/heads/', 'H'),
+                ('refs/remotes/origin/', 'RH'),
+                ('refs/tags/', 'T')]
+        _refs = {}
+        for ref, sha in refs.iteritems():
+            for k, type_ in keys:
+                if ref.startswith(k):
+                    _key = ref[len(k):]
+                    _refs[_key] = [sha, type_]
+                    break
+        return _refs
+
+    def _heads(self, reverse=False):
+        refs = self._repo.get_refs()
+        heads = {}
+
+        for key, val in refs.items():
+            for ref_key in ['refs/heads/', 'refs/remotes/origin/']:
+                if key.startswith(ref_key):
+                    n = key[len(ref_key):]
+                    if n not in ['HEAD']:
+                        heads[n] = val
+
+        return heads if reverse else dict((y, x) for x, y in heads.iteritems())
+
     def get_changeset(self, revision=None):
         """
         Returns ``GitChangeset`` object representing commit from git repository
             yield self.get_changeset(rev)
 
     def get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
-            context=3):
+                 context=3):
         """
         Returns (git like) *diff*, as plain text. Shows changes introduced by
         ``rev2`` since ``rev1``.
         if ignore_whitespace:
             flags.append('-w')
 
+        if hasattr(rev1, 'raw_id'):
+            rev1 = getattr(rev1, 'raw_id')
+
+        if hasattr(rev2, 'raw_id'):
+            rev2 = getattr(rev2, 'raw_id')
+
         if rev1 == self.EMPTY_CHANGESET:
             rev2 = self.get_changeset(rev2).raw_id
             cmd = ' '.join(['show'] + flags + [rev2])
         # If error occurs run_git_command raises RepositoryError already
         self.run_git_command(cmd)
 
+    def pull(self, url):
+        """
+        Tries to pull changes from external location.
+        """
+        url = self._get_url(url)
+        cmd = ['pull']
+        cmd.append("--ff-only")
+        cmd.append(url)
+        cmd = ' '.join(cmd)
+        # If error occurs run_git_command raises RepositoryError already
+        self.run_git_command(cmd)
+
+    def fetch(self, url):
+        """
+        Tries to pull changes from external location.
+        """
+        url = self._get_url(url)
+        cmd = ['fetch']
+        cmd.append(url)
+        cmd = ' '.join(cmd)
+        # If error occurs run_git_command raises RepositoryError already
+        self.run_git_command(cmd)
+
     @LazyProperty
     def workdir(self):
         """

vcs/utils/diffs.py

 from vcs.nodes import FileNode, NodeError
 
 
-def get_udiff(filenode_old, filenode_new,show_whitespace=True):
+def get_udiff(filenode_old, filenode_new, show_whitespace=True):
     """
     Returns unified diff between given ``filenode_old`` and ``filenode_new``.
     """
     try:
-        filenode_old_date = filenode_old.last_changeset.date
+        filenode_old_date = filenode_old.changeset.date
     except NodeError:
         filenode_old_date = None
 
     try:
-        filenode_new_date = filenode_new.last_changeset.date
+        filenode_new_date = filenode_new.changeset.date
     except NodeError:
         filenode_new_date = None
 

vcs/utils/hgcompat.py

-"""Mercurial libs compatibility
+"""
+Mercurial libs compatibility
+"""
 
-"""
 from mercurial import archival, merge as hg_merge, patch, ui
 from mercurial.commands import clone, nullid, pull
 from mercurial.context import memctx, memfilectx
 from mercurial.localrepo import localrepository
 from mercurial.match import match
 from mercurial.mdiff import diffopts
-from mercurial.node import hex
+from mercurial.node import hex
+from mercurial.encoding import tolocal
+from mercurial import discovery
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.