Commits

Keshav Kini committed e58a6d0

Remove illegal characters from username/email

Includes a doctest and tests in test-hg/author for the correct behavior.

Comments (0)

Files changed (4)

 help:
 	@echo 'Commonly used make targets:'
 	@echo '  tests              - run all tests in the automatic test suite'
-	@echo '  all-version-tests - run all tests against many hg versions'
+	@echo '  all-version-tests  - run all tests against many hg versions'
 	@echo '  tests-%s           - run all tests in the specified hg version'
 
 all: help

hggit/git_handler.py

         return commit.id
 
     def get_valid_git_username_email(self, name):
-        return name.lstrip('< ').rstrip('> ')
+        r"""Sanitize usernames and emails to fit git's restrictions.
+
+        The following is taken from the man page of git's fast-import
+        command:
+
+            [...] Likewise LF means one (and only one) linefeed [...]
+
+            committer
+                The committer command indicates who made this commit,
+                and when they made it.
+
+                Here <name> is the person's display name (for example
+                "Com M Itter") and <email> is the person's email address
+                ("cm@example.com[1]"). LT and GT are the literal
+                less-than (\x3c) and greater-than (\x3e) symbols. These
+                are required to delimit the email address from the other
+                fields in the line. Note that <name> and <email> are
+                free-form and may contain any sequence of bytes, except
+                LT, GT and LF. <name> is typically UTF-8 encoded.
+
+        Accordingly, this function makes sure that there are none of the
+        characters <, >, or \n in any string which will be used for
+        a git username or email. Before this, it first removes left
+        angle brackets and spaces from the beginning, and right angle
+        brackets and spaces from the end, of this string, to convert
+        such things as " <john@doe.com> " to "john@doe.com" for
+        convenience.
+
+        TESTS:
+
+        >>> from mercurial.ui import ui
+        >>> g = GitHandler('', ui()).get_valid_git_username_email
+        >>> g('John Doe')
+        'John Doe'
+        >>> g('john@doe.com')
+        'john@doe.com'
+        >>> g(' <john@doe.com> ')
+        'john@doe.com'
+        >>> g('    <random<\n<garbage\n>  > > ')
+        'random???garbage?'
+        >>> g('Typo in hgrc >but.hg-git@handles.it.gracefully>')
+        'Typo in hgrc ?but.hg-git@handles.it.gracefully'
+        """
+        return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
 
     def get_git_author(self, ctx):
         # hg authors might not have emails
         a = regex.match(author)
 
         if a:
-            name = a.group(1)
-            email = a.group(2)
+            name = self.get_valid_git_username_email(a.group(1))
+            email = self.get_valid_git_username_email(a.group(2))
             if a.group(3) != None and len(a.group(3)) != 0:
                 name += ' ext:(' + urllib.quote(a.group(3)) + ')'
             author = self.get_valid_git_username_email(name) + ' <' + self.get_valid_git_username_email(email) + '>'
             ctx = self.repo[rev]
             if getattr(ctx, 'bookmarks', None):
                 labels = lambda c: ctx.tags() + [
-                                fltr for fltr, bm 
+                                fltr for fltr, bm
                                 in self._filter_for_bookmarks(ctx.bookmarks())
                             ]
             else:
                 bms = bookmarks.parse(self.repo)
             else:
                 bms = self.repo._bookmarks
-            return dict([(filtered_bm, hex(bms[bm])) for 
+            return dict([(filtered_bm, hex(bms[bm])) for
                         filtered_bm, bm in self._filter_for_bookmarks(bms)])
         except AttributeError: #pragma: no cover
             return {}
                 real_branch_names = self.repo.branchmap()
                 bms = dict(
                     (
-                        bm_name + self.branch_bookmark_suffix 
+                        bm_name + self.branch_bookmark_suffix
                             if bm_name in real_branch_names
                         else bm_name,
                         bms[bm_name]

tests/test-hg-author

 hgcommit -u "test < test@example.com >" -m 'add eta'
 hg push
 
+echo theta > theta
+hg add theta
+hgcommit -u "test >test@example.com>" -m 'add theta'
+hg push
+
 hg log --graph | egrep -v ': *(not-master|master)'
 
 cd ..

tests/test-hg-author.out

 creating and sending data
     default::refs/heads/not-master => GIT:7eeab2ea
     default::refs/heads/master => GIT:8c878c97
-@  changeset:   7:b90e988091a2
+pushing to git://localhost/gitrepo
+exporting hg objects to git
+creating and sending data
+    default::refs/heads/not-master => GIT:7eeab2ea
+    default::refs/heads/master => GIT:1e03e913
+@  changeset:   8:d3c51ce68cfd
 |  tag:         default/master
 |  tag:         tip
+|  user:        test >test@example.com>
+|  date:        Mon Jan 01 00:00:18 2007 +0000
+|  summary:     add theta
+|
+o  changeset:   7:b90e988091a2
 |  user:        test < test@example.com >
 |  date:        Mon Jan 01 00:00:17 2007 +0000
 |  summary:     add eta
    summary:     add alpha
 
 importing git objects into hg
-7 files updated, 0 files merged, 0 files removed, 0 files unresolved
-@  changeset:   7:8ab87d5066e4
+8 files updated, 0 files merged, 0 files removed, 0 files unresolved
+@  changeset:   8:efec0270e295
 |  tag:         default/master
 |  tag:         tip
+|  user:        test ?test@example.com <test ?test@example.com>
+|  date:        Mon Jan 01 00:00:18 2007 +0000
+|  summary:     add theta
+|
+o  changeset:   7:8ab87d5066e4
 |  user:        test <test@example.com>
 |  date:        Mon Jan 01 00:00:17 2007 +0000
 |  summary:     add eta
    date:        Mon Jan 01 00:00:10 2007 +0000
    summary:     add alpha
 
+commit 1e03e913eca571b86ee06d3c1ddd795dde9ca917
+Author: test ?test@example.com <test ?test@example.com>
+Date:   Mon Jan 1 00:00:18 2007 +0000
+
+    add theta
+
 commit 8c878c9764e96e67ed9f62b3f317d156bf71bc52
 Author: test <test@example.com>
 Date:   Mon Jan 1 00:00:17 2007 +0000