Commits

Anonymous committed 895a1b8

do a better job at tracking different lines of code.
Allow setting bookname in a rule. At the end of a conversion, any
commits recoded against bookmarks are applied to the bookmarks in the
repo. This adds another dimension to the codeline tracking features.

Expose max_rev as a command line option.

Add option to defer recording tags until all processing is complete.
This is useful because hg will take the superset of all tags on all
heads, rather than just those on the default branch. In repos that
branch off from trunk, "hg tags" will show old, deleted tags.

Comments (0)

Files changed (2)

svn2hg/SvnImporter.py

 # See LICENSE.md for usage and distribution terms
 
 from mercurial import encoding, cmdutil, util, commands, hg, context, scmutil
+from mercurial import bookmarks
 from mercurial.node import bin, hex, nullid
 from mercurial.i18n import _
 
 branch, tag and bookmark name and allows us to track tag and bookmarks
 as separate heads from the bare branch heads"""
 def _compute_commit_key(rdata):
-  k = rdata['kind'] + '@' + rdata['name']
+  k = rdata['kind'] + '@' + rdata['name'] + '@' + rdata['codeline']
   if 'tagname' in rdata:
     k = k + '@T@' + rdata['tagname']
   if 'bookname' in rdata:
     k = k + '@B@' + rdata['bookname']
+  if 'min' in rdata:
+    k = k + '<' + rdata['min']
+  if 'max' in rdata:
+    k = k + '>' + rdata['max']
   return k
 
 """Represents the compiled ruleset"""
         # the container; caller needs this to locate the file properly
         rem_name = path[m.end():]
 
+        codeline = path[:m.end()]
+        if codeline.endswith('/'):
+          codeline = codeline[:-1]
+
         rdata = {
             "name": container_name,
             "kind": kind,
-            "itemname": rem_name
+            "itemname": rem_name,
+            "codeline": codeline,
         }
-        for n in ['tagname', 'bookname']:
+        if maxrev:
+          rdata['max'] = maxrev
+        if minrev:
+          rdata['min'] = minrev
+        for n in ['tagname', 'bookname', 'codeline']:
           tn = rule.get(n)
           if tn:
             rdata[n] = m.expand(tn)
         return (kind, container_name, rem_name, rdata)
 
     raise RuntimeError("no rule matches path %s, cannot continue" % path)
-    return ('unknown', None, None, None)
 
 
 """Represents changes in a revision, groups by branches"""
 
 class SvnImport:
   def __init__(self, ui, repo, path):
+    self.tag_at_end = False
     self.ui = ui
     self.repo = repo
     self.path = path
     # tags created in the last svn commit
     self.tags = {}
     self.tags_changed = False
+    # bookmarks recorded during conversion (applied at end)
+    self.bookmarks = {}
 
     self.repourl = 'file://%s' % path
     # Set up access to the SVN repo
     return self.get_client().list(path = "%s/%s" % (self.repourl, path),
         peg_revision = rev, revision = rev, depth = depth)
 
+  """records the bookmarks set by 'bookname' properties"""
+  def record_bookmarks(self):
+    if len(self.bookmarks):
+      self.ui.status("updating bookmarks\n")
+      for mark in self.bookmarks:
+        self.repo._bookmarks[mark] = self.bookmarks[mark]
+      bookmarks.write(self.repo)
+
   """records the head created by a mercurial commit."""
   def record_commit(self, rdata, srev, hrev):
     k = _compute_commit_key(rdata)
       self.commithashmap[k] = {}
     self.commitmap[k].append((srev, bin(hrev)))
     self.commithashmap[k][srev] = hrev
-    self.ui.write("r%d %s commit %s\n" % (srev, k, hrev))
+    if 'bookname' in rdata:
+      name = rdata['bookname']
+      self.bookmarks[name] = bin(hrev)
+      self.ui.write("r%d %s commit %s %s\n" % (srev, k, hrev, name))
+    else:
+      self.ui.write("r%d %s commit %s\n" % (srev, k, hrev))
 
   """Write out a file recording the svn -> hg revision mapping"""
   def record_mapping(self):
 
     rule = changes['rule']
     files = {}
-    extra = { 'branch': rule['name'] }
+    extra = {
+        'branch': rule['name'],
+        'svn2hg_rev': srev.rev,
+        'svn2hg_codeline': rule['codeline'],
+    }
     parents = {}
 
     p1 = self.search_commit(rule, srev.rev)
       parents[p1] = p1
 
     fctx = None
+    # if we end the iteration below with this set to true, we deleted
+    # the files from the branch and need to close its head
     branch_delete = False
+    # if we end the iteration below with this set to true, we deleted
+    # a tag and don't need to record any file changes; just a tag removal
+    tag_delete = False
 
     for (action, iname, sname, skind, copysname, copysrev,
           copykind, copycontainer, copyitem, itemrule, copyrule
           ) in changes['items']:
       self.ui.debug("  %s %d %s\n" % (action, skind, iname))
 
+      # If we set branch_create in this loop, we don't need to copy source
+      # files into hg from svn as they are already present in the source
+      # branch that we already have tracked in hg
       branch_create = False
+
+      # Track deletion state
+      branch_delete = False
+      tag_delete = False
       if iname == '' and action == 'D':
         branch_delete = True
+        if rule['kind'] == 'tag':
+          tag_delete = True
+          continue
 
-      if p1 is None and iname == '' and copyrule and (
+      if p1 is None and iname == '' and copyitem == '' and copyrule and (
           action == 'A' or action == 'R'):
         # when we set branch_create, we are telling the dir copy code
         # below that it does not need to explicitly enumerate the
     if branch_delete or rule['kind'] == 'tag':
       extra['close'] = 1;
 
-    if len(files) == 0 and branch_delete and rule['kind'] == 'tag':
+    if tag_delete:
       del self.tags[rule['tagname']]
       self.tags_changed = True
     else:
         self.tags[rule['tagname']] = hex(res)
         self.tags_changed = True
 
-    self.record_commit(rule, srev.rev, hex(res))
+      self.record_commit(rule, srev.rev, hex(res))
 
   """If we recorded activity for tags, apply those changes to the default
   branch so that they are tracked in the tags file"""
 
     rule = {
         'name': 'default',
-        'kind': 'branch'
+        'kind': 'branch',
+        'codeline': 'trunk'
     }
+
+    if self.tag_at_end:
+      srev.rev = srev.rev + 1
+      srev.logmsg = "(apply deferred tags)"
+      srev.author = self.authors['anonymous']
+
     p1 = self.search_commit(rule, srev.rev)
 
     def gettagctx(repo, memctx, f):
     res = self.repo.commitctx(ctx)
     self.record_commit(rule, srev.rev, hex(res))
 
-  def process(self, rules, authors):
+  def process(self, rules, authors, opts):
     ra = self.get_RA(False)
     max_rev = ra.get_latest_revnum()
 
     # compile rules
     rules = import_rules(rules, authors)
     self.rules = rules
+    self.authors = authors
 
-    #max_rev = 790
+    if opts.get('max_rev'):
+      max_rev = int(opts.get('max_rev'))
+    if opts.get('tag_at_end'):
+      self.tag_at_end = True
     step = 8192
 
-
     for rmin in xrange(0, max_rev, step):
       ra = self.get_RA(False)
       end = rmin + step - 1
         for k in srev.items:
           self.apply_changes(root, k, srev, srev.items[k])
 
-        self.update_tags(srev)
-
-        del srev
+        if not self.tag_at_end:
+          self.update_tags(srev)
 
       self.prune()
 
+    if self.tag_at_end:
+      self.update_tags(srev)
+    self.record_bookmarks()
     self.record_mapping()
   if target:
     repo = hg.repository(ui, target, create = True)
   imp = SvnImport(ui, repo, repopaths[0])
-  imp.process(rules, authors)
+  imp.process(rules, authors, opts)
 
 
 cmdtable = {
           ('', 'target', '', _('destination hg repo path'), _('TREPO')),
           ('', 'rules', '', _('rules file (.json)'), _('RULES')),
           ('', 'authors', '', _('authors file (.json)'), _('AUTHORS')),
+          ('', 'tag-at-end', '', _('whether to record tags at the end'), _('TAGATEND')),
+          ('', 'max-rev', '', _('maximum svn revision to process'), _('MAXREV')),
         ],
         'hg svn2hg repo'
       )