Commits

Wez Furlong  committed fff45e1

tidy up output and refine the branch creation optimization

  • Participants
  • Parent commits d0bc1d0

Comments (0)

Files changed (2)

File svn2hg/SvnImporter.py

     self.path = path
     # maps branch->svnrev back to hg commit
     self.commitmap = {}
+    self.commithashmap = {}
     self.last_search_key = None
     self.last_search_res = None
     # tags created in the last svn commit
     k = _compute_commit_key(rdata)
     if not (k in self.commitmap):
       self.commitmap[k] = []
+      self.commithashmap[k] = {}
     self.commitmap[k].append((srev, bin(hrev)))
-    self.ui.write("commit [%s][%d] -> %s\n" % (k, srev, hrev))
+    self.commithashmap[k][srev] = hrev
+    self.ui.write("r%d %s commit %s\n" % (srev, k, hrev))
+
+  """Write out a file recording the svn -> hg revision mapping"""
+  def record_mapping(self):
+    f = open(self.repo.path + "/svn2hg.json", "w")
+    try:
+      json.dump(self.commithashmap, f, indent = 1)
+    finally:
+      f.close()
 
   """searches for the hg commit best matching a given svn revision"""
   def search_commit(self, rdata, srev):
     if not (k in self.commitmap):
       return None
     if (k, srev) == self.last_search_key:
-      self.ui.write("searching for commit [%s][%d] => %s (cached)\n" % (
+      self.ui.debug("searching for commit [%s][%d] => %s (cached)\n" % (
         k, srev, self.last_search_res))
       return self.last_search_res
     m = self.commitmap[k]
 
-    self.ui.write("searching for commit [%s][%d]\n" % (k, srev))
+    self.ui.debug("searching for commit [%s][%d]\n" % (k, srev))
     hi = len(m)
 
     # likely that we want the very last one
     (midval, hrev) = m[hi - 1]
     if midval <= srev:
       hrev = hex(hrev)
-      self.ui.write("last element is %d, best match %s\n" % (midval, hrev))
+      self.ui.debug("last element is %d, best match %s\n" % (midval, hrev))
       self.last_search_key = (k, srev)
       self.last_search_res = hrev;
       return hrev
     while lo < hi:
       mid = (lo+hi)//2
       (midval, hrev) = m[mid]
-      self.ui.write("consider %d (want %d)\n" % (midval, srev))
+      self.ui.debug("consider %d (want %d)\n" % (midval, srev))
       if midval < srev:
         lo = mid+1
       elif midval > srev:
         hi = mid
       else:
         hrev = hex(hrev)
-        self.ui.write("  took %d steps -> %s\n" % (steps, hrev))
+        self.ui.debug("  took %d steps -> %s\n" % (steps, hrev))
         self.last_search_key = (k, srev)
         self.last_search_res = hrev;
         return hrev
       steps = steps + 1
 
-    self.ui.write("  took %d steps -> <none>\n" % (steps))
+    self.ui.debug("  took %d steps -> <none>\n" % (steps))
     self.last_search_key = (k, srev)
     self.last_search_res = None;
 
     return None
 
   def apply_changes(self, root, k, srev, changes):
-    self.ui.write("r%d %s\n" % (srev.rev, srev.author))
-    self.ui.write("%s\n" % k)
+    self.ui.write("\nr%d %s %s\n" % (srev.rev, k, srev.author))
 
     rule = changes['rule']
     files = {}
       parents[p1] = p1
 
     fctx = None
-    branch_create = False
     branch_delete = False
 
     for (action, iname, sname, skind, copysname, copysrev,
           copykind, copycontainer, copyitem, itemrule, copyrule
           ) in changes['items']:
-      self.ui.write("  %s %d %s\n" % (action, skind, iname))
+      self.ui.debug("  %s %d %s\n" % (action, skind, iname))
 
+      branch_create = False
       if iname == '' and action == 'D':
         branch_delete = True
 
       if p1 is None and iname == '' and copyrule and (
           action == 'A' or action == 'R'):
+        # when we set branch_create, we are telling the dir copy code
+        # below that it does not need to explicitly enumerate the
+        # files and tell hg about them.  Doing this saves about 20 minutes
+        # of conversion runtime and 20MB of repo size for our 10yr old repo
         branch_create = True
         p1 = self.search_commit(copyrule, copysrev)
 
         nameslash = "%s/" % iname
         for f in fctx:
           if f.startswith(nameslash):
-            self.ui.write("  -> D %s\n" % f)
+            self.ui.debug("  -> D %s\n" % f)
             files[f] = ('D', None, None)
 
       if copyrule:
         p2 = self.search_commit(copyrule, copysrev)
         if p2:
           parents[p2] = p2
+          # probably unsafe to do branch creation optimizations when we
+          # have two parents
+          if branch_create and p2 != p1:
+            branch_create = False
+
 
       # hg doesn't version directories, so we need to perform some
       # translation
       if skind == svn_node_dir:
-        if copysname and (action == 'R' or action == 'A') and not (
-            branch_create and p2):
+        if copysname and (action == 'R' or action == 'A') and not branch_create:
           # Now additions for those from svn
           for f, ent in self.svn_ls(sname, srev.rev).iteritems():
             if f == '' or f == None or ent['kind'] == svn_node_dir:
                     f, rule['name'], fcont))
             # in theory, we could populate the copysrc too
             files[frem] = (action, fname, frem)
-        elif copysname and not (branch_create and p2):
+        elif copysname and not branch_create:
           raise RuntimeError("need to handle dir copy %s@%d %s %s %s" % (
             copysname, copysrev, copykind, copycontainer, copyitem))
 
     else:
       p2 = plist.pop(0)
       log = srev.logmsg
-      self.ui.write("%d files\n" % len(files))
+      self.ui.debug("%d files\n" % len(files))
       while plist:
         p1 = p2
         p2 = plist.pop(0)
         del srev
 
       self.prune()
+
+    self.record_mapping()

File svn2hg/__init__.py

       authors = json.load(f)
     finally:
       f.close()
+  else:
+    authors = None
 
   target = opts.get('target')
   if target: