Commits

Patrick Mézard  committed 8648ccf

editor: process missing files with regular files

Missing files were stored directly in RevisionMeta and resolved after
the revision was replayed. It means the missing files set was no pruned
by delete_entry() actions or by the filemap, and some of them were
fetched for no reason.

Say you convert:

A branch/foo/bar (from trunk/foo/bar:123)

with a filemap excluding "foo/bar". Since the directory was excluded in
trunk the files cannot be found and were marked as missing even though
they were discarded afterwards.

  • Participants
  • Parent commits b729909

Comments (0)

Files changed (5)

File hgsubversion/editor.py

 
     __slots__ = [
         'file', 'added', 'deleted', 'rev', 'execfiles', 'symlinks',
-        'copies', 'missing', 'emptybranches', 'base', 'externals', 'ui',
-        'exception', 'store', '_failonmissing',
+        'copies', 'emptybranches', 'base', 'externals', 'ui',
+        'exception', 'store',
     ]
 
     def __init__(self, ui):
         self.symlinks = {}
         # Map fully qualified destination file paths to module source path
         self.copies = {}
-        self.missing = set()
-        # Used in tests and debugging
-        self._failonmissing = self.ui.config(
-            'hgsubversion', 'failonmissing', False)
         self.emptybranches = {}
         self.externals = {}
         self.exception = None
         self.symlinks[path] = islink
         if path in self.deleted:
             del self.deleted[path]
-        if path in self.missing:
-            self.missing.remove(path)
         if copypath is not None:
             self.copies[path] = copypath
 
             files.update(g)
         return sorted(files)
 
-    def addmissing(self, path):
-        if self._failonmissing:
-            raise EditingError('missing entry: %s' % path)
-        self.missing.add(path)
-
-    def findmissing(self, svn):
-
-        if not self.missing:
-            return
-
-        msg = 'fetching %s files that could not use replay.\n'
-        self.ui.debug(msg % len(self.missing))
-        root = svn.subdir and svn.subdir[1:] or ''
-        r = self.rev.revnum
-
-        files = set()
-        for p in self.missing:
-            self.ui.note('.')
-            self.ui.flush()
-            if p[-1] == '/':
-                dir = p[len(root):]
-                new = [p + f for f, k in svn.list_files(dir, r) if k == 'f']
-                files.update(new)
-            else:
-                files.add(p)
-
-        i = 1
-        self.ui.note('\nfetching files...\n')
-        for p in files:
-            if self.ui.debugflag:
-                self.ui.debug('fetching %s\n' % p)
-            else:
-                self.ui.note('.')
-            self.ui.flush()
-            if i % 50 == 0:
-                svn.init_ra_and_client()
-            i += 1
-            data, mode = svn.get_file(p[len(root):], r)
-            self.set(p, data, 'x' in mode, 'l' in mode)
-
-        self.missing = set()
-        self.ui.note('\n')
-
     def close(self):
         self.store.close()
 
         self.current = RevisionData(meta.ui)
         self._clear()
 
+    def setsvn(self, svn):
+        self._svn = svn
+
     def _clear(self):
         self._filecounter = 0
         # A mapping of svn paths to CopiedFile entries
         self._getctx = util.lrucachefunc(self.repo.changectx, 3)
         # A stack of opened directory (baton, path) pairs.
         self._opendirs = []
+        self._missing = set()
 
     def _openfile(self, path, data, isexec, islink, copypath, create=False):
         if path in self._openpaths:
         self._deleted.add(path)
         if path in self._svncopies:
             del self._svncopies[path]
+        self._missing.discard(path)
+
+    def addmissing(self, path, isdir=False):
+        svn = self._svn
+        root = svn.subdir and svn.subdir[1:] or ''
+        if not isdir:
+            self._missing.add(path[len(root):])
+        else:
+            # Resolve missing directories content immediately so the
+            # missing files maybe processed by delete actions.
+            rev = self.current.rev.revnum
+            path = path + '/'
+            parentdir = path[len(root):]
+            for f, k in svn.list_files(parentdir, rev):
+                if k != 'f':
+                    continue
+                f = parentdir + f
+                if not self.meta.is_path_valid(f, False):
+                    continue
+                self._missing.add(f)
 
     @svnwrap.ieditor
     def delete_entry(self, path, revision_bogus, parent_baton, pool=None):
         for f in list(self._svncopies):
             if f.startswith(prefix):
                 self._deletefile(f)
+        if path in self._missing:
+            self._missing.remove(path)
+        else:
+            for f in list(self._missing):
+                if f.startswith(prefix):
+                    self._missing.remove(f)
 
         if br_path is not None:
             ha = self.meta.get_parent_revision(self.current.rev.revnum, branch)
         parent = self.meta.get_parent_revision(baserev + 1, branch, True)
         ctx = self._getctx(parent)
         if fpath not in ctx:
-            self.current.addmissing(path)
+            self.addmissing(path)
             return None
 
         fctx = ctx.filectx(fpath)
         (from_file,
          from_branch) = self.meta.split_branch_path(copyfrom_path)[:2]
         if not from_file:
-            self.current.addmissing(path)
+            self.addmissing(path)
             return None
         # Use exact=True because during replacements ('R' action) we select
         # replacing branch as parent, but svn delta editor provides delta
                                            from_branch, True)
         ctx = self._getctx(ha)
         if from_file not in ctx:
-            self.current.addmissing(path)
+            self.addmissing(path)
             return None
 
         fctx = ctx.filectx(from_file)
                 # existing=False to guess a possible branch location and
                 # test it against the filemap. The actual path and
                 # revision will be resolved below if necessary.
-                self.current.addmissing('%s/' % path)
+                self.addmissing(path, isdir=True)
                 return baton
         if tag:
             changeid = self.meta.tags[tag]
             frompath, source_branch = self.meta.split_branch_path(copyfrom_path)[:2]
         new_hash = self.meta.get_parent_revision(source_rev + 1, source_branch, True)
         if new_hash == node.nullid:
-            self.current.addmissing('%s/' % path)
+            self.addmissing(path, isdir=True)
             return baton
         fromctx = self._getctx(new_hash)
         if frompath != '/' and frompath != '':
                         path, target, isexec, islink, copypath)
             except svnwrap.SubversionException, e: # pragma: no cover
                 if e.args[1] == svnwrap.ERR_INCOMPLETE_DATA:
-                    self.current.addmissing(path)
+                    self.addmissing(path)
                 else: # pragma: no cover
                     raise hgutil.Abort(*e.args)
             except: # pragma: no cover
                 self.current.set(path, data, isexec, islink, copied)
         self._svncopies.clear()
 
+        # Resolve missing files
+        if self._missing:
+            missing = sorted(self._missing)
+            self.ui.debug('fetching %s files that could not use replay.\n'
+                    % len(missing))
+            if self.ui.configbool('hgsubversion', 'failonmissing', False):
+                raise EditingError('missing entry: %s' % missing[0])
+
+            svn = self._svn
+            rev = self.current.rev.revnum
+            root = svn.subdir and svn.subdir[1:] or ''
+            i = 1
+            for f in missing:
+                if self.ui.debugflag:
+                    self.ui.debug('fetching %s\n' % f)
+                else:
+                    self.ui.note('.')
+                self.ui.flush()
+                if i % 50 == 0:
+                    svn.init_ra_and_client()
+                i += 1
+                data, mode = svn.get_file(f, rev)
+                self.current.set(f, data, 'x' in mode, 'l' in mode)
+            if not self.ui.debugflag:
+                self.ui.note('\n')
+
         for f in self._deleted:
             self.current.delete(f)
         self._deleted.clear()

File hgsubversion/replay.py

     editor = meta.editor
     editor.current.clear()
     editor.current.rev = r
+    editor.setsvn(svn)
 
     if firstrun and meta.revmap.oldest <= 0:
         # We know nothing about this project, so fetch everything before
     editor.close()
 
     current = editor.current
-    current.findmissing(svn)
 
     updateexternals(ui, meta, current)
 
     if current.exception is not None:  # pragma: no cover
         traceback.print_exception(*current.exception)
         raise ReplayException()
-    if current.missing:
-        raise MissingPlainTextError()
 
     files_to_commit = current.files()
     branch_batches = {}

File tests/fixtures/copies.sh

 svn cp trunk/dir trunk/dir2
 echo b >> trunk/dir2/a
 svn ci -m 'copy/edit trunk/dir/a'
+svn up
+svn cp trunk/dir2 trunk/dir3
+svn ci -m 'copy dir2 to dir3'
 cd ..
 
 svnadmin dump testrepo > ../copies.svndump

File tests/fixtures/copies.svndump

 SVN-fs-dump-format-version: 2
 
-UUID: 707bea87-43e4-45d9-8f28-5d06ca9e3f3b
+UUID: f9962aa6-eec5-4335-8af9-9ae89f4b18b2
 
 Revision-number: 0
 Prop-content-length: 56
 K 8
 svn:date
 V 27
-2012-10-14T12:41:39.387675Z
+2012-10-14T14:22:33.372222Z
 PROPS-END
 
 Revision-number: 1
 K 8
 svn:date
 V 27
-2012-10-14T12:41:39.409053Z
+2012-10-14T14:22:33.393643Z
 K 7
 svn:log
 V 4
 K 8
 svn:date
 V 27
-2012-10-14T12:41:41.048526Z
+2012-10-14T14:22:35.042430Z
 K 7
 svn:log
 V 21
 b
 
 
+Revision-number: 3
+Prop-content-length: 119
+Content-length: 119
+
+K 10
+svn:author
+V 7
+pmezard
+K 8
+svn:date
+V 27
+2012-10-14T14:22:38.041919Z
+K 7
+svn:log
+V 17
+copy dir2 to dir3
+PROPS-END
+
+Node-path: trunk/dir3
+Node-kind: dir
+Node-action: add
+Node-copyfrom-rev: 2
+Node-copyfrom-path: trunk/dir2
+
+

File tests/test_fetch_mappings.py

         all_tests = set(test)
         self.assertEqual(fromself.symmetric_difference(all_tests), set())
 
-    def _loadwithfilemap(self, svndump, filemapcontent, stupid=False):
+    def _loadwithfilemap(self, svndump, filemapcontent, stupid=False,
+            failonmissing=True):
         repo_path = self.load_svndump(svndump)
         filemap = open(self.filemap, 'w')
         filemap.write(filemapcontent)
         ui = self.ui(stupid)
         ui.setconfig('hgsubversion', 'filemap', self.filemap)
         ui.setconfig('hgsubversion', 'failoninvalidreplayfile', 'true')
-        ui.setconfig('hgsubversion', 'failonmissing', 'true')
+        ui.setconfig('hgsubversion', 'failonmissing', failonmissing)
         commands.clone(ui, test_util.fileurl(repo_path),
                        self.wc_path, filemap=self.filemap)
         return self.repo
                          ['alpha', 'beta'])
 
     def test_file_map_copy(self):
-        repo = self._loadwithfilemap('copies.svndump', "exclude dir2\n")
+        # Exercise excluding files copied from a non-excluded directory.
+        # There will be missing files as we are copying from an excluded
+        # directory.
+        repo = self._loadwithfilemap('copies.svndump', "exclude dir2\n",
+                failonmissing=False)
+        self.assertEqual(['dir/a', 'dir3/a'], list(repo['tip']))
+
+    def test_file_map_exclude_copy_source_and_dest(self):
+        # dir3 is excluded and copied from dir2 which is also excluded.
+        # dir3 files should not be marked as missing and fetched.
+        repo = self._loadwithfilemap('copies.svndump',
+                "exclude dir2\nexclude dir3\n")
         self.assertEqual(['dir/a'], list(repo['tip']))
 
+    def test_file_map_include_file_exclude_dir(self):
+        # dir3 is excluded but we want dir3/a, which is also copied from
+        # an exluded dir2. dir3/a should be fetched.
+        repo = self._loadwithfilemap('copies.svndump',
+                "include .\nexclude dir2\nexclude dir3\ninclude dir3/a\n",
+                failonmissing=False)
+        self.assertEqual(['dir/a', 'dir3/a'], list(repo['tip']))
+
     def test_branchmap(self, stupid=False):
         repo_path = self.load_svndump('branchmap.svndump')
         branchmap = open(self.branchmap, 'w')