hgsubversion-queue / verify-editor.diff

# HG changeset patch
# User Dan Villiom Podlaski Christiansen <danchr@gmail.com>
# Date 1323817678 -3600
# Node ID 6b969673ca25c13f47117a9c5a0f4c9c9d9feedd
# Parent  0cc046e3b3af81cda55617c59573b9d5b51a27e1
svn verify: use a custom editor and get_revision()

Previously, we would fetch each file in the revision/changeset
individually. With this change, we fetch the entire revision in one
request, and use a custom editor to verify its contents. This is quite
a lot faster than the previous means when verifying over the internet.
By an order of magnitude or two, in fact. As data is transfered in a
single operation, verifying a revision from PyPy took 30 seconds
rather than 30 minutes, and saturated my 10Mbps connection.

In addition, flag corruption is now distinguished from data
corruption. Other than that, the only difference should be that files
appear to be visited in reverse order than previously.

diff --git a/hgsubversion/editor.py b/hgsubversion/editor.py
--- a/hgsubversion/editor.py
+++ b/hgsubversion/editor.py
@@ -368,3 +368,97 @@ class HgEditor(svnwrap.Editor):
                 self._exception_info = sys.exc_info()
                 raise
         return txdelt_window
+
+class Verifier(svnwrap.Editor):
+    '''editor that verifies a repository against the given context
+
+    Defining the class within the function avoids importing Subversion
+    needlessly.
+    '''
+    def __init__(self, ui, ctx):
+        self.ui = ui
+        self.ctx = ctx
+        self.unseen = set(ctx) - util.ignoredfiles
+        self.failed = False
+
+    def open_root(self, base_revnum, pool=None):
+        pass
+
+    def add_directory(self, path, parent_baton, copyfrom_path,
+                      copyfrom_revision, pool=None):
+        self.file = None
+        self.props = None
+
+    def open_directory(self, path, parent_baton, base_revision, pool=None):
+        self.file = None
+        self.props = None
+
+    def add_file(self, path, parent_baton=None, copyfrom_path=None,
+                 copyfrom_revision=None, file_pool=None):
+        try:
+            self.unseen.remove(path)
+            self.file = path
+            self.props = {}
+        except KeyError:
+            self.ui.warn('extra file: %s\n' % path)
+            self.failed = True
+            self.file = None
+            self.props = None
+
+    def open_file(self, path, base_revnum):
+        raise NotImplementedError()
+
+    def apply_textdelta(self, file_baton, base_checksum, pool=None):
+        stream = cStringIO.StringIO()
+        handler = svnwrap.apply_txdelta('', stream)
+        if not callable(handler):
+            raise hgutil.Abort('Error in Subversion bindings: '
+                               'cannot call handler!')
+        def txdelt_window(window):
+            handler(window)
+            # window being None means we're done
+            if window:
+                return
+
+            fctx = self.ctx[self.file]
+            hgdata = fctx.data()
+            svndata = stream.getvalue()
+
+            if 'svn:executable' in self.props:
+                if fctx.flags() != 'x':
+                    self.ui.warn('wrong flags for: %s\n' % self.file)
+                    self.failed = True
+            elif 'svn:special' in self.props:
+                hgdata = 'link ' + hgdata
+                if fctx.flags() != 'l':
+                    self.ui.warn('wrong flags for: %s\n' % self.file)
+                    self.failed = True
+            elif fctx.flags():
+                self.ui.warn('wrong flags for: %s\n' % self.file)
+                self.failed = True
+
+            if hgdata != svndata:
+                self.ui.warn('difference in: %s\n' % self.file)
+                self.failed = True
+
+        if self.file is not None:
+            return txdelt_window
+
+    def change_dir_prop(self, dir_baton, name, value, pool=None):
+        pass
+
+    def change_file_prop(self, file_baton, name, value, pool=None):
+        if self.props is not None:
+            self.props[name] = value
+
+    def close_directory(self, dir_baton, pool=None):
+        pass
+
+    def delete_entry(self, path, revnum, pool=None):
+        raise NotImplementedError()
+
+    def check(self):
+        for f in self.unseen:
+            self.ui.warn('missing file: %s\n' % f)
+            self.failed = True
+        return not self.failed
diff --git a/hgsubversion/svncommands.py b/hgsubversion/svncommands.py
--- a/hgsubversion/svncommands.py
+++ b/hgsubversion/svncommands.py
@@ -15,6 +15,7 @@ import svnwrap
 import svnrepo
 import util
 import svnexternals
+import editor
 
 
 def verify(ui, repo, args=None, **opts):
@@ -47,39 +48,48 @@ def verify(ui, repo, args=None, **opts):
 
     ui.write('verifying %s against %s@%i\n' % (ctx, branchurl, srev))
 
-    svnfiles = set()
-    result = 0
+    if ui.configbool('hgsubversion', 'stupid'):
+        svnfiles = set()
+        result = 0
 
-    svndata = svn.list_files(branchpath, srev)
-    for i, (fn, type) in enumerate(svndata):
-        util.progress(ui, 'verify', i)
-        if type != 'f':
-            continue
-        elif fn not in ctx:
-            ui.write('extra file: %s\n' % fn)
-            result = 1
-            continue
-        svnfiles.add(fn)
-        fp = fn
-        if branchpath:
-            fp = branchpath + '/' + fn
-        data, mode = svn.get_file(posixpath.normpath(fp), srev)
-        try:
-            fctx = ctx[fn]
-        except error.LookupError:
-            result = 1
-            continue
-        if not fctx.data() == data:
-            ui.write('difference in: %s\n' % fn)
-            result = 1
-        if not fctx.flags() == mode:
-            ui.write('wrong flags for: %s\n' % fn)
+        svndata = svn.list_files(branchpath, srev)
+        for i, (fn, type) in enumerate(svndata):
+            util.progress(ui, 'verify', i)
+            if type != 'f':
+                continue
+            elif fn not in ctx:
+                ui.write('extra file: %s\n' % fn)
+                result = 1
+                continue
+            svnfiles.add(fn)
+            fp = fn
+            if branchpath:
+                fp = branchpath + '/' + fn
+            data, mode = svn.get_file(posixpath.normpath(fp), srev)
+            try:
+                fctx = ctx[fn]
+            except error.LookupError:
+                result = 1
+                continue
+            if not fctx.data() == data:
+                ui.write('difference in: %s\n' % fn)
+                result = 1
+            if not fctx.flags() == mode:
+                ui.write('wrong flags for: %s\n' % fn)
+                result = 1
+
+        missing = set(ctx) - util.ignoredfiles - svnfiles
+        for fn in missing:
+            ui.write('missing file: %s\n' % fn)
             result = 1
 
-    missing = set(ctx) - util.ignoredfiles - svnfiles
-    for fn in missing:
-        ui.write('missing file: %s\n' % fn)
-        result = 1
+    else:
+        v = editor.Verifier(ui, ctx)
+        svnrepo.svnremoterepo(ui, branchurl).svn.get_revision(srev, v)
+        if v.check():
+            result = 0
+        else:
+            result = 1
 
     return result
 
diff --git a/tests/comprehensive/test_verify.py b/tests/comprehensive/test_verify.py
--- a/tests/comprehensive/test_verify.py
+++ b/tests/comprehensive/test_verify.py
@@ -23,6 +23,9 @@ def _do_case(self, name, stupid, layout)
     for i in repo:
         ctx = repo[i]
         self.assertEqual(svncommands.verify(repo.ui, repo, rev=ctx.node()), 0)
+        stupidui = ui.ui(repo.ui)
+        stupidui.config('hgsubversion', 'stupid', True)
+        self.assertEqual(svncommands.verify(stupidui, repo, rev=ctx.node()), 0)
 
 def buildmethod(case, name, stupid, layout):
     m = lambda self: self._do_case(case, stupid, layout)
@@ -33,10 +36,11 @@ def buildmethod(case, name, stupid, layo
 
 skipall = set([
     'project_root_not_repo_root.svndump',
+    'corrupt.svndump',
 ])
 skipstandard = set([
     'subdir_is_file_prefix.svndump',
-    'corrupt.svndump',
+    'correct.svndump',
 ])
 
 attrs = {'_do_case': _do_case}
diff --git a/tests/test_corruption.py b/tests/test_corruption.py
--- a/tests/test_corruption.py
+++ b/tests/test_corruption.py
@@ -12,14 +12,14 @@ from hgsubversion import svncommands
 
 class TestCorruption(test_util.TestBase):
 
-    def test_verify(self):
+    def test_verify(self, stupid=False):
         SUCCESS = 0
         FAILURE = 1
 
         repo, repo_path = self.load_and_fetch('correct.svndump', layout='single',
                                               subdir='')
 
-        ui = self.ui()
+        ui = self.ui(stupid=stupid)
 
         self.assertEqual(SUCCESS, svncommands.verify(ui, self.repo, rev='tip'))
 
@@ -47,5 +47,8 @@ class TestCorruption(test_util.TestBase)
 
         self.assertEqual((FAILURE, expected), (code, actual))
 
+    def test_verify_stupid(self):
+        self.test_verify(True)
+
 def suite():
     return unittest.TestLoader().loadTestsFromTestCase(TestCorruption)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.