Commits

jacobmason committed 2a9ca4e Merge

merge with version-tracking

Comments (0)

Files changed (5)

     :license: BSD, see LICENSE for details.
 """
 from uuid import uuid4
+from operator import itemgetter
+from collections import defaultdict
 from itertools import product
-try:
-    from itertools import izip_longest as zip_longest
-except ImportError:
-    from itertools import zip_longest
-from difflib import SequenceMatcher
 
 from sphinx.util import PeekableIterator
 
         node.uid = uuid4().hex
         yield node
 
-def merge_node(old, new):
-    """
-    Merges the `old` node with the `new` one, if it's successful the `new` node
-    get's the unique identifier of the `new` one and ``True`` is returned. If
-    the merge is unsuccesful ``False`` is returned.
-    """
-    equals, changed, replaced = make_diff(old.rawsource,
-                                          new.rawsource)
-    if equals or changed:
-        new.uid = old.uid
-        return True
-    return False
-
 def merge_doctrees(old, new, condition):
     """
     Merges the `old` doctree with the `new` one while looking at nodes matching
     :param condition:
         A callable which returns either ``True`` or ``False`` for a given node.
     """
-    old_iter = PeekableIterator(old.traverse(condition))
-    new_iter = PeekableIterator(new.traverse(condition))
-    old_nodes = []
-    new_nodes = []
-    for old_node, new_node in zip_longest(old_iter, new_iter):
-        if old_node is None:
-            new_nodes.append(new_node)
+    old_nodes = old.traverse(condition)
+    new_nodes = new.traverse(condition)
+    ratios = defaultdict(list)
+    for old_node, new_node in product(old_nodes, new_nodes):
+        ratios[old_node, new_node] = get_ratio(old_node.rawsource,
+                                               new_node.rawsource)
+    ratios = sorted(ratios.iteritems(), key=itemgetter(1))
+    seen = set()
+    for (old_node, new_node), ratio in ratios:
+        if new_node in seen:
             continue
-        if new_node is None:
-            old_nodes.append(old_node)
-            continue
-        if not merge_node(old_node, new_node):
-            if old_nodes:
-                for i, very_old_node in enumerate(old_nodes):
-                    if merge_node(very_old_node, new_node):
-                        del old_nodes[i]
-                        # If the last identified node which has not matched the
-                        # unidentified node matches the current one, we have to
-                        # assume that the last unidentified one has been
-                        # inserted.
-                        #
-                        # As the required time multiplies with each insert, we
-                        # want to avoid that by checking if the next
-                        # unidentified node matches the current identified one
-                        # and if so we make a shift.
-                        if i == len(old_nodes):
-                            next_new_node = new_iter.next()
-                            if not merge_node(old_node, next_new_node):
-                                new_iter.push(next_new_node)
-                        break
-            else:
-                old_nodes.append(old_node)
-                new_nodes.append(new_node)
-    for (i, new_node), (j, old_node) in product(enumerate(new_nodes),
-                                                enumerate(old_nodes)):
-        if merge_node(old_node, new_node):
-            del new_nodes[i]
-            del old_nodes[j]
-    for node in new_nodes:
-        node.uid = uuid4().hex
-        # Yielding the new nodes here makes it possible to use this generator
-        # like add_uids
-        yield node
+        else:
+            seen.add(new_node)
+        if ratio < 65:
+            new_node.uid = old_node.uid
+        else:
+            new_node.uid = uuid4().hex
+            yield new_node
 
-def make_diff(old, new):
+def get_ratio(old, new):
     """
-    Takes two strings `old` and `new` and returns a :class:`tuple` of boolean
-    values ``(equals, changed, replaced)``.
-
-    equals
-
-        ``True`` if the `old` string and the `new` one are equal.
-
-    changed
-
-        ``True`` if the `new` string is a changed version of the `old` one.
-
-    replaced
-
-        ``True`` if the `new` string and the `old` string are totally
-        different.
-
-    .. note:: This assumes the two strings are human readable text or at least
-              something very similar to that, otherwise it can not detect if
-              the string has been changed or replaced. In any case the
-              detection should not be considered reliable.
+    Returns a "similiarity ratio" representing the similarity between the two
+    strings where 0 is equal and anything above less than equal.
     """
     if old == new:
-        return True, False, False
-    if new in old or levenshtein_distance(old, new) / (len(old) / 100.0) < 70:
-        return False, True, False
-    return False, False, True
+        return 0
+    ratio = levenshtein_distance(old, new) / (len(old) / 100.0)
+    return ratio
 
 def levenshtein_distance(a, b):
     if len(a) < len(b):
         """
         shutil.rmtree(self, ignore_errors=ignore_errors, onerror=onerror)
 
-    def copytree(self, destination, symlinks=False, ignore=None):
+    def copytree(self, destination, symlinks=False):
         """
         Recursively copy a directory to the given `destination`. If the given
         `destination` does not exist it will be created.
             A callback which gets called with the path of the directory being
             copied and a list of paths as returned by :func:`os.listdir`.
         """
-        shutil.copytree(self, destination, symlinks=symlinks, ignore=ignore)
+        shutil.copytree(self, destination, symlinks=symlinks)
 
     def movetree(self, destination):
         """

tests/root/versioning/index.txt

     deleted_end
     modified
     insert_beginning
+    insert_similar

tests/root/versioning/insert_similar.txt

+Versioning test text
+====================
+
+So the thing is I need some kind of text - not the lorem ipsum stuff, that
+doesn't work out that well - to test :mod:`sphinx.versioning`. I couldn't find
+a good text for that under public domain so I thought the easiest solution is
+to write one by myself. It's not really interesting, in fact it is *really*
+boring.
+
+Anyway I need more
+
+Anyway I need more than one paragraph, at least three for the original
+document, I think, and another one for two different ones.
+
+So the previous paragraph was a bit short because I don't want to test this
+only on long paragraphs, I hope it was short enough to cover most stuff.
+Anyway I see this lacks ``some markup`` so I have to add a **little** bit.

tests/test_versioning.py

     assert len(uids) == 4
     assert original_uids == uids[1:]
     assert original_uids[0] != uids[0]
+
+def test_insert_similar():
+    insert_similar = doctrees['versioning/insert_similar']
+    new_nodes = list(merge_doctrees(original, insert_similar, is_paragraph))
+    uids = [n.uid for n in insert_similar.traverse(is_paragraph)]
+    assert len(new_nodes) == 1
+    assert new_nodes[0].rawsource == u'Anyway I need more'
+    assert original_uids[0] == uids[0]
+    assert original_uids[1:] == uids[2:]