James Clemence avatar James Clemence committed 9419e87

Upstream release 20120106

Comments (0)

Files changed (7)

+Version 20120106
+----------------
+Upstream release
+
 Version 20111010
 ----------------
 Upstream release
Add a comment to this file

python2/diff_match_patch/__init__.py

File contents unchanged.

python2/diff_match_patch/diff_match_patch.py

     # A match this many characters away from the expected location will add
     # 1.0 to the score (0.0 is a perfect match).
     self.Match_Distance = 1000
-    # When deleting a large block of text (over ~64 characters), how close does
-    # the contents have to match the expected contents. (0.0 = perfection,
+    # When deleting a large block of text (over ~64 characters), how close do
+    # the contents have to be to match the expected contents. (0.0 = perfection,
     # 1.0 = very loose).  Note that Match_Threshold controls how closely the
     # end points of a delete need to match.
     self.Patch_DeleteThreshold = 0.5
       # Walk the front path one step.
       for k1 in xrange(-d + k1start, d + 1 - k1end, 2):
         k1_offset = v_offset + k1
-        if (k1 == -d or k1 != d and
+        if k1 == -d or (k1 != d and
             v1[k1_offset - 1] < v1[k1_offset + 1]):
           x1 = v1[k1_offset + 1]
         else:
       # Walk the reverse path one step.
       for k2 in xrange(-d + k2start, d + 1 - k2end, 2):
         k2_offset = v_offset + k2
-        if (k2 == -d or k2 != d and
+        if k2 == -d or (k2 != d and
             v2[k2_offset - 1] < v2[k2_offset + 1]):
           x2 = v2[k2_offset + 1]
         else:
     """
     changes = False
     equalities = []  # Stack of indices where equalities are found.
-    lastequality = None  # Always equal to equalities[-1][1]
+    lastequality = None  # Always equal to diffs[equalities[-1]][1]
     pointer = 0  # Index of current position.
     # Number of chars that changed prior to the equality.
     length_insertions1, length_deletions1 = 0, 0
           length_deletions2 += len(diffs[pointer][1])
         # Eliminate an equality that is smaller or equal to the edits on both
         # sides of it.
-        if (lastequality != None and (len(lastequality) <=
+        if (lastequality and (len(lastequality) <=
             max(length_insertions1, length_deletions1)) and
             (len(lastequality) <= max(length_insertions2, length_deletions2))):
           # Duplicate record.
     # Find any overlaps between deletions and insertions.
     # e.g: <del>abcxxx</del><ins>xxxdef</ins>
     #   -> <del>abc</del>xxx<ins>def</ins>
+    # e.g: <del>xxxabc</del><ins>defxxx</ins>
+    #   -> <ins>def</ins>xxx<del>abc</del>
     # Only extract an overlap if it is as big as the edit ahead or behind it.
     pointer = 1
     while pointer < len(diffs):
           diffs[pointer][0] == self.DIFF_INSERT):
         deletion = diffs[pointer - 1][1]
         insertion = diffs[pointer][1]
-        overlap_length = self.diff_commonOverlap(deletion, insertion)
-        if (overlap_length >= len(deletion) / 2.0 or
-            overlap_length >= len(insertion) / 2.0):
-          # Overlap found.  Insert an equality and trim the surrounding edits.
-          diffs.insert(pointer, (self.DIFF_EQUAL, insertion[:overlap_length]))
-          diffs[pointer - 1] = (self.DIFF_DELETE,
-                                deletion[:len(deletion) - overlap_length])
-          diffs[pointer + 1] = (self.DIFF_INSERT, insertion[overlap_length:])
-          pointer += 1
+        overlap_length1 = self.diff_commonOverlap(deletion, insertion)
+        overlap_length2 = self.diff_commonOverlap(insertion, deletion)
+        if overlap_length1 >= overlap_length2:
+          if (overlap_length1 >= len(deletion) / 2.0 or
+              overlap_length1 >= len(insertion) / 2.0):
+            # Overlap found.  Insert an equality and trim the surrounding edits.
+            diffs.insert(pointer, (self.DIFF_EQUAL,
+                                   insertion[:overlap_length1]))
+            diffs[pointer - 1] = (self.DIFF_DELETE,
+                                  deletion[:len(deletion) - overlap_length1])
+            diffs[pointer + 1] = (self.DIFF_INSERT,
+                                  insertion[overlap_length1:])
+            pointer += 1
+        else:
+          if (overlap_length2 >= len(deletion) / 2.0 or
+              overlap_length2 >= len(insertion) / 2.0):
+            # Reverse overlap found.
+            # Insert an equality and swap and trim the surrounding edits.
+            diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2]))
+            diffs[pointer - 1] = (self.DIFF_INSERT,
+                                  insertion[:len(insertion) - overlap_length2])
+            diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:])
+            pointer += 1
         pointer += 1
       pointer += 1
 
     def diff_cleanupSemanticScore(one, two):
       """Given two strings, compute a score representing whether the
       internal boundary falls on logical boundaries.
-      Scores range from 5 (best) to 0 (worst).
+      Scores range from 6 (best) to 0 (worst).
       Closure, but does not reference any external variables.
 
       Args:
       """
       if not one or not two:
         # Edges are the best.
-        return 5
+        return 6
 
       # Each port of this function behaves slightly differently due to
       # subtle differences in each language's definition of things like
       # 'whitespace'.  Since this function's purpose is largely cosmetic,
       # the choice has been made to use each language's native features
       # rather than force total conformity.
-      score = 0
-      # One point for non-alphanumeric.
-      if not one[-1].isalnum() or not two[0].isalnum():
-        score += 1
+      char1 = one[-1]
+      char2 = two[0]
+      nonAlphaNumeric1 = not char1.isalnum()
+      nonAlphaNumeric2 = not char2.isalnum()
+      whitespace1 = nonAlphaNumeric1 and char1.isspace()
+      whitespace2 = nonAlphaNumeric2 and char2.isspace()
+      lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n")
+      lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n")
+      blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one)
+      blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two)
+
+      if blankLine1 or blankLine2:
+        # Five points for blank lines.
+        return 5
+      elif lineBreak1 or lineBreak2:
+        # Four points for line breaks.
+        return 4
+      elif nonAlphaNumeric1 and not whitespace1 and whitespace2:
+        # Three points for end of sentences.
+        return 3
+      elif whitespace1 or whitespace2:
         # Two points for whitespace.
-        if one[-1].isspace() or two[0].isspace():
-          score += 1
-          # Three points for line breaks.
-          if (one[-1] == "\r" or one[-1] == "\n" or
-              two[0] == "\r" or two[0] == "\n"):
-            score += 1
-            # Four points for blank lines.
-            if (re.search("\\n\\r?\\n$", one) or
-                re.match("^\\r?\\n\\r?\\n", two)):
-              score += 1
-      return score
+        return 2
+      elif nonAlphaNumeric1 or nonAlphaNumeric2:
+        # One point for non-alphanumeric.
+        return 1
+      return 0
 
     pointer = 1
     # Intentionally ignore the first and last element (don't need checking).
             pointer -= 1
       pointer += 1
 
+  # Define some regex patterns for matching boundaries.
+  BLANKLINEEND = re.compile(r"\n\r?\n$");
+  BLANKLINESTART = re.compile(r"^\r?\n\r?\n");
+
   def diff_cleanupEfficiency(self, diffs):
     """Reduce the number of edits by eliminating operationally trivial
     equalities.
     """
     changes = False
     equalities = []  # Stack of indices where equalities are found.
-    lastequality = ''  # Always equal to equalities[-1][1]
+    lastequality = None  # Always equal to diffs[equalities[-1]][1]
     pointer = 0  # Index of current position.
     pre_ins = False  # Is there an insertion operation before the last equality.
     pre_del = False  # Is there a deletion operation before the last equality.
         else:
           # Not a candidate, and can never become one.
           equalities = []
-          lastequality = ''
+          lastequality = None
 
         post_ins = post_del = False
       else:  # An insertion or deletion.
           diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
               diffs[equalities[-1] + 1][1])
           equalities.pop()  # Throw away the equality we just deleted.
-          lastequality = ''
+          lastequality = None
           if pre_ins and pre_del:
             # No changes made which could affect previous entry, keep going.
             post_ins = post_del = True
       HTML representation.
     """
     html = []
-    i = 0
     for (op, data) in diffs:
       text = (data.replace("&", "&amp;").replace("<", "&lt;")
                  .replace(">", "&gt;").replace("\n", "&para;<br>"))
         html.append("<del style=\"background:#ffe6e6;\">%s</del>" % text)
       elif op == self.DIFF_EQUAL:
         html.append("<span>%s</span>" % text)
-      if op != self.DIFF_DELETE:
-        i += len(data)
     return "".join(html)
 
   def diff_text1(self, diffs):
         if d == 0:  # First pass: exact match.
           rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
         else:  # Subsequent passes: fuzzy match.
-          rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | (
+          rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | (
               ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
         if rd[j] & matchmask:
           score = match_bitapScore(d, j - 1)
           undefined (methods 1,2,3).
 
     Returns:
-      Array of patch objects.
+      Array of Patch objects.
     """
     text1 = None
     diffs = None
     """Given an array of patches, return another array that is identical.
 
     Args:
-      patches: Array of patch objects.
+      patches: Array of Patch objects.
 
     Returns:
-      Array of patch objects.
+      Array of Patch objects.
     """
     patchesCopy = []
     for patch in patches:
     as a list of true/false values indicating which patches were applied.
 
     Args:
-      patches: Array of patch objects.
+      patches: Array of Patch objects.
       text: Old text.
 
     Returns:
     something.  Intended to be called only from within patch_apply.
 
     Args:
-      patches: Array of patch objects.
+      patches: Array of Patch objects.
 
     Returns:
       The padding string added to each side.
     Intended to be called only from within patch_apply.
 
     Args:
-      patches: Array of patch objects.
+      patches: Array of Patch objects.
     """
     patch_size = self.Match_MaxBits
     if patch_size == 0:
       # to handle integers of arbitrary precision.
       return
     for x in xrange(len(patches)):
-      if patches[x].length1 > patch_size:
-        bigpatch = patches[x]
-        # Remove the big old patch.
-        del patches[x]
-        x -= 1
-        start1 = bigpatch.start1
-        start2 = bigpatch.start2
-        precontext = ''
-        while len(bigpatch.diffs) != 0:
-          # Create one of several smaller patches.
-          patch = patch_obj()
-          empty = True
-          patch.start1 = start1 - len(precontext)
-          patch.start2 = start2 - len(precontext)
-          if precontext:
-            patch.length1 = patch.length2 = len(precontext)
-            patch.diffs.append((self.DIFF_EQUAL, precontext))
+      if patches[x].length1 <= patch_size:
+        continue
+      bigpatch = patches[x]
+      # Remove the big old patch.
+      del patches[x]
+      x -= 1
+      start1 = bigpatch.start1
+      start2 = bigpatch.start2
+      precontext = ''
+      while len(bigpatch.diffs) != 0:
+        # Create one of several smaller patches.
+        patch = patch_obj()
+        empty = True
+        patch.start1 = start1 - len(precontext)
+        patch.start2 = start2 - len(precontext)
+        if precontext:
+          patch.length1 = patch.length2 = len(precontext)
+          patch.diffs.append((self.DIFF_EQUAL, precontext))
 
-          while (len(bigpatch.diffs) != 0 and
-                 patch.length1 < patch_size - self.Patch_Margin):
-            (diff_type, diff_text) = bigpatch.diffs[0]
-            if diff_type == self.DIFF_INSERT:
-              # Insertions are harmless.
+        while (len(bigpatch.diffs) != 0 and
+               patch.length1 < patch_size - self.Patch_Margin):
+          (diff_type, diff_text) = bigpatch.diffs[0]
+          if diff_type == self.DIFF_INSERT:
+            # Insertions are harmless.
+            patch.length2 += len(diff_text)
+            start2 += len(diff_text)
+            patch.diffs.append(bigpatch.diffs.pop(0))
+            empty = False
+          elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
+              patch.diffs[0][0] == self.DIFF_EQUAL and
+              len(diff_text) > 2 * patch_size):
+            # This is a large deletion.  Let it pass in one chunk.
+            patch.length1 += len(diff_text)
+            start1 += len(diff_text)
+            empty = False
+            patch.diffs.append((diff_type, diff_text))
+            del bigpatch.diffs[0]
+          else:
+            # Deletion or equality.  Only take as much as we can stomach.
+            diff_text = diff_text[:patch_size - patch.length1 -
+                                  self.Patch_Margin]
+            patch.length1 += len(diff_text)
+            start1 += len(diff_text)
+            if diff_type == self.DIFF_EQUAL:
               patch.length2 += len(diff_text)
               start2 += len(diff_text)
-              patch.diffs.append(bigpatch.diffs.pop(0))
+            else:
               empty = False
-            elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
-                patch.diffs[0][0] == self.DIFF_EQUAL and
-                len(diff_text) > 2 * patch_size):
-              # This is a large deletion.  Let it pass in one chunk.
-              patch.length1 += len(diff_text)
-              start1 += len(diff_text)
-              empty = False
-              patch.diffs.append((diff_type, diff_text))
+
+            patch.diffs.append((diff_type, diff_text))
+            if diff_text == bigpatch.diffs[0][1]:
               del bigpatch.diffs[0]
             else:
-              # Deletion or equality.  Only take as much as we can stomach.
-              diff_text = diff_text[:patch_size - patch.length1 -
-                                    self.Patch_Margin]
-              patch.length1 += len(diff_text)
-              start1 += len(diff_text)
-              if diff_type == self.DIFF_EQUAL:
-                patch.length2 += len(diff_text)
-                start2 += len(diff_text)
-              else:
-                empty = False
+              bigpatch.diffs[0] = (bigpatch.diffs[0][0],
+                                   bigpatch.diffs[0][1][len(diff_text):])
 
-              patch.diffs.append((diff_type, diff_text))
-              if diff_text == bigpatch.diffs[0][1]:
-                del bigpatch.diffs[0]
-              else:
-                bigpatch.diffs[0] = (bigpatch.diffs[0][0],
-                                     bigpatch.diffs[0][1][len(diff_text):])
+        # Compute the head context for the next patch.
+        precontext = self.diff_text2(patch.diffs)
+        precontext = precontext[-self.Patch_Margin:]
+        # Append the end context for this patch.
+        postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
+        if postcontext:
+          patch.length1 += len(postcontext)
+          patch.length2 += len(postcontext)
+          if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
+            patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
+                               postcontext)
+          else:
+            patch.diffs.append((self.DIFF_EQUAL, postcontext))
 
-          # Compute the head context for the next patch.
-          precontext = self.diff_text2(patch.diffs)
-          precontext = precontext[-self.Patch_Margin:]
-          # Append the end context for this patch.
-          postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
-          if postcontext:
-            patch.length1 += len(postcontext)
-            patch.length2 += len(postcontext)
-            if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
-              patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
-                                 postcontext)
-            else:
-              patch.diffs.append((self.DIFF_EQUAL, postcontext))
-
-          if not empty:
-            x += 1
-            patches.insert(x, patch)
+        if not empty:
+          x += 1
+          patches.insert(x, patch)
 
   def patch_toText(self, patches):
     """Take a list of patches and return a textual representation.
 
     Args:
-      patches: Array of patch objects.
+      patches: Array of Patch objects.
 
     Returns:
       Text representation of patches.
       textline: Text representation of patches.
 
     Returns:
-      Array of patch objects.
+      Array of Patch objects.
 
     Raises:
       ValueError: If invalid input.

python2/diff_match_patch/diff_match_patch_test.py

     # Unicode.
     # Some overly clever languages (C#) may treat ligatures as equal to their
     # component letters.  E.g. U+FB01 == 'fi'
-    self.assertEquals(0, self.dmp.diff_commonOverlap("fi", u"\ubf01i"))
+    self.assertEquals(0, self.dmp.diff_commonOverlap("fi", u"\ufb01i"))
 
   def testDiffHalfMatch(self):
     # Detect a halfmatch.
     self.dmp.diff_cleanupSemanticLossless(diffs)
     self.assertEquals([(self.dmp.DIFF_EQUAL, "xaa"), (self.dmp.DIFF_DELETE, "a")], diffs)
 
+    # Sentence boundaries.
+    diffs = [(self.dmp.DIFF_EQUAL, "The xxx. The "), (self.dmp.DIFF_INSERT, "zzz. The "), (self.dmp.DIFF_EQUAL, "yyy.")]
+    self.dmp.diff_cleanupSemanticLossless(diffs)
+    self.assertEquals([(self.dmp.DIFF_EQUAL, "The xxx."), (self.dmp.DIFF_INSERT, " The zzz."), (self.dmp.DIFF_EQUAL, " The yyy.")], diffs)
+
   def testDiffCleanupSemantic(self):
     # Cleanup semantically trivial equalities.
     # Null case.
     self.dmp.diff_cleanupSemantic(diffs)
     self.assertEquals([(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_EQUAL, "xxx"), (self.dmp.DIFF_INSERT, "def")], diffs)
 
+    # Reverse overlap elimination.
+    diffs = [(self.dmp.DIFF_DELETE, "xxxabc"), (self.dmp.DIFF_INSERT, "defxxx")]
+    self.dmp.diff_cleanupSemantic(diffs)
+    self.assertEquals([(self.dmp.DIFF_INSERT, "def"), (self.dmp.DIFF_EQUAL, "xxx"), (self.dmp.DIFF_DELETE, "abc")], diffs)
+
     # Two overlap eliminations.
     diffs = [(self.dmp.DIFF_DELETE, "abcd1212"), (self.dmp.DIFF_INSERT, "1212efghi"), (self.dmp.DIFF_EQUAL, "----"), (self.dmp.DIFF_DELETE, "A3"), (self.dmp.DIFF_INSERT, "3BC")]
     self.dmp.diff_cleanupSemantic(diffs)

python3/diff_match_patch/diff_match_patch.py

     # A match this many characters away from the expected location will add
     # 1.0 to the score (0.0 is a perfect match).
     self.Match_Distance = 1000
-    # When deleting a large block of text (over ~64 characters), how close does
-    # the contents have to match the expected contents. (0.0 = perfection,
+    # When deleting a large block of text (over ~64 characters), how close do
+    # the contents have to be to match the expected contents. (0.0 = perfection,
     # 1.0 = very loose).  Note that Match_Threshold controls how closely the
     # end points of a delete need to match.
     self.Patch_DeleteThreshold = 0.5
       # Walk the front path one step.
       for k1 in range(-d + k1start, d + 1 - k1end, 2):
         k1_offset = v_offset + k1
-        if (k1 == -d or k1 != d and
+        if k1 == -d or (k1 != d and
             v1[k1_offset - 1] < v1[k1_offset + 1]):
           x1 = v1[k1_offset + 1]
         else:
       # Walk the reverse path one step.
       for k2 in range(-d + k2start, d + 1 - k2end, 2):
         k2_offset = v_offset + k2
-        if (k2 == -d or k2 != d and
+        if k2 == -d or (k2 != d and
             v2[k2_offset - 1] < v2[k2_offset + 1]):
           x2 = v2[k2_offset + 1]
         else:
     """
     changes = False
     equalities = []  # Stack of indices where equalities are found.
-    lastequality = None  # Always equal to equalities[-1][1]
+    lastequality = None  # Always equal to diffs[equalities[-1]][1]
     pointer = 0  # Index of current position.
     # Number of chars that changed prior to the equality.
     length_insertions1, length_deletions1 = 0, 0
           length_deletions2 += len(diffs[pointer][1])
         # Eliminate an equality that is smaller or equal to the edits on both
         # sides of it.
-        if (lastequality != None and (len(lastequality) <=
+        if (lastequality and (len(lastequality) <=
             max(length_insertions1, length_deletions1)) and
             (len(lastequality) <= max(length_insertions2, length_deletions2))):
           # Duplicate record.
     # Find any overlaps between deletions and insertions.
     # e.g: <del>abcxxx</del><ins>xxxdef</ins>
     #   -> <del>abc</del>xxx<ins>def</ins>
+    # e.g: <del>xxxabc</del><ins>defxxx</ins>
+    #   -> <ins>def</ins>xxx<del>abc</del>
     # Only extract an overlap if it is as big as the edit ahead or behind it.
     pointer = 1
     while pointer < len(diffs):
           diffs[pointer][0] == self.DIFF_INSERT):
         deletion = diffs[pointer - 1][1]
         insertion = diffs[pointer][1]
-        overlap_length = self.diff_commonOverlap(deletion, insertion)
-        if (overlap_length >= len(deletion) / 2.0 or
-            overlap_length >= len(insertion) / 2.0):
-          # Overlap found.  Insert an equality and trim the surrounding edits.
-          diffs.insert(pointer, (self.DIFF_EQUAL, insertion[:overlap_length]))
-          diffs[pointer - 1] = (self.DIFF_DELETE,
-                                deletion[:len(deletion) - overlap_length])
-          diffs[pointer + 1] = (self.DIFF_INSERT, insertion[overlap_length:])
-          pointer += 1
+        overlap_length1 = self.diff_commonOverlap(deletion, insertion)
+        overlap_length2 = self.diff_commonOverlap(insertion, deletion)
+        if overlap_length1 >= overlap_length2:
+          if (overlap_length1 >= len(deletion) / 2.0 or
+              overlap_length1 >= len(insertion) / 2.0):
+            # Overlap found.  Insert an equality and trim the surrounding edits.
+            diffs.insert(pointer, (self.DIFF_EQUAL,
+                                   insertion[:overlap_length1]))
+            diffs[pointer - 1] = (self.DIFF_DELETE,
+                                  deletion[:len(deletion) - overlap_length1])
+            diffs[pointer + 1] = (self.DIFF_INSERT,
+                                  insertion[overlap_length1:])
+            pointer += 1
+        else:
+          if (overlap_length2 >= len(deletion) / 2.0 or
+              overlap_length2 >= len(insertion) / 2.0):
+            # Reverse overlap found.
+            # Insert an equality and swap and trim the surrounding edits.
+            diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2]))
+            diffs[pointer - 1] = (self.DIFF_INSERT,
+                                  insertion[:len(insertion) - overlap_length2])
+            diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:])
+            pointer += 1
         pointer += 1
       pointer += 1
 
     def diff_cleanupSemanticScore(one, two):
       """Given two strings, compute a score representing whether the
       internal boundary falls on logical boundaries.
-      Scores range from 5 (best) to 0 (worst).
+      Scores range from 6 (best) to 0 (worst).
       Closure, but does not reference any external variables.
 
       Args:
       """
       if not one or not two:
         # Edges are the best.
-        return 5
+        return 6
 
       # Each port of this function behaves slightly differently due to
       # subtle differences in each language's definition of things like
       # 'whitespace'.  Since this function's purpose is largely cosmetic,
       # the choice has been made to use each language's native features
       # rather than force total conformity.
-      score = 0
-      # One point for non-alphanumeric.
-      if not one[-1].isalnum() or not two[0].isalnum():
-        score += 1
+      char1 = one[-1]
+      char2 = two[0]
+      nonAlphaNumeric1 = not char1.isalnum()
+      nonAlphaNumeric2 = not char2.isalnum()
+      whitespace1 = nonAlphaNumeric1 and char1.isspace()
+      whitespace2 = nonAlphaNumeric2 and char2.isspace()
+      lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n")
+      lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n")
+      blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one)
+      blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two)
+
+      if blankLine1 or blankLine2:
+        # Five points for blank lines.
+        return 5
+      elif lineBreak1 or lineBreak2:
+        # Four points for line breaks.
+        return 4
+      elif nonAlphaNumeric1 and not whitespace1 and whitespace2:
+        # Three points for end of sentences.
+        return 3
+      elif whitespace1 or whitespace2:
         # Two points for whitespace.
-        if one[-1].isspace() or two[0].isspace():
-          score += 1
-          # Three points for line breaks.
-          if (one[-1] == "\r" or one[-1] == "\n" or
-              two[0] == "\r" or two[0] == "\n"):
-            score += 1
-            # Four points for blank lines.
-            if (re.search("\\n\\r?\\n$", one) or
-                re.match("^\\r?\\n\\r?\\n", two)):
-              score += 1
-      return score
+        return 2
+      elif nonAlphaNumeric1 or nonAlphaNumeric2:
+        # One point for non-alphanumeric.
+        return 1
+      return 0
 
     pointer = 1
     # Intentionally ignore the first and last element (don't need checking).
             pointer -= 1
       pointer += 1
 
+  # Define some regex patterns for matching boundaries.
+  BLANKLINEEND = re.compile(r"\n\r?\n$");
+  BLANKLINESTART = re.compile(r"^\r?\n\r?\n");
+
   def diff_cleanupEfficiency(self, diffs):
     """Reduce the number of edits by eliminating operationally trivial
     equalities.
     """
     changes = False
     equalities = []  # Stack of indices where equalities are found.
-    lastequality = ''  # Always equal to equalities[-1][1]
+    lastequality = None  # Always equal to diffs[equalities[-1]][1]
     pointer = 0  # Index of current position.
     pre_ins = False  # Is there an insertion operation before the last equality.
     pre_del = False  # Is there a deletion operation before the last equality.
         else:
           # Not a candidate, and can never become one.
           equalities = []
-          lastequality = ''
+          lastequality = None
 
         post_ins = post_del = False
       else:  # An insertion or deletion.
           diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
               diffs[equalities[-1] + 1][1])
           equalities.pop()  # Throw away the equality we just deleted.
-          lastequality = ''
+          lastequality = None
           if pre_ins and pre_del:
             # No changes made which could affect previous entry, keep going.
             post_ins = post_del = True
       HTML representation.
     """
     html = []
-    i = 0
     for (op, data) in diffs:
       text = (data.replace("&", "&amp;").replace("<", "&lt;")
                  .replace(">", "&gt;").replace("\n", "&para;<br>"))
         html.append("<del style=\"background:#ffe6e6;\">%s</del>" % text)
       elif op == self.DIFF_EQUAL:
         html.append("<span>%s</span>" % text)
-      if op != self.DIFF_DELETE:
-        i += len(data)
     return "".join(html)
 
   def diff_text1(self, diffs):
         if d == 0:  # First pass: exact match.
           rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
         else:  # Subsequent passes: fuzzy match.
-          rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | (
+          rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | (
               ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
         if rd[j] & matchmask:
           score = match_bitapScore(d, j - 1)
           undefined (methods 1,2,3).
 
     Returns:
-      Array of patch objects.
+      Array of Patch objects.
     """
     text1 = None
     diffs = None
     """Given an array of patches, return another array that is identical.
 
     Args:
-      patches: Array of patch objects.
+      patches: Array of Patch objects.
 
     Returns:
-      Array of patch objects.
+      Array of Patch objects.
     """
     patchesCopy = []
     for patch in patches:
     as a list of true/false values indicating which patches were applied.
 
     Args:
-      patches: Array of patch objects.
+      patches: Array of Patch objects.
       text: Old text.
 
     Returns:
     something.  Intended to be called only from within patch_apply.
 
     Args:
-      patches: Array of patch objects.
+      patches: Array of Patch objects.
 
     Returns:
       The padding string added to each side.
     Intended to be called only from within patch_apply.
 
     Args:
-      patches: Array of patch objects.
+      patches: Array of Patch objects.
     """
     patch_size = self.Match_MaxBits
     if patch_size == 0:
       # to handle integers of arbitrary precision.
       return
     for x in range(len(patches)):
-      if patches[x].length1 > patch_size:
-        bigpatch = patches[x]
-        # Remove the big old patch.
-        del patches[x]
-        x -= 1
-        start1 = bigpatch.start1
-        start2 = bigpatch.start2
-        precontext = ''
-        while len(bigpatch.diffs) != 0:
-          # Create one of several smaller patches.
-          patch = patch_obj()
-          empty = True
-          patch.start1 = start1 - len(precontext)
-          patch.start2 = start2 - len(precontext)
-          if precontext:
-            patch.length1 = patch.length2 = len(precontext)
-            patch.diffs.append((self.DIFF_EQUAL, precontext))
+      if patches[x].length1 <= patch_size:
+        continue
+      bigpatch = patches[x]
+      # Remove the big old patch.
+      del patches[x]
+      x -= 1
+      start1 = bigpatch.start1
+      start2 = bigpatch.start2
+      precontext = ''
+      while len(bigpatch.diffs) != 0:
+        # Create one of several smaller patches.
+        patch = patch_obj()
+        empty = True
+        patch.start1 = start1 - len(precontext)
+        patch.start2 = start2 - len(precontext)
+        if precontext:
+          patch.length1 = patch.length2 = len(precontext)
+          patch.diffs.append((self.DIFF_EQUAL, precontext))
 
-          while (len(bigpatch.diffs) != 0 and
-                 patch.length1 < patch_size - self.Patch_Margin):
-            (diff_type, diff_text) = bigpatch.diffs[0]
-            if diff_type == self.DIFF_INSERT:
-              # Insertions are harmless.
+        while (len(bigpatch.diffs) != 0 and
+               patch.length1 < patch_size - self.Patch_Margin):
+          (diff_type, diff_text) = bigpatch.diffs[0]
+          if diff_type == self.DIFF_INSERT:
+            # Insertions are harmless.
+            patch.length2 += len(diff_text)
+            start2 += len(diff_text)
+            patch.diffs.append(bigpatch.diffs.pop(0))
+            empty = False
+          elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
+              patch.diffs[0][0] == self.DIFF_EQUAL and
+              len(diff_text) > 2 * patch_size):
+            # This is a large deletion.  Let it pass in one chunk.
+            patch.length1 += len(diff_text)
+            start1 += len(diff_text)
+            empty = False
+            patch.diffs.append((diff_type, diff_text))
+            del bigpatch.diffs[0]
+          else:
+            # Deletion or equality.  Only take as much as we can stomach.
+            diff_text = diff_text[:patch_size - patch.length1 -
+                                  self.Patch_Margin]
+            patch.length1 += len(diff_text)
+            start1 += len(diff_text)
+            if diff_type == self.DIFF_EQUAL:
               patch.length2 += len(diff_text)
               start2 += len(diff_text)
-              patch.diffs.append(bigpatch.diffs.pop(0))
+            else:
               empty = False
-            elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
-                patch.diffs[0][0] == self.DIFF_EQUAL and
-                len(diff_text) > 2 * patch_size):
-              # This is a large deletion.  Let it pass in one chunk.
-              patch.length1 += len(diff_text)
-              start1 += len(diff_text)
-              empty = False
-              patch.diffs.append((diff_type, diff_text))
+
+            patch.diffs.append((diff_type, diff_text))
+            if diff_text == bigpatch.diffs[0][1]:
               del bigpatch.diffs[0]
             else:
-              # Deletion or equality.  Only take as much as we can stomach.
-              diff_text = diff_text[:patch_size - patch.length1 -
-                                    self.Patch_Margin]
-              patch.length1 += len(diff_text)
-              start1 += len(diff_text)
-              if diff_type == self.DIFF_EQUAL:
-                patch.length2 += len(diff_text)
-                start2 += len(diff_text)
-              else:
-                empty = False
+              bigpatch.diffs[0] = (bigpatch.diffs[0][0],
+                                   bigpatch.diffs[0][1][len(diff_text):])
 
-              patch.diffs.append((diff_type, diff_text))
-              if diff_text == bigpatch.diffs[0][1]:
-                del bigpatch.diffs[0]
-              else:
-                bigpatch.diffs[0] = (bigpatch.diffs[0][0],
-                                     bigpatch.diffs[0][1][len(diff_text):])
+        # Compute the head context for the next patch.
+        precontext = self.diff_text2(patch.diffs)
+        precontext = precontext[-self.Patch_Margin:]
+        # Append the end context for this patch.
+        postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
+        if postcontext:
+          patch.length1 += len(postcontext)
+          patch.length2 += len(postcontext)
+          if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
+            patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
+                               postcontext)
+          else:
+            patch.diffs.append((self.DIFF_EQUAL, postcontext))
 
-          # Compute the head context for the next patch.
-          precontext = self.diff_text2(patch.diffs)
-          precontext = precontext[-self.Patch_Margin:]
-          # Append the end context for this patch.
-          postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
-          if postcontext:
-            patch.length1 += len(postcontext)
-            patch.length2 += len(postcontext)
-            if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
-              patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
-                                 postcontext)
-            else:
-              patch.diffs.append((self.DIFF_EQUAL, postcontext))
-
-          if not empty:
-            x += 1
-            patches.insert(x, patch)
+        if not empty:
+          x += 1
+          patches.insert(x, patch)
 
   def patch_toText(self, patches):
     """Take a list of patches and return a textual representation.
 
     Args:
-      patches: Array of patch objects.
+      patches: Array of Patch objects.
 
     Returns:
       Text representation of patches.
       textline: Text representation of patches.
 
     Returns:
-      Array of patch objects.
+      Array of Patch objects.
 
     Raises:
       ValueError: If invalid input.

python3/diff_match_patch/diff_match_patch_test.py

     # Unicode.
     # Some overly clever languages (C#) may treat ligatures as equal to their
     # component letters.  E.g. U+FB01 == 'fi'
-    self.assertEqual(0, self.dmp.diff_commonOverlap("fi", "\ubf01i"))
+    self.assertEqual(0, self.dmp.diff_commonOverlap("fi", "\ufb01i"))
 
   def testDiffHalfMatch(self):
     # Detect a halfmatch.
     self.dmp.diff_cleanupSemanticLossless(diffs)
     self.assertEqual([(self.dmp.DIFF_EQUAL, "xaa"), (self.dmp.DIFF_DELETE, "a")], diffs)
 
+    # Sentence boundaries.
+    diffs = [(self.dmp.DIFF_EQUAL, "The xxx. The "), (self.dmp.DIFF_INSERT, "zzz. The "), (self.dmp.DIFF_EQUAL, "yyy.")]
+    self.dmp.diff_cleanupSemanticLossless(diffs)
+    self.assertEqual([(self.dmp.DIFF_EQUAL, "The xxx."), (self.dmp.DIFF_INSERT, " The zzz."), (self.dmp.DIFF_EQUAL, " The yyy.")], diffs)
+
   def testDiffCleanupSemantic(self):
     # Cleanup semantically trivial equalities.
     # Null case.
     self.dmp.diff_cleanupSemantic(diffs)
     self.assertEqual([(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_EQUAL, "xxx"), (self.dmp.DIFF_INSERT, "def")], diffs)
 
+    # Reverse overlap elimination.
+    diffs = [(self.dmp.DIFF_DELETE, "xxxabc"), (self.dmp.DIFF_INSERT, "defxxx")]
+    self.dmp.diff_cleanupSemantic(diffs)
+    self.assertEqual([(self.dmp.DIFF_INSERT, "def"), (self.dmp.DIFF_EQUAL, "xxx"), (self.dmp.DIFF_DELETE, "abc")], diffs)
+
     # Two overlap eliminations.
     diffs = [(self.dmp.DIFF_DELETE, "abcd1212"), (self.dmp.DIFF_INSERT, "1212efghi"), (self.dmp.DIFF_EQUAL, "----"), (self.dmp.DIFF_DELETE, "A3"), (self.dmp.DIFF_INSERT, "3BC")]
     self.dmp.diff_cleanupSemantic(diffs)
 
 setup(
     name='diff-match-patch',
-    version='20111010',
+    version='20120106',
     description = "The Diff Match and Patch libraries offer robust algorithms to perform the operations required for synchronizing plain text.",
     long_description = read('README.rst') + "\n\n" + read("CHANGES.rst"),
     packages = ['diff_match_patch'],
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.