Commits

Matt Hurne committed de2e186

SortedZipDiffScope now uses SHA-1 hashes to detect file renames

Comments (0)

Files changed (2)

     </build>
     <dependencies>
         <dependency>
+            <groupId>commons-codec</groupId>
+            <artifactId>commons-codec</artifactId>
+            <version>1.6</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+            <version>3.1</version>
+        </dependency>
+        <dependency>
             <groupId>log4j</groupId>
             <artifactId>log4j</artifactId>
             <version>1.2.16</version>

engine/src/main/java/deepdiff/scope/SortedZipDiffScope.java

 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TreeMap;
-import java.util.TreeSet;
+import java.util.*;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipException;
 import java.util.zip.ZipInputStream;
 
+import deepdiff.core.*;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.log4j.Logger;
 
-import deepdiff.core.DiffPointProcessor;
-import deepdiff.core.DiffScope;
-import deepdiff.core.DiffUnit;
-import deepdiff.core.DiffUnitProcessor;
-
 /**
  * An implementation of {@link DiffScope} that compares two Zip files. These Zip files do not need
  * to be physical files; any {@link InputStream} can be used. Thus, it is possible to process Zip
 public class SortedZipDiffScope implements DiffScope {
     private static final Logger log = Logger.getLogger(SortedZipDiffScope.class);
 
-    private SortedMap<String, CachedZipEntry> leftEntries;
-    private SortedMap<String, CachedZipEntry> rightEntries;
+    private SortedMap<String, CachedZipEntry> leftEntriesByPath;
+    private SortedMap<String, CachedZipEntry> rightEntriesByPath;
+
+    private Map<String, Map<String, CachedZipEntry>> leftEntriesBySha;
+    private Map<String, Map<String, CachedZipEntry>> rightEntriesBySha;
 
     private InputStream isLeft;
     private InputStream isRight;
         this.path = path;
         this.isLeft = isLeft;
         this.isRight = isRight;
-        leftEntries = new TreeMap<String, CachedZipEntry>();
-        rightEntries = new TreeMap<String, CachedZipEntry>();
+        leftEntriesByPath = new TreeMap<String, CachedZipEntry>();
+        rightEntriesByPath = new TreeMap<String, CachedZipEntry>();
+        leftEntriesBySha = new HashMap<String, Map<String, CachedZipEntry>>();
+        rightEntriesBySha = new HashMap<String, Map<String, CachedZipEntry>>();
     }
 
     /**
     public void scan(DiffUnitProcessor unitProcessor, DiffPointProcessor pointProcessor) {
         try {
             log.debug("Starting to scan scope " + path);
-            loadEntries(isLeft, leftEntries);
-            loadEntries(isRight, rightEntries);
-            SortedSet<String> keySet = new TreeSet<String>();
-            keySet.addAll(leftEntries.keySet());
-            keySet.addAll(rightEntries.keySet());
-            for (String path : keySet) {
-                CachedZipEntry leftEntry = leftEntries.get(path);
-                CachedZipEntry rightEntry = rightEntries.get(path);
+
+            loadEntries(isLeft, leftEntriesByPath, leftEntriesBySha);
+            loadEntries(isRight, rightEntriesByPath, rightEntriesBySha);
+
+            SortedSet<String> pathSet = new TreeSet<String>();
+            pathSet.addAll(leftEntriesByPath.keySet());
+            pathSet.addAll(rightEntriesByPath.keySet());
+
+            Set<String> shaSet = new HashSet<String>();
+            shaSet.addAll(leftEntriesBySha.keySet());
+            shaSet.addAll(rightEntriesBySha.keySet());
+
+            for (String sha : shaSet) {
+                Map<String, CachedZipEntry> leftEntries = leftEntriesBySha.get(sha);
+                Map<String, CachedZipEntry> rightEntries = rightEntriesBySha.get(sha);
+                if (leftEntries != null && rightEntries != null) {
+                    for (String leftPath : leftEntries.keySet()) {
+                        if (!rightEntries.containsKey(leftPath)) {
+                            for (String rightPath : rightEntries.keySet()) {
+                                String leftDirName = StringUtils.substringBeforeLast(leftPath, "/");
+                                String rightDirName = StringUtils.substringBeforeLast(rightPath, "/");
+                                if (StringUtils.equals(leftDirName, rightDirName)) {
+                                    CachedZipEntry leftEntry = leftEntries.get(leftPath);
+                                    CachedZipEntry rightEntry = rightEntries.get(rightPath);
+                                    CachedZipDiffUnit diffUnit = new CachedZipDiffUnit(this, leftEntry, rightEntry);
+                                    DiffPoint diffPoint = new DiffPoint(diffUnit, "Renamed in right to " + rightPath);
+                                    pointProcessor.processDiffPoint(diffPoint);
+                                    pathSet.remove(leftEntry.getName());
+                                    pathSet.remove(rightEntry.getName());
+                                    leftEntry.cleanup();
+                                    rightEntry.cleanup();
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            for (String path : pathSet) {
+                CachedZipEntry leftEntry = leftEntriesByPath.get(path);
+                CachedZipEntry rightEntry = rightEntriesByPath.get(path);
                 CachedZipDiffUnit diffUnit = new CachedZipDiffUnit(this, leftEntry, rightEntry);
                 unitProcessor.processDiffUnit(diffUnit, pointProcessor);
                 if (leftEntry != null) {
                     rightEntry.cleanup();
                 }
             }
-            leftEntries.clear();
-            rightEntries.clear();
+            leftEntriesByPath.clear();
+            rightEntriesByPath.clear();
+            leftEntriesBySha.clear();
+            rightEntriesBySha.clear();
             log.debug("Completed scan of scope " + path);
         } finally {
             if (isLeft != null) {
         return path;
     }
 
-    private void loadEntries(InputStream is, Map<String, CachedZipEntry> cachedEntries) {
+    private void loadEntries(InputStream is, Map<String, CachedZipEntry> entriesByPath, Map<String, Map<String, CachedZipEntry>> entriesBySha) {
         ZipInputStream zis = new ZipInputStream(is);
         ZipEntry entry = null;
         boolean validEntry = false;
                     if (entry != null) {
                         String path = entry.getName();
                         CachedZipEntry cachedEntry = new CachedZipEntry(entry, zis);
-                        cachedEntries.put(path, cachedEntry);
+                        entriesByPath.put(path, cachedEntry);
+                        String sha = cachedEntry.getSha();
+                        if (sha != null) {
+                            if (!entriesBySha.containsKey(sha)) {
+                                Map<String, CachedZipEntry> map = new TreeMap<String, CachedZipEntry>();
+                                entriesBySha.put(sha, map);
+                            }
+                            entriesBySha.get(sha).put(path, cachedEntry);
+                        }
                     }
                 } catch (ZipException ze) {
                     if ("encrypted ZIP entry not supported".equals(ze.getMessage())) {
         private File tmpFile;
         private byte[] data;
         int length;
+        private String sha;
 
         public CachedZipEntry(ZipEntry ze, ZipInputStream zis) throws IOException {
             super(ze.getName());
             this.directory = ze.isDirectory();
             if (!this.directory) {
                 readAll(zis, ze);
+
+                if (tmpFile != null) {
+                    InputStream is = new FileInputStream(tmpFile);
+                    sha = DigestUtils.shaHex(is);
+                    is.close();
+                } else {
+                    sha = DigestUtils.shaHex(data);
+                }
             }
         }
 
             return directory;
         }
 
+        public String getSha() {
+            return sha;
+        }
+
         private void readAll(ZipInputStream zis, ZipEntry entry) throws IOException {
             final int maxMemSize = 100000;
             long totalSize = entry.getSize();