1. Cat's Eye Technologies
  2. Dipple

Commits

Cat's Eye Technologies  committed 807b62c

Add --move-to option to find-unique. Increase buffer size.

  • Participants
  • Parent commits 7c129de
  • Branches default

Comments (0)

Files changed (4)

File python/find-dups

View file
  • Ignore whitespace
     file = open(filename, "r")
     eof = False
     while not eof:
-        data = file.read(128)
+        data = file.read(1024)
         if data:
             hash.update(data)
         else:

File python/find-unique

View file
  • Ignore whitespace
 import os
 import re
 import sys
+from optparse import OptionParser
 
 
 def load_index(filename, hashindex):
     file = open(filename, "r")
     eof = False
     while not eof:
-        data = file.read(128)
+        data = file.read(1024)
         if data:
             hash.update(data)
         else:
 ### MAIN ###
 
 def main(argv):
-    dir_a = os.path.normpath(argv[1])
+    parser = OptionParser()
+    parser.add_option("--move-to",
+                      dest="move_to",
+                      default=None,
+                      help="move unique files to this directory "
+                           "(DOES NOT CHECK THAT THE FILE BASENAMES "
+                           "ARE UNIQUE)")
+
+    (options, args) = parser.parse_args()
+    dir_a = os.path.normpath(args[0])
     print "traversing %s..." % dir_a
     a = {}
     build_hashmap(dir_a, a)
 
     b = {}
-    for arg in argv[2:]:
+    for arg in args[1:]:
         dest = os.path.normpath(arg)
         if os.path.isdir(dest):
             print "traversing %s..." % dest
         if hash not in b:
             for filename in a[hash]:
                 print filename
+                if options.move_to is not None:
+                    basename = os.path.basename(filename)
+                    destname = os.path.join(options.move_to, basename)
+                    print "renaming to %s..." % destname
+                    os.rename(filename, destname)
+
 
 if __name__ == '__main__':
     main(sys.argv)

File python/find-unique-and-move

  • Ignore whitespace
-#!/usr/bin/env python
-
-"""
-Find unique files and move them into b.
-
-Usage: find-unique a b
-
-List all files in a which are not to be found anywhere (under any name) in b.
-
-a may be somewhere inside b.  a will not be traversed during b traveral.
-
-DOES NOT CHECK THAT THE FILE BASENAMES ARE UNIQUE.
-
-"""
-
-import hashlib
-import os
-import sys
-
-
-def md5(filename):
-    """Compute and return the MD5 hash of the named file.
-
-    """
-    hash = hashlib.md5()
-    file = open(filename, "r")
-    eof = False
-    while not eof:
-        data = file.read(128)
-        if data:
-            hash.update(data)
-        else:
-            eof = True
-    file.close()
-    return hash.hexdigest()
-
-
-def build_hashmap(dir, exclude=None):
-    hashmap = {}
-    for root, dirs, files in os.walk(dir):
-        if os.path.normpath(root) == exclude:
-            print "(skipping %s)" % exclude
-            dirs[:] = []
-        else:
-            for filename in files:
-                full = os.path.normpath(os.path.join(root, filename))
-                try:
-                    hash = md5(full)
-                    hashmap.setdefault(hash, []).append(full)
-                except IOError as e:
-                    print str(e)
-    return hashmap
-
-
-### MAIN ###
-
-def main(argv):
-    dir_a = os.path.normpath(argv[1])
-    dir_b = os.path.normpath(argv[2])
-    print "traversing %s..." % dir_a
-    a = build_hashmap(dir_a)
-    print "traversing %s..." % dir_b
-    b = build_hashmap(dir_b, exclude=dir_a)
-    for hash in a:
-        if hash not in b:
-            for filename in a[hash]:
-                basename = os.path.basename(filename)
-                destname = os.path.join(dir_b, basename)
-                print "renaming %s to %s" % (filename, destname)
-                os.rename(filename, destname)
-
-if __name__ == '__main__':
-    main(sys.argv)

File python/mk-dups-index

View file
  • Ignore whitespace
     file = open(filename, "r")
     eof = False
     while not eof:
-        data = file.read(128)
+        data = file.read(1024)
         if data:
             hash.update(data)
         else: