Phillip Alday avatar Phillip Alday committed d0e974d

Added the ability to use suffixes in size specifications, fixed a logic error in using pretty_size()

Comments (0)

Files changed (1)

 
 # list of file size bases along with the post K/M/G/T prefix letter
 # ie KiB vs KB
-SIZE_BASES = {2:"i",10:""}
+SIZE_BASES = {
+    10:{
+        'TB' : 10**12,
+        'GB' : 10**9,
+        'MB' : 10**6,
+        'KB' : 10**3,
+    },
+    2:{
+        'TiB': 2**40,
+        'GiB': 2**30,
+        'MiB': 2**20,
+        'KiB': 2**10,
+    }
+}
 
 def main(argv=None):
     parser = argparse.ArgumentParser()
-    parser.add_argument('--size-only', metavar="SIZE", type=int,
-                        default=sys.maxint,
+    parser.add_argument('--size-only', metavar="SIZE", type=str,
+                        default=str(sys.maxint),
                         help="Only use size comparison on files "
                         "larger than SIZE")
     parser.add_argument('--use-hash', type=str, default="sha1",
                         help="List of hashes to be carried out in further passes"
                         "but only upon an initial match.")
     parser.add_argument('-b','--human-readable',metavar="BASE",
-                        type=int, default=0, nargs=1, choices=SIZE_BASES.keys(),
+                        type=int, default=0, choices=SIZE_BASES.keys(),
                         help="Make file sizes human readble in base BASE")
-    parser.add_argument('--max-size', type=int, default=sys.maxint,
+    parser.add_argument('--max-size', type=str, default=str(sys.maxint),
                         help="Ignore files larger than MAX_SIZE")
-    parser.add_argument('--min-size', type=int, default=0,
+    parser.add_argument('--min-size', type=str, default="0",
                         help="Ignore files smaller than MIN_SIZE")
     parser.add_argument('-v', '--verbose', action="store_true", default=False,
                         help="Display progress information on STDERR")
 
     args = parser.parse_args(argv)
     args.final_byte_check = False
+    args.size_only, args.max_size, args.min_size = map(size_to_int, [args.size_only, args.max_size, args.min_size])
     find_duplicates(args.path, args)
 
 def find_files(args, ext=None):
                     hashes.sort()
                 else:
                     if not opts.summary_only:
-                        print('Size: {size} : Size: {size}'.format(size=pretty_size(sz,base=2)))
+                        print('Size: {size} : Size: {size}'.format(size=pretty_size(sz,base=base)))
                         for f in fnames:
                             print(' ', f)
                         if opts.dupe_cost:
                             # when taking action -- this seems like it would be
                             # a complicated and dynamic affair
                             if opts.dupe_cost:
-                                print("Duplicated space: {}".format(pretty_size(sz * (len(vals)-1), base=2)))
+                                print("Duplicated space: {}".format(pretty_size(sz * (len(vals)-1), base=base)))
 
                             if opts.prompt_for_action:
                                 action_on_file_list(vals)
         print("Found {0} non-empty duplicate sets".format(distincts),
                 file=stderr)
     if opts.dupe_cost:
-        print("\nSpace cost of duplicates: {}".format(pretty_size(dupe_cost,base=2)), file=stderr)
-
+        print("\nSpace cost of duplicates: {}".format(pretty_size(dupe_cost,base=base)), file=stderr)
 
 def additional_tests(fnames, hashfncs, hashnames, ncheck_bytes):
     if len(hashfncs) == 0:
     elif base not in SIZE_BASES:
         raise ValueError("Invalid metric prefix base: {}".format(base))
     else:
-        if bytes > base**40:
-            size = "{0:.2f}T{1}B".format(bytes / base**40,SIZE_BASES[base])
-        elif bytes > base**30:
-            size = "{0:.2f}G{1}B".format(bytes / base**30,SIZE_BASES[base])
-        elif bytes > base**20:
-            size = "{0:.2f}M{1}B".format(bytes / base**20,SIZE_BASES[base])
-        elif bytes > base**10:
-            size = "{0:.2f}K{1}B".format(bytes / base**10,SIZE_BASES[base])
+        for suffix in sorted(SIZE_BASES[base], key=SIZE_BASES[base].get, reverse=True):
+            if bytes > SIZE_BASES[base][suffix]:
+                return "{0:.2f}{1}".format(bytes / SIZE_BASES[base][suffix], suffix)
         else:
-            size = "{0:.2f}B".format(bytes,SIZE_BASES[base])
+            # we can always fail back to non pretty printed output
+            return bytes
 
-        return size
+def size_to_int(size):
+    size = size.strip()
+
+    if size.isdigit():
+        return int(size)
+
+    global SIZE_BASES
+
+    for b in SIZE_BASES:
+        for suffix in sorted(SIZE_BASES[b], key=SIZE_BASES[b].get, reverse=True):
+            if size.endswith(suffix):
+                 s = float(size[:-(len(suffix)+1)]) *  SIZE_BASES[b][suffix]
+                 return s
+    else:
+        raise ValueError("Invalid Suffix on {}".format(size))
 
 if __name__ == '__main__':
     sys.exit(main())
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.