Commits

Henrik Tunedal committed c151d75

Improved recognition of licenses.

Comments (0)

Files changed (1)

 source_root = "/data/misc/fdroidbuild"
 metadata_root = "/home/henrik/src/fdroidserver/metadata"
 
-import os, subprocess, argparse, shutil
+import os, subprocess, argparse, shutil, sys, re
 
 
 ##############################################
     info = os.stat(filename)
     return info.st_size < 512
 
-def grep_snippets(filename, snippets):
+def grep_snippets(filename, snippets, regex=False):
+    max_distance = 800
+    if not isinstance(snippets, list):
+        raise ValueError, "snippets should be a list"
     data = open(filename, "r").read(8192)
-    pos = 0
-    for snippet in snippets:
-        match = data.find(snippet, pos)
-        if match == -1:
-            return False
-        pos = match
+    if regex:
+        remaining = data
+        for snippet in snippets:
+            match = re.search(snippet, remaining)
+            if not match or match.start() > max_distance:
+                return False
+            remaining = remaining[match.end():]
+    else:
+        pos = 0
+        for snippet in snippets:
+            match = data.find(snippet, pos)
+            if match == -1 or match > max_distance:
+                return False
+            pos = match
     return True
 
 def file_is_gplv2(filename):
     return grep_snippets(
         filename,
-        ["free software",
-         "GNU General Public License version 2"])
+        ["This program is free software",
+         "under the terms of the GNU General Public License",
+         "version 2"])
 
 def file_is_gplv2later(filename):
     return grep_snippets(
         filename,
         ["the terms of the GNU General Public License",
-         "Free Software Foundation; either version 2",
-         "(at your option) any later version"])
+         "Free Software Foundation[;,] either version 2",
+         "\(at your option\) any later version"],
+        True)
 
 def file_is_gplv3(filename, or_later=False):
-    data = open(filename, "r").read(8192)
-    start = data.find("under the terms of the GNU General Public License")
-    if start < 0: return False
-    end = data.find("You should have received a copy of the GNU General", start)
-    if end < 0 or start - end > 1000: return False
-    if or_later and data.find("your option) any later vers", start, end) == -1:
-        return False
-    return data.find("Free Software Foundation, either version 3 of the Lice",
-                     start, end) != -1
+    return grep_snippets(
+        filename,
+        ["the terms of the GNU General Public License",
+         "Free Software",
+         "version 3"])
 
 def file_is_gplv3later(filename):
-    return file_is_gplv3(filename, True)
+    return grep_snippets(
+        filename,
+        ["the terms of the GNU General Public License",
+         "Free Software",
+         "either version 3 of the License",
+         "or (at your option) any later"])
 
 def file_is_apache2(filename):
     data = open(filename, "r").read(8192)
-    return data.find("Licensed under the Apache License, Version 2.") != -1
+    return (data.find("Licensed under the Apache License, Version 2.") != -1
+            or data.find("http://www.apache.org/licenses/LICENSE-2.0") != -1)
+
+# Not really a license, so it's impossible to find all the ways people
+# try to put things in the public domain.
+def file_is_public_domain(filename):
+    data = open(filename, "r").read(16384)
+    for s in ["Not copyrighted -- provided to the public domain",
+              "I am placing this code in the Public Domain.",
+              "This file has been placed in the public domain by the authors.",
+              "This script is public domain, no copyright.",
+              "This code is explicitly placed into the public domain.",
+              "This file is put in the public domain."]:
+        if data.find(s) != -1:
+            return True
+    else:
+        return False
+
+# XML files can probably be ignored, assuming that no Android developer
+# is crazy enough to code in XSLT.
+def file_is_xml(filename):
+    return filename.endswith(".xml") and grep_snippets(filename, ["<?xml "])
 
 # Determine if a directory looks like the main project directory with
 # the customary layout.
              ("GPLv3+", file_is_gplv3later),
              ("GPLv3", file_is_gplv3),
              ("Apache2", file_is_apache2),
+             ("public domain", file_is_public_domain),
+             ("XML", file_is_xml),
              ("tiny", file_is_tiny)]
     results = {}
     for root, dirs, files in walk_package_dirs(package_id, args.apps):
             res = check_package(meta["package id"]).items()
             for test,files in sorted(res,key=lambda i: len(i[1]),reverse=True):
                 if files:
-                    print "  %-10s %i files" % (test + ":", len(files))
+                    print "  %-14s %i files" % (test + ":", len(files))
             print
 
 if __name__ == "__main__":
     parser.add_argument("packages", nargs="*", metavar="PACKAGE",
                       help='package to process (all packages if not specified)')
     args = parser.parse_args()
-    main()
+    try:
+        main()
+    except KeyboardInterrupt:
+        print >>sys.stderr, "Aborted."