Commits

Luke Plant committed 2209fab

Added initial support for introspection and autogeneration of anonymizers.

  • Participants
  • Parent commits 9c0444b

Comments (0)

Files changed (3)

 
 * Add 'anonymizer' to your ``INSTALLED_APPS`` setting.
 
-* AUTOMATIC INTROSPECTION - YET TO BE IMPLEMENTED:
-
-  To create some stub files for your anonymizers, do::
+* To create some stub files for your anonymizers, do::
 
     ./manage.py create_anonymizers app_name1 [app_name2...]
 

File anonymizer/introspect.py

+import re
+
+from django.db.models import EmailField
+
+attribute_template = "        '%(attname)s': %(replacer)s,"
+
+class_template = """
+class %(modelname)sAnonymizer(Anonymizer):
+
+    model = %(modelname)s
+
+    attributes = {
+%(attributes)s
+    }
+"""
+
+field_replacers = {
+    'AutoField': None,
+    'ForeignKey': None,
+    'ManyToManyField': None,
+    'OneToOneField': None,
+    'DateField': '"date"',
+    'DateTimeField': '"datetime"',
+    'BooleanField': '"bool"',
+    'NullBooleanField': '"bool"',
+    'IntegerField': '"integer"',
+    'SmallIntegerField': '"small_integer"',
+    'PositiveIntegerField': '"positive_integer"',
+    'PositiveSmallIntegerField': '"positive_small_integer"',
+}
+
+# NB - order matters. 'address' is more generic so should be at the end.
+charfield_replacers = [
+    (r'(\b|_)full_name\d*', '"name"'),
+    (r'(\b|_)first_name\d*', '"first_name"'),
+    (r'(\b|_)last_name\d*', '"last_name"'),
+    (r'(\b|_)user_name\d*', '"username"'),
+    (r'(\b|_)username\d*', '"username"'),
+    (r'(\b|_)name\d*', '"name"'),
+    (r'(\b|_)email\d*', '"email"'),
+    (r'(\b|_)town\d*', '"city"'),
+    (r'(\b|_)city\d*', '"city"'),
+    (r'(\b|_)county\d*', '"uk_county"'),
+    (r'(\b|_)post_code\d*', '"uk_postcode"'),
+    (r'(\b|_)postcode\d*', '"uk_postcode"'),
+    (r'(\b|_)zip\d*', '"zip_code"'),
+    (r'(\b|_)zipcode\d*', '"zip_code"'),
+    (r'(\b|_)zip_code\d*', '"zip_code"'),
+    (r'(\b|_)telephone\d*', '"phonenumber"'),
+    (r'(\b|_)mobile\d*', '"phonenumber"'),
+    (r'(\b|_)tel\d*\b', '"phonenumber"'),
+    (r'(\b|_)state\d*\b', '"state"'),
+    (r'(\b|_)address\d*', '"full_address"'),
+]
+
+def get_replacer_for_field(field):
+    # Some obvious ones:
+    if isinstance(field, EmailField):
+        return '"email"'
+
+    field_type = field.get_internal_type()
+    if field_type == "CharField" or field_type == "TextField":
+        #from IPython.Shell import IPShellEmbed; IPShellEmbed([])()
+
+        # Guess by the name
+
+        # First, go for complete match
+        for pattern, result in charfield_replacers:
+            if re.match(pattern + "$", field.attname):
+                return result
+
+        # Then, go for a partial match.
+        for pattern, result in charfield_replacers:
+            if re.search(pattern, field.attname):
+                return result
+
+        # Nothing matched.
+        if field_type == "TextField":
+            return '"lorem"'
+
+        # Just try some random chars
+        max_length = field.max_length
+        return "lambda self, obj, field, val: self.faker.varchar(%d, field=field)" % max_length
+
+
+    try:
+        r = field_replacers[field_type]
+    except KeyError:
+        r = "UNKNOWN_FIELD"
+        from IPython.Shell import IPShellEmbed; IPShellEmbed([])()
+
+
+    if r is None:
+        return None
+
+    #if field.attname == 'youth_work_declined':
+    #    from IPython.Shell import IPShellEmbed; IPShellEmbed([])()
+
+
+    return r
+
+def create_anonymizer(model):
+    replacers = []
+    for f in model._meta.fields:
+        replacer = get_replacer_for_field(f)
+        if replacer is not None:
+            replacers.append((f.attname, replacer))
+
+    attributes = "\n".join(attribute_template % {'attname': att,
+                                                 'replacer': replacer }
+                           for att, replacer in replacers)
+    return class_template % {'modelname':model.__name__,
+                             'attributes': attributes }
+

File anonymizer/management/commands/create_anonymizers.py

+"""
+amonymize_data command
+"""
+from __future__ import with_statement
+
+import sys
+import os.path
+
+from django.db.models.loading import get_models
+from django.core.exceptions import ImproperlyConfigured
+from django.core.management.base import AppCommand, CommandError
+from django.utils import importlib
+
+from anonymizer import Anonymizer
+from anonymizer import introspect
+
+class Command(AppCommand):
+
+    def handle_app(self, app, **options):
+
+        anonymizers_module_parent = ".".join(app.__name__.split(".")[:-1])
+        mod = importlib.import_module(anonymizers_module_parent)
+
+        parent, discard = os.path.split(mod.__file__)  # lop off __init__.pyc
+        path = os.path.join(parent, 'anonymizers.py') # and add anonymizers.
+
+        if os.path.exists(path):
+            raise CommandError("File '%s' already exists." % path)
+
+        model_names = []
+        imports = []
+        output = []
+        output.append("")
+        imports.append("from anonymizer import Anonymizer")
+        for model in get_models(app):
+            model_names.append(model.__name__)
+            output.append(introspect.create_anonymizer(model))
+
+        imports.insert(0, "from %s import %s" % (app.__name__, ", ".join(model_names)))
+
+        with open(path, "w") as fd:
+            fd.write("\n".join(imports) + "\n".join(output))