Commits

Luke Plant committed e117b26

Initial import.

Comments (0)

Files changed (9)

+
+Version 0.1
+-----------
+
+Initial release
+
+
+include *.rst
+===================
+ Django Anonymizer
+===================
+
+This app aims to help you anonymize data in a database used for development.
+
+It is common practice in develpment to use a database that is very similar in
+content to the real data. The problem is that this can lead to have copies of
+sensitive customer data on development machines (and backups etc). This Django
+app helps by giving an easy and customizable way to anonymize data in your
+models.
+
+The basic method is go through all the models that you specify, and generate
+fake data for all the fields specified. Introspection of the models will produce
+an anonymizer that will attempt to provide sensible fake data for each field,
+leaving you to fill in the gaps.
+
+Please note that the methods provided will not provide full anonymity. Even if
+you anonymize the names and other details of your customers, there may well be
+enough data to identify them. Relationships between records in the database are
+not altered, in order to preserve the characteristic structure of data in your
+application, but this may leave you open to information leaks which might not be
+acceptable for your data. This application **should** be good enough for simpler
+policies like 'remove all real telephone numbers from the database'.
+
+Usage:
+
+* Install using setup.py or pip/easy_install.
+
+* Add 'anonymizer' to your ``INSTALLED_APPS`` setting.
+
+* AUTOMATIC INTROSPECTION - YET TO BE IMPLEMENTED:
+
+  To create some stub files for your anonymizers, do::
+
+    ./manage.py create_anonymizers app_name1 [app_name2...]
+
+  This will create a file ``anonymizers.py`` in each of the apps you specify.
+  (It will not overwrite existing files).
+
+  The file will contain autogenerated classes that attempt to use appropriate
+  functions for generating fake data.
+
+* Edit the generated ``anonymizers.py`` files, filling out the details, and
+  adding any filtering. You can override any of the public methods defined in
+  ``anonymizer.base.Anonymizer`` in order to do filtering and other
+  customization.
+
+  The 'attributes' dictionary is the key attribute to edit. The keys
+  are the attribute names of attributes on the model that need to be set.
+  The values are callables that take the following arguments:
+
+  * The Anonymizer instance
+  * The object being edited.
+  * The field being edited
+  * The current value of the field.
+
+  The Anonymizer instance has an attribute 'faker' which is useful for
+  generating faked data.
+
+  An example Anonymizer for django.contrib.auth.models.User might look like
+  this::
+
+      from anonymizer import Anonymizer
+      from django.contrib.auth.models import User
+
+      class UserAnonymizer(Anonymizer):
+
+          model = User
+
+          attributes = {
+              'username':   lambda self, obj, field, val: self.faker.username(field=field),
+              'first_name': lambda self, obj, field, val: self.faker.first_name(field=field),
+              'last_name':  lambda self, obj, field, val: self.faker.last_name(field=field),
+              'email':      lambda self, obj, field, val: self.faker.email(field=field),
+          }
+
+          def alter_object(self, obj):
+              super(UserAnonymizer, self).alter_object(obj)
+              obj.set_unusable_password()
+
+* If you need to create anonymizers for apps that you do not control, you may
+  want to move the contents of the anonymizers.py file to an app that you **do**
+  control. It doesn't matter if the anonymizer classes are for models that do
+  not correspond to the applications they are contained it.
+
+  (For example, if you specify 'django.contrib.auth' as an app to specify, you
+  will probably want to move the contents of django/contrib/auth/anonymizers.py
+  into yourprojectapp/anonymizers.py)
+
+* To run the anonymizers, do::
+
+    ./manage.py anonymize_data
+
+  This will DESTRUCTIVELY UPDATE all your data. Make sure you have backups,
+  use at own risk, yada yada.

anonymizer/__init__.py

+from anonymizer.base import Anonymizer

anonymizer/base.py

+from django.db import transaction
+from django.db.utils import IntegrityError
+from faker import Faker
+
+class DjangoFaker(object):
+    """
+    Class that provides fake data, using Django specific knowledge to ensure
+    acceptable data for Django models.
+    """
+    faker = Faker()
+
+    def __init__(self):
+        self.init_values = {}
+
+    def _prep_init(self, field):
+        if field in self.init_values:
+            return
+
+        field_vals = set(x[0] for x in field.model._default_manager.values_list(field.name))
+        self.init_values[field] = field_vals
+
+
+    def get_allowed_value(self, source, field):
+        retval = source()
+
+        # Enforce unique.  Eensure we don't set the same values, as either
+        # any of the existing values, or any of the new ones we make up.
+        unique = getattr(field, 'unique', None)
+        if unique:
+            self._prep_init(field)
+            used = self.init_values[field]
+            for i in xrange(0, 10):
+                if retval in used:
+                    retval = source()
+                else:
+                    break
+
+            if retval in used:
+                raise Exception("Cannot generate unique data for field %s. Last value tried %s" % (field, retval))
+            used.add(retval)
+
+        # Enforce max_length
+        max_length = getattr(field, 'max_length', None)
+        if max_length is not None:
+            retval = retval[:max_length]
+
+        return retval
+
+    def __getattr__(self, name):
+        # we delegate all calls to faker, but add checks
+        def func(*args, **kwargs):
+            source = getattr(self.faker, name)
+            field = kwargs.get('field', None)
+            if field is not None:
+                return self.get_allowed_value(source, field)
+            else:
+                return source()
+        return func
+
+
+class Anonymizer(object):
+
+   model = None
+   # attributes is a dictionary of {attribute_name: replacer}, where replacer is
+   # a callable that takes as arguments this Anonymizer instance, the object to
+   # be altered, the field to be altered, and the current field value, and
+   # returns a replacement value.
+
+   # This signature is designed to be useful for making lambdas that call the
+   # 'faker' instance provided on this class, but it can be used with any
+   # function.
+
+   attributes = None
+
+   # To impose an order on Anonymizers within a module, this can be set - lower
+   # values are done first.
+   order = 0
+
+   faker = DjangoFaker()
+
+   def get_query_set(self):
+       """
+       Returns the QuerySet to be manipulated
+       """
+       if self.model is None:
+           raise Exception("'model' attribute must be set")
+       return self.model._default_manager.get_query_set().order_by('id')
+
+   def get_attributes(self):
+       if self.attributes is None:
+           raise Exception("'attributes' attribute must be set")
+       return self.attributes
+
+   def alter_object(self, obj):
+       """
+       Alters all the attributes in an individual object.
+
+       If it returns False, the object will not be saved
+       """
+       attributes = self.get_attributes()
+       for attname, replacer in attributes.items():
+           self.alter_object_attribute(obj, attname, replacer)
+
+   def alter_object_attribute(self, obj, attname, replacer):
+       """
+       Alters a single attribute in an object.
+       """
+       currentval = getattr(obj, attname)
+       field = obj._meta.get_field_by_name(attname)[0]
+       replacement = replacer(self, obj, field, currentval)
+       setattr(obj, attname, replacement)
+
+   def run(self):
+       for obj in self.get_query_set():
+           retval = self.alter_object(obj)
+           if retval is not False:
+               try:
+                   sid = transaction.savepoint()
+                   obj.save()
+                   transaction.savepoint_commit(sid)
+               except IntegrityError:
+                   transaction.savepoint_rollback(sid)
+                   from IPython.Shell import IPShellEmbed; IPShellEmbed([])()
+                   #raise
+

anonymizer/management/__init__.py

+

anonymizer/management/commands/__init__.py

Empty file added.

anonymizer/management/commands/anonymize_data.py

+"""
+amonymize_data command
+"""
+
+from django.core.exceptions import ImproperlyConfigured
+from django.core.management.base import AppCommand, CommandError
+from django.utils import importlib
+
+from anonymizer import Anonymizer
+
+class Command(AppCommand):
+
+    def handle_app(self, app, **options):
+
+        anonymizers_module = ".".join(app.__name__.split(".")[:-1] + ["anonymizers"])
+        mod = importlib.import_module(anonymizers_module)
+
+        anonymizers = []
+        for k, v in mod.__dict__.items():
+            is_anonymizer = False
+            if 'Anonymizer' in k:
+                is_anonymizer = True
+            try:
+                if issubclass(v, Anonymizer):
+                    is_anonymizer = True
+            except TypeError:
+                pass
+
+            if v is Anonymizer:
+                is_anonymizer = False
+
+            if k.startswith('_'):
+                is_anonymizer = False
+
+            if is_anonymizer:
+                anonymizers.append(v)
+
+        anonymizers.sort(key=lambda c:c.order)
+        for a in anonymizers:
+            a().run()
+
+#!/usr/bin/env python
+from setuptools import setup, find_packages
+import os
+
+
+def read(*rnames):
+    return open(os.path.join(os.path.dirname(__file__), *rnames)).read()
+
+
+setup(
+    name = "django-anonymizer",
+    version = '0.1',
+    packages = find_packages(),
+    include_package_data = True,
+
+    author = "Luke Plant",
+    author_email = "L.Plant.98@cantab.net",
+    url = "https://bitbucket.org/spookylukey/django-anonymizer/",
+    description = "App to anonymize data in Django models.",
+    long_description = (
+                        read('README.rst')
+                        + "\n\n" +
+                        read('CHANGES.rst')
+    ),
+    license = "MIT",
+    keywords = "django data database anonymize private",
+    classifiers = [
+        "Development Status :: 3 - Alpha",
+        "Environment :: Web Environment",
+        "Environment :: Console",
+        "Intended Audience :: Developers",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+        "Programming Language :: Python",
+        "Framework :: Django",
+        "Topic :: Software Development :: Testing",
+        "Topic :: Database"
+        ]
+)