Commits

Luke Plant committed d45f2ff

Implemented 'similar_lorem' replacer.

  • Participants
  • Parent commits 796d561

Comments (0)

Files changed (2)

anonymizer/base.py

         source = lambda: random.choice(data.UK_COUNTRIES)
         return self._get_allowed_value(source, field)
 
+    def lorem(self, field=None, val=None):
+        """
+        Returns lorem ipsum text. If val is provided, the lorem ipsum text will
+        be the same length as the original text, and with the same pattern of
+        line breaks.
+        """
+        if val is not None:
+            def generate(length):
+                # Get lorem ipsum of a specific length.
+                collect = ""
+                while len(collect) < length:
+                    collect += self.faker.lorem()
+                collect = collect[:length]
+                return collect
+
+            # We want to match the pattern of the text - linebreaks
+            # in the same places.
+            def source():
+                parts = val.split("\n")
+                for i, p in enumerate(parts):
+                    # Replace each bit with lorem ipsum of the same length
+                    parts[i] = generate(len(p))
+                return "\n".join(parts)
+        else:
+            source = self.faker.lorem
+        return self._get_allowed_value(source, field)
+
     ## Other attributes provided by 'Faker':
 
     # username
     # state
     # zip_code
     # company
-    # lorem
 
     def __getattr__(self, name):
         # we delegate most calls to faker, but add checks

anonymizer/replacers.py

 # (within two years) of the original value.
 similar_datetime = lambda anon, obj, field, val: anon.faker.datetime(field=field, val=val)
 similar_date = lambda anon, obj, field, val: anon.faker.date(field=field, val=val)
+
+# similar_lorem produces lorem ipsum text with the same length and same pattern
+# of linebreaks as the original. If the original often takes a standard form
+# (e.g. a single word 'yes' or 'no'), this could easily fail to hide the
+# original data.
+similar_lorem = lambda anon, obj, field, val: anon.faker.lorem(field=field, val=val)
+