Commits

Reshef Mann committed 28b5889

Initial commit

  • Participants

Comments (0)

Files changed (10)

MANIFEST.in

Empty file added.
+dictmapper
+=============
+
+dictmapper was created to help transforming a sequence of python dictionaries to tabular format and specifically to transform json documents to be exported in csv fromat.
+
+Usage
+------
+
+Create a mapper: ::
+
+    from dictmapper import Mapper, Mapping
+
+    class UserMapper(Mapper):
+
+        user_id = Mapping('user_id')
+        email = Mapping('email')
+        Name = Mapping(lambda u: '%s %s' % (u['first_name'], u['last_name']), name='User Name')
+        nickname = Mapping('nickname', default='N/A')
+        street = Mapping('address/street')
+        city = Mapping('address/city')
+        joined_at = Mapping('joined_at', transform=lambda d: d.strftime('%Y-%m-%d'))
+
+Sample input: ::
+
+    users_docs = [
+        {
+            'user_id': '1000001',
+            'first_name': 'Test',
+            'last_name': 'User',
+            'email': 'user@test.com',
+            'address': {
+                'street': 'Example Road',
+                'city': 'Emerald City',
+            },
+            'joined_at': datetime.now(),
+        },
+        {
+            'user_id': '1000002',
+            'first_name': 'Example',
+            'last_name': 'Member',
+            'nickname': 'exampy',
+            'email': 'example@member.com',
+            'address': {
+                'street': 'Sample Road',
+                'city': 'Emerald City',
+            },
+            'joined_at': datetime.now(),
+        }
+    ]
+
+Output: ::
+    
+    >>> mapper = UserMapper()
+    >>> mapper.headers()
+    ['User id', 'Email', 'Name', 'Nickname', 'Street', 'City', 'Joined at']
+    >>> res = mapper.map(users_docs)
+    >>> res
+    [['1000001',
+      'user@test.com',
+      'Test User',
+      'N/A',
+      'Example Road',
+      'Emerald City',
+      '2012-03-18'],
+     ['1000002',
+      'example@member.com',
+      'Example Member',
+      'exampy',
+      'Sample Road',
+      'Emerald City',
+      '2012-03-18']]
+
+Export to csv
+--------------
+
+I recommend using the excellent `tablib`_: ::
+
+    import tablib
+    data = tablib.Dataset(*res, headers=mapper.headers())
+    data.csv
+
+
+.. _tablib : http://github.com/kennethreitz/tablib

dictmapper/__init__.py

+from mapper import Mapping, Mapper

dictmapper/__init__.pyc

Binary file added.

dictmapper/mapper.py

+import inspect
+
+
+class Mapping(object):
+    creation_counter = 0
+
+    def __init__(self, query=None, name=None, static_value=None, default=None, transform=None):
+        self.creation_order = Mapping.creation_counter
+        Mapping.creation_counter += 1
+        self.name = name
+        self.default = default
+        self.transform = transform if transform else lambda x: x
+
+        if query and static_value:
+            raise AttributeError('Define either query or static_value')
+
+        if query:
+            if callable(query):
+                self._do_mapping = lambda context: query(context)
+            else:
+                self.query_list = query.split('/')
+                self.query_list_len = len(self.query_list)
+
+        self.static_value = static_value
+
+    @staticmethod
+    def _value_or_call(obj):
+        return obj() if callable(obj) else obj
+
+    def map(self, context):
+        return self._do_mapping(context)
+
+    def _do_mapping(self, context):
+        if self.static_value:
+            return self.static_value
+
+        current_context = context
+        for i, e in enumerate(self.query_list, 1):
+            current_context = current_context.get(e)
+            if i == self.query_list_len or not current_context:
+                return self.transform(current_context) if current_context is not None else Mapping._value_or_call(self.default)
+
+
+class MapperMeta(type):
+    def __new__(meta, classname, bases, classDict):
+        cls = type.__new__(meta, classname, bases, classDict)
+        cls.mappings = sorted(inspect.getmembers(cls, lambda o: isinstance(o, Mapping)), key=lambda i: i[1].creation_order)
+        for name, mapping in cls.mappings:
+            if isinstance(mapping, Mapping):
+                if not mapping.name:
+                    mapping.name = name
+
+        cls.nb_mappings = len(cls.mappings)
+        return cls
+
+
+class MapperBase(object):
+    __metaclass__ = MapperMeta
+
+
+class Mapper(MapperBase):
+
+    def headers(self):
+        return [n.capitalize().replace('_', ' ') for n, v in self.mappings]
+
+    def map(self, inp):
+        """
+        Performs mapping on a sequence of dict objects and returns the corresponding rows:
+        >>> class TestMapper(Mapper):
+        >>>     name = Mapping('name')
+        >>>     value = Mapping('value')
+        >>> mapper = TestMapper()
+        >>> res = mapper.map([{'name': 'test name', 'value': 'test value'}, {'name': 'test name 2', 'value': 'test value 2'}])
+        returns
+        [('test name', 'test value'), ('test name 2', 'test value 2')]
+        """
+        result = []
+        for record in inp:
+            row = []
+            for _, mapping in self.mappings:
+                row.append(mapping.map(record))
+            result.append(row)
+        return result

dictmapper/mapper.pyc

Binary file added.

dictmapper_tests.py

+import unittest
+from dictmapper import Mapper, Mapping
+
+
+test_stream = [{
+    'name': 'test',
+    'creator': 'Reshef Mann',
+    'version': '1.0',
+    'metadata': {
+        'os': 'ubuntu'
+    },
+}]
+
+
+class SimpleMapper(Mapper):
+    name = Mapping('name')
+    creator = Mapping('creator')
+
+
+class TestSimpleMapping(unittest.TestCase):
+
+    def setUp(self):
+        self.mapper = SimpleMapper()
+
+    def test_simple_mapping(self):
+        res = self.mapper.map(test_stream)
+        self.assertEquals('Name', self.mapper.headers()[0])
+        self.assertEquals('test', res[0][0])
+
+
+class TransformingMapper(Mapper):
+    name = Mapping('name', transform=lambda v: 'Doc-%s' % v)
+    creator = Mapping('creator')
+
+
+class TestTransformingMapper(unittest.TestCase):
+    def setUp(self):
+        self.mapper = TransformingMapper()
+
+    def test_transforming_mapper(self):
+        res = self.mapper.map(test_stream)
+        self.assertEquals('Doc-test', res[0][0])
+
+
+class HierarchicalMapper(Mapper):
+    os = Mapping('metadata/os')
+
+
+class TestHierarchicalMapper(unittest.TestCase):
+    def setUp(self):
+        self.mapper = HierarchicalMapper()
+
+    def test_hierarchical_mapper(self):
+        res = self.mapper.map(test_stream)
+        self.assertEquals('ubuntu', res[0][0])

dictmapper_tests.pyc

Binary file added.
+[egg_info]
+tag_build = dev
+tag_svn_revision = true
+from setuptools import setup, find_packages
+import sys, os
+
+version = '0.1'
+
+setup(name='dictmapper',
+      version=version,
+      description="Maps hierarchical dictionaries to flat structures",
+      long_description=open('README.rst').read(),
+      classifiers=['Development Status :: 4 - Beta', 'License :: OSI Approved :: MIT License'],
+      keywords='json,csv,mapping,export,transform',
+      author='Reshef Mann',
+      author_email='reshef.mann@gmail.com',
+      url='http://reshefmann.com',
+      license='MIT',
+      packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
+      include_package_data=True,
+      zip_safe=True,
+      install_requires=[
+          # -*- Extra requirements: -*-
+      ],
+      entry_points="""
+      # -*- Entry points: -*-
+      """,
+      test_suite='nose.collector',
+      test_requires=['Nose'],
+      )