Source

commonlib / serialization.py

Full commit
"""All things serialization.

This modules borrows heavily from the pickle, copy, and json modules.

"""

__all__ = [
        'Serializable',

        'Serializer',
        'XMLSerializer',

        'serialize',
        'unserialize',
        ]

import datetime
import os
from xml.etree import ElementTree


ATOMIC_TYPES = (dict, list, tuple, basestring, int, long, float, bool,
                type(None))


class SerializableMeta(type):

    def register(cls, serializer=None, overwrite=False):
        """Add a serializer to the registry."""
        if not isinstance(serializer, type(cls.register)):
            # used as a decorator factory
            return lambda serializer: cls.register(serializer, overwrite)

        name = serializer.NAME
        if not name:
            raise TypeError("Name is empty")
        if name in cls._SERIALIZERS:
            raise KeyError("Already registered: {0}".format(name))

        matchedclass = cls.look_up_serializer(name, 'ignored')[1]
        if matchedclass is not None:
            msg = "Overwriting serializer in registry: {0} ({1})"
            warnings.warn(msg.format(name, matchedclass.__name__))
        return serializer


class Serializable(object):
    """An object that can be serialized.

    """
    __metaclass__ = SerializableMeta

    _SERIALIZERS = {}

    @classmethod
    def look_up_serializer(cls, name, default=None):
        """Return (serializer, matched_class) for the named serializer.
        
        The MRO of the serializable is traversed to look in the registry
        of each class for the named serializer.  The class containing
        that matching registry is the one returned as matched_class.
        
        If no serializer is found, a KeyError ensues.  However, a
        default may be passed as a second argument.  That value will be
        returned instead of raising an exception.
        
        """
        for cls in cls.__mro__:
            if not issubclass(cls, Serializable):
                # skip over any non-serializable base classes
                continue
            if name in cls._SERIALIZERS:
                return cls._SERIALIZERS[name], cls
        else:
            if default is None:
                raise KeyError(name)
            else:
                return default, None

    def __getstate__(self):
        raise NotImplementedError

    def __setstate__(self):
        raise NotImplementedError


########################
# serializers

class Serializer(object):
    """Something that can encode/decode Python objects.

    This borrows from the pickle/json modules.

    """
    __metaclass__ = ABCMeta

    NAME = None

    def serialize(self, obj):
        # do what pickle does
        state
        obj.__getstate__
    @abstractmethod
    def decode(self, serialized_obj):
        """Return the Python representation of the serialized object."""

    @abstractmethod
    def raw_decode(self, serialized_obj):
        """Return the Python representation of the serialized object.

        This can be used to decode an object with extraneous data.
        
        """

    def default(self, obj):
        """Fallback called by encode() when obj is not a familiar type."""
        raise TypeError("{0!r} is not serializable")

    @abstractmethod
    def encode(self, obj):
        """Return a serialized object derived from the argument."""

    def iterencode(self, obj):
        """Return the encoded object, line by line, if possible."""
        raise TypeError("Can't be encoded into a line format")


class JSONSerializer(Serializer):
    
    NAME = 'json'

    def decode(self, serialized_obj):
        return JSONDecoder().decode(serialized_obj)

    def raw_decode(self, serialized_obj):
        return JSONDecoder().raw_decode(serialized_obj)

    def encode(self, obj):
        return JSONEncoder().encode(obj)

    def iterencode(self, obj):
        return JSONEncoder().iterencode(obj)


class XMLSerializer(Serializer):

    NAME = 'xml'

    def decode(self, serialized_obj):
        raise NotImplementedError

    def raw_decode(self, serialized_obj):
        raise NotImplementedError

    def encode(self, obj):
        raise NotImplementedError

    def iterencode(self, obj):
        raise NotImplementedError


########################
# module functions

def _atomic_getter(obj):
    return obj

_ATOMIC_TYPES = [
        bool,
        bytes,
        complex,  # XXX not always available?
        type(Ellipsis),
        float,
        int,
        #long
        type(None),
        str,
        #unicode,
        }

_CONTAINER_TYPES = [
        #buffer,
        bytearray,
        dict,
        frozenset,
        list,
        set,
        tuple,
        ]

_ITERATOR_TYPES = [
        enumerate,
        #range,
        slice,
        #xrange,
        ]

_OTHER_TYPES = [
        classmethod,
        file,
        property,
        staticmethod,
        super,
        type, # classes
        ]
        

if sys.version_info < 3:
    # Py2-only builtin types
    _ATOMIC_TYPES.extend([
        buffer,
        long,
        unicode,
        xrange,
        ])
    _ATOMIC_TYPES.append(long)
    _ATOMIC_TYPES.append(unicode)
    _CONTAINER_TYPES.append(buffer)
    _ITERATOR_TYPES.append(xrange)
else:
    _ITERATOR_TYPES.append(range)


def get_state(obj, names=None, registry=_getter_registry):
    """Return the state of the object as pickle.dump() would.

    The state is a dictionary with string keys and Python object values.
    Those values should be the actual values and not the states of those
    values.
    
    """
    # borrows heavily from copy.deepcopy()
    # XXX incorporate memo?

    cls = type(obj)
    
    # put the common case in the registry
    getter = registry.get(cls)
    if not getter:
        reductor = dispatch_table.get(cls)
        if reductor:
            rv = reductor(2)
        else:
            reductor = getattr(obj, "__reduce_ex__", None)
            if reductor:
                rv = reductor(2)
            else:
                reductor = getattr(obj, "__reduce__", None)
                if reductor:
                    rv = reductor()
                else:
                    raise Error("un(deep)copyable object of type %s" % cls)
        state = rv(obj)
    return state


def to_state(obj, regsitry=None):
    # essentially a copy of object.__reduce__()
    raise NotImplementedError


def set_state(obj, state):
    """Set the object's state, as pickle.load() would.

    The state is a dictionary with string keys and Python object values.
    Those values should be the actual values and not the states of those
    values.

    At this point __new__() has been called already to instantiate obj.
    Also, __init__() has been called if __newinitargs__() is defined.
    See from_state().
    
    """
    #Mostly borrowed from pickle._Unpickler.load_build().

    setstate = getattr(obj, '__setstate__', None)
    if setstate:
        setstate(state)
    slotstate = None
    if isinstance(state, tuple) and len(state) == 2:
        state, slotstate = state
    if state:
        obj_dict = obj.__dict__
            intern = sys.intern
            for k, v in state.items():
                if type(k) is str:
                    obj_dict[intern(k)] = v
                else:
                    obj_dict[k] = v
        if slotstate:
            for k, v in slotstate.items():
                setattr(obj, k, v)


def from_state(cls, args=(), state={}, itemlist=(), sequence=()):
    raise NotImplementedError


def serialize(obj, format=None, serializer=Serializer):
    """Return the object in the requested serial format.
    
    If no format is passed, serializer is used.

    """
    if format is not None:
        finder = getattr(obj, "look_up_serializer",
                         Serializable.look_up_serializer)
        serializer = finder(format, serializer)()
    return serializer.serialize(obj)


def unserialize(obj, format=None, serializer=Serializer):
    """Return the object, unserialized from the requested format.
    
    If no format is passed, serializer is used.

    """
    if format is not None:
        finder = getattr(obj, "look_up_serializer",
                         Serializable.look_up_serializer)
        serializer = finder(format, serializer)()
    return serializer.unserialize(obj)


def as_jsonable(obj):
    # use the pickle protocol
    # XXX loses 4 of the 5 object.__reduce__() values
    state = get_state(obj)
    for key, value in state.items():
        state[key] = as_jsonable(value)
    return state