Commits

Andy Mikhailenko  committed 0af7eed

Started heavy refactoring (issue #22: keep Document light and simple).

  • Participants
  • Parent commits 79c48c8

Comments (0)

Files changed (6)

 # tests coverage, b) sphinx and setup.py properly import the version from a
 # single place so that I don't have to edit it always here and there.
 
-version = '0.27.2'
+version = '0.28.0'

File doqu/document_base.py

 #from backend import BaseStorage
 import validators
 from utils import camel_case_to_underscores
-from utils.data_structures import DotDict, ProxyDict
+from utils.data_structures import DotDict, ProxyDict, ReprMixin
 
 
 __all__ = ['Document', 'Many']
 
 
+RECURSIVE_RELATION_NAME = 'self'
+
+
 log = logging.getLogger(__name__)
 
 
         return type.__new__(cls, name, bases, attrs)
 
 
-class Document(DotDict):
-    """
-    Base class for document schemata.
+class Document(ReprMixin, DotDict):
 
-    Wrapper for a record with predefined metadata.
-
-    Usage::
-
-        >>> from doqu import Document
-        >>> from doqu.validators import AnyOf
-
-        >>> class Note(Document):
-        ...     structure = {
-        ...         'text': unicode,
-        ...         'is_note': bool,
-        ...     }
-        ...     defaults = {
-        ...         'is_note': True,
-        ...     }
-        ...     validators = {
-        ...         'is_note': [AnyOf([True])],
-        ...     }
-        ...
-        ...     def __unicode__(self):
-        ...         return u'{text}'.format(**self)
-
-    To save model instances and retrieve them you will want a storage::
-
-        >>> from doqu import get_db
-
-        >>> db = get_db(backend='doqu.ext.tokyo_tyrant', port=1983)
-
-        # and another one, just for testing (yep, the real storage is the same)
-        >>> other_db = get_db(backend='doqu.ext.tokyo_tyrant', port=1983)
-
-        # let's make sure the storage is empty
-        >>> db.clear()
-
-    See documentation on methods for more details.
-    """
     __metaclass__ = DocumentMetaclass
 
     #--------------------+
     #--------------------+
 
     def __eq__(self, other):
-        """
-        # unsaved instances are never equal
-        >>> Note() == Note()
-        False
-        >>> Note(text='foo') == Note(text='bar')
-        False
-
-        # saved instances are equal if they have the same key in same storage
-        # even if their data differs
-        >>> note1 = Note(text='foo')
-        >>> note1.save(db)
-        u'1'
-        >>> note1.text = 'quux'
-        >>> note1_retrieved = db.get(Note, note1.pk)
-        >>> note1 == note1_retrieved
-        True
-
-        # saved instances are different if they have different keys
-        >>> note2 = Note(text='bar')
-        >>> note2.save(db)
-        u'2'
-        >>> note1 == note2
-        False
-
-        # saved instances are different if they have different storages
-        # even if their keys are the same
-        >>> note2.save_as(note1.pk, other_db)
-        <Note bar>
-        >>> note1 == note2
-        False
-
-        """
         if not other:
             return False
         if not hasattr(other, '_saved_state'):
             value = self.meta.get_item_processors[key](value)
 
         # handle references to other documents    # XXX add support for nested structure?
-        ref_doc_class = self._get_related_document_class(key)
+        ref_doc_class = _get_related_document_class(self, key)
         if ref_doc_class:
-            """
-
-            if value and not isinstance(value, Document):
-                if not self._saved_state:
-                    raise RuntimeError(
-                        'Cannot resolve lazy reference {cls}.{name} {value} to'
-                        ' {ref}: storage is not defined'.format(
-                        cls=self.__class__.__name__, name=key,
-                        value=repr(value), ref=ref_model.__name__))
-                # retrieve the record and replace the PK in the data dictionary
-                value = self._saved_state.storage.get(ref_model, value)
-            """
-            value = self._get_document_by_ref(key, value)
+            value = _get_document_by_ref(self, key, value)
 
             # FIXME changes internal state!!! bad, bad, baaad
             # we need to cache the instances but keep PKs intact.
 
         self._data = dict.fromkeys(self.meta.structure)  # None per default
 
-#        errors = []
         for key, value in kw.iteritems():
             # this will validate the values against structure (if any) and
             # custom validators; will raise KeyError or ValidationError
                 if self.meta.break_on_invalid_incoming_data:
                     raise
                 log.warn(e)
-#                errors.append(key)
-
-        '''
-
-        if self.meta.structure:
-            self._data = dict((k, kw.pop(k, None))
-                                     for k in self.meta.structure)
-            if kw:
-                raise ValidationError('Properties do not fit structure: %s'
-                                      % ', '.join(kw.keys()))
-            self.validate()
-        else:
-            self._data = kw.copy()
-        '''
 
         # add backward relation descriptors to related classes
         for field in self.meta.structure:
-            ref_doc = self._get_related_document_class(field)
+            ref_doc = _get_related_document_class(self, field)
             if ref_doc:
                 descriptor = BackwardRelation(self, field)
                 rel_name = self.meta.lowercase_name + '_set'
                 setattr(ref_doc, rel_name, descriptor)
 
-#        if errors:
-#            msg = u'These fields failed validation: {0}'
-#            fields = ', '.join(errors)
-#            raise validators.ValidationError(msg.format(fields))
-
-    def __repr__(self):
-        try:
-            label = unicode(self)
-        except (UnicodeEncodeError, UnicodeDecodeError):
-            label = u'[bad unicode data]'
-        except TypeError:
-            type_name = type(self.__unicode__()).__name__
-            label = u'[__unicode__ returned {0}]'.format(type_name)
-        return u'<{class_name}: {label}>'.format(
-            class_name = self.__class__.__name__,
-            label = label,
-        ).encode('utf-8')
-
     def __setattr__(self, name, value):
         # FIXME this is already implemented in DotDict but that method doesn't
         # call *our* __setitem__ and therefore misses validation
         if key in self.meta.set_item_processors:
             value = self.meta.set_item_processors[key](value)
 
-        self._validate_value(key, value)  # will raise ValidationError if wrong
+        _validate_value(self, key, value)  # will raise ValidationError if wrong
         super(Document, self).__setitem__(key, value)
 
     def __unicode__(self):
         return repr(self._data)
 
-    #----------------------+
-    #  Private attributes  |
-    #----------------------+
-
-    def _clone(self, as_document=None):
-        """
-        Returns an exact copy of current instance with regard to model metadata.
-
-        :param as_document:
-            class of the new object (must be a :class:`Document` subclass).
-
-        .. note::
-            if `as_document` is set, it is not guaranteed that the resulting
-            document instance will validate even if the one being cloned is
-            valid. The document classes define different rules for validation.
-
-        """
-        cls = as_document or type(self)
-
-        new_obj = cls()
-
-        fields_to_copy = list(new_obj.meta.structure) or list(new_obj._data)
-        for name in fields_to_copy:
-            if name in self._data:
-                new_obj._data[name] = self._data[name]
-
-        if self._saved_state:
-            new_obj._saved_state = self._saved_state.clone()
-
-        return new_obj
-
-    def _fill_defaults(self):
-        """
-        Fills default values. Example::
-
-            class Foo(Document):
-                defaults = {
-                    # a value (non-callable)
-                    'text': 'no text provided',
-                    # a callable value but not a function, no args passed
-                    'date': datetime.date.today,  # not a simple function
-                    # a simple function, document instance passed as arg
-                    'slug': lambda doc: doc.text[:20].replace(' ','')
-                }
-                use_dot_notation = True
-
-        The "simple function" is any instance of `types.FunctionType` including
-        one created with ``def`` or with ``lambda``. Such functions will get a
-        single argument: the document instance. All other callable objects are
-        called without arguments. This may sound a bit confusing but it's not.
-        """
-        for name in self.meta.defaults:
-            current_value = self.get(name)
-            if current_value is None or current_value == '':
-                value = self.meta.defaults[name]
-                if hasattr(value, '__call__'):
-                    if isinstance(value, types.FunctionType):
-                        # functions are called with instance as argment, e.g.:
-                        #   defaults = {'slug': lambda d: d.text.replace(' ','')
-                        value = value(self)
-                    else:
-                        # methods, etc. are called without arguments, e.g.:
-                        #   defaults = {'date': datetime.date.today}
-                        value = value()
-                self[name] = value
-
-    def _get_document_by_ref(self, field, value):
-        if not value:
-            return value
-
-        # XXX needs refactoring:
-        # self._get_related_document_class is also called in __getitem__.
-        document_class = self._get_related_document_class(field)
-        if not document_class:
-            return value
-
-        def _resolve(ref, document_class):
-            if isinstance(ref, Document):
-                assert isinstance(ref, document_class), (
-                    'Expected {expected} instance, got {cls}'.format(
-                        expected=document_class.__name__,
-                        cls=ref.__class__.__name__))
-                return ref
-            if not self._saved_state:
-                raise RuntimeError(
-                    'Cannot resolve lazy reference {cls}.{name} {value} to'
-                    ' {ref}: storage is not defined'.format(
-                    cls=self.__class__.__name__, name=key,
-                    value=repr(ref), ref=document_class.__name__))
-            # retrieve the record and replace the PK in the data dictionary
-            return self._saved_state.storage.get(document_class, ref)
-
-        datatype = self.meta.structure.get(field)
-        if isinstance(datatype, OneToManyRelation):
-            # one-to-many (list of primary keys)
-            assert isinstance(value, list)
-            # NOTE: list is re-created; may be undesirable
-            return [_resolve(v, document_class) for v in value]
-        else:
-            # "foreign key" (plain single reference)
-            return _resolve(value, document_class)
-
-    # TODO: move outside of the class?
-    @classmethod
-    def _get_related_document_class(cls, field):
-        """
-        Returns the relevant document class for given `field` depending on the
-        declared document structure. (Field = property = column.)
-
-        If the declared data type is a :class:`Document` subclass, it is
-        returned. If the data type is a string, it is interpreted as a lazy
-        import path (e.g. `myapp.models.Foo` or `self`). If the import fails,
-        `ImportError` is raised.  If the data type is unrelated, `None` is
-        returned.
-
-        """
-        if not cls.meta.structure or not field in cls.meta.structure:
-            return
-
-        datatype = cls.meta.structure.get(field)
-
-        # model class
-        if issubclass(datatype, Document):
-            return datatype
-
-        if isinstance(datatype, OneToManyRelation):
-            return datatype.document_class
-
-        # dotted path to the model class (lazy import)
-        if isinstance(datatype, basestring):
-            return cls._resolve_model_path(datatype)
-
-    # TODO: mode outside of the class?
-    @classmethod
-    def _resolve_model_path(cls, path):
-        # XXX make better docstring. For now see _get_related_document_class.
-        if path == 'self':
-            return cls
-        if '.' in path:
-            module_path, attr_name = path.rsplit('.', 1)
-        else:
-            module_path, attr_name = cls.__module__, path
-        module = __import__(module_path, globals(), locals(), [attr_name], -1)
-        return getattr(module, attr_name)
-
-    def _validate_value(self, key, value):
-        # note: we intentionally provide the value instead of leaving the
-        # method get it by key because the method is used to check both
-        # existing values and values *to be set* (pre-check).
-        self._validate_value_type(key, value)
-        self._validate_value_custom(key, value)
-
-    def _validate_value_custom(self, key, value):
-        tests = self.meta.validators.get(key, [])
-        for test in tests:
-            try:
-                test(self, value)
-            except validators.StopValidation:
-                break
-            except validators.ValidationError:
-                # XXX should preserve call stack and add sensible message
-                msg = 'Value {value} is invalid for {cls}.{field} ({test})'
-                raise validators.ValidationError(msg.format(
-                    value=repr(value), cls=type(self).__name__,
-                    field=key, test=test))
-
-    def _validate_value_type(self, key, value):
-        if value is None:
-            return
-        datatype = self.meta.structure.get(key)
-        if isinstance(datatype, basestring):
-            # A text reference, i.e. "self" or document class name.
-            return
-        if issubclass(datatype, Document) and isinstance(value, basestring):
-            # A class reference; value is the PK, not the document object.
-            # This is a normal situation when a document instance is being
-            # created from a database record. The reference will be resolved
-            # later on __getitem__ call. We just skip it for now.
-            return
-        if isinstance(datatype, OneToManyRelation):
-            if not hasattr(value, '__iter__'):
-                msg = u'{cls}.{field}: expected list of documents, got {value}'
-                raise validators.ValidationError(msg.format(
-                    cls=type(self).__name__, field=key, value=repr(value)))
-            return
-        if datatype and not isinstance(value, datatype):
-            msg = u'{cls}.{field}: expected a {datatype} instance, got {value}'
-            raise validators.ValidationError(msg.format(
-                cls=type(self).__name__, field=key, datatype=datatype.__name__,
-                value=repr(value)))
-
     #---------------------+
     #  Public attributes  |
     #---------------------+
 
-    def convert_to(self, other_schema, overrides=None):
-        """
-        Returns the document as an instance of another model. Copies attributes
-        of current instance that can be applied to another model (i.e. only
-        overlapping attributes -- ones that matter for both models). All other
-        attributes are re-fetched from the database (if we know the key).
-
-        .. note::
-            The document key is *preserved*. This means that the new instance
-            represents *the same document*, not a new one. Remember that models
-            are "views", and to "convert" a document does not mean copying; it
-            can however imply *adding* attributes to the existing document.
-
-        Neither current instance nor the returned one are saved automatically.
-        You will have to do it yourself.
-
-        Please note that trying to work with the same document via different
-        instances of models whose properties overlap can lead to unpredictable
-        results: some properties can be overwritten, go out of sync, etc.
-
-        :param other_model:
-            the model to which the instance should be converted.
-        :param overrides:
-            a dictionary with attributes and their values that should be set on
-            the newly created model instance. This dictionary will override any
-            attributes that the models have in common.
-
-        Usage::
-
-            >>> class Contact(Note):
-            ...     structure = {'name': unicode}
-            ...     validators = {'name': [required()]}  # merged with Note's
-            ...
-            ...     def __unicode__(self):
-            ...         return u'{name} ({text})'.format(**self)
-
-            >>> note = Note(text='phone: 123-45-67')
-            >>> note
-            <Note phone: 123-45-67>
-
-            # same document, contact-specific data added
-            >>> contact = note.convert_to(Contact, {'name': 'John Doe'})
-            >>> contact
-            <Contact John Doe (phone: 123-45-67)>
-            >>> contact.name
-            'John Doe'
-            >>> contact.text
-            'phone: 123-45-67'
-
-            # same document, contact-specific data ignored
-            >>> note2 = contact.convert_to(Note)
-            >>> note2
-            <Note phone: 123-45-67>
-            >>> note2.name
-            Traceback (most recent call last):
-            ...
-            AttributeError: 'Note' object has no attribute 'name'
-            >>> note2.text
-            'phone: 123-45-67'
-
-        """
-        if self._saved_state.storage and self._saved_state.key:
-            # the record may be invalid for another document class so we are
-            # very careful about it
-#            try:
-            new_instance = self._saved_state.storage.get(other_schema, self.pk)
-#            except validators.ValidationError:
-#                pass
-##            new_instance = other_schema()
-##            new_instance._saved_state = self._saved_state.clone()
-##            for key, value in self.iteritems():
-##                try:
-##                    new_instance[key] = value
-##                except KeyError:
-##                    pass
-        else:
-            new_instance = self._clone(as_model=other_schema)
-
-        if overrides:
-            for attr, value in overrides.items():
-                setattr(new_instance, attr, value)
-
-        return new_instance
-
-    def delete(self):
-        """
-        Deletes the object from the associated storage.
-        """
-        if not self._saved_state.storage or not self._saved_state.key:
-            raise ValueError('Cannot delete object: not associated with '
-                             'a storage and/or primary key is not defined.')
-        self._saved_state.storage.delete(self._saved_state.key)
-
-    def dump(self, raw=False, as_repr=False):
-        width = max(len(k) for k in self.keys())
-        template = u' {key:>{width}} : {value}'
-        if raw:
-            assert self._saved_state
-            data = self._saved_state.data
-        else:
-            data = self
-        for key in sorted(data):
-            value = data[key]
-            if as_repr:
-                value = repr(value)
-            print template.format(key=key, value=value, width=width)
-
-    def is_field_changed(self, name):
-        if self.meta.structure:
-            assert name in self.meta.structure
-        if not self.pk:
-            return True
-        if self.get(name) == self._saved_state.data.get(name):
-            return False
-        return True
-
-    def is_valid(self):
-        try:
-            self.validate()
-        except validators.ValidationError:
-            return False
-        else:
-            return True
-
-    @classmethod
-    def object(cls, storage, pk):
-        """
-        Returns an instance of given document class associated with a record
-        stored with given primary key in given storage. Usage::
-
-            event = Event.object(db, key)
-
-        :param storage:
-            a :class:`~doqu.backend_base.BaseStorageAdapter` subclass (see
-            :doc:`ext`).
-        :param pk:
-            the record's primary key (a string).
-
-        """
-        return storage.get(cls, pk)
-
     @classmethod
     def objects(cls, storage):
         """
             storage = self._saved_state.storage
 
         # fill defaults before validation
-        self._fill_defaults()
+        for key, value in _collect_defaults(self):
+            self[key] = value
 
         self.validate()    # will raise ValidationError if something is wrong
 
         assert key == self.pk    # TODO: move this to tests
         return key
 
-    def save_as(self, key=None, storage=None, **kwargs):
+    def delete(self):
         """
-        Saves the document under another key (specified as `key` or generated)
-        and returns the newly created instance.
-
-        :param key:
-            the key by which the document will be identified in the storage.
-            Use with care: any existing record with that key will be
-            overwritten. Pay additional attention if you are saving the
-            document into another storage. Each storage has its own namespace
-            for keys (unless the storage objects just provide different ways to
-            access a single real storage). If the key is not specified, it is
-            generated automatically by the storage.
-
-        See `save()` for details on other params.
-
-        Usage::
-
-            >>> db.clear()
-            >>> note = Note(text="hello")   # just create the item
-
-            # WRONG:
-
-            >>> note.save()               # no storage; don't know where to save
-            Traceback (most recent call last):
-            ...
-            AttributeError: cannot save model instance: storage is not defined neither in instance nor as argument for the save() method
-            >>> note.save_as()            # same as above
-            Traceback (most recent call last):
-            ...
-            AttributeError: cannot save model instance: storage is not defined neither in instance nor as argument for the save() method
-
-            # CORRECT:
-
-            >>> new_key = note.save(db)                   # storage provided, key generated
-            >>> new_key
-            u'1'
-            >>> new_obj = note.save_as(storage=db)        # same as above
-            >>> new_obj
-            <Note hello>
-            >>> new_obj.pk  # new key
-            u'2'
-            >>> new_obj.text  # same data
-            'hello'
-            >>> new_key = note.save()                     # same storage, same key
-            >>> new_key
-            u'1'
-            >>> new_obj = note.save_as()                  # same storage, autogenerated new key
-            >>> new_obj.pk
-            u'3'
-            >>> new_obj = note.save_as('custom_key')      # same storage, key "123"
-            >>> new_obj.pk
-            'custom_key'
-
-            >>> note.save_as(123, other_db)     # other storage, key "123"
-            <Note hello>
-            >>> note.save_as(storage=other_db)  # other storage, autogenerated new key
-            <Note hello>
-
-        .. warning::
-
-            Current implementation may lead to data corruption if the document
-            comes from one database and is being saved to another one, managed
-            by a different backend. Use with care.
-
+        Deletes the object from the associated storage.
         """
-        # FIXME: this is totally wrong.  We need to completely pythonize all
-        # data. The _saved_state *must* be created using the new storage's
-        # datatype converters from pythonized data. Currently we just clone the
-        # old storage's native record representation. The pythonized data is
-        # stored as doc._data while the sort-of-native is at doc._saved_state.data
-        new_instance = self._clone()
-        new_instance._saved_state.update(storage=storage)
-        new_instance._saved_state.key = key    # reset to None
-        new_instance.save(**kwargs)
-        return new_instance
-
-        # TODO:
-        # param "crop_data" (default: False). Removes all fields that do not
-        # correspond to target document class structure (only if it has a
-        # structure). Use case: we need to copy a subset of data fields from a
-        # large database. Say, that second database is a view for calculations.
-        # Example::
-        #
-        #    for doc in BigDocument(heavy_db):
-        #        doc.save_as(TinyDocument, tmp_db)
-        #
-        # TinyDocument can even do some calculations on save, e.g. extract some
-        # datetime data for quick lookups, grouping and aggregate calculation.
+        if not self._saved_state.storage or not self._saved_state.key:
+            raise ValueError('Cannot delete object: not associated with '
+                             'a storage and/or primary key is not defined.')
+        self._saved_state.storage.delete(self._saved_state.key)
 
     def validate(self):
         """
 
         """
         for key, value in self.iteritems():
-            self._validate_value(key, value)
+            _validate_value(self, key, value)
 
 
 class OneToManyRelation(object):
         # TODO: 1. remove all existing references, 2. set new ones.
         # (there may be validation issues)
         raise NotImplementedError('sorry')
+
+#------------------------------+
+#  Document-related functions  |
+#------------------------------+
+
+def _collect_defaults(doc):
+    """
+    Returns pairs of keys and respective default values if needed (i.e. if
+    current value is empty).  Example::
+
+        class Foo(Document):
+            defaults = {
+                # a value (non-callable)
+                'text': 'no text provided',
+                # a callable value but not a function, no args passed
+                'date': datetime.date.today,  # not a simple function
+                # a simple function, document instance passed as arg
+                'slug': lambda doc: doc.text[:20].replace(' ','')
+            }
+            use_dot_notation = True
+
+    The "simple function" is any instance of `types.FunctionType` including
+    one created with ``def`` or with ``lambda``. Such functions will get a
+    single argument: the document instance. All other callable objects are
+    called without arguments. This may sound a bit confusing but it's not.
+    """
+    for name in doc.meta.defaults:
+        current_value = doc.get(name)
+        if current_value is None or current_value == '':
+            value = doc.meta.defaults[name]
+            if hasattr(value, '__call__'):
+                if isinstance(value, types.FunctionType):
+                    # functions are called with instance as argment, e.g.:
+                    #   defaults = {'slug': lambda d: d.text.replace(' ','')
+                    value = value(doc)
+                else:
+                    # methods, etc. are called without arguments, e.g.:
+                    #   defaults = {'date': datetime.date.today}
+                    value = value()
+            yield name, value
+
+
+
+def _get_document_by_ref(doc, field, value):
+    if not value:
+        return value
+
+    # XXX needs refactoring:
+    # cls._get_related_document_class is also called in __getitem__.
+    document_class = _get_related_document_class(doc, field)
+    if not document_class:
+        return value
+
+    def _resolve(ref, document_class):
+        if isinstance(ref, Document):
+            assert isinstance(ref, document_class), (
+                'Expected {expected} instance, got {cls}'.format(
+                    expected=document_class.__name__,
+                    cls=ref.__class__.__name__))
+            return ref
+        if not doc._saved_state:
+            raise RuntimeError(
+                'Cannot resolve lazy reference {cls}.{name} {value} to'
+                ' {ref}: storage is not defined'.format(
+                cls=doc.__class__.__name__, name=key,
+                value=repr(ref), ref=document_class.__name__))
+        # retrieve the record and replace the PK in the data dictionary
+        return doc._saved_state.storage.get(document_class, ref)
+
+    datatype = doc.meta.structure.get(field)
+    if isinstance(datatype, OneToManyRelation):
+        # one-to-many (list of primary keys)
+        assert isinstance(value, list)
+        # NOTE: list is re-created; may be undesirable
+        return [_resolve(v, document_class) for v in value]
+    else:
+        # "foreign key" (plain single reference)
+        return _resolve(value, document_class)
+
+def _get_related_document_class(cls, field):
+    """
+    Returns the relevant document class for given `field` depending on the
+    declared document structure. (Field = property = column.)
+
+    If the declared data type is a :class:`Document` subclass, it is
+    returned. If the data type is a string, it is interpreted as a lazy
+    import path (e.g. `myapp.models.Foo` or `cls`). If the import fails,
+    `ImportError` is raised.  If the data type is unrelated, `None` is
+    returned.
+    """
+    if not cls.meta.structure or not field in cls.meta.structure:
+        return
+
+    datatype = cls.meta.structure.get(field)
+
+    # model class
+    if issubclass(datatype, Document):
+        return datatype
+
+    if isinstance(datatype, OneToManyRelation):
+        return datatype.document_class
+
+    # dotted path to the model class (lazy import)
+    if isinstance(datatype, basestring):
+        return _resolve_model_path(datatype)
+
+def _resolve_model_path(cls, path):
+    # XXX make better docstring. For now see _get_related_document_class.
+    if path == RECURSIVE_RELATION_NAME:
+        return cls
+    if '.' in path:
+        module_path, attr_name = path.rsplit('.', 1)
+    else:
+        module_path, attr_name = cls.__module__, path
+    module = __import__(module_path, globals(), locals(), [attr_name], -1)
+    return getattr(module, attr_name)
+
+def _validate_value(doc, key, value):
+    # note: we intentionally provide the value instead of leaving the
+    # method get it by key because the method is used to check both
+    # existing values and values *to be set* (pre-check).
+    _validate_value_type(doc, key, value)
+    _validate_value_custom(doc, key, value)
+
+def _validate_value_custom(doc, key, value):
+    tests = doc.meta.validators.get(key, [])
+    for test in tests:
+        try:
+            test(doc, value)
+        except validators.StopValidation:
+            break
+        except validators.ValidationError:
+            # XXX should preserve call stack and add sensible message
+            msg = 'Value {value} is invalid for {cls}.{field} ({test})'
+            raise validators.ValidationError(msg.format(
+                value=repr(value), cls=type(doc).__name__,
+                field=key, test=test))
+
+def _validate_value_type(cls, key, value):
+    if value is None:
+        return
+    datatype = cls.meta.structure.get(key)
+    if isinstance(datatype, basestring):
+        # A text reference, i.e. "cls" or document class name.
+        return
+    if issubclass(datatype, Document) and isinstance(value, basestring):
+        # A class reference; value is the PK, not the document object.
+        # This is a normal situation when a document instance is being
+        # created from a database record. The reference will be resolved
+        # later on __getitem__ call. We just skip it for now.
+        return
+    if isinstance(datatype, OneToManyRelation):
+        if not hasattr(value, '__iter__'):
+            msg = u'{cls}.{field}: expected list of documents, got {value}'
+            raise validators.ValidationError(msg.format(
+                cls=type(cls).__name__, field=key, value=repr(value)))
+        return
+    if datatype and not isinstance(value, datatype):
+        msg = u'{cls}.{field}: expected a {datatype} instance, got {value}'
+        raise validators.ValidationError(msg.format(
+            cls=type(cls).__name__, field=key, datatype=datatype.__name__,
+            value=repr(value)))

File doqu/ext/forms.py

 import wtforms.ext
 
 from doqu import Document
-from doqu.document_base import OneToManyRelation
+from doqu.document_base import OneToManyRelation, _get_related_document_class # XXX
 from doqu.validators import Required, Optional, AnyOf
 
 
         defaults = {}
         field_validators = document_class.meta.validators.get(name, [])
         # XXX private attr used, make it public?
-        doc_ref = document_class._get_related_document_class(name)
+        doc_ref = _get_related_document_class(document_class, name)
         if doc_ref:
             if not storage:
                 # we need a storage to fetch choices for the reference

File doqu/utils/__init__.py

 
 Various useful functions. Some can be imported from :mod:`doqu.utils`, some
 are available directly at :mod:`doqu`.
+
+These utilities are either stable and well-tested or possible changes in their
+API are not considered harmful (i.e. they are marginal). Important functions
+which design is likely to change or which lack proper tests are located in
+:mod:`doqu.future`.
 """
 
 import os
 import sys
 from functools import wraps
 
+from doqu import validators
 
-__all__ = ['get_db', 'camel_case_to_underscores', 'load_fixture']
+
+__all__ = ['dump_doc', 'get_db', 'camel_case_to_underscores', 'load_fixture']
 
 
 def get_db(settings_dict=None, **settings_kwargs):
             self.__cached_values[function.__name__] = value
         return self.__cached_values[function.__name__]
     return property(inner)
+
+def dump_doc(self, raw=False, as_repr=False, align=True, keys=None, exclude=None):
+    """Returns a multi-line string with document keys and values nicely
+    formatted and aligned.
+
+    :param raw:
+        If `True`, uses "raw" values, as fetched from the database (note that
+        this will fail for unsaved documents). If not, the values are obtained
+        in the normal way, i.e. by `__getitem__()`. Default is `False`.
+    :prarm as_repr:
+        If `True`, uses `repr()` for values; if not, coerces them to Unicode.
+        Default if `False`.
+    :param align:
+        If `True`, the keys and values are aligned into two columns of equal
+        width. If `False`, no padding is used. Default is `True`.
+    :param keys:
+        a list of document keys to show. By default all existing keys are
+        included.
+    :param exclude:
+        a list of keys to exclude. By default no keys are excluded.
+
+    """
+    def _gen():
+        width = max(len(k) for k in self.keys())
+        template = u' {key:>{width}} : {value}' if align else u'{key}: {value}'
+        if raw:
+            assert self._saved_state
+            data = self._saved_state.data
+        else:
+            data = self
+        for key in sorted(data):
+            if keys and key not in keys:
+                continue
+            if exclude and key in exclude:
+                continue
+            value = data[key]
+            if as_repr:
+                value = repr(value)
+            yield template.format(key=key, value=value, width=width)
+    return '\n'.join(_gen())
+
+def is_doc_valid(doc):
+    try:
+        doc.validate()
+    except validators.ValidationError:
+        return False
+    else:
+        return True

File doqu/utils/data_structures.py

                 key=self._sort_key,
                 reverse=self._reverse)
         return iter(self._sorted_data)
+
+
+class ReprMixin(object):
+    "Adds informative and safe ``__repr__()`` based on ``__unicode__()``."
+
+    def __repr__(self):
+        try:
+            label = unicode(self)
+        except (UnicodeEncodeError, UnicodeDecodeError):
+            label = u'[bad unicode data]'
+        except TypeError:
+            type_name = type(self.__unicode__()).__name__
+            label = u'[__unicode__ returned {0}]'.format(type_name)
+        return u'<{class_name}: {label}>'.format(
+            class_name = self.__class__.__name__,
+            label = label,
+        ).encode('utf-8')
+
+    def __unicode__(self):
+        raise NotImplementedError

File tests/test_document.py

 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-import unittest
+import unittest2 as unittest
 
-from doqu import Document
+from doqu import Document, get_db
+from doqu.utils import is_doc_valid
 from doqu import validators
 
 
             pass
         document = Doc(foo=123)
 
+
+class ReprTestCase(unittest.TestCase):
+    "Document representation test case"
+
     def test_repr_default(self):
         "document representation can be changed"
         class Doc(Document):
         class Doc(Document):
            pass
         doc = Doc(name=u'foo')
-        assert doc.is_valid()
+        assert is_doc_valid(doc)
         self.assertEqual(doc['name'], u'foo')
 
     def test_structure_correct(self):
         class Doc(Document):
            structure = {'name': unicode}
         doc = Doc(name=u'foo')
-        assert doc.is_valid()
+        assert is_doc_valid(doc)
         self.assertEqual(doc['name'], u'foo')
         doc['name'] = u'bar'
-        assert doc.is_valid()
+        assert is_doc_valid(doc)
 
     def test_structure_wrong_field(self):
         "Document structure is defined, wrong field ignored"
             structure = {'name': unicode}
         doc = Doc()
         # valid with no data:
-        assert doc.is_valid()
+        assert is_doc_valid(doc)
         # invalidates on creation:
         self.assertRaises(KeyError, lambda: Doc(location=u'foo'))
         # invalidates on setitem:
             structure = {'name': unicode}
         doc = Doc()
         # valid with no data:
-        assert doc.is_valid()
+        assert is_doc_valid(doc)
         # doesn't break on creation:
         doc = Doc(name=123)
         # invalidates on setitem:
             structure = {'name': unicode}
         doc = Doc()
         # valid with no data:
-        assert doc.is_valid()
+        assert is_doc_valid(doc)
         # invalidates on creation:
         self.assertRaises(validators.ValidationError, lambda: Doc(name=123))
         # invalidates on setitem:
         class Doc(Document):
             pass
         doc = Doc(name=u'foo')
-        assert doc.is_valid()
+        assert is_doc_valid(doc)
 
     def test_validators_correct(self):
         "Validators defined, value passes test, document is valid"
                 'name': [validators.Length(min=3)],
             }
         doc = Doc(name=u'foo')
-        assert doc.is_valid()
+        assert is_doc_valid(doc)
 
     def test_validators_wrong(self):
         "Validators defined, value fails test, document is invalid"
 class StateTestCase(unittest.TestCase):
     "Document state"
 
-#    def setUp(self):
-#        self.db1 = get_db(backend='doqu.ext.shove')
-#        self.db2 = get_db(backend='doqu.ext.shove')
-#        assert self.db1 != self.db2
-#
+    def setUp(self):
+        class Note(Document):
+            structure = {'text':unicode}
+        self.db1 = get_db(backend='doqu.ext.shove_db')
+        self.db2 = get_db(backend='doqu.ext.shove_db')
+        assert self.db1 != self.db2
+
 #    def tearDown(self):
 #        self.db1.close()
 #        self.db2.close()
 
-    def test_equal_docs(self):
-        pass
+    def test_unsaved(self):
+        "Unsaved instances always differ."
+        class Note(Document):
+            structure = {'text':unicode}
+        self.assertNotEqual(Note(), Note())
+        self.assertNotEqual(Note(text='foo'), Note(text='bar'))
 
-    def test_different_docs(self):
-        pass
+    def test_saved_equal(self):
+        "Instances are same if the storage and the key are the same."
+        # (...even if their data differs)
+        class Note(Document):
+            structure = {'text':unicode}
+        note = Note(text=u'foo')
+        note.save(self.db1)
+        note.text = u'quux'
+        note_retrieved = self.db1.get(Note, note.pk)
+        self.assertEqual(note, note_retrieved)
 
+    def test_saved_different_pk(self):
+        "Instances differ if keys differ."
+        class Note(Document):
+            structure = {'text':unicode}
+        note1 = Note(text=u'foo')
+        note1.save(self.db1)
+        note2 = Note(text=u'bar')
+        note2.save(self.db1)
+        self.assertNotEqual(note1, note2)
+
+    def test_different_storages(self):
+        "Instances differ if storages differ, even if keys are same."
+        class Note(Document):
+            structure = {'text':unicode}
+        note1 = Note(text=u'foo')
+        note1.save(self.db1)
+        self.db2.save(note1._saved_state.data, note1.pk)
+        note2 = self.db2.get(Note, note1.pk)
+        self.assertEqual(note1.pk, note2.pk)
+        self.assertNotEqual(note1, note2)
+
+    @unittest.expectedFailure
     def test_hash(self):
         # TODO: test if __hash__ works properly for saved and unsaved documents
         # including cross-database comparisons (storage AND key must be same)
-        pass
+        raise NotImplementedError
 
 
 class ReferenceTestCase(unittest.TestCase):