Source

dynamodb-mapper-features / dynamodb_mapper / model.py

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
"""Object mapper for Amazon DynamoDB.

Based in part on mongokit's Document interface.

Released under the GNU LGPL, version 3 or later (see COPYING).
"""
from __future__ import absolute_import

import json
import logging
from datetime import datetime, timedelta, tzinfo

from onctuous.schema import Schema

import boto
from boto.dynamodb.item import Item
from boto.exception import DynamoDBResponseError
from boto.dynamodb.exceptions import DynamoDBConditionalCheckFailedError

log = logging.getLogger(__name__)
dblog = logging.getLogger(__name__+".database-access")


MAX_RETRIES = 100
# primary key of the magic item used for autoinc
MAGIC_KEY = -1

class SchemaError(Exception):
    """SchemaError exception is raised when a schema consistency check fails.
    Most of the checks are performed in :py:meth:`~.ConnectionBorg.create_table`.

    Common consistency failure includes lacks of ``__table__``, ``__hash_key__``,
    ``__schema__`` definition or when an :py:class:`~.autoincrement_int` ``hash_key``
    is used with a ``range_key``.
    """


class MaxRetriesExceededError(Exception):
    """Raised when a failed operation couldn't be completed after retrying
    ``MAX_RETRIES`` times (e.g. saving an autoincrementing hash_key).
    """


class ConflictError(Exception):
    """Atomic edition failure.
    Raised when an Item has been changed between the read and the write operation
    and this has been forbid by the ``raise_on_conflict`` argument of
    :meth:`DynamoDBModel.save` (i.e. when somebody changed the DB's version of
    your object behind your back).
    """


class OverwriteError(ConflictError):
    """Raised when saving a DynamoDBModel instance would overwrite something
    in the database and we've forbidden that because we believe we're creating
    a new one (see :meth:`DynamoDBModel.save`).
    """


class InvalidRegionError(Exception):
    """Raised when ``set_region()`` is called with an invalid region name.
    """


class autoincrement_int(int):
    """Dummy int subclass for use in your schemas.

    If you're using this class as the type for your key in a hash_key-only
    table, new objects in your table will have an auto-incrementing primary
    key.

    Note that you can still insert items with explicit values for your primary
    key -- the autoincrementing scheme is only used for objects with unset
    hash_keys (or to be more precise, left set to the default value of 0).

    Auto-incrementing int keys are implemented by storing a special "magic"
    item in the table with the following properties:

        - ``hash_key_value = -1``
        - ``__max_hash_key__ = N``

    where N is the maximum used hash_key value.

    Inserting a new item issues an atomic add on the '__max_hash_key__' value.
    Its new value is returned and used as the primary key for the new elem.

    Note that hash_key_value is set to '-1' while ``__max_hash_key__`` initial
    value is 0. This will element at key '0' unused. It's actually a garbage item
    for cases where a value is manually added to an unitialized index.
    """

_JSON_TYPES = frozenset([list, dict])


class UTC(tzinfo):
    """UTC timezone"""
    def utcoffset(self, dt):
        return timedelta(0)

    def tzname(self, dt):
        return "UTC"

    def dst(self, dt):
        return timedelta(0)


utc_tz = UTC()

def _get_proto_value(schema_entry):
    """Return a prototype value matching what schema_type will be serialized
    as in DynamoDB:

      - For strings and numbers, an instance of schema_type.
      - For "special" types implemented at the mapper level (list, dict,
        datetime), an empty string (this is what they're stored as in the DB).
    """
    # Those types must be serialized as strings
    if schema_entry in _JSON_TYPES or type(schema_entry) in _JSON_TYPES:
        return u""

    if schema_entry is datetime:
        return u""

    # Regular string/number
    if type(schema_entry) is type:
        return schema_entry()

    # callable validator
    return _get_proto_value((type(schema_entry(None))))

def _python_to_dynamodb(value):
    """Convert a Python object to a representation suitable to direct storage
    in DynamoDB, according to a type from a DynamoDBModel schema.

    If value should be represented as a missing value in DynamoDB
    (empty string or set), None is returned.

    ``_dynamodb_to_python(t, _python_to_dynamodb(v)) == v`` for any v.

    :param value: The Python object to convert.

    :return: ``value``, serialized to DynamoDB, or ``None`` if ``value`` must
        be represented as a missing attribute.
    """
    if isinstance(value, tuple(_JSON_TYPES)):
        # json serialization hooks for json_* data types.
        return json.dumps(value, sort_keys=True)

    if isinstance(value, datetime):
        # datetime instances are stored as UTC in the DB itself.
        # (that way, they become sortable)
        # datetime objects without tzinfo are not supported.
        s = value.astimezone(utc_tz).strftime("%Y-%m-%dT%H:%M:%S.%f%z")
        # there is not strftime code to output the timezone with the ':' that
        # is mandated by the W3CDTF format, so here's an ugly hack
        s = s[:-2] + ':' + s[-2:]
        return s

    # This case prevents `'fields': False` to be added when genereating expected
    # values dict in save as this would mean 'field does not exist' instead of
    # 'field exists and is False'.
    if isinstance(value, bool):
        return int(value)

    if value or value == 0:
        return value

    # Yes, that part is horrible. DynamoDB can't store empty
    # sets/strings, so we're representing them as missing
    # attributes on the DB side.
    return None


def _dynamodb_to_python(schema_entry, value):
    """Convert a DynamoDB attribute value to a Python object, according to a
    type from a DynamoDBModel schema.

    If ``schema_entry`` is a type (``int`` for example) a datetime or a list/dict
    validator, ``value`` is automatically de-serialized.

    The resulting value is then passed to the validation engine. If needed, make
    sure that your validator starts by manually coerce the input value.

    :param schema_entry: A type or validator supported by the mapper
    :param value: The DynamoDB attribute to convert to a Python object.
        May be ``None``.

    :return: coerced and validated ``value``.
    """
    schema_type = type(schema_entry)

    # Handle json related type
    if schema_entry in _JSON_TYPES:
        # basic type => just load it and return
        if value is None:
            return schema_entry()
        return schema_entry(json.loads(value))
    elif schema_type in _JSON_TYPES:
        # looks like a validator => load and validate it
        if value is None:
            value = schema_type()
        else:
            value = schema_type(json.loads(value))
        return Schema(schema_entry)(value)

    # Handle this f** datetime sh** (sorry for expressing my thougts aloud)
    # if we enter this, it means this is not a validator so return directly
    if schema_entry is datetime:
        if value is None:
            return datetime.now(tz=utc_tz)
        else:
            # Parse TZ-aware isoformat

            # strptime doesn't support timezone parsing (%z flag), so we're forcing
            # the strings in the database to be UTC (+00:00) for now.
            # TODO Handle arbitrary timezones (with manual parsing).
            if value.endswith('Z'):
                value = value[:-1] + '+00:00'
            return datetime.strptime(
                value, "%Y-%m-%dT%H:%M:%S.%f+00:00").replace(tzinfo=utc_tz)

    # handle "base" type like int and unicode: if no value, load "neutral" value
    # for basic type, if coercion is OK, validation is done. Hence the "return" shortcut
    # do it only once json and dates are done as json is only a special case of this one
    if schema_type is type:
        if value is None:
            return schema_entry()
        return schema_entry(value)

    # if this is a regular callable, run it on the input as a validator
    elif callable(schema_entry):
        return schema_entry(value)

    raise SchemaError("Invalid schema entry {}; can not load value {}".format(schema_entry, value))



class ConnectionBorg(object):
    """Borg that handles access to DynamoDB.

    You should never make any explicit/direct ``boto.dynamodb`` calls by yourself
    except for table maintenance operations :

        - ``boto.dynamodb.table.update_throughput()``
        - ``boto.dynamodb.table.delete()``

    Remember to call :meth:`set_credentials`, or to set the
    ``AWS_ACCESS_KEY_ID`` and ``AWS_SECRET_ACCESS_KEY`` environment variables
    before making any calls.
    """
    _shared_state = {
        "_aws_access_key_id": None,
        "_aws_secret_access_key": None,
        "_region": None,
        "_connection": None,
        "_tables_cache": {},
    }

    def __init__(self):
        self.__dict__ = self._shared_state

    def _get_connection(self):
        """Return the DynamoDB connection for the mapper
        """
        if self._connection is None:
            self._connection = boto.connect_dynamodb(
                aws_access_key_id=self._aws_access_key_id,
                aws_secret_access_key=self._aws_secret_access_key,
                region=self._region,
            )
        return self._connection

    def set_credentials(self, aws_access_key_id, aws_secret_access_key):
        """Set the DynamoDB credentials. If boto is already configured on this
        machine, this step is optional.
        Access keys can be found in `Amazon's console.
        <https://aws-portal.amazon.com/gp/aws/developer/account/index.html?action=access-key>`_

        :param aws_access_key_id: AWS api access key ID

        :param aws_secret_access_key: AWS api access key

        """
        self._aws_access_key_id = aws_access_key_id
        self._aws_secret_access_key = aws_secret_access_key

    def set_region(self, region_name):
        """Set the DynamoDB region. If this is not set AWS defaults to 'us-east-1'.

        :param region_name: The name of the region to use
        """
        for region in boto.dynamodb.regions():
            if region.name == region_name:
                self._region = region
                return

        raise InvalidRegionError("Region name %s is invalid" % region_name)

    def create_table(self, cls, read_units, write_units, wait_for_active=False):
        """Create a table that'll be used to store instances of cls.

        See `Amazon's developer guide <http://docs.amazonwebservices.com/amazondynamodb/latest/developerguide/ProvisionedThroughputIntro.html>`_
        for more information about provisioned throughput.

        :param cls: The class whose instances will be stored in the table.

        :param read_units: The number of read units to provision for this table
            (minimum 5)

        :param write_units: The number of write units to provision for this
            table (minimum 5).

        :param wait_for_active: If True, create_table will wait for the table
            to become ACTIVE before returning (otherwise, it'll be CREATING).
            Note that this can take up to a minute.
            Defaults to False.
        """
        table_name = cls.__table__
        hash_key_name = cls.__hash_key__
        range_key_name = cls.__range_key__

        if not table_name:
            raise SchemaError("Class does not define __table__", cls)

        # FIXME: check key is defined in schema
        if not hash_key_name:
            raise SchemaError("Class does not define __hash_key__", cls)

        if not cls.__schema__:
            raise SchemaError("Class does not define __schema__", cls)

        hash_key_type = cls.__schema__[hash_key_name]

        if hash_key_type is autoincrement_int:
            if range_key_name:
                raise SchemaError(
                    "Class defines both a range key and an autoincrement_int hash key",
                    cls)
            if not wait_for_active:
                # Maybe we should raise ValueError instead?
                log.info(
                    "Class %s has autoincrement_int hash key -- forcing wait_for_active",
                    cls)
                wait_for_active = True

        conn = self._get_connection()
        # It's a prototype/an instance, not a type.
        hash_key_proto_value = _get_proto_value(hash_key_type)
        # None in the case of a hash-only table.
        if range_key_name:
            # We have a range key, its type must be specified.
            range_key_proto_value = _get_proto_value(
                cls.__schema__[range_key_name])
        else:
            range_key_proto_value = None

        schema = conn.create_schema(
            hash_key_name=hash_key_name,
            hash_key_proto_value=hash_key_proto_value,
            range_key_name=range_key_name,
            range_key_proto_value=range_key_proto_value
        )
        table = conn.create_table(cls.__table__, schema, read_units, write_units)
        table.refresh(wait_for_active=wait_for_active)

        dblog.debug("Created table %s(%s, %s)", cls.__table__, hash_key_name, range_key_name)

        return table

    def get_table(self, name):
        """Return the table with the requested name."""
        if name not in self._tables_cache:
            self._tables_cache[name] = self._get_connection().get_table(name)
        return self._tables_cache[name]


class DynamoDBModel(object):
    """Abstract base class for all models that use DynamoDB as their storage
    backend.

    Each subclass must define the following attributes:

      - ``__table__``: the name of the table used for storage.
      - ``__hash_key__``: the name of the primary hash key.
      - ``__range_key__``: (optional) if you're using a composite primary key,
          the name of the range key.
      - ``__schema__``: ``{attribute_name: attribute_type}`` mapping.
          Supported attribute_types are: int, long, float, str, unicode, set.
          Default values are obtained by calling the type with no args
          (so 0 for numbers, "" for strings and empty sets).
      - ``__defaults__``: (optional) ``{attribute_name: defaulter}`` mapping.
          This dict allows to provide a default value for each attribute_name at
          object creation time. It will *never* be used when loading from the DB.
          It is fully optional. If no value is supplied the empty value
          corresponding to the type will be used.
          "defaulter" may either be a scalar value or a callable with no
          arguments.
      - ``__migrator__``: :py:class:`~.Migration` handler attached to this model

    To redefine serialization/deserialization semantics (e.g. to have more
    complex schemas, like auto-serialized JSON data structures), override the
    _from_dict (deserialization) and _to_db_dict (serialization) methods.

    *Important implementation note regarding sets:* DynamoDB can't store empty
    sets/strings. Therefore, since we have schema information available to us,
    we're storing empty sets/strings as missing attributes in DynamoDB, and
    converting back and forth based on the schema.

    So if your schema looks like the following::

        {
            "id": unicode,
            "name": str,
            "cheats": set
        }

    then::

        {
            "id": "e1m1",
            "name": "Hangar",
            "cheats": set([
                "idkfa",
                "iddqd"
            ])
        }

    will be stored exactly as is, but::

        {
            "id": "e1m2",
            "name": "",
            "cheats": set()
        }

    will be stored as simply::

        {
            "id": "e1m2"
        }
    """

    # TODO Add checks to the various methods so that meaningful error messages
    # are raised when they're incorrectly overridden.
    __table__ = None
    __hash_key__ = None
    __range_key__ = None
    __schema__ = None
    __migrator__ = None
    __defaults__ = {}

    def __init__(self, **kwargs):
        """Create an instance of the model. All fields defined in the schema
        are created. By order of priority its value will be loaded from:

            - kwargs
            - __defaults__
            - None. Warning, this will most likely cause failures at save time

        We're supplying this method to avoid the need for extra checks in save and
        ease object initial creation.

        Objects created and initialized with this method are considered as not
        coming from the DB.
        """
        cls = type(self)
        defaults = cls.__defaults__
        schema = cls.__schema__

        self._raw_data = {}

        for (name, type_) in schema.iteritems():
            if name in kwargs:
                value = kwargs.get(name)
            elif name in defaults:
                value = defaults[name]
            else:
                value = None
            setattr(self, name, value)

        # instanciate the migrator only once per model *after* initialization
        # as it assumes a fully initialized model
        if isinstance(cls.__migrator__, type):
            cls.__migrator__ = cls.__migrator__(cls)

    def validate(self):
        """Return a ``dict`` of validated fields if validators passes. Otherwise
        ``InvalidList`` is raised.
        """
        # load schema
        schema = self.__schema__
        validate = Schema(schema)
        # load schema data from self
        data = {str(key):getattr(self, str(key)) for key in schema}
        # return validated data (or raise)
        return validate(data)

    @classmethod
    def get(cls, hash_key_value, range_key_value=None, consistent_read=False):
        """Retrieve a single object from DynamoDB according to its primary key.

        Note that this is not a query method -- it will only return the object
        matching the exact primary key provided. Meaning that if the table is
        using a composite primary key, you need to specify both the hash and
        range key values.

        Objects loaded by this method are marked as coming from the DB. Hence
        their initial state is saved in ``self._raw_data``.

        :param hash_key_value: The value of the requested item's hash_key.

        :param range_key_value: The value of the requested item's range_key,
            if the table has a composite key.

        :param consistent_read: If False (default), an eventually consistent
            read is performed. Set to True for strongly consistent reads.
        """
        table = ConnectionBorg().get_table(cls.__table__)
        # Convert the keys to DynamoDB values.
        h_value = _python_to_dynamodb(hash_key_value)
        if cls.__range_key__:
            r_value = _python_to_dynamodb(range_key_value)
        else:
            r_value = None

        item = table.get_item(
                    hash_key=h_value,
                    range_key=r_value,
                    consistent_read=consistent_read)

        dblog.debug("Got item (%s, %s) from table %s", h_value, r_value, cls.__table__)

        return cls._from_db_dict(item)

    @classmethod
    def get_batch(cls, keys):
        """Retrieve multiple objects according to their primary keys.

        Like get, this isn't a query method -- you need to provide the exact
        primary key(s) for each object you want to retrieve:

          - If the primary keys are hash keys, keys must be a list of
            their values (e.g. ``[1, 2, 3, 4]``).
          - If the primary keys are composite (hash + range), keys must
            be a list of ``(hash_key, range_key)`` values
            (e.g. ``[("user1", 1), ("user1", 2), ("user1", 3)]``).

        get_batch *always* performs eventually consistent reads.

        Objects loaded by this method are marked as coming from the DB. Hence
        their initial state is saved in ``self._raw_data``.

        :param keys: iterable of keys. ex ``[(hash1, range1), (hash2, range2)]``

        """
        table = ConnectionBorg().get_table(cls.__table__)

        # Convert all the keys to DynamoDB values.
        if cls.__range_key__:
            dynamo_keys = [
                (
                    _python_to_dynamodb(h),
                    _python_to_dynamodb(r)
                ) for (h, r) in keys
            ]
        else:
            dynamo_keys = map(_python_to_dynamodb, keys)

        res = table.batch_get_item(dynamo_keys)

        dblog.debug("Sent a batch get on table %s", cls.__table__)

        return [cls._from_db_dict(d) for d in res]

    @classmethod
    def query(cls, hash_key_value, range_key_condition=None, consistent_read=False, reverse=False, limit=None):
        """Query DynamoDB for items matching the requested key criteria.

        You need to supply an exact hash key value, and optionally, conditions
        on the range key. If no such conditions are supplied, all items matching
        the hash key value will be returned.

        This method can only be used on tables with composite (hash + range)
        primary keys -- since the exact hash key value is mandatory, on tables
        with hash-only primary keys, cls.get(k) does the same thing cls.query(k)
        would.

        Objects loaded by this method are marked as coming from the DB. Hence
        their initial state is saved in ``self._raw_data``.

        :param hash_key_value: The hash key's value for all requested items.

        :param range_key_condition: A condition instance from
            ``boto.dynamodb.condition`` -- one of

                - EQ(x)
                - LE(x)
                - LT(x)
                - GE(x)
                - GT(x)
                - BEGINS_WITH(x)
                - BETWEEN(x, y)

        :param consistent_read: If False (default), an eventually consistent
            read is performed. Set to True for strongly consistent reads.

        :param reverse: Ask DynamoDB to scan the ``range_key`` in the reverse
            order. For example, if you use dates here, the more recent element
            will be returned first. Defaults to ``False``.

        :param limit: Specify the maximum number of items to read from the table.
            Even though Boto returns a generator, it works by batchs of 1MB.
            using this option may help to spare some read credits. Defaults to
            ``None``

        :rtype: generator
        """
        table = ConnectionBorg().get_table(cls.__table__)
        h_value = _python_to_dynamodb(hash_key_value)

        res = table.query(
                h_value,
                range_key_condition,
                consistent_read=consistent_read,
                scan_index_forward=not reverse,
                max_results=limit)

        dblog.debug("Queried (%s, %s) on table %s", h_value, range_key_condition, cls.__table__)

        return (cls._from_db_dict(d) for d in res)

    @classmethod
    def scan(cls, scan_filter=None):
        """Scan DynamoDB for items matching the requested criteria.

        You can scan based on any attribute and any criteria (including multiple
        criteria on multiple attributes), not just the primary keys.

        Scan is a very expensive operation -- it doesn't use any indexes and will
        look through the entire table. As much as possible, you should avoid it.

        Objects loaded by this method are marked as coming from the DB. Hence
        their initial state is saved in ``self._raw_data``.

        :param scan_filter: A ``{attribute_name: condition}`` dict, where
            condition is a condition instance from ``boto.dynamodb.condition``.

        :rtype: generator
        """
        table = ConnectionBorg().get_table(cls.__table__)
        hash_key_name = table.schema.hash_key_name

        res = table.scan(scan_filter)

        dblog.debug("Scanned table %s with filter %s", cls.__table__, scan_filter)

        return (
            cls._from_db_dict(d)
            for d in res
            if d[hash_key_name] != MAGIC_KEY or cls.__schema__[hash_key_name] != autoincrement_int
        )

    @classmethod
    def _from_db_dict(cls, raw_data):
        """Build an instance from a dict-like mapping, according to the class's
        schema. Objects created with this method are considered as comming from
        the DB. The initial state is persisted in ``self._raw_data``.
        If a ``__migrator__`` has been declared, migration is triggered on a copy
        of the raw data.

        Default values are used for anything that's missing from the dict
        (see DynamoDBModel class docstring).

        Direct use of this method should be avoided as much as possible but still
        may be usefull for "deep copy".

        Overload this method if you need a special (de-)serialization semantic

        :param raw_data: Raw db dict
        """
        #FIXME: type check. moving to __init__ syntax may break some implementations
        instance = cls()
        instance._raw_data = raw_data

        #If a migrator is registered, trigger it
        if cls.__migrator__ is not None:
           raw_data = cls.__migrator__(raw_data)

        # de-serialize data
        for (name, type_) in cls.__schema__.iteritems():
            # Set the value if we got one from DynamoDB. Otherwise, stick with the default
            value = _dynamodb_to_python(type_, raw_data.get(name)) # de-serialize
            setattr(instance, name, value)

        return instance

    def _to_db_dict(self):
        """Return a dict representation of the object according to the class's
        schema, suitable for direct storage in DynamoDB.

        Direct use of this method should be avoided as much as possible but still
        may be usefull for "deep copy".

        Overload this method if you need a special serialization semantic
        """
        data = self.validate()
        return {key: _python_to_dynamodb(val) for key, val in data.iteritems() if val or val == 0}

    def to_json_dict(self):
        """Return a dict representation of the object, suitable for JSON
        serialization.

        This means the values must all be valid JSON object types
        (in particular, sets must be converted to lists), but types not
        suitable for DynamoDB (e.g. nested data structures) may be used.

        Note that this method is never used for interaction with the database.
        """
        out = {}
        for name in self.__schema__:
            value = getattr(self, name)
            if isinstance(value, (set, frozenset)):
                out[name] = sorted(value)
            elif isinstance(value, datetime):
                # Using strftime instead of str or isoformat to get the right
                # separator ('T') and time offset notation (with ':')
                out[name] = value.astimezone(utc_tz).isoformat()
            else:
                out[name] = value
        return out

    def _save_autoincrement_hash_key(self):
        """Compute an autoincremented hash_key for an item and save it to the DB.

        To achieve this goal, we keep a special object at ``hash_key=MAGIC_KEY``
        to keep track of the counter status. We then issue an atomic inc to the
        counter field.

        We do not need to read it befor as we know its hesh_key yet.
        The new value is send back to us and used as the hash_key for elem
        """
        counter_key = '__max_hash_key__'
        hk_name = self.__hash_key__
        table = ConnectionBorg().get_table(self.__table__)
        tries = 0

        while tries < MAX_RETRIES:
            tries += 1
            # Create a 'new item' with key=-1 and trigger an atomic increment
            # This spares one read unit :)
            max_hash_item = Item(MAGIC_KEY)
            max_hash_item.add_attribute(counter_key, 1)
            max_hash_item = max_hash_item.save(return_values='ALL_NEW')
            # We just reserved that value for the hash key
            hash_key = max_hash_item['Attributes'][counter_key]
            setattr(self, hk_name, autoincrement_int(hash_key))

            try:
                # Make sure this primary key was not 'stolen' by a direct DB access
                self.save(raise_on_conflict=True)
                dblog.debug("Saved autoinc (%s) in table %s", hash_key, table.name)
                return
            except ConflictError as e:
                log.debug(
                    "table=%s, An item seems to have been manually inserted at index %s (%s).",
                    table.name, hash_key, tries)

        # This table auto-incr has been screwed up...
        raise MaxRetriesExceededError()

    def save(self, raise_on_conflict=False):
        """Save the object to the database.

        This method may be used both to insert a new object in the DB, or to
        update an existing one (iff ``raise_on_conflict == False``).

        It also embeds the high level logic to avoid the 'lost update' syndrom.
        Internally, it uses ``expected_values`` set to ``self._raw_data``

        ``raise_on_conflict=True`` scenarios:

        - **object from database**: Use ``self._raw_dict`` to generate ``expected_values``
        - **new object**: ``self._raw_dict`` is empty, set ``allow_overwrite=True``
        - **new object with autoinc**: flag has no effect
        - **(accidentally) editing keys**: Use ``self._raw_dict`` to generate ``expected_values``, will catch overwrites and insertion to empty location

        :param raise_on_conflict: flag to toggle overwrite protection -- if any
            one of the original values doesn't match what is in the database
            (i.e. someone went ahead and modified the object in the DB behind
            your back), the operation fails and raises
            :class:`ConflictError` or ``OverwriteError``.

        :raise ConflictError: Target object has changed between read and write operation
        :raise OverwriteError: A new Item overwrites an existing one and ``raise_on_conflict=True``. Note: this exception inherits from ConflictError
        """

        cls = type(self)
        expected_values = {}
        allow_overwrite = True
        schema = cls.__schema__
        hash_key = cls.__hash_key__
        range_key = cls.__range_key__
        table = ConnectionBorg().get_table(cls.__table__)

        # Detect magic elem manual overwrite
        if schema[hash_key] == autoincrement_int and getattr(self, hash_key) == MAGIC_KEY:
            raise SchemaError("Index {} is reserved in table with autoincrementing key".format(MAGIC_KEY))
        # We're inserting a new item in an autoincrementing table.
        if schema[hash_key] == autoincrement_int and getattr(self, hash_key) is None:
            # allocate the index and recursively call this method
            return self._save_autoincrement_hash_key()


        item_data = self._to_db_dict()
        item = Item(table, attrs=item_data)

        # Regular save
        if raise_on_conflict:
            if self._raw_data:
                expected_values = self._raw_data
                # Empty strings/sets must be represented as missing values
                for name in schema.iterkeys():
                    if name not in expected_values:
                        expected_values[name] = False
            else:
                # Forbid overwrites: do a conditional write on
                # "this hash_key doesn't exist"
                allow_overwrite = False
                expected_values = {hash_key: False}
                if range_key:
                    expected_values[range_key] = False
        try:
            item.put(expected_values)
        except DynamoDBResponseError as e:
            if e.error_code == "ConditionalCheckFailedException":
                if allow_overwrite:
                    # Conflict detected
                    raise ConflictError(item)
                # Forbidden overwrite
                raise OverwriteError(item)
            # Unhandled exception
            raise

        # Update Raw_data to reflect DB state on success
        self._raw_data = item_data

        hash_key_value = getattr(self, hash_key)
        range_key_value = getattr(self, range_key, None) if range_key else None
        dblog.debug("Saved (%s, %s) in table %s raise_on_conflict=%s", hash_key_value, range_key_value, cls.__table__, raise_on_conflict)

    def delete(self, raise_on_conflict=False):
        """Delete the current object from the database.

        If the Item has been edited before the ``delete`` command is issued and
        ``raise_on_conflict=True`` then, :class:`ConflictError` is raised.

        :param raise_on_conflict: flag to toggle overwrite protection -- if any
            one of the original values doesn't match what is in the database
            (i.e. someone went ahead and modified the object in the DB behind
            your back), the operation fails and raises
            :class:`ConflictError`.

        :raise ConflictError: Target object has changed between read and write operation
        """
        cls = type(self)
        schema = cls.__schema__
        expected_values = None
        hash_key_value = getattr(self, cls.__hash_key__)
        h_value = _python_to_dynamodb(hash_key_value)

        if raise_on_conflict:
            if self._raw_data:
                expected_values = self._raw_data
                # Empty strings/sets must be represented as missing values
                for name in schema.iterkeys():
                    if name not in expected_values:
                        expected_values[name] = False
            else: #shortcut :D
                raise ConflictError("Attempts to delete an object which has not yet been persited with raise_on_conflict=True")

        # Range key is only present in composite primary keys
        if cls.__range_key__:
            range_key_value = getattr(self, cls.__range_key__)
            r_value = _python_to_dynamodb(range_key_value)
        else:
            r_value = None

        try:
            table = ConnectionBorg().get_table(cls.__table__)
            Item(table, h_value, r_value).delete(expected_values)
        except DynamoDBConditionalCheckFailedError, e:
            raise ConflictError(e)

        # Make sure any further save will be considered as *insertion*
        self._raw_data = {}

        dblog.debug("Deleted (%s, %s) from table %s", h_value, r_value, cls.__table__)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.