Commits

codernity committed a472a6c

Lot's of changes...
- Added sharding (tests + docs + demo)
- Improved thread safe database
- Improved indexcreator (prefix, infix, suffix support)
- Documentation updates
- Migrate script added
- Wrong indexes will now print warnings with error info
- Code pretify

Comments (0)

Files changed (34)

CodernityDB/database.py

 import os
 import io
 from inspect import getsource
-from indexcreator import Parser
 
 # for custom indexes
 from CodernityDB.storage import Storage, IU_Storage
 
 from random import randrange
 
+import warnings
+
 
 def header_for_indexes(index_name, index_class, db_custom="", ind_custom="", classes_code=""):
     return """# %s
             raise IndexConflict("Already exists")
 
     def __write_index(self, new_index, number=0, edit=False, ind_kwargs=None):
-        #print new_index
+        # print new_index
         if ind_kwargs is None:
             ind_kwargs = {}
         p = os.path.join(self.path, '_indexes')
         if isinstance(new_index, basestring) and not new_index.startswith("path:"):
             if len(new_index.splitlines()) < 4 or new_index.splitlines()[3] != '# inserted automatically':
+                from indexcreator import Parser
                 par = Parser()
-                s = par.parse(new_index).splitlines()
+                custom_imports, s = par.parse(new_index)
+                s = s.splitlines()
                 name = s[0][2:]
                 c = s[1][2:]
                 comented = ['\n\n#SIMPLIFIED CODE']
                 comented.append('#SIMPLIFIED CODE END\n\n')
 
                 s = header_for_indexes(
-                    name, c) + "\n".join(s[2:]) + "\n".join(comented)
+                    name, c, ind_custom=custom_imports) + "\n".join(s[2:]) + "\n".join(comented)
                 new_index = s
-
             else:
                 name = new_index.splitlines()[0][2:]
                 name = name.strip()
         if name == 'id':
             self.__set_main_storage()
             self.__compat_things()
+        for patch in getattr(ind_obj, 'patchers', ()):  # index can patch db object
+            patch(self)
         return name
 
     def edit_index(self, index, reindex=False, ind_kwargs=None):
             ind_kwargs = {}
         ind_obj, name = self.__write_index(index, -1, edit=True)
         old = next(x for x in self.indexes if x.name == name)
+        old.close_index()
         index_of_index = self.indexes.index(old)
         ind_obj.open_index()
         self.indexes[index_of_index] = ind_obj
             try:
                 index.create_index()
             except IndexException:
-                raise DatabaseConflict("Already exists (detected on index=%s)" % index.name)
+                raise DatabaseConflict(
+                    "Already exists (detected on index=%s)" % index.name)
         return True
 
     def _read_indexes(self):
             return
         if self.id_ind.entry_line_format[4:6] == '4s':
             # rev compatibility...
-            import warnings
             warnings.warn("Your database is using old rev mechanizm \
 for ID index. You should update that index \
 (CodernityDB.migrate.migrate).")
         """
         try:
             old_should_index = index.make_key_value(db_data)
-        except:
+        except Exception as ex:
+            warnings.warn("""Problem during update for `%s`, ex = `%s`, \
+uou should check index code.""" % (index.name, ex), RuntimeWarning)
             old_should_index = None
         if old_should_index:
             old_key, old_value = old_should_index
             try:
                 new_should_index = index.make_key_value(data)
-            except:
+            except Exception as ex:
+                warnings.warn("""Problem during update for `%s`, ex = `%r`, \
+you should check index code.""" % (index.name, ex), RuntimeWarning)
                 new_should_index = None
             if new_should_index:
                 new_key, new_value = new_should_index
                 if new_key != old_key:
                     index.delete(doc_id, old_key)
-                    if new_value:
-                        storage = index.storage
-                        start, size = storage.insert(new_value)
-                    else:
-                        start = 1
-                        size = 0
-                    index.insert(doc_id, new_key, start, size)
+                    index.insert_with_storage(doc_id, new_key, new_value)
                 elif new_value != old_value:
-                    if new_value:
-                        storage = index.storage
-                        start, size = storage.insert(new_value)
-                    else:
-                        start = 1
-                        size = 0
                     try:
-                        index.update(doc_id, new_key, start, size)
+                        index.update_with_storage(doc_id, new_key, new_value)
                     except (ElemNotFound, DocIdNotFound):
                         # element should be in index but isn't
                         #(propably added new index without reindex)
-                        raise TryReindexException()
+                        warnings.warn("""Reindex might be required for index %s""" % index.name)
             else:
                 index.delete(doc_id, old_key)
         else:  # not previously indexed
         if db_data['_rev'] != _rev:
             raise RevConflict()
         new_rev = self.create_new_rev(_rev)
-        storage = self.storage
-        start, size = storage.update(value)
-        self.id_ind.update(_id, new_rev, start, size)
+        # storage = self.storage
+        # start, size = storage.update(value)
+        # self.id_ind.update(_id, new_rev, start, size)
+        self.id_ind.update_with_storage(_id, new_rev, value)
         return _id, new_rev, db_data
 
     def _update_indexes(self, _rev, data):
         """
         try:
             should_index = index.make_key_value(data)
-        except:
+        except Exception as ex:
+            warnings.warn("""Problem during insert for `%s`, ex = `%r`, \
+you should check index code.""" % (index.name, ex), RuntimeWarning)
             should_index = None
         if should_index:
             key, value = should_index
-            if value:
-                storage = index.storage
-                start, size = storage.insert(value)
-            else:
-                start = 1
-                size = 0
-            index.insert(doc_id, key, start, size)
+            index.insert_with_storage(doc_id, key, value)
+            # if value:
+            #     storage = index.storage
+            #     start, size = storage.insert(value)
+            # else:
+            #     start = 1
+            #     size = 0
+            # index.insert(doc_id, key, start, size)
 
     def _insert_id_index(self, _rev, data):
         """
         Performs insert on **id** index.
         """
         _id, value = self.id_ind.make_key_value(data)  # may be improved
-        storage = self.storage
-        start, size = storage.insert(value)
-        self.id_ind.insert(_id, _rev, start, size)
+#        storage = self.storage
+        # start, size = storage.insert(value)
+        # self.id_ind.insert(_id, _rev, start, size)
+        self.id_ind.insert_with_storage(_id, _rev, value)
         return _id
 
     def _insert_indexes(self, _rev, data):
         """
         Performs delete from **id** index
         """
-        #key, value = self.id_ind.make_key_value(data)
+        # key, value = self.id_ind.make_key_value(data)
         # key = data['_id']
         key = self.id_ind.make_key(_id)
         self.id_ind.delete(key)
             self.__not_opened()
             raise IndexNotFoundException(
                 "Index `%s` doesn't exists" % index_name)
-        storage = ind.storage
         try:
             l_key, _unk, start, size, status = ind.get(key)
         except ElemNotFound as ex:
         elif status == 'd':
             raise RecordDeleted("Deleted")
         if with_storage and size:
+            storage = ind.storage
             data = storage.get(start, size, status)
         else:
 
             data = {}
         if with_doc and index_name != 'id':
+            storage = ind.storage
             doc = self.get('id', l_key, False)
             if data:
                 data['doc'] = doc
             else:
-                data = dict(doc=doc)
+                data = {'doc': doc}
         data['_id'] = l_key
         if index_name == 'id':
             data['_rev'] = _unk
             data['key'] = _unk
         return data
 
-    def get_many(self, index_name, key=None, limit=1, offset=0, with_doc=False, with_storage=True, start=None, end=None, **kwargs):
+    def get_many(self, index_name, key=None, limit=-1, offset=0, with_doc=False, with_storage=True, start=None, end=None, **kwargs):
         """
         Allows to get **multiple** data for given ``key`` for *Hash based indexes*.
         Also allows get **range** queries for *Tree based indexes* with ``start`` and ``end`` arguments.
                     if data:
                         data['doc'] = doc
                     else:
-                        data = dict(doc=doc)
+                        data = {'doc': doc}
                 data['_id'] = doc_id
                 if key is None:
                     data['key'] = ind_data[1]

CodernityDB/database_gevent.py

 cdb_environment['rlock_obj'] = RLock
 
 
-#from CodernityDB.database import Database
+# from CodernityDB.database import Database
 from CodernityDB.database_safe_shared import SafeDatabase
 
 

CodernityDB/database_safe_shared.py

 # limitations under the License.
 
 from CodernityDB.env import cdb_environment
-from CodernityDB.database import PreconditionsException
-from database import Database
+from CodernityDB.database import PreconditionsException, RevConflict, Database
+#from database import Database
 
 from collections import defaultdict
 from functools import wraps
 
     def __init__(self, path, *args, **kwargs):
         super(SafeDatabase, self).__init__(path, *args, **kwargs)
-        self.indexes_locks = defaultdict(lambda: cdb_environment['rlock_obj']())
+        self.indexes_locks = defaultdict(
+            lambda: cdb_environment['rlock_obj']())
         self.close_open_lock = cdb_environment['rlock_obj']()
         self.main_lock = cdb_environment['rlock_obj']()
+        self.id_revs = {}
 
     def __patch_index_gens(self, name):
         ind = self.indexes_names[name]
                 self.__patch_index(res)
             return res
 
+    def _single_update_index(self, index, data, db_data, doc_id):
+        with self.indexes_locks[index.name]:
+            super(SafeDatabase, self)._single_update_index(
+                index, data, db_data, doc_id)
+
+    def _single_delete_index(self, index, data, doc_id, old_data):
+        with self.indexes_locks[index.name]:
+            super(SafeDatabase, self)._single_delete_index(
+                index, data, doc_id, old_data)
+
     def edit_index(self, *args, **kwargs):
         with self.main_lock:
             res = super(SafeDatabase, self).edit_index(*args, **kwargs)
         if key in self.indexes_locks:
             lock = self.indexes_locks[index.name + "reind"]
         else:
-            self.indexes_locks[index.name + "reind"] = cdb_environment['rlock_obj']()
+            self.indexes_locks[index.name +
+                               "reind"] = cdb_environment['rlock_obj']()
             lock = self.indexes_locks[index.name + "reind"]
         self.main_lock.release()
         try:
             super(SafeDatabase, self).fsync()
         finally:
             self.main_lock.release()
+
+    def _update_id_index(self, _rev, data):
+        with self.indexes_locks['id']:
+            return super(SafeDatabase, self)._update_id_index(_rev, data)
+
+    def _delete_id_index(self, _id, _rev, data):
+        with self.indexes_locks['id']:
+            return super(SafeDatabase, self)._delete_id_index(_id, _rev, data)
+
+    def _update_indexes(self, _rev, data):
+        _id, new_rev, db_data = self._update_id_index(_rev, data)
+        with self.main_lock:
+            self.id_revs[_id] = new_rev
+        for index in self.indexes[1:]:
+            with self.main_lock:
+                curr_rev = self.id_revs.get(_id)  # get last _id, _rev
+                if curr_rev != new_rev:
+                    break  # new update on the way stop current
+            self._single_update_index(index, data, db_data, _id)
+        with self.main_lock:
+            if self.id_revs[_id] == new_rev:
+                del self.id_revs[_id]
+        return _id, new_rev
+
+    def _delete_indexes(self, _id, _rev, data):
+        old_data = self.get('id', _id)
+        if old_data['_rev'] != _rev:
+            raise RevConflict()
+        with self.main_lock:
+            self.id_revs[_id] = _rev
+        for index in self.indexes[1:]:
+            self._single_delete_index(index, data, _id, old_data)
+        self._delete_id_index(_id, _rev, data)
+        with self.main_lock:
+            if self.id_revs[_id] == _rev:
+                del self.id_revs[_id]

CodernityDB/database_super_thread_safe.py

                     if b_attr == 'flush' or b_attr == 'flush_indexes':
                         pass
                     else:
-                        #setattr(base, b_attr, SuperLock.wrapper(a))
+                        # setattr(base, b_attr, SuperLock.wrapper(a))
                         new_attr[b_attr] = SuperLock.wrapper(a)
         for attr_name, attr_value in attr.iteritems():
             if isinstance(attr_value, FunctionType) and not attr_name.startswith('_'):
     than ThreadSafe version (without super word)
     """
 
+    __metaclass__ = SuperLock
+
+    def __init__(self, *args, **kwargs):
+        super(SuperThreadSafeDatabase, self).__init__(*args, **kwargs)
 
     def __patch_index_gens(self, name):
         ind = self.indexes_names[name]
         res = super(SuperThreadSafeDatabase, self).edit_index(*args, **kwargs)
         self.__patch_index_gens(res)
         return res
-    __metaclass__ = SuperLock
-
-    def __init__(self, *args, **kwargs):
-        super(SuperThreadSafeDatabase, self).__init__(*args, **kwargs)

CodernityDB/debug_stuff.py

             meth(*line[1], **line[2])
 
 
-#def insert_for_debug(self, data):
+# def insert_for_debug(self, data):
 #
 #    _rev = data['_rev']
 #

CodernityDB/hash_index.py

     def _calculate_position(self, key):
         return abs(hash(key) & self.hash_lim) * self.bucket_line_size + self._start_ind
 
-    #TODO add cache!
+    # TODO add cache!
     def _locate_key(self, key, start):
         """
         Locate position of the key, it will iterate using `next` field in record
                                                           size,
                                                           status,
                                                           _next))
-                self.flush()
+#                self.flush()
                 self.buckets.seek(found_at)
                 self.buckets.write(self.entry_struct.pack(_doc_id,
                                                           _key,
                                                       size,
                                                       status,
                                                       0))
-            self.flush()
+#            self.flush()
             self._find_key.delete(key)
             self.buckets.seek(start_position)
             self.buckets.write(self.bucket_struct.pack(wrote_at))
         if curr_data:
             location = self.bucket_struct.unpack(curr_data)[0]
         else:
-            #case happens when trying to delete element with new index key in data
-            #after adding new index to database without reindex
+            # case happens when trying to delete element with new index key in data
+            # after adding new index to database without reindex
             raise TryReindexException()
         found_at, _doc_id, _key, start, size, status, _next = self._locate_doc_id(doc_id, key, location)
         self.buckets.seek(found_at)
         super(IU_UniqueHashIndex, self).__init__(db_path, name,
                                                  entry_line_format, *args, **kwargs)
         self.create_key = random_hex_32  # : set the function to create random key when no _id given
-        #self.entry_struct=struct.Struct(entry_line_format)
+        # self.entry_struct=struct.Struct(entry_line_format)
 
 #    @lfu_cache(100)
     def _find_key(self, key):
             return None, None, 0, 0, 'u'
 
     def _find_key_many(self, *args, **kwargs):
-        raise NotImplemented
+        raise NotImplementedError()
 
     def _find_place(self, start, key):
         """
                                                       status,
                                                       _next))
 
-            self.flush()
+#            self.flush()
             self.buckets.seek(found_at)
             self.buckets.write(self.entry_struct.pack(_key,
                                                       _rev,
                                                       size,
                                                       status,
                                                       0))
-            self.flush()
+#            self.flush()
             self.buckets.seek(start_position)
             self.buckets.write(self.bucket_struct.pack(wrote_at))
             self.flush()
                     limit -= 1
 
     def get_many(self, *args, **kwargs):
-        raise NotImplemented
+        raise NotImplementedError()
 
     def delete(self, key, start=0, size=0):
-        self.update(key, '0000', start, size, 'd')
+        self.update(key, '00000000', start, size, 'd')
 
     def make_key_value(self, data):
         _id = data['_id']
     def _clear_cache(self):
         self._find_key.clear()
 
+    def insert_with_storage(self, _id, _rev, value):
+        if value:
+            start, size = self.storage.insert(value)
+        else:
+            start = 1
+            size = 0
+        return self.insert(_id, _rev, start, size)
+
+    def update_with_storage(self, _id, _rev, value):
+        if value:
+            start, size = self.storage.insert(value)
+        else:
+            start = 1
+            size = 0
+        return self.update(_id, _rev, start, size)
+
 
 class DummyHashIndex(IU_HashIndex):
     def __init__(self, db_path, name, entry_line_format="<32s4sIIcI", *args, **kwargs):
         super(DummyHashIndex, self).__init__(db_path, name,
                                              entry_line_format, *args, **kwargs)
         self.create_key = random_hex_32  # : set the function to create random key when no _id given
-        #self.entry_struct=struct.Struct(entry_line_format)
+        # self.entry_struct=struct.Struct(entry_line_format)
 
     def update(self, *args, **kwargs):
         return True
         return super(IU_MultiHashIndex, self).get(key)
 
     def make_key_value(self, data):
-        raise NotImplemented
+        raise NotImplementedError()
 
 
 # classes for public use, done in this way because of

CodernityDB/index.py

         self._close()
 
     def create_index(self):
-        raise NotImplemented
+        raise NotImplementedError()
 
     def _fix_params(self):
         self.buckets.seek(0)
         self.storage.destroy()
 
     def _find_key(self, key):
-        raise NotImplemented
+        raise NotImplementedError()
 
-    def update(self, key, start, size):
-        raise NotImplemented
+    def update(self, doc_id, key, start, size):
+        raise NotImplementedError()
 
-    def insert(self, key, start, size):
-        raise NotImplemented
+    def insert(self, doc_id, key, start, size):
+        raise NotImplementedError()
 
     def get(self, key):
-        raise NotImplemented
+        raise NotImplementedError()
 
     def get_many(self, key, start_from=None, limit=0):
-        raise NotImplemented
+        raise NotImplementedError()
 
     def all(self, start_pos):
-        raise NotImplemented
+        raise NotImplementedError()
 
     def delete(self, key, start, size):
-        raise NotImplemented
+        raise NotImplementedError()
 
     def make_key_value(self, data):
-        raise NotImplemented
+        raise NotImplementedError()
 
     def make_key(self, data):
-        raise NotImplemented
+        raise NotImplementedError()
 
     def compact(self, *args, **kwargs):
-        raise NotImplemented
+        raise NotImplementedError()
 
     def destroy(self, *args, **kwargs):
         self._close()
             self.storage.fsync()
         except:
             pass
+
+    def update_with_storage(self, doc_id, key, value):
+        if value:
+            start, size = self.storage.insert(value)
+        else:
+            start = 1
+            size = 0
+        return self.update(doc_id, key, start, size)
+
+    def insert_with_storage(self, doc_id, key, value):
+        if value:
+            start, size = self.storage.insert(value)
+        else:
+            start = 1
+            size = 0
+        return self.insert(doc_id, key, start, size)

CodernityDB/indexcreator.py

         self.stage = 0
         self.logic = ['and', 'or', 'in']
         self.logic2 = ['&', '|']
-        self.allowed_props = {'TreeBasedIndex': ['type','name','key_format','node_capacity','pointer_format','meta_format'],
-                              'HashIndex': ['type','name','key_format','hash_lim','entry_line_format']
-                             }
+        self.allowed_props = {'TreeBasedIndex': ['type', 'name', 'key_format', 'node_capacity', 'pointer_format', 'meta_format'],
+                              'HashIndex': ['type', 'name', 'key_format', 'hash_lim', 'entry_line_format'],
+                              'MultiHashIndex': ['type', 'name', 'key_format', 'hash_lim', 'entry_line_format'],
+                              'MultiTreeBasedIndex': ['type', 'name', 'key_format', 'node_capacity', 'pointer_format', 'meta_format']
+                              }
         self.funcs = {'md5': (['md5'], ['.digest()']),
                       'len': (['len'], []),
                       'str': (['str'], []),
-                      'fix_r': (['self.fix_r'], [])
+                      'fix_r': (['self.fix_r'], []),
+                      'prefix': (['self.prefix'], []),
+                      'infix': (['self.infix'], []),
+                      'suffix': (['self.suffix'], [])
                       }
+        self.handle_int_imports = {'infix': "from itertools import izip\n"}
+
         self.funcs_with_body = {'fix_r':
-                                ("""   def fix_r(self,s,l):
+                                ("""    def fix_r(self,s,l):
         e = len(s)
         if e == l:
             return s
         elif e > l:
             return s[:l]
         else:
-            return s.rjust(l,'_')\n""", False)}
+            return s.rjust(l,'_')\n""", False),
+                                'prefix':
+                                ("""    def prefix(self,s,m,l,f):
+        t = len(s)
+        if m < 1:
+            m = 1
+        o = set()
+        if t > l:
+            s = s[:l]
+            t = l
+        while m <= t:
+            o.add(s.rjust(f,'_'))
+            s = s[:-1]
+            t -= 1
+        return o\n""", False),
+                                'suffix':
+                                ("""    def suffix(self,s,m,l,f):
+        t = len(s)
+        if m < 1:
+            m = 1
+        o = set()
+        if t > l:
+            s = s[t-l:]
+            t = len(s)
+        while m <= t:
+            o.add(s.rjust(f,'_'))
+            s = s[1:]
+            t -= 1
+        return o\n""", False),
+                                'infix':
+                                ("""    def infix(self,s,m,l,f):
+        t = len(s)
+        o = set()
+        for x in xrange(m - 1, l):
+            t = (s, )
+            for y in xrange(0, x):
+                t += (s[y + 1:],)
+            o.update(set(''.join(x).rjust(f, '_').lower() for x in izip(*t)))
+        return o\n""", False)}
         self.none = ['None', 'none', 'null']
         self.props_assign = ['=', ':']
         self.all_adj_num_comp = {token.NUMBER: (
 
     def add(self, l, i):
         def add_aux(*args):
-            #print args,self.ind
+            # print args,self.ind
             if len(l[i]) < self.ind:
                 l[i].append([])
             l[i][self.ind - 1].append(args)
         self.prop_name = True
         self.prop_assign = False
         self.is_one_arg_enough = False
-        self.to_import = []
         self.funcs_stack = []
-        self.last_line = [-1,-1,-1]
+        self.last_line = [-1, -1, -1]
         self.props_set = []
+        self.custom_header = set()
 
-        self.tokens = ['# %s\n' % self.name, 'class %s(' % self.name, '):\n', '   def __init__(self, *args, **kwargs):        ']
+        self.tokens = []
+        self.tokens_head = ['# %s\n' % self.name, 'class %s(' % self.name, '):\n', '    def __init__(self, *args, **kwargs):        ']
 
         for i in xrange(3):
             tokenize.tokenize(self.readline(i), self.add(self.pre_tokens, i))
-            # tokenize treats some keyword not in the right way, thats why we have to change some of them
+            # tokenize treats some keyword not in the right way, thats why we
+            # have to change some of them
             for nk, k in enumerate(self.pre_tokens[i]):
                 for na, a in enumerate(k):
                     if a[0] == token.NAME and a[1] in self.logic:
         if self.index_name == "":
             raise IndexCreatorValueException("Missing index name\n")
 
-        self.tokens[0] = "# " + self.index_name + "\n" + self.tokens[0]
+        self.tokens_head[0] = "# " + self.index_name + "\n" + \
+            self.tokens_head[0]
 
         for i in self.funcs_with_body:
             if self.funcs_with_body[i][1]:
-                self.tokens.insert(4, self.funcs_with_body[i][0])
+                self.tokens_head.insert(4, self.funcs_with_body[i][0])
 
-        for i in self.to_import:
-            self.tokens[0] += i
-        self.tokens[0] += self.tokens[1]
-        del self.tokens[1]
+        if None in self.custom_header:
+            self.custom_header.remove(None)
+        if self.custom_header:
+            s = '    custom_header = """'
+            for i in self.custom_header:
+                s += i
+            s += '"""\n'
+            self.tokens_head.insert(4, s)
 
         if self.index_type in self.allowed_props:
             for i in self.props_set:
                 if i not in self.allowed_props[self.index_type]:
-                    raise IndexCreatorValueException("Properity %s is not allowed for index type: %s"%(i,self.index_type))
+                    raise IndexCreatorValueException("Properity %s is not allowed for index type: %s" % (i, self.index_type))
 
-        #print " ".join(self.tokens)
-        return " ".join(self.tokens)
+        #print "".join(self.tokens_head)
+        #print "----------"
+        #print (" ".join(self.tokens))
+        return "".join(self.custom_header), "".join(self.tokens_head) + (" ".join(self.tokens))
 
     # has to be run BEFORE tokenize
     def check_enclosures(self, d, st):
     def cnt_line_nr(self, l, stage):
         nr = -1
         for n, i in enumerate(self.predata[stage]):
-            #print i,"|||",i.strip(),"|||",l
+            # print i,"|||",i.strip(),"|||",l
             if l == i.strip():
                 nr = n
         if nr == -1:
 
         if d[0][0] == token.NAME or d[0][0] == token.STRING:
             if d[0][1] in self.props_set:
-                raise IndexCreatorValueException("Properity %s is set more than once" % d[0][1],self.cnt_line_nr(d[0][4],0))
+                raise IndexCreatorValueException("Properity %s is set more than once" % d[0][1], self.cnt_line_nr(d[0][4], 0))
             self.props_set += [d[0][1]]
             if d[0][1] == "type" or d[0][1] == "name":
                 t, tk, _, _, line = d[2]
 
                 if d[0][1] == "type":
                     if d[2][1] == "TreeBasedIndex":
-                        self.to_import += ["from CodernityDB.tree_index import TreeBasedIndex\n"]
-                    self.tokens.insert(2, tk)
+                        self.custom_header.add("from CodernityDB.tree_index import TreeBasedIndex\n")
+                    elif d[2][1] == "MultiTreeBasedIndex":
+                        self.custom_header.add("from CodernityDB.tree_index import MultiTreeBasedIndex\n")
+                    elif d[2][1] == "MultiHashIndex":
+                        self.custom_header.add("from CodernityDB.hash_index import MultiHashIndex\n")
+                    self.tokens_head.insert(2, tk)
                     self.index_type = tk
                 else:
                     self.index_name = tk
 
     def generate_func(self, t, tk, pos_start, pos_end, line, hdata, stage):
         if self.last_line[stage] != -1 and pos_start[0] > self.last_line[stage] and line != '':
-            raise IndexCreatorFunctionException("This line will never be executed!",self.cnt_line_nr(line,stage))
+            raise IndexCreatorFunctionException("This line will never be executed!", self.cnt_line_nr(line, stage))
         if t == 0:
             return
 
             self.line_cons[stage][pos_start[0] - 1] -= 1
 
         if tk in self.logic2:
-            #print tk
+            # print tk
             if line[pos_start[1] - 1] != tk and line[pos_start[1] + 1] != tk:
                 self.tokens += [tk]
             if line[pos_start[1] - 1] != tk and line[pos_start[1] + 1] == tk:
                 if tk in self.funcs_with_body:
                     self.funcs_with_body[tk] = (
                         self.funcs_with_body[tk][0], True)
+                self.custom_header.add(self.handle_int_imports.get(tk))
                 self.funcs_stack += [(tk, self.cur_brackets)]
         else:
             self.tokens += [tk]

CodernityDB/migrate.py

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2011-2012 Codernity (http://codernity.com)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from CodernityDB.database import Database
+import shutil
+import os
+
+
+def migrate(source, destination):
+    """
+    Very basic for now
+    """
+    dbs = Database(source)
+    dbt = Database(destination)
+    dbs.open()
+    dbt.create()
+    dbt.close()
+    for curr in os.listdir(os.path.join(dbs.path, '_indexes')):
+        if curr != '00id.py':
+            shutil.copyfile(os.path.join(dbs.path, '_indexes', curr),
+                            os.path.join(dbt.path, '_indexes', curr))
+    dbt.open()
+    for c in dbs.all('id'):
+        del c['_rev']
+        dbt.insert(c)
+    return True
+
+
+if __name__ == '__main__':
+    import sys
+    migrate(sys.argv[1], sys.argv[2])

CodernityDB/rr_cache.py

                         del cache1lvl[choice(cache1lvl.keys())]
                 cache1lvl[key] = user_function(key, *args, **kwargs)
                 result = cache1lvl[key]
-                # result = user_function(obj, key, *args, **kwargs)
+#                result = user_function(obj, key, *args, **kwargs)
             return result
 
         def clear():

CodernityDB/sharded_hash.py

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2011-2012 Codernity (http://codernity.com)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from CodernityDB.hash_index import UniqueHashIndex, HashIndex
+from CodernityDB.sharded_index import ShardedIndex
+from CodernityDB.index import IndexPreconditionsException
+
+from random import getrandbits
+import uuid
+
+
+class IU_ShardedUniqueHashIndex(ShardedIndex):
+
+    custom_header = """import uuid
+from random import getrandbits
+from CodernityDB.sharded_index import ShardedIndex
+"""
+
+    def __init__(self, db_path, name, *args, **kwargs):
+        if kwargs.get('sh_nums', 0) > 255:
+            raise IndexPreconditionsException("Too many shards")
+        kwargs['ind_class'] = UniqueHashIndex
+        super(IU_ShardedUniqueHashIndex, self).__init__(db_path, name, *args, **kwargs)
+        self.patchers.append(ShardedUniqueHashIndex.wrap_insert_id_index)
+
+    @staticmethod
+    def wrap_insert_id_index(db_obj, clean=False):
+        def _insert_id_index(_rev, data):
+            """
+            Performs insert on **id** index.
+            """
+            _id, value = db_obj.id_ind.make_key_value(data)  # may be improved
+            trg_shard = _id[:2]
+            storage = db_obj.id_ind.shards_r[trg_shard].storage
+            start, size = storage.insert(value)
+            db_obj.id_ind.insert(_id, _rev, start, size)
+            return _id
+        if not clean:
+            if hasattr(db_obj, '_insert_id_index_orig'):
+                raise IndexPreconditionsException("Already patched, something went wrong")
+            setattr(db_obj, "_insert_id_index_orig", db_obj._insert_id_index)
+            setattr(db_obj, "_insert_id_index", _insert_id_index)
+        else:
+            setattr(db_obj, "_insert_id_index", db_obj._insert_id_index_orig)
+            delattr(db_obj, "_insert_id_index_orig")
+
+    def create_key(self):
+        h = uuid.UUID(int=getrandbits(128), version=4).hex
+        trg = self.last_used + 1
+        if trg >= self.sh_nums:
+            trg = 0
+        self.last_used = trg
+        h = '%02x%30s' % (trg, h[2:])
+        return h
+
+    def delete(self, key, *args, **kwargs):
+        trg_shard = key[:2]
+        op = self.shards_r[trg_shard]
+        return op.delete(key, *args, **kwargs)
+
+    def update(self, key, *args, **kwargs):
+        trg_shard = key[:2]
+        self.last_used = int(trg_shard, 16)
+        op = self.shards_r[trg_shard]
+        return op.update(key, *args, **kwargs)
+
+    def insert(self, key, *args, **kwargs):
+        trg_shard = key[:2]  # in most cases it's in create_key BUT not always
+        self.last_used = int(key[:2], 16)
+        op = self.shards_r[trg_shard]
+        return op.insert(key, *args, **kwargs)
+
+    def get(self, key, *args, **kwargs):
+        trg_shard = key[:2]
+        self.last_used = int(trg_shard, 16)
+        op = self.shards_r[trg_shard]
+        return op.get(key, *args, **kwargs)
+
+
+class ShardedUniqueHashIndex(IU_ShardedUniqueHashIndex):
+
+    # allow unique hash to be used directly
+    custom_header = 'from CodernityDB.sharded_hash import IU_ShardedUniqueHashIndex'
+
+    pass
+
+
+class IU_ShardedHashIndex(ShardedIndex):
+
+    custom_header = """from CodernityDB.sharded_index import ShardedIndex"""
+
+    def __init__(self, db_path, name, *args, **kwargs):
+        kwargs['ind_class'] = HashIndex
+        super(IU_ShardedHashIndex, self).__init__(db_path, name, *args, **kwargs)
+
+    def calculate_shard(self, key):
+        """
+        Must be implemented. It has to return shard to be used by key
+
+        :param key: key
+        :returns: target shard
+        :rtype: int
+        """
+        raise NotImplementedError()
+
+    def delete(self, doc_id, key, *args, **kwargs):
+        trg_shard = self.calculate_shard(key)
+        op = self.shards_r[trg_shard]
+        return op.delete(doc_id, key, *args, **kwargs)
+
+    def insert(self, doc_id, key, *args, **kwargs):
+        trg_shard = self.calculate_shard(key)
+        op = self.shards_r[trg_shard]
+        return op.insert(doc_id, key, *args, **kwargs)
+
+    def update(self, doc_id, key, *args, **kwargs):
+        trg_shard = self.calculate_shard(key)
+        op = self.shards_r[trg_shard]
+        return op.insert(doc_id, key, *args, **kwargs)
+
+    def get(self, key, *args, **kwargs):
+        trg_shard = self.calculate_shard(key)
+        op = self.shards_r[trg_shard]
+        return op.get(key, *args, **kwargs)
+
+
+class ShardedHashIndex(IU_ShardedHashIndex):
+    pass

CodernityDB/sharded_index.py

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2011-2012 Codernity (http://codernity.com)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from CodernityDB.index import Index
+# from CodernityDB.env import cdb_environment
+# import warnings
+
+
+class ShardedIndex(Index):
+
+    def __init__(self, db_path, name, *args, **kwargs):
+        """
+        There are 3 additional parameters. You have to hardcode them in your custom class. **NEVER** use directly
+
+        :param int sh_nums: how many shards should be
+        :param class ind_class: Index class to use (HashIndex or your custom one)
+        :param bool use_make_keys: if True, `make_key`, and `make_key_value` will be overriden with those from first shard
+
+        The rest parameters are passed straight to `ind_class` shards.
+
+        """
+        super(ShardedIndex, self).__init__(db_path, name)
+        try:
+            self.sh_nums = kwargs.pop('sh_nums')
+        except KeyError:
+            self.sh_nums = 5
+        try:
+            ind_class = kwargs.pop('ind_class')
+        except KeyError:
+            raise Exception("ind_class must be given")
+        else:
+            # if not isinstance(ind_class, basestring):
+            #     ind_class = ind_class.__name__
+            self.ind_class = ind_class
+        if 'use_make_keys' in kwargs:
+            self.use_make_keys = kwargs.pop('use_make_keys')
+        else:
+            self.use_make_keys = False
+        self._set_shard_datas(*args, **kwargs)
+        self.patchers = []  # database object patchers
+
+    def _set_shard_datas(self, *args, **kwargs):
+        self.shards = {}
+        self.shards_r = {}
+#        ind_class = globals()[self.ind_class]
+        ind_class = self.ind_class
+        i = 0
+        for sh_name in [self.name + str(x) for x in xrange(self.sh_nums)]:
+            # dict is better than list in that case
+            self.shards[i] = ind_class(self.db_path, sh_name, *args, **kwargs)
+            self.shards_r['%02x' % i] = self.shards[i]
+            self.shards_r[i] = self.shards[i]
+            i += 1
+
+        if not self.use_make_keys:
+            self.make_key = self.shards[0].make_key
+            self.make_key_value = self.shards[0].make_key_value
+
+        self.last_used = 0
+
+    @property
+    def storage(self):
+        st = self.shards[self.last_used].storage
+        return st
+
+    def __getattr__(self, name):
+        return getattr(self.shards[self.last_used], name)
+
+    def open_index(self):
+        for curr in self.shards.itervalues():
+            curr.open_index()
+
+    def create_index(self):
+        for curr in self.shards.itervalues():
+            curr.create_index()
+
+    def destroy(self):
+        for curr in self.shards.itervalues():
+            curr.destroy()
+
+    def compact(self):
+        for curr in self.shards.itervalues():
+            curr.compact()
+
+    def reindex(self):
+        for curr in self.shards.itervalues():
+            curr.reindex()
+
+    def all(self, *args, **kwargs):
+        for curr in self.shards.itervalues():
+            for now in curr.all(*args, **kwargs):
+                yield now
+
+    def get_many(self, *args, **kwargs):
+        for curr in self.shards.itervalues():
+            for now in curr.get_many(*args, **kwargs):
+                yield now

CodernityDB/tree_index.py

 import io
 import shutil
 from storage import IU_Storage
-#from ipdb import set_trace
+# from ipdb import set_trace
 
 from CodernityDB.env import cdb_environment
 from CodernityDB.index import TryReindexException
         twolvl_cache = cache2lvl(150)
         self._find_key = cache(self._find_key)
         self._match_doc_id = cache(self._match_doc_id)
-#        self._read_single_leaf_record = twolvl_cache(self._read_single_leaf_record)
+# self._read_single_leaf_record =
+# twolvl_cache(self._read_single_leaf_record)
         self._find_key_in_leaf = twolvl_cache(self._find_key_in_leaf)
         self._read_single_node_key = twolvl_cache(self._read_single_node_key)
         self._find_first_key_occurence_in_node = twolvl_cache(
         right_leaf_start_position = self.data_start + \
             self.node_size + self.leaf_size
         self.buckets.seek(self.data_start + self.leaf_heading_size)
-        #read old root
+        # read old root
         data = self.buckets.read(
             self.single_leaf_record_size * self.node_capacity)
         leaf_data = struct.unpack('<' + self.
                                   single_leaf_record_format * self.node_capacity, data)
-        #remove deleted records, if succeded abort spliting
+        # remove deleted records, if succeded abort spliting
         if self._update_if_has_deleted(self.data_start, leaf_data, 0, new_data):
             return None
-        #find out key which goes to parent node
+        # find out key which goes to parent node
         if nr_of_records_to_rewrite > new_leaf_size - 1:
             key_moved_to_parent_node = leaf_data[(old_leaf_size - 1) * 5]
         elif nr_of_records_to_rewrite == new_leaf_size - 1:
                                                     right_leaf_start_position,
                                                     'l')
         if nr_of_records_to_rewrite > half_size:
-                #key goes to first half
-                #prepare left leaf data
+                # key goes to first half
+                # prepare left leaf data
             left_leaf_data = struct.pack('<' + self.leaf_heading_format + self.single_leaf_record_format
                                          * (self.node_capacity - nr_of_records_to_rewrite),
                                          old_leaf_size,
                 new_data[3],
                 new_data[4],
                 *leaf_data[-nr_of_records_to_rewrite * 5:(old_leaf_size - 1) * 5])
-                #prepare right leaf_data
+                # prepare right leaf_data
             right_leaf_data = struct.pack('<' + self.elements_counter_format + 2 * self.pointer_format +
                                           self.single_leaf_record_format *
                                           new_leaf_size,
                                           0,
                                           *leaf_data[-new_leaf_size * 5:])
         else:
-                #key goes to second half
+                # key goes to second half
             if nr_of_records_to_rewrite:
                 records_before = leaf_data[old_leaf_size *
                                            5:-nr_of_records_to_rewrite * 5]
                 0,
                 right_leaf_start_position,
                 *leaf_data[:old_leaf_size * 5])
-                #prepare right leaf_data
+                # prepare right leaf_data
             right_leaf_data = struct.pack('<' + self.elements_counter_format + 2 * self.pointer_format +
                                           self.single_leaf_record_format * (new_leaf_size -
                                                                             nr_of_records_to_rewrite - 1),
                 self.buckets.seek(self._calculate_key_position(leaf_start,
                                                                self.node_capacity - nr_of_records_to_rewrite,
                                                                'l'))
-                #read all records with key>new_key
+                # read all records with key>new_key
                 data = self.buckets.read(
                     nr_of_records_to_rewrite * self.single_leaf_record_size)
                 records_to_rewrite = struct.unpack(
                     '<' + nr_of_records_to_rewrite * self.single_leaf_record_format, data)
-                #remove deleted records, if succeded abort spliting
+                # remove deleted records, if succeded abort spliting
                 if self._update_if_has_deleted(leaf_start,
                                                records_to_rewrite,
                                                self.node_capacity -
                     return None
                 key_moved_to_parent_node = records_to_rewrite[
                     -new_leaf_size * 5]
-                #write new leaf at end of file
+                # write new leaf at end of file
                 self.buckets.seek(0, 2)  # end of file
                 new_leaf_start = self.buckets.tell()
-                #prepare new leaf_data
+                # prepare new leaf_data
                 new_leaf = struct.pack('<' + self.elements_counter_format + 2 * self.pointer_format +
                                        self.single_leaf_record_format *
                                        new_leaf_size,
                                        next_l,
                                        *records_to_rewrite[-new_leaf_size * 5:])
                 new_leaf += blanks
-                #write new leaf
+                # write new leaf
                 self.buckets.write(new_leaf)
-                #update old leaf heading
+                # update old leaf heading
                 self._update_leaf_size_and_pointers(leaf_start,
                                                     old_leaf_size,
                                                     prev_l,
                 self.buckets.seek(self._calculate_key_position(leaf_start,
                                                                self.node_capacity - nr_of_records_to_rewrite,
                                                                'l'))
-                #write new key and keys after
+                # write new key and keys after
                 self.buckets.write(
                     struct.pack(
                         '<' + self.single_leaf_record_format *
 
                 return new_leaf_start, key_moved_to_parent_node
             else:  # key goes into second half of leaf     '
-                #seek half of the leaf
+                # seek half of the leaf
                 self.buckets.seek(self._calculate_key_position(
                     leaf_start, old_leaf_size, 'l'))
                 data = self.buckets.read(
                     self.single_leaf_record_size * (new_leaf_size - 1))
                 records_to_rewrite = struct.unpack('<' + (new_leaf_size - 1) *
                                                    self.single_leaf_record_format, data)
-                #remove deleted records, if succeded abort spliting
+                # remove deleted records, if succeded abort spliting
                 if self._update_if_has_deleted(leaf_start,
                                                records_to_rewrite,
                                                old_leaf_size,
                     key_moved_to_parent_node = new_key
                 self.buckets.seek(0, 2)  # end of file
                 new_leaf_start = self.buckets.tell()
-                #prepare new leaf data
+                # prepare new leaf data
                 index_of_records_split = nr_of_records_to_rewrite * 5
                 if index_of_records_split:
                     records_before = records_to_rewrite[
                 nr_of_elements -= 1
             else:
                 curr_index += 1
-        #if were deleted dont have to split, just update leaf
+        # if were deleted dont have to split, just update leaf
         if nr_of_elements < self.node_capacity:
             data_split_index = 0
             for key in records_to_rewrite[0::5]:
         return new_root
 
     def _create_new_root_from_node(self, node_start, children_flag, nr_of_keys_to_rewrite, new_node_size, old_node_size, new_key, new_pointer):
-            #reading second half of node
+            # reading second half of node
             self.buckets.seek(self.data_start + self.node_heading_size)
-            #read all keys with key>new_key
+            # read all keys with key>new_key
             data = self.buckets.read(self.pointer_size + self.
                                      node_capacity * (self.key_size + self.pointer_size))
             old_node_data = struct.unpack('<' + self.pointer_format + self.node_capacity *
             new_node_start = self.buckets.tell()
             if nr_of_keys_to_rewrite == new_node_size:
                 key_moved_to_root = new_key
-                #prepare new nodes data
+                # prepare new nodes data
                 left_node = struct.pack('<' + self.node_heading_format + self.pointer_format +
                                         old_node_size * (self.
                                                          key_format + self.pointer_format),
                                          *old_node_data[old_node_size * 2 + 1:])
             elif nr_of_keys_to_rewrite > new_node_size:
                 key_moved_to_root = old_node_data[old_node_size * 2 - 1]
-                #prepare new nodes data
+                # prepare new nodes data
                 if nr_of_keys_to_rewrite == self.node_capacity:
                     keys_before = old_node_data[:1]
                     keys_after = old_node_data[1:old_node_size * 2 - 1]
             else:
 #               'inserting key into second half of node and creating new root'
                 key_moved_to_root = old_node_data[old_node_size * 2 + 1]
-                #prepare new nodes data
+                # prepare new nodes data
                 left_node = struct.pack('<' + self.node_heading_format + self.pointer_format +
                                         old_node_size * (self.
                                                          key_format + self.pointer_format),
                                                    new_node_start + self.node_size)
             left_node += (self.node_capacity - old_node_size) * \
                 (self.key_size + self.pointer_size) * '\x00'
-            #adding blanks after new node
+            # adding blanks after new node
             right_node += (self.node_capacity - new_node_size) * \
                 (self.key_size + self.pointer_size) * '\x00'
             self.buckets.seek(0, 2)
             blanks = (self.node_capacity - new_node_size) * (
                 self.key_size + self.pointer_size) * '\x00'
             if nr_of_keys_to_rewrite == new_node_size:  # insert key into first half of node
-                #reading second half of node
+                # reading second half of node
                 self.buckets.seek(self._calculate_key_position(node_start,
                                                                old_node_size,
                                                                'n') + self.pointer_size)
-                #read all keys with key>new_key
+                # read all keys with key>new_key
                 data = self.buckets.read(nr_of_keys_to_rewrite *
                                          (self.key_size + self.pointer_size))
                 old_node_data = struct.unpack('<' + nr_of_keys_to_rewrite *
                                               (self.key_format + self.pointer_format), data)
-                #write new node at end of file
+                # write new node at end of file
                 self.buckets.seek(0, 2)
                 new_node_start = self.buckets.tell()
-                #prepare new node_data
+                # prepare new node_data
                 new_node = struct.pack('<' + self.node_heading_format + self.pointer_format +
                                        (self.key_format +
                                         self.pointer_format) * new_node_size,
                                        new_pointer,
                                        *old_node_data)
                 new_node += blanks
-                #write new node
+                # write new node
                 self.buckets.write(new_node)
-                #update old node data
+                # update old node data
                 self._update_size(
                     node_start, old_node_size)
 
 
                 return new_node_start, new_key
             elif nr_of_keys_to_rewrite > half_size:  # insert key into first half of node
-                #seek for first key to rewrite
+                # seek for first key to rewrite
                 self.buckets.seek(self._calculate_key_position(node_start, self.node_capacity - nr_of_keys_to_rewrite, 'n')
                                   + self.pointer_size)
-                #read all keys with key>new_key
+                # read all keys with key>new_key
                 data = self.buckets.read(
                     nr_of_keys_to_rewrite * (self.key_size + self.pointer_size))
                 old_node_data = struct.unpack(
                     new_node_size + 1) * 2]
                 self.buckets.seek(0, 2)
                 new_node_start = self.buckets.tell()
-                #prepare new node_data
+                # prepare new node_data
                 new_node = struct.pack('<' + self.node_heading_format +
                                        self.pointer_format + (self.key_format +
                                                               self.pointer_format) * new_node_size,
                                        old_node_data[-new_node_size * 2 - 1],
                                        *old_node_data[-new_node_size * 2:])
                 new_node += blanks
-                #write new node
+                # write new node
                 self.buckets.write(new_node)
                 self._update_size(
                     node_start, old_node_size)
                 # seek position of new key in first half
                 self.buckets.seek(self._calculate_key_position(node_start, self.node_capacity - nr_of_keys_to_rewrite, 'n')
                                   + self.pointer_size)
-                #write new key and keys after
+                # write new key and keys after
                 self.buckets.write(
                     struct.pack(
                         '<' + (self.key_format + self.pointer_format) *
 
                 return new_node_start, key_moved_to_parent_node
             else:  # key goes into second half
-                #reading second half of node
+                # reading second half of node
                 self.buckets.seek(self._calculate_key_position(node_start,
                                                                old_node_size,
                                                                'n')
                     new_node_size * (self.key_size + self.pointer_size))
                 old_node_data = struct.unpack('<' + new_node_size *
                                               (self.key_format + self.pointer_format), data)
-                #find key which goes to parent node
+                # find key which goes to parent node
                 key_moved_to_parent_node = old_node_data[0]
                 self.buckets.seek(0, 2)  # end of file
                 new_node_start = self.buckets.tell()
                 index_of_records_split = nr_of_keys_to_rewrite * 2
-                #prepare new node_data
+                # prepare new node_data
                 first_leaf_pointer = old_node_data[1]
                 old_node_data = old_node_data[2:]
                 if index_of_records_split:
                                         new_pointer,
                                         *keys_after)
                 new_node += blanks
-                #write new node
+                # write new node
                 self.buckets.write(new_node)
                 self._update_size(node_start, old_node_size)
 
                 nodes_stack.append(curr_pointer)
                 indexes.append(curr_index)
             return nodes_stack, indexes
-        #nodes stack contains start addreses of nodes directly above leaf with key, indexes match keys adjacent nodes_stack values (as pointers)
-        #required when inserting new keys in upper tree levels
+        # nodes stack contains start addreses of nodes directly above leaf with key, indexes match keys adjacent nodes_stack values (as pointers)
+        # required when inserting new keys in upper tree levels
 
     def _find_leaf_with_last_key_occurence(self, key):
         if self.root_flag == 'l':
                     return
 
     def make_key(self, key):
-        raise NotImplemented
+        raise NotImplementedError()
 
     def make_key_value(self, data):
-        raise NotImplemented
+        raise NotImplementedError()
 
     def _open_storage(self):
         s = globals()[self.storage_class]
         return super(IU_MultiTreeBasedIndex, self).get(key)
 
     def make_key_value(self, data):
-        raise NotImplemented
+        raise NotImplementedError()
 
 
 # classes for public use, done in this way because of

docs/CodernityDB_HTTP_id_all.png

Old
Old image
New
New image

docs/CodernityDB_HTTP_new_doc.png

Old
Old image
New
New image

docs/codes/shard_demo.py

+from CodernityDB.database import Database
+from CodernityDB.sharded_hash import ShardedUniqueHashIndex, ShardedHashIndex
+from CodernityDB.hash_index import HashIndex
+
+import random
+
+
+class CustomIdSharded(ShardedUniqueHashIndex):
+
+    custom_header = 'from CodernityDB.sharded_hash import ShardedUniqueHashIndex'
+
+    def __init__(self, *args, **kwargs):
+        kwargs['sh_nums'] = 10
+        super(CustomIdSharded, self).__init__(*args, **kwargs)
+
+
+class MySharded(ShardedHashIndex):
+
+    custom_header = 'from CodernityDB.sharded_hash import ShardedHashIndex'
+
+    def __init__(self, *args, **kwargs):
+        kwargs['sh_nums'] = 10
+        kwargs['key_format'] = 'I'
+        kwargs['use_make_keys'] = True
+        super(MySharded, self).__init__(*args, **kwargs)
+
+    def make_key_value(self, data):
+        return data['x'], None
+
+    def calculate_shard(self, key):
+        return key % self.sh_nums
+
+
+y = 1500 * 'y'
+
+db = Database('/tmp/shard')
+
+db.create(with_id_index=False)
+db.add_index(CustomIdSharded(db.path, 'id'))
+db.add_index(MySharded(db.path, 'x'))
+
+
+# it makes non sense to use sharding with such small number of records
+for x in xrange(10 ** 4):
+    db.insert({'x': x, 'y': y})
+
+
+print db.get('x', random.randint(0, 10 ** 4))['_id']

docs/codes/tables_like_indexes.py

 
     def __init__(self, *args, **kwargs):
         kwargs['key_format'] = '16s'
-        super(AllUsers, self).__init__(*args, **kwargs)
+        super(AllItems, self).__init__(*args, **kwargs)
 
     def make_key(self, key):
         return md5(key).digest()
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('.'))
+# sys.path.insert(0, os.path.abspath('.'))
 
-# -- General configuration -----------------------------------------------------
+# -- General configuration -----------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
+# needs_sphinx = '1.0'
 
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 source_suffix = '.rst'
 
 # The encoding of source files.
-#source_encoding = 'utf-8-sig'
+# source_encoding = 'utf-8-sig'
 
 # The master toctree document.
 master_doc = 'index'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
-#language = None
+# language = None
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 exclude_patterns = ['_build', '_themes']
 
 # The reST default role (used for this markup: `text`) to use for all documents.
-#default_role = None
+# default_role = None
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
 
 # A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
 
 
-# -- Options for HTML output ---------------------------------------------------
+# -- Options for HTML output ---------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-#html_theme_options = {}
+# html_theme_options = {}
 
 # Add any paths that contain custom themes here, relative to this directory.
 html_theme_path = ['_themes']
 html_title = 'CodernityDB'
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
+# html_short_title = None
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+# html_last_updated_fmt = '%b %d, %Y'
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
 html_sidebars = {
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
 
 # If false, no module index is generated.
-#html_domain_indices = True
+# html_domain_indices = True
 
 # If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
 
 # If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
 
 # If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
+# html_show_sphinx = True
 
 # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
+# html_show_copyright = True
 
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
+# html_file_suffix = None
 
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'CodernityDBdoc'
 
 
-# -- Options for LaTeX output --------------------------------------------------
+# -- Options for LaTeX output --------------------------------------------
 
 latex_elements = {
     # The paper size ('letterpaper' or 'a4paper').
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
-#latex_logo = None
+# latex_logo = None
 
 # For "manual" documents, if this is true, then toplevel headings are parts,
 # not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
 
 # If true, show page references after internal links.
-#latex_show_pagerefs = False
+# latex_show_pagerefs = False
 
 # If true, show URL addresses after external links.
-#latex_show_urls = False
+# latex_show_urls = False
 
 # Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
 
 # If false, no module index is generated.
-#latex_domain_indices = True
+# latex_domain_indices = True
 
 
-# -- Options for manual page output --------------------------------------------
+# -- Options for manual page output --------------------------------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 ]
 
 # If true, show URL addresses after external links.
-#man_show_urls = False
+# man_show_urls = False
 
 
-# -- Options for Texinfo output ------------------------------------------------
+# -- Options for Texinfo output ------------------------------------------
 
 # Grouping the document tree into Texinfo files. List of tuples
 # (source start file, target name, title, author,
 ]
 
 # Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
+# texinfo_appendices = []
 
 # If false, no module index is generated.
-#texinfo_domain_indices = True
+# texinfo_domain_indices = True
 
 # How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
+# texinfo_show_urls = 'footnote'
 
 
 rst_prolog = """

docs/database_indexes.rst

 custom_header
     It's string that will be inserted to final index file. It's
     useful to pass the custom imports there. You will find an example
-    in :ref:`Examples - secure storage <secure_storage_example>`
+    in :ref:`Examples - secure storage <secure_storage_example>`.
 
 storage_class
-    It defines what storage to use. By default all indexes will use :py:class:`CodernityDB.storage.Storage`
+    It defines what storage to use. By default all indexes will use :py:class:`CodernityDB.storage.Storage`. If your Storage needs to be initialized in custom way please look at :ref:`Examples - secure storage <secure_storage_example>`.
 
 
 .. _internal_hash_index:
 
 .. seealso::
 
-   :ref:`multiple_keys_index`
-      for Multiindex hash based implementation (more than one key per database data).
+    :ref:`multiple_keys_index`
+       for Multiindex hash based implementation (more than one key per database data).
 
 
 Below you will find explained in details parameters for that index
     :py:class:`CodernityDB.tree_index.TreeBasedIndex`
         For documentation
 
-   :ref:`multiple_keys_index`
-      for Multiindex tree based implementation (more than one key per database data).
+    :ref:`multiple_keys_index`
+       for Multiindex tree based implementation (more than one key per database data).
 
 
 duplicate keys
     :returns: fixed size string
     :rtype: string
 
+.. method:: infix(value, min_len, max_len, fixed_len)
+
+    it will generate all possible infixes of ``value`` not shorter than ``min_len`` 
+    and not longer than ``max_len`` while all of them will have fixed length
+    defined in ``fixed_len`` (which works exactly as ``fix_r``)
+    
+    :param string value: a string which all infixes will be generated from
+    :param integer min_len: minimal length of an infix
+    :param integer max_len: maximal length of an infix
+    :param integer fixed_len: fixed size of all infixes
+    :returns: set containing fixed size infixes
+    :rtype: set  
+
+.. method:: prefix(value, min_len, max_len, fixed_len)
+
+    it will generate all possible prefixes of ``value`` not shorter than ``min_len`` 
+    and not longer than ``max_len`` while all of them will have fixed length
+    defined in ``fixed_len`` (which works exactly as ``fix_r``)
+    
+    :param string value: a string which all prefixes will be generated from
+    :param integer min_len: minimal length of an prefix
+    :param integer max_len: maximal length of an prefix
+    :param integer fixed_len: fixed size of all prefixes
+    :returns: set containing fixed size prefixes
+    :rtype: set   
+
+.. method:: suffix(value, min_len, max_len, fixed_len)
+
+    it will generate all possible suffixes of ``value`` not shorter than ``min_len`` 
+    and not longer than ``max_len`` while all of them will have fixed length
+    defined in ``fixed_len`` (which works exactly as ``fix_r``)
+    
+    :param string value: a string which all suffixes will be generated from
+    :param integer min_len: minimal length of an suffix
+    :param integer max_len: maximal length of an suffix
+    :param integer fixed_len: fixed size of all suffixes
+    :returns: set containing fixed size suffixes
+    :rtype: set   
+
 .. note::
     Obviously you can use that simple indexes in |CodernityDB-HTTP-link| without any problem.
 
     Error reporting / handling system in that mode will tell you exactly what's wrong with your code.
 
 
-.. _tables_colections_q:
+.. _tables_collections_q:
 
 Tables, collections...?
 -------------------------
         def make_key(self,key):
                 return key
 
-Even if now class is in proper scope, the example won't work, because variable *a* isn't known to CodernityDB.
+Even if now class is in proper scope, the example won't work, because variable ``a`` isn't known to CodernityDB.
+
+
+.. _sharding_in_indexes:
+
+Sharding in indexes
+-------------------
+
+For advanced users we have sharded indexes.
+
+All you need to do if you want to use Sharded indexes is just:
+
+.. literalinclude:: codes/shard_demo.py
+   :linenos:
+
+
+.. warning::
+
+   Just remember that you have to **hardcode** ShardIndex parameters (unlike other indexes). So you **really** should derive from it's class.
+
+
+
+.. _sharding_performance:
+
+
+Performance
+~~~~~~~~~~~
+
+Consider this script
+
+.. literalinclude:: codes/shard_vs_noshard.py
+   :linenos:
+
+
+.. list-table::
+   :header-rows: 1
+
+   * - Number of inserts
+     - Time in sharded
+     - Time in non sharded
+   * - 5 000 000
+     - 65.405 seconds
+     - 74.699 seconds
+   * - 10 000 000
+     - 148.095 seconds
+     - 186.383 seconds
+
+
+As you can see, sharding **does matter**. It gives you almost **25%** performance boost. Totally free. Similar performance boost applies to get operations.
+
+
+.. note::
+
+    What's even more important in Sharding is that as you probably already know CodernityDB index metadata stores data position and size by using ``struct`` module. By default those fields are ``I`` format (``unsigned int``). So when you need to change that format to ``Q`` without sharding, you probably can switch to sharding and still use ``I`` format. ``I`` format can accept values up to ``4294967295`` bytes so about 4GB. Having 100 shards will mean that you can index up to ``4GB * 100`` data.
+
+
+.. note::
+
+   Currently one index can have up to 255 shards.

docs/examples.rst

             return key
 
 
+Example sharded hash
+""""""""""""""""""""
+
+Example sharded index, it will shard records on ``key`` into 10 shards. (see :ref:`sharding_in_indexes`)
+
+.. code-block:: python
+
+    class MySharded(ShardedHashIndex):
+
+        custom_header = """from CodernityDB.sharded_hash import ShardedHashIndex"""
+
+        def __init__(self, *args, **kwargs):
+            kwargs['sh_nums'] = 10
+            kwargs['key_format'] = 'I'
+            kwargs['use_make_keys'] = True
+            super(MySharded, self).__init__(*args, **kwargs)
+
+        def make_key_value(self, data):
+            return data['x'] % 10, None
+
+        def calculate_shard(self, key):
+            return key % self.sh_nums
+
+
 
 .. _example_storage:
 
 Can I add index to existing DB ?
     Yes you can, but you will need to reindex that index to have in it data that were in database already before you add that index. (see :ref:`database_indexes` for details)
 
+Can I do prefix/infix/suffix search in CodernityDB ?
+    Sure! Please refer to :ref:`multiple_keys_index`. By using such method you will get very fast prefix/infix/suffix search mechanism.
+
 What about tables or collections ?
-    Everything can be done through our Index mechanism see :ref:`tables_colections_q`.
+    Everything can be done through our Index mechanism see :ref:`tables_collections_q`.
 
 How does it compare to MongoDB, CouchDB and other "big" NoSQL databases ?
     Different purposes + different design. CodernityDB doesn't have yet any replication engine (yet?). However we are sure that there is a place for CodernityDB. Nothing is impossible in CodernityDB, because Index IS a Python class where you can do anything (if you're not a Python user we created :ref:`simple_index`). Don't try make CodernityDB relational database, it will work but its not *that*. It can act as a simple key-value database or as a database with secondary indexes (ordering / selecting etc). You can optimize IO performance by moving indexes data on different partitions. Generally the CodernityDB index mechanism is really powerful, its much more than in other databases (it's more similar to CouchDB views).
 
+How does it compare to Redis, Kyoto Cabinet and other Key/Value databases ?
+    First of all it has to be said, CodernityDB is **NOT** typical Key/Value database. It's much more. CodernityDB has support for multiple indexes, can perform range queries, it can index more than one value per incoming record. It's much more than typical Key/Value database.
+
+Is it daybreak (Ruby Simple Key/Value database) but for Python ?
+    Not really, CodernityDB is much more. CodernityDB always store Python dict type as data value. CodernityDB has multiple index support, it can use TreeIndex and HashIndex. CodernityDB can work as daybreak but by default it's designed to be something more than "simple key/value". They are generally as you can see not the same things.
+
 Why Python 3 is not supported ?
     Python 3 introduced many incompatible changes. In case of CodernityDB having working version for 2.x and 3.x series in the same code base without ugly hacks (big monkey-patching etc.) is almost impossible. If you're interested Python 3 version of CodernityDB contact us. Porting CodernityDB to Python 3.x is not hard. Python 3.x support in fact was never needed. That's why there is no support for it (yet?).
 
 I want to have a bit customized CodernityDB
     No problem, just contact us to get more details about that.
 
-Can I do prefix/infix/suffix search in CodernityDB ?
-    Sure! Please refer to :ref:`multiple_keys_index`. By using such method you will get very fast prefix/infix/suffix search mechanism.
-
 What If I want to implement my own Index ?
     At first you have to remember that implementing custom index shouldn't require changes in Database itself. Because of CodernityDB design, database object tries to perform operations on particular index as fast as it's possible. Good example of such method is :ref:`multiple_keys_index`.
 
+Is there any built-in sharding mechanism ?
+    Yes, there is sharding mechanism build in :ref:`sharding_in_indexes`.
+
 I want to use CodernityDB in commercial project, do I have to pay for it?
-    CodernityDB is released on `Apache 2.0 license`_, it allows you to freely use it even for commercial purposes without any need to pay for it. IT'S FREE FOR COMMERCIAL USE. 
+    CodernityDB is released on `Apache 2.0 license`_, it allows you to freely use it even for commercial purposes without any need to pay for it.
 
 
 .. _Apache 2.0 license: http://www.apache.org/licenses/LICENSE-2.0.html
-CodernityDB pure python, NoSQL, fast database
+CodernityDB pure python, fast, NoSQL database
 =============================================
 
-CodernityDB is opensource, pure Python (no 3rd party dependency), fast (really fast check :ref:`speed` if you don't believe in words), multi platform, schema-less, NoSQL_ database. It has optional support for HTTP server version (|CodernityDB-HTTP-link|), and also Python client library (|CodernityDB-PyClient-link|) that aims to be 100% compatible with embedded version.
+CodernityDB is opensource, pure Python (no 3rd party dependency), fast (50 000 insert and 100 000 get operations per second, check :ref:`speed` if you don't believe in words), multi platform, schema-less, NoSQL_ database. 
 
 .. image:: CodernityDB.png
     :align: center
 
 
-You can call it a more advanced key-value database. With multiple key-values indexes in the same engine. Also CodernityDB supports functions that are executed inside database.
+You can also call it a more advanced key-value database, with multiple key-values indexes in the same engine (for sure it's not "simple key/value store"). Also CodernityDB supports functions that are executed inside database. It has optional support for HTTP server version (|CodernityDB-HTTP-link|), and also Python client library (|CodernityDB-PyClient-link|) that aims to be 100% compatible with embedded version.
+
+**And it's** `Apache 2.0`_ **licensed !**