codernity avatar codernity committed 0647083

First public release

Comments (0)

Files changed (84)

+.+\.pyc
+.*~
+\.ropeproject
+.*eproject.cfg
+\#.*\#
+.tox
+.*test_db.*
+build/
+htmlcov/
+.*egg-info.*
+dist/
+.coverage/
+.project
+.coverage
+.settings/
+htmlcov_.*
+coverage.*\.xml
+junit-.*\.xml
+.*\.orig

CodernityDB/__init__.py

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2011-2012 Codernity (http://codernity.com)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+__version__ = '0.3.50'
+__license__ = "Apache 2.0"

CodernityDB/database.py

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2011-2012 Codernity (http://codernity.com)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import io
+from inspect import getsource
+
+
+# for custom indexes
+from CodernityDB.storage import Storage, IU_Storage
+from CodernityDB.hash_index import (IU_UniqueHashIndex,
+                                    IU_HashIndex,
+                                    HashIndex,
+                                    UniqueHashIndex)
+# normal imports
+
+from CodernityDB.index import (ElemNotFound,
+                               DocIdNotFound,
+                               IndexException,
+                               Index,
+                               TryReindexException,
+                               ReindexException,
+                               IndexNotFoundException,
+                               IndexConflict)
+
+from CodernityDB.misc import NONE, random_hex_4
+
+from CodernityDB.env import cdb_environment
+
+
+def header_for_indexes(index_name, index_class, db_custom="", ind_custom="", classes_code=""):
+    return """# %s
+# %s
+
+# inserted automatically
+import os
+import marshal
+
+import struct
+import shutil
+
+from hashlib import md5
+
+# custom db code start
+# db_custom
+%s
+
+# custom index code start
+# ind_custom
+%s
+
+# source of classes in index.classes_code
+# classes_code
+%s
+
+# index code start
+
+""" % (index_name, index_class, db_custom, ind_custom, classes_code)
+
+
+class DatabaseException(Exception):
+    pass
+
+
+class PreconditionsException(DatabaseException):
+    pass
+
+
+class RecordDeleted(DatabaseException):
+    pass
+
+
+class RecordNotFound(DatabaseException):
+    pass
+
+
+class RevConflict(DatabaseException):
+    pass
+
+
+class DatabaseConflict(DatabaseException):
+    pass
+
+
+class DatabasePathException(DatabaseException):
+    pass
+
+
+class DatabaseIsNotOpened(PreconditionsException):
+    pass
+
+
+class Database(object):
+    """
+    A default single thread database object.
+    """
+
+    custom_header = ""  # : use it for imports required by your database
+
+    def __init__(self, path):
+        self.path = path
+        self.storage = None
+        self.indexes = []
+        self.id_ind = None
+        self.indexes_names = {}
+        self.opened = False
+
+    def __not_opened(self):
+        if not self.opened:
+            raise DatabaseIsNotOpened("Database is not opened")
+
+    def set_indexes(self, indexes=[]):
+        """
+        Set indexes using ``indexes`` param
+
+        :param indexes: indexes to set in db
+        :type indexes: iterable of :py:class:`CodernityDB.index.Index` objects.
+
+        """
+        for ind in indexes:
+            self.add_index(ind, create=False)
+
+    def _add_single_index(self, p, i, index):
+        """
+        Adds single index to a database.
+        It will use :py:meth:`inspect.getsource` to get class surce.
+        Then it will build real index file, save it in ``_indexes`` directory.
+        """
+        code = getsource(index.__class__)
+        index._order = i
+        cls_code = getattr(index, 'classes_code', [])
+        classes_code = ""
+        for curr in cls_code:
+            classes_code += getsource(curr) + '\n\n'
+        with io.FileIO(os.path.join(p, "%.2d%s" % (i, index.name) + '.py'), 'w') as f:
+            f.write(header_for_indexes(index.name,
+                                       index.__class__.__name__,
+                                       getattr(self, 'custom_header', ''),
+                                       getattr(index, 'custom_header', ''),
+                                       classes_code))
+            f.write(code)
+        return True
+
+    def _read_index_single(self, p, ind, ind_kwargs={}):
+        """
+        It will read single index from index file (ie. generated in :py:meth:`._add_single_index`). Then it will perform ``exec`` on that code
+
+        :param p: path
+        :param ind: index name (will be joined with *p*)
+        :returns: new index object
+        """
+        with io.FileIO(os.path.join(p, ind), 'r') as f:
+            name = f.readline()[2:].strip()
+            _class = f.readline()[2:].strip()
+            code = f.read()
+        obj = compile(code, '<Index: %s' % os.path.join(p, ind), 'exec')
+        exec obj in globals()
+        ind_obj = globals()[_class](self.path, name, **ind_kwargs)
+        ind_obj._order = int(ind[:2])
+        return ind_obj
+
+    def __write_index(self, new_index, number=0, edit=False, ind_kwargs=None):
+        if ind_kwargs is None:
+            ind_kwargs = {}
+        p = os.path.join(self.path, '_indexes')
+        if isinstance(new_index, basestring) and not new_index.startswith("path:"):
+            name = new_index.splitlines()[0][2:]
+            name = name.strip()
+
+            if name in self.indexes_names and not edit:
+                raise IndexConflict("Already exists")
+            if edit:
+                previous_index = filter(lambda x: x.endswith(
+                    '.py') and x[2:-3] == name, os.listdir(p))
+                if not previous_index:
+                    raise PreconditionsException(
+                        "Can't edit index that's not yet in database")
+                number = int(previous_index[0][:2])
+            if number == 0 and not edit and not name == 'id':
+                raise PreconditionsException(
+                    "Id index must be the first added")
+            ind_path = "%.2d%s" % (number, name)
+            with io.FileIO(os.path.join(p, ind_path + '.py'), 'w') as f:
+                f.write(new_index)
+
+            ind_obj = self._read_index_single(p, ind_path + '.py')
+        elif isinstance(new_index, basestring) and new_index.startswith("path:"):
+            path = new_index[5:]
+            if not path.endswith('.py'):
+                path += '.py'
+            ind_obj = self._read_index_single(p, path, ind_kwargs)
+            name = ind_obj.name
+            if name in self.indexes_names and not edit:
+                raise IndexConflict("Already exists")
+        elif isinstance(new_index, Index):
+            # it will first save index as a string, and then compile it
+            # it will allow to control the index object on the DB side
+            ind = new_index
+            init_arguments = new_index.__class__.__init__.im_func.func_code.co_varnames[
+                3:]  # ignore self, path and name
+            for curr in init_arguments:
+                if curr not in ('args', 'kwargs'):
+                    v = getattr(ind, curr, NONE())
+                    if not isinstance(v, NONE):
+                        ind_kwargs[curr] = v
+            if edit:
+                # code duplication...
+                previous_index = filter(lambda x: x.endswith(
+                    '.py') and x[2:-3] == ind.name, os.listdir(p))
+                if not previous_index:
+                    raise PreconditionsException(
+                        "Can't edit index that's not yet in database")
+                number = int(previous_index[0][:2])
+            if ind.name in self.indexes_names and not edit:
+                raise IndexConflict("Already exists")
+            if number == 0 and not edit and not ind.name == 'id':
+                raise PreconditionsException(
+                    "Id index must be the first added")
+            self._add_single_index(p, number, ind)
+            ind_path = "%.2d%s" % (number, ind.name)
+            ind_obj = self._read_index_single(p, ind_path + '.py', ind_kwargs)
+            name = ind_obj.name
+        else:
+            raise PreconditionsException("Argument must be Index instance, path to index_file or valid string index format")
+        return ind_obj, name
+
+    def add_index(self, new_index, create=True, ind_kwargs=None):
+        """
+
+        :param new_index: New index to add, can be Index object, index valid string or path to file with index code
+        :type new_index: string
+        :param create: Create the index after add or not
+        :type create: bool
+
+        :returns: new index name
+        """
+        if ind_kwargs is None:
+            ind_kwargs = {}
+        p = os.path.join(self.path, '_indexes')
+        if not os.path.exists(p):
+            self.initialize()
+        current = sorted(filter(lambda x: x.endswith('.py'), os.listdir(p)))
+        if current:
+            last = int(current[-1][:2])  # may crash... ignore
+            _next = last + 1
+        else:
+            _next = 0
+        ind_obj, name = self.__write_index(new_index, _next, edit=False)
+        # add the new index to objects
+        self.indexes.append(ind_obj)
+        self.indexes_names[name] = ind_obj
+        if create:
+            if self.exists():  # no need te create if database doesn't exists'
+                ind_obj.create_index()
+        if name == 'id':
+            self.__set_main_storage()
+        return name
+
+    def edit_index(self, index, reindex=False, ind_kwargs=None):
+        if ind_kwargs is None:
+            ind_kwargs = {}
+        ind_obj, name = self.__write_index(index, -1, edit=True)
+        old = next(x for x in self.indexes if x.name == name)
+        index_of_index = self.indexes.index(old)
+        ind_obj.open_index()
+        self.indexes[index_of_index] = ind_obj
+        self.indexes_names[name] = ind_obj
+        if reindex:
+            self.reindex_index(name)
+        return name
+
+    def get_index_code(self, index_name):
+        """
+        It will return full index code from index file.
+
+        :param index_name: the name of index to look for code
+        """
+        if not index_name in self.indexes_names:
+            self.__not_opened()
+            raise IndexNotFoundException(
+                "Index `%s` doesn't exists" % index_name)
+        ind = self.indexes_names[index_name]
+        name = "%.2d%s" % (ind._order, index_name)
+        name += '.py'
+        with io.FileIO(os.path.join(self.path, '_indexes', name), 'r') as f:
+            return f.read()
+        return ""  # shouldn't happen
+
+    def __set_main_storage(self):
+        """
+        Sets database main storage (from the **id** index)
+        """
+        try:
+            self.storage = self.indexes_names['id'].storage
+            self.id_ind = self.indexes_names['id']
+        except KeyError:
+            # when opening / initializing DB without `id` index
+            # happens mostly on server side
+            pass
+
+    def initialize(self, path=None, makedir=True):
+        """
+        Initialize new database (have to be called before open_new)
+
+        :param path: Path to a database (allows delayed path configuration), if not provided self.path will be used
+        :param makedir: Make the ``_indexes`` directory or not
+
+        :returns: the database path
+        """
+        if self.opened == True:
+            raise DatabaseConflict("Already opened")
+        if not path:
+            path = self.path
+        else:
+            self.path = path
+        if makedir:
+            if not self.path:
+                raise PreconditionsException("No path specified")
+            p = os.path.join(self.path, '_indexes')
+            if os.path.exists(p):
+                raise DatabaseConflict("Cant't create because already exists")
+            os.makedirs(p)
+
+        return self.path
+
+    def __open_new(self, with_id_index=True, index_kwargs={}):
+        """
+        Will open new database (works like create),
+        if not self.path provided will call initialize()
+        """
+        if self.path:
+            if not os.path.exists(self.path):
+                self.initialize(self.path)
+        if not 'id' in self.indexes_names and with_id_index:
+            import CodernityDB.hash_index
+            if not 'db_path' in index_kwargs:
+                index_kwargs['db_path'] = self.path
+            index_kwargs['name'] = 'id'
+            id_ind = CodernityDB.hash_index.UniqueHashIndex(**index_kwargs)
+            self.add_index(id_ind, create=False)
+            # del CodernityDB.index
+        for index in self.indexes:
+            index.create_index()
+        return True
+
+    def _read_indexes(self):
+        """
+        Read all known indexes from ``_indexes``
+        """
+        p = os.path.join(self.path, '_indexes')
+        for ind in os.listdir(p):
+            if ind.endswith('.py'):
+                self.add_index('path:' + ind, create=False)
+
+    def create(self, path=None, **kwargs):
+        """
+        Create database
+
+        :param path: path where to create the database
+
+        :returns: database path
+        """
+        if path:
+            self.initialize(path)
+        if not self.path:
+            raise PreconditionsException("No path specified")
+        if self.opened == True:
+            raise DatabaseConflict("Already opened")
+        self.__open_new(**kwargs)
+        self.__set_main_storage()
+        self.opened = True
+        return self.path
+
+    def exists(self, path=None):
+        """
+        Checks if database in given path exists
+
+        :param path: path to look for database
+        """
+        if not path:
+            path = self.path
+        if not path:
+            return False
+        if os.path.exists(path):
+            return os.path.exists(os.path.join(path, '_indexes'))
+        return False
+
+    def open(self, path=None):
+        """
+        Will open already existing database
+
+        :param path: path with database to open
+        """
+        if self.opened == True:
+            raise DatabaseConflict("Already opened")
+#        else:
+        if path:
+            self.path = path
+        if not self.path:
+            raise PreconditionsException("No path specified")
+        if not os.path.exists(self.path):
+            raise DatabasePathException("Can't open database")
+        self.indexes = []
+        self.id_ind = None
+        self.indexes_names = {}
+        self._read_indexes()
+        if not 'id' in self.indexes_names:
+            raise PreconditionsException("There must be `id` index!")
+        for index in self.indexes:
+            index.open_index()
+        self.indexes.sort(key=lambda ind: ind._order)
+        self.__set_main_storage()
+        self.opened = True
+        return True
+
+    def close(self):
+        """
+        Closes the database
+        """
+        if not self.opened:
+            raise DatabaseConflict("Not opened")
+        self.id_ind = None
+        self.indexes_names = {}
+        self.storage = None
+        for index in self.indexes:
+            index.close_index()
+        self.indexes = []
+        self.opened = False
+        return True
+
+    def destroy(self):
+        """
+        Allows to destroy database.
+
+        **not reversable** operation!
+        """
+        # destroy all but *id*
+        if not self.exists():
+            raise DatabaseConflict("Doesn't exists'")
+        for index in reversed(self.indexes[1:]):
+            try:
+                self.destroy_index(index)
+            except:
+                pass
+        if getattr(self, 'id_ind', None) is not None:
+            self.id_ind.destroy()  # now destroy id index
+        # remove all files in db directory
+        for root, dirs, files in os.walk(self.path, topdown=False):
+            for name in files:
+                os.remove(os.path.join(root, name))
+            for name in dirs:
+                os.rmdir(os.path.join(root, name))
+        os.rmdir(self.path)
+        self.close()
+        return True
+
+    def _single_update_index(self, index, data, db_data, doc_id):
+        """
+        Performs update operation on single index
+
+        :param index: the index to perform operation
+        :param data: new data
+        :param db_data: database data
+        :param doc_id: the id of document
+        """
+        try:
+            old_should_index = index.make_key_value(db_data)
+        except:
+            old_should_index = None
+        if old_should_index:
+            old_key, old_value = old_should_index
+            try:
+                new_should_index = index.make_key_value(data)
+            except:
+                new_should_index = None
+            if new_should_index:
+                new_key, new_value = new_should_index
+                if new_key != old_key:
+                    index.delete(doc_id, old_key)
+                    if new_value:
+                        storage = index.storage
+                        start, size = storage.insert(new_value)
+                    else:
+                        start = 1
+                        size = 0
+                    index.insert(doc_id, new_key, start, size)
+                elif new_value != old_value:
+                    if new_value:
+                        storage = index.storage
+                        start, size = storage.insert(new_value)
+                    else:
+                        start = 1
+                        size = 0
+                    try:
+                        index.update(doc_id, new_key, start, size)
+                    except (ElemNotFound, DocIdNotFound):
+                        # element should be in index but isn't
+                        #(propably added new index without reindex)
+                        raise TryReindexException()
+            else:
+                index.delete(doc_id, old_key)
+        else:  # not previously indexed
+            self._single_insert_index(index, data, doc_id)
+
+    def _update_id_index(self, _rev, data):
+        """
+        Performs update on **id** index
+        """
+        _id, value = self.id_ind.make_key_value(data)
+        db_data = self.get('id', _id)
+        if db_data['_rev'] != _rev:
+            raise RevConflict()
+        new_rev = random_hex_4()
+        storage = self.storage
+        start, size = storage.update(value)
+        self.id_ind.update(_id, new_rev, start, size)
+        return _id, new_rev, db_data
+
+    def _update_indexes(self, _rev, data):
+        """
+        Performs update operation on all indexes in order
+        """
+        _id, new_rev, db_data = self._update_id_index(_rev, data)
+        for index in self.indexes[1:]:
+            self._single_update_index(index, data, db_data, _id)
+        return _id, new_rev
+
+    def _single_insert_index(self, index, data, doc_id):
+        """
+        Performs insert operation on single index
+
+        :param index: index to perform operation
+        :param data: new data
+        :param doc_id: document id
+        """
+        try:
+            should_index = index.make_key_value(data)
+        except:
+            should_index = None
+        if should_index:
+            key, value = should_index
+            if value:
+                storage = index.storage
+                start, size = storage.insert(value)
+            else:
+                start = 1
+                size = 0
+            index.insert(doc_id, key, start, size)
+
+    def _insert_id_index(self, _rev, data):
+        """
+        Performs insert on **id** index.
+        """
+        _id, value = self.id_ind.make_key_value(data)  # may be improved
+        storage = self.storage
+        start, size = storage.insert(value)
+        self.id_ind.insert(_id, _rev, start, size)
+        return _id
+
+    def _insert_indexes(self, _rev, data):
+        """
+        Performs insert operation on all indexes in order
+        """
+        _id = self._insert_id_index(_rev, data)
+        for index in self.indexes[1:]:
+            self._single_insert_index(index, data, _id)
+
+    def _single_delete_index(self, index, data, doc_id, old_data):
+        """
+        Performs single delete operation on single index.
+        It's very similar to update functions (that's why data is in arguments)
+
+        :param index: index to perform operation
+        :param data: not important (because of update operations)
+        :param doc_id: document id
+        :param old_data: current data in database
+        """
+        index_data = index.make_key_value(old_data)
+        if not index_data:
+            return
+        key, value = index_data
+        try:
+            index.delete(doc_id, key)
+        except TryReindexException:
+            return
+
+    def _delete_id_index(self, _id, _rev, data):
+        """
+        Performs delete from **id** index
+        """
+        #key, value = self.id_ind.make_key_value(data)
+        # key = data['_id']
+        key = self.id_ind.make_key(_id)
+        self.id_ind.delete(key)
+
+    def _delete_indexes(self, _id, _rev, data):
+        """
+        Performs delete operation on all indexes in order
+        """
+        old_data = self.get('id', _id)
+        if old_data['_rev'] != _rev:
+            raise RevConflict()
+        for index in self.indexes[1:]:
+            self._single_delete_index(index, data, _id, old_data)
+        self._delete_id_index(_id, _rev, data)
+
+    def destroy_index(self, index):
+        """
+        Destroys index
+
+        :param index: the index to destroy
+        :type index: :py:class:`CodernityDB.index.Index`` instance, or string
+        """
+        if isinstance(index, basestring):
+            if not index in self.indexes_names:
+                raise PreconditionsException("No index named %s" % index)
+            index = self.indexes_names[index]
+        elif not index in self.indexes:
+            self.__not_opened()
+            raise PreconditionsException("Argument must be Index instance or valid string index format")
+        if index.name == 'id':
+            self.__not_opened()
+            raise PreconditionsException("Id index cannot be destroyed")
+        full_file = "%.2d%s" % (index._order, index.name) + '.py'
+        p = os.path.join(self.path, '_indexes', full_file)
+        os.unlink(p)
+        index.destroy()
+        del self.indexes_names[index.name]
+        self.indexes.remove(index)
+
+    def compact_index(self, index):
+        """
+        Compacts index
+        Used for better utilization of index metadata. The deleted documents will be not more in structure.
+
+        :param index: the index to destroy
+        :type index: :py:class:`CodernityDB.index.Index`` instance, or string
+        """
+        if isinstance(index, basestring):
+            if not index in self.indexes_names:
+                raise PreconditionsException("No index named %s" % index)
+            index = self.indexes_names[index]
+        elif not index in self.indexes:
+            self.__not_opened()
+            raise PreconditionsException("Argument must be Index instance or valid string index format")
+        if getattr(index, 'compacting', False):
+            raise ReindexException(
+                "The index=%s is still compacting" % index.name)
+        index.compacting = True
+        index.compact()
+        del index.compacting
+
+    def _compact_indexes(self):
+        """
+        Runs compact on all indexes
+        """
+        for index in self.indexes:
+            self.compact_index(index)
+
+    def _single_reindex_index(self, index, data):
+        doc_id, rev, start, size, status = self.id_ind.get(
+            data['_id'])  # it's cached so it's ok
+        if status != 'd' and status != 'u':
+            self._single_insert_index(index, data, doc_id)
+
+    def reindex_index(self, index):
+        """
+        Performs reindex on index. Optimizes metadata and storage informations for given index.
+
+        You can't reindex **id** index.
+
+        :param index: the index to reindex
+        :type index: :py:class:`CodernityDB.index.Index`` instance, or string
+        """
+        if isinstance(index, basestring):
+            if not index in self.indexes_names:
+                raise PreconditionsException("No index named %s" % index)
+            index = self.indexes_names[index]
+        elif not index in self.indexes:
+            self.__not_opened()
+            raise PreconditionsException("Argument must be Index instance or valid string index format")
+        if index.name == 'id':
+            self.__not_opened()
+            raise PreconditionsException("Id index cannot be reindexed")
+        if getattr(index, 'reindexing', False):
+            raise ReindexException(
+                "The index=%s is still reindexing" % index.name)
+
+        all_iter = self.all('id')
+        index.reindexing = True
+        index.destroy()
+        index.create_index()
+
+        while True:
+            try:
+                curr = all_iter.next()
+            except StopIteration:
+                break
+            else:
+                self._single_reindex_index(index, curr)
+        del index.reindexing
+
+    def _reindex_indexes(self):
+        for index in self.indexes[1:]:
+            self.reindex_index(index)
+
+    def insert(self, data):
+        """
+        It's using **reference** on the given data dict object,
+        to avoid it copy it before inserting!
+
+        If data **will not** have ``_id`` field, it will be generated (random 32 chars string)
+
+        :param data: data to insert
+        """
+        if '_rev' in data:
+            self.__not_opened()
+            raise PreconditionsException(
+                "Can't add record with forbidden fields")
+        _rev = random_hex_4()
+        if not '_id' in data:
+            try:
+                _id = self.id_ind.create_key()
+            except:
+                self.__not_opened()
+                raise DatabaseException("No id?")
+        else:
+            _id = data['_id']
+        assert _id is not None
+        data['_rev'] = _rev  # for make_key_value compat with update / delete
+        data['_id'] = _id
+        self._insert_indexes(_rev, data)
+        ret = {'_id': _id, '_rev': _rev}
+        data.update(ret)
+        return ret
+
+    def update(self, data):
+        """
+        It's using **reference** on the given data dict object,
+        to avoid it copy it before updating!
+
+        ``data`` **must** contain ``_id`` and ``_rev`` fields.
+
+        :param data: data to update
+        """
+        if not '_rev' in data or not '_id' in data:
+            self.__not_opened()
+            raise PreconditionsException("Can't update without _rev or _id")
+        _rev = data['_rev']
+        try:
+            _rev = bytes(_rev)
+        except:
+            self.__not_opened()
+            raise PreconditionsException(
+                "`_rev` must be valid bytes object")
+        _id, new_rev = self._update_indexes(_rev, data)
+        ret = {'_id': _id, '_rev': new_rev}
+        data.update(ret)
+        return ret
+
+    def get(self, index_name, key, with_doc=False, with_storage=True):
+        """
+        Get single data from Database by ``key``.
+
+        :param index_name: index to get data from
+        :param key: key to get
+        :param with_doc: if ``True`` data from **id** index will be included in output
+        :param with_storage: if ``True`` data from index storage will be included, otherwise just metadata.
+        """
+        # if not self.indexes_names.has_key(index_name):
+        #     raise DatabaseException, "Invalid index name"
+        try:
+            ind = self.indexes_names[index_name]
+        except KeyError:
+            self.__not_opened()
+            raise IndexNotFoundException(
+                "Index `%s` doesn't exists" % index_name)
+        storage = ind.storage
+        try:
+            l_key, _unk, start, size, status = ind.get(key)
+        except ElemNotFound as ex:
+            raise RecordNotFound(ex)
+        if not start and not size:
+            raise RecordNotFound("Not found")
+        elif status == 'd':
+            raise RecordDeleted("Deleted")
+        if with_storage and size:
+            data = storage.get(start, size, status)
+        else:
+            data = {}
+        if with_doc and index_name != 'id':
+            doc = self.get('id', l_key, False)
+            if data:
+                data['doc'] = doc
+            else:
+                data = dict(doc=doc)
+        data['_id'] = l_key
+        if index_name == 'id':
+            data['_rev'] = _unk
+        else:
+            data['key'] = _unk
+        return data
+
+    def get_many(self, index_name, key=None, limit=1, offset=0, with_doc=False, with_storage=True, start=None, end=None, **kwargs):
+        """
+        Allows to get **multiple** data for given ``key`` for *Hash based indexes*. Also allows get **range** queries for *Tree based indexes* with ``start`` and ``end`` arguments.
+
+        :param index_name: Index to perform the operation
+        :param key: key to look for (has to be ``None`` to use range queries)
+        :param limit: defines limit for query
+        :param offset: defines offset (how many records from start it will ignore)
+        :param with_doc: if ``True`` data from **id** index will be included in output
+        :param with_storage: if ``True`` data from index storage will be included, otherwise just metadata.
+        :param start: ``start`` parameter for range queries
+        :param end: ``end`` parameter for range queries
+
+        :returns: iterator over records
+        """
+        if index_name == 'id':
+            self.__not_opened()
+            raise PreconditionsException("Can't get many from `id`")
+        try:
+            ind = self.indexes_names[index_name]
+        except KeyError:
+            self.__not_opened()
+            raise IndexNotFoundException(
+                "Index `%s` doesn't exists" % index_name)
+        storage = ind.storage
+        if start is None and end is None:
+            gen = ind.get_many(key, limit, offset)
+        else:
+            gen = ind.get_between(start, end, limit, offset, **kwargs)
+        while True:
+            try:
+#                l_key, start, size, status = gen.next()
+                ind_data = gen.next()
+            except StopIteration:
+                break
+            else:
+                if with_storage and ind_data[-2]:
+                    data = storage.get(*ind_data[-3:])
+                else:
+                    data = {}
+                doc_id = ind_data[0]
+                if with_doc:
+                    doc = self.get('id', doc_id, False)
+                    if data:
+                        data['doc'] = doc
+                    else:
+                        data = dict(doc=doc)
+                data['_id'] = doc_id
+                if key is None:
+                    data['key'] = ind_data[1]
+                yield data
+
+    def all(self, index_name, limit=-1, offset=0, with_doc=False, with_storage=True):
+        """
+        Alows to get all records for given index
+
+        :param index_name: Index to perform the operation
+        :param limit: defines limit for query
+        :param offset: defines offset (how many records from start it will ignore)
+        :param with_doc: if ``True`` data from **id** index will be included in output
+        :param with_storage: if ``True`` data from index storage will be included, otherwise just metadata
+        """
+        try:
+            ind = self.indexes_names[index_name]
+        except KeyError:
+            self.__not_opened()
+            raise IndexNotFoundException(
+                "Index `%s` doesn't exists" % index_name)
+        storage = ind.storage
+        gen = ind.all(limit, offset)
+        while True:
+            try:
+                doc_id, unk, start, size, status = gen.next()
+            except StopIteration:
+                break
+            else:
+                if index_name == 'id':
+                    if with_storage and size:
+                        data = storage.get(start, size, status)
+                    else:
+                        data = {}
+                    data['_id'] = doc_id
+                    data['_rev'] = unk
+                else:
+                    data = {}
+                    if with_storage and size:
+                        data['value'] = storage.get(start, size, status)
+                    data['key'] = unk
+                    data['_id'] = doc_id
+                    if with_doc:
+                        doc = self.get('id', doc_id, False)
+                        data['doc'] = doc
+                yield data
+
+    def run(self, index_name, target_funct, *args, **kwargs):
+        """
+        Allows to execute given function on Database side (important for server mode)
+
+        If ``target_funct==sum`` then given index must have ``run_sum`` method.
+
+        :param index_name: index name to perform action.
+        :param target_funct: target function name (without *run* prefix)
+        :param \*args: ``*args`` for function
+        :param \*\*kwargs: ``**kwargs`` for function
+
+        """
+        try:
+            ind = self.indexes_names[index_name]
+        except KeyError:
+            self.__not_opened()
+            raise IndexNotFoundException(
+                "Index `%s` doesn't exists" % index_name)
+        try:
+            funct = getattr(ind, "run_" + target_funct)
+        except AttributeError:
+            raise IndexException("Invalid function to run")
+        return funct(self, *args, **kwargs)
+
+    def count(self, target_funct, *args, **kwargs):
+        """
+        Counter. Allows to execute for example
+
+        .. code-block:: python
+
+            db.count(db.all, 'id')
+
+        And it will return then how much records are in your ``id`` index.
+
+        .. warning::
+            It sets ``kwargs['with_storage'] = False`` and ``kwargs['with_doc'] = False``
+
+
+        """
+        kwargs['with_storage'] = False
+        kwargs['with_doc'] = False
+        iter_ = target_funct(*args, **kwargs)
+        i = 0
+        while True:
+            try:
+                iter_.next()
+                i += 1
+            except StopIteration:
+                break
+        return i
+
+    def delete(self, data):
+        """
+        Delete data from database.
+
+        ``data`` has to contain ``_id`` and ``_rev`` fields.
+
+        :param data: data to delete
+        """
+        if not '_rev' in data or not '_id' in data:
+            raise PreconditionsException("Can't delete without _rev or _id")
+        _id = data['_id']
+        _rev = data['_rev']
+        try:
+            _id = bytes(_id)
+            _rev = bytes(_rev)
+        except:
+            raise PreconditionsException(
+                "`_id` and `_rev` must be valid bytes object")
+        data['_deleted'] = True
+        self._delete_indexes(_id, _rev, data)
+        return True
+
+    def compact(self):
+        """
+        Compact all indexes. Runs :py:meth:`._compact_indexes` behind.
+        """
+        self.__not_opened()
+        self._compact_indexes()
+
+    def reindex(self):
+        """
+        Reindex all indexes. Runs :py:meth:`._reindex_indexes` behind.
+        """
+        self.__not_opened()
+        self._reindex_indexes()
+
+    def flush_indexes(self):
+        """
+        Flushes all indexes
+        """
+        self.__not_opened()
+        for index in self.indexes:
+            index.flush()
+
+    def flush(self):
+        """
+        Flushes all indexes. Runs :py:meth:`.flush_indexes` behind.
+        """
+        return self.flush_indexes()
+
+    def fsync(self):
+        """
+        It forces the kernel buffer to be written to disk. Use when you're sure that you need to.
+        """
+        self.__not_opened()
+        for index in self.indexes:
+            index.flush()
+            index.fsync()
+
+    def __get_size(self):
+        """
+        :returns: total size of database.
+        """
+        if not self.path:
+            return 0
+        return sum(
+            os.path.getsize(os.path.join(dirpath, filename)) for dirpath, dirnames,
+            filenames in os.walk(self.path) for filename in filenames)
+
+    def get_index_details(self, name):
+        """
+        Will return index properties.
+
+        :returns: index details
+        """
+        self.__not_opened()
+        try:
+            db_index = self.indexes_names[name]
+        except KeyError:
+            self.__not_opened()
+            raise IndexNotFoundException("Index doesn't exist")
+
+        props = {}
+        for key, value in db_index.__dict__.iteritems():
+            if not callable(value):  # not using inspect etc...
+                props[key] = value
+        return props
+
+    def get_db_details(self):
+        """
+        Get's database details, size, indexes, environment etc.
+
+        :returns: database details
+        """
+        props = {}
+        props['path'] = self.path
+        props['size'] = self.__get_size()
+        props['indexes'] = self.indexes_names.keys()
+        props['cdb_environment'] = cdb_environment
+        return props

CodernityDB/database_gevent.py

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2011-2012 Codernity (http://codernity.com)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from gevent.coros import RLock
+
+from CodernityDB.env import cdb_environment
+from CodernityDB.database import PreconditionsException
+
+cdb_environment['mode'] = "gevent"
+cdb_environment['rlock_obj'] = RLock
+
+
+from CodernityDB.database import Database
+
+
+class GeventDatabase(Database):
+    """
+    A database that works with Gevent
+    """
+
+    def __init__(self, path):
+        super(GeventDatabase, self).__init__(path)
+        self.main_lock = RLock()
+        self.close_open_lock = RLock()
+        self.indexes_locks = {}
+
+    def initialize(self, *args, **kwargs):
+        res = None
+        try:
+            self.close_open_lock.acquire()
+            res = super(GeventDatabase, self).initialize(*args, **kwargs)
+            for name in self.indexes_names.iterkeys():
+                self.indexes_locks[name] = RLock()
+        finally:
+            self.close_open_lock.release()
+        return res
+
+    def open(self, *args, **kwargs):
+        res = None
+        try:
+            self.close_open_lock.acquire()
+            res = super(GeventDatabase, self).open(*args, **kwargs)
+            for name in self.indexes_names.iterkeys():
+                self.indexes_locks[name] = RLock()
+        finally:
+            self.close_open_lock.release()
+        return res
+
+    def create(self, *args, **kwargs):
+        res = None
+        try:
+            self.close_open_lock.acquire()
+            res = super(GeventDatabase, self).create(*args, **kwargs)
+            for name in self.indexes_names.iterkeys():
+                self.indexes_locks[name] = RLock()
+        finally:
+            self.close_open_lock.release()
+        return res
+
+    def close(self):
+        res = None
+        try:
+            self.close_open_lock.acquire()
+            res = super(GeventDatabase, self).close()
+        finally:
+            self.close_open_lock.release()
+        return res
+
+    def destroy(self):
+        res = None
+        try:
+            self.close_open_lock.acquire()
+            res = super(GeventDatabase, self).destroy()
+        finally:
+            self.close_open_lock.release()
+            return res
+
+    def add_index(self, *args, **kwargs):
+        res = None
+        try:
+            self.main_lock.acquire()
+            res = super(GeventDatabase, self).add_index(*args, **kwargs)
+        finally:
+            if self.opened:
+                self.indexes_locks[res] = RLock()
+            self.main_lock.release()
+        return res
+
+    def edit_index(self, *args, **kwargs):
+        res = None
+        try:
+            self.main_lock.acquire()
+            res = super(GeventDatabase, self).edit_index(*args, **kwargs)
+        finally:
+            if self.opened:
+                self.indexes_locks[res] = RLock()
+            self.main_lock.release()
+        return res
+
+    def set_indexes(self, *args, **kwargs):
+        try:
+            self.main_lock.acquire()
+            super(GeventDatabase, self).set_indexes(*args, **kwargs)
+        finally:
+            self.main_lock.release()
+
+    def _read_indexes(self, *args, **kwargs):
+        try:
+            self.main_lock.acquire()
+            super(GeventDatabase, self)._read_indexes(*args, **kwargs)
+        finally:
+            self.main_lock.release()
+
+    def insert_id_index(self, *args, **kwargs):
+        lock = self.indexes_locks['id']
+        try:
+            lock.acquire()
+            res = super(GeventDatabase, self).insert_id_index(*args, **kwargs)
+        finally:
+            lock.release()
+        return res
+
+    def update_id_index(self, *args, **kwargs):
+        lock = self.indexes_locks['id']
+        res = None
+        try:
+            lock.acquire()
+            res = super(GeventDatabase, self).update_id_index(*args, **kwargs)
+        finally:
+            lock.release()
+        return res
+
+    def delete_id_index(self, *args, **kwargs):
+        lock = self.indexes_locks['id']
+        res = None
+        try:
+            lock.acquire()
+            res = super(GeventDatabase, self).delete_id_index(*args, **kwargs)
+        finally:
+            lock.release()
+        return res
+
+    def single_update_index(self, index, *args, **kwargs):
+        lock = self.indexes_locks[index.name]
+        try:
+            lock.acquire()
+            super(GeventDatabase, self).single_update_index(
+                index, *args, **kwargs)
+        finally:
+            lock.release()
+
+    def single_insert_index(self, index, *args, **kwargs):
+        lock = self.indexes_locks[index.name]
+        try:
+            lock.acquire()
+            super(GeventDatabase, self).single_insert_index(
+                index, *args, **kwargs)
+        finally:
+            lock.release()
+
+    def single_delete_index(self, index, *args, **kwargs):
+        lock = self.indexes_locks[index.name]
+        try:
+            lock.acquire()
+            super(GeventDatabase, self).single_delete_index(
+                index, *args, **kwargs)
+        finally:
+            lock.release()
+
+    def single_compact_index(self, index, *args, **kwargs):
+        lock = self.indexes_locks[index.name]
+        try:
+            lock.acquire()
+            super(GeventDatabase, self).compact_index(index, *args, **kwargs)
+        finally:
+            lock.release()
+
+    def reindex_index(self, index, *args, **kwargs):
+        if isinstance(index, basestring):
+            if not index in self.indexes_names:
+                raise PreconditionsException("No index named %s" % index)
+            index = self.indexes_names[index]
+        key = index.name + "reind"
+        self.main_lock.acquire()
+        if key in self.indexes_locks:
+            lock = self.indexes_locks[index.name + "reind"]
+        else:
+            self.indexes_locks[index.name + "reind"] = RLock()
+            lock = self.indexes_locks[index.name + "reind"]
+        self.main_lock.release()
+        try:
+            lock.acquire()
+            super(GeventDatabase, self).reindex_index(index, *args, **kwargs)
+        finally:
+            lock.release()
+
+    def destroy_index(self, index, *args, **kwargs):
+        if isinstance(index, basestring):
+            if not index in self.indexes_names:
+                raise PreconditionsException("No index named %s" % index)
+            index = self.indexes_names[index]
+        lock = self.indexes_locks[index.name]
+        try:
+            lock.acquire()
+            super(GeventDatabase, self).destroy_index(index, *args, **kwargs)
+        finally:
+            lock.release()
+
+    def flush(self):
+        try:
+            self.main_lock.acquire()
+            super(GeventDatabase, self).flush()
+        finally:
+            self.main_lock.release()
+
+    def fsync(self):
+        try:
+            self.main_lock.acquire()
+            super(GeventDatabase, self).fsync()
+        finally:
+            self.main_lock.release()

CodernityDB/database_super_thread_safe.py

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2011-2012 Codernity (http://codernity.com)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from threading import RLock
+
+from CodernityDB.env import cdb_environment
+
+cdb_environment['mode'] = "threads"
+cdb_environment['rlock_obj'] = RLock
+
+from database import Database
+
+from functools import wraps
+from types import FunctionType, MethodType
+
+
+class SuperLock(type):
+
+    @staticmethod
+    def wrapper(f):
+        @wraps(f)
+        def _inner(*args, **kwargs):
+            db = args[0]
+            with db.super_lock:
+                res = f(*args, **kwargs)
+                if db.opened:
+                    db.flush()
+                return res
+        return _inner
+
+    def __new__(cls, classname, bases, attr):
+        new_attr = {}
+        for base in bases:
+            for b_attr in dir(base):
+                a = getattr(base, b_attr, None)
+                if isinstance(a, MethodType) and not b_attr.startswith('_'):
+                    if b_attr == 'flush' or b_attr == 'flush_indexes':
+                        pass
+                    else:
+                        #setattr(base, b_attr, SuperLock.wrapper(a))
+                        new_attr[b_attr] = SuperLock.wrapper(a)
+        for attr_name, attr_value in attr.iteritems():
+            if isinstance(attr_value, FunctionType) and not attr_name.startswith('_'):
+                attr_value = SuperLock.wrapper(attr_value)
+            new_attr[attr_name] = attr_value
+        new_attr['super_lock'] = RLock()
+        return type.__new__(cls, classname, bases, new_attr)
+
+
+class SuperThreadSafeDatabase(Database):
+
+    __metaclass__ = SuperLock
+
+    def __init__(self, *args, **kwargs):
+        super(SuperThreadSafeDatabase, self).__init__(*args, **kwargs)

CodernityDB/database_thread_safe.py

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2011-2012 Codernity (http://codernity.com)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from threading import RLock
+
+from CodernityDB.env import cdb_environment
+from CodernityDB.database import PreconditionsException
+
+cdb_environment['mode'] = "threads"
+cdb_environment['rlock_obj'] = RLock
+
+
+from database import Database
+
+
+class ThreadSafeDatabase(Database):
+
+    def __init__(self, path):
+        super(ThreadSafeDatabase, self).__init__(path)
+        self.main_lock = RLock()
+        self.close_open_lock = RLock()
+        self.indexes_locks = {}
+
+    def initialize(self, *args, **kwargs):
+        res = None
+        try:
+            self.close_open_lock.acquire()
+            res = super(ThreadSafeDatabase, self).initialize(*args, **kwargs)
+            for name in self.indexes_names.iterkeys():
+                self.indexes_locks[name] = RLock()
+        finally:
+            self.close_open_lock.release()
+        return res
+
+    def open(self, *args, **kwargs):
+        res = None
+        try:
+            self.close_open_lock.acquire()
+            res = super(ThreadSafeDatabase, self).open(*args, **kwargs)
+            for name in self.indexes_names.iterkeys():
+                self.indexes_locks[name] = RLock()
+        finally:
+            self.close_open_lock.release()
+        return res
+
+    def create(self, *args, **kwargs):
+        res = None
+        try:
+            self.close_open_lock.acquire()
+            res = super(ThreadSafeDatabase, self).create(*args, **kwargs)
+            for name in self.indexes_names.iterkeys():
+                self.indexes_locks[name] = RLock()
+        finally:
+            self.close_open_lock.release()
+        return res
+
+    def close(self):
+        res = None
+        try:
+            self.close_open_lock.acquire()
+            res = super(ThreadSafeDatabase, self).close()
+        finally:
+            self.close_open_lock.release()
+        return res
+
+    def destroy(self):
+        res = None
+        try:
+            self.close_open_lock.acquire()
+            res = super(ThreadSafeDatabase, self).destroy()
+        finally:
+            self.close_open_lock.release()
+            return res
+
+    def add_index(self, *args, **kwargs):
+        res = None
+        try:
+            self.main_lock.acquire()
+            res = super(ThreadSafeDatabase, self).add_index(*args, **kwargs)
+        finally:
+            if self.opened:
+                self.indexes_locks[res] = RLock()
+            self.main_lock.release()
+        return res
+
+    def edit_index(self, *args, **kwargs):
+        res = None
+        try:
+            self.main_lock.acquire()
+            res = super(ThreadSafeDatabase, self).edit_index(*args, **kwargs)
+        finally:
+            if self.opened:
+                self.indexes_locks[res] = RLock()
+            self.main_lock.release()
+        return res
+
+    def set_indexes(self, *args, **kwargs):
+        try:
+            self.main_lock.acquire()
+            super(ThreadSafeDatabase, self).set_indexes(*args, **kwargs)
+        finally:
+            self.main_lock.release()
+
+    def _read_indexes(self, *args, **kwargs):
+        try:
+            self.main_lock.acquire()
+            super(ThreadSafeDatabase, self)._read_indexes(*args, **kwargs)
+        finally:
+            self.main_lock.release()
+
+    def _insert_id_index(self, *args, **kwargs):
+        lock = self.indexes_locks['id']
+        try:
+            lock.acquire()
+            res = super(ThreadSafeDatabase,
+                        self)._insert_id_index(*args, **kwargs)
+        finally:
+            lock.release()
+        return res
+
+    def _update_id_index(self, *args, **kwargs):
+        lock = self.indexes_locks['id']
+        res = None
+        try:
+            lock.acquire()
+            res = super(
+                ThreadSafeDatabase, self)._update_id_index(*args, **kwargs)
+        finally:
+            lock.release()
+        return res
+
+    def _delete_id_index(self, *args, **kwargs):
+        lock = self.indexes_locks['id']
+        res = None
+        try:
+            lock.acquire()
+            res = super(
+                ThreadSafeDatabase, self)._delete_id_index(*args, **kwargs)
+        finally:
+            lock.release()
+        return res
+
+    def _single_update_index(self, index, *args, **kwargs):
+        lock = self.indexes_locks[index.name]
+        try:
+            lock.acquire()
+            super(ThreadSafeDatabase,
+                  self)._single_update_index(index, *args, **kwargs)
+        finally:
+            lock.release()
+
+    def _single_insert_index(self, index, *args, **kwargs):
+        lock = self.indexes_locks[index.name]
+        try:
+            lock.acquire()
+            super(ThreadSafeDatabase,
+                  self)._single_insert_index(index, *args, **kwargs)
+        finally:
+            lock.release()
+
+    def _single_delete_index(self, index, *args, **kwargs):
+        lock = self.indexes_locks[index.name]
+        try:
+            lock.acquire()
+            super(ThreadSafeDatabase,
+                  self)._single_delete_index(index, *args, **kwargs)
+        finally:
+            lock.release()
+
+    def _single_compact_index(self, index, *args, **kwargs):
+        lock = self.indexes_locks[index.name]
+        try:
+            lock.acquire()
+            super(ThreadSafeDatabase, self).compact_index(
+                index, *args, **kwargs)
+        finally:
+            lock.release()
+
+    def reindex_index(self, index, *args, **kwargs):
+        if isinstance(index, basestring):
+            if not index in self.indexes_names:
+                raise PreconditionsException("No index named %s" % index)
+            index = self.indexes_names[index]
+        key = index.name + "reind"
+        self.main_lock.acquire()
+        if key in self.indexes_locks:
+            lock = self.indexes_locks[index.name + "reind"]
+        else:
+            self.indexes_locks[index.name + "reind"] = RLock()
+            lock = self.indexes_locks[index.name + "reind"]
+        self.main_lock.release()
+        try:
+            lock.acquire()
+            super(ThreadSafeDatabase, self).reindex_index(
+                index, *args, **kwargs)
+        finally:
+            lock.release()
+
+    def destroy_index(self, index, *args, **kwargs):
+        if isinstance(index, basestring):
+            if not index in self.indexes_names:
+                raise PreconditionsException("No index named %s" % index)
+            index = self.indexes_names[index]
+        lock = self.indexes_locks[index.name]
+        try:
+            lock.acquire()
+            super(ThreadSafeDatabase, self).destroy_index(
+                index, *args, **kwargs)
+        finally:
+            lock.release()
+
+    def flush(self):
+        try:
+            self.main_lock.acquire()
+            super(ThreadSafeDatabase, self).flush()
+        finally:
+            self.main_lock.release()
+
+    def fsync(self):
+        try:
+            self.main_lock.acquire()
+            super(ThreadSafeDatabase, self).fsync()
+        finally:
+            self.main_lock.release()

CodernityDB/debug_stuff.py

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2011-2012 Codernity (http://codernity.com)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from CodernityDB.tree_index import TreeBasedIndex
+import struct
+import os
+
+import inspect
+from functools import wraps
+import json
+
+
+class DebugTreeBasedIndex(TreeBasedIndex):
+
+    def __init__(self, *args, **kwargs):
+        super(DebugTreeBasedIndex, self).__init__(*args, **kwargs)
+
+    def print_tree(self):
+        print '-----CURRENT TREE-----'
+        print self.root_flag
+
+        if self.root_flag == 'l':
+            print '---ROOT---'
+            self._print_leaf_data(self.data_start)
+            return
+        else:
+            print '---ROOT---'
+            self._print_node_data(self.data_start)
+            nr_of_el, children_flag = self._read_node_nr_of_elements_and_children_flag(
+                self.data_start)
+            nodes = []
+            for index in range(nr_of_el):
+                l_pointer, key, r_pointer = self._read_single_node_key(
+                    self.data_start, index)
+                nodes.append(l_pointer)
+            nodes.append(r_pointer)
+            print 'ROOT NODES', nodes
+            while children_flag == 'n':
+                self._print_level(nodes, 'n')
+                new_nodes = []
+                for node in nodes:
+                    nr_of_el, children_flag = \
+                        self._read_node_nr_of_elements_and_children_flag(node)
+                    for index in range(nr_of_el):
+                        l_pointer, key, r_pointer = self._read_single_node_key(
+                            node, index)
+                        new_nodes.append(l_pointer)
+                    new_nodes.append(r_pointer)
+                nodes = new_nodes
+            self._print_level(nodes, 'l')
+
+    def _print_level(self, nodes, flag):
+        print '---NEXT LVL---'
+        if flag == 'n':
+            for node in nodes:
+                self._print_node_data(node)
+        elif flag == 'l':
+            for node in nodes:
+                self._print_leaf_data(node)
+
+    def _print_leaf_data(self, leaf_start_position):
+        print 'printing data of leaf at', leaf_start_position
+        nr_of_elements = self._read_leaf_nr_of_elements(leaf_start_position)
+        self.buckets.seek(leaf_start_position)
+        data = self.buckets.read(self.leaf_heading_size +
+                                 nr_of_elements * self.single_leaf_record_size)
+        leaf = struct.unpack('<' + self.leaf_heading_format +
+                             nr_of_elements * self.single_leaf_record_format, data)
+        print leaf
+        print
+
+    def _print_node_data(self, node_start_position):
+        print 'printing data of node at', node_start_position
+        nr_of_elements = self._read_node_nr_of_elements_and_children_flag(
+            node_start_position)[0]
+        self.buckets.seek(node_start_position)
+        data = self.buckets.read(self.node_heading_size + self.pointer_size
+                                 + nr_of_elements * (self.key_size + self.pointer_size))
+        node = struct.unpack('<' + self.node_heading_format + self.pointer_format
+                             + nr_of_elements * (
+                             self.key_format + self.pointer_format),
+                             data)
+        print node
+        print
+# ------------------>
+
+
+def database_step_by_step(db_obj, path=None):
+
+    if not path:
+        # ugly for multiplatform support....
+        p = db_obj.path
+        p1 = os.path.split(p)
+        p2 = os.path.split(p1[0])
+        p3 = '_'.join([p2[1], 'operation_logger.log'])
+        path = os.path.join(os.path.split(p2[0])[0], p3)
+    f_obj = open(path, 'wb')
+
+    __stack = []  # inspect.stack() is not working on pytest etc
+
+    def remove_from_stack(name):
+        for i in range(len(__stack)):
+            if __stack[-i] == name:
+                __stack.pop(-i)
+
+    def __dumper(f):
+        @wraps(f)
+        def __inner(*args, **kwargs):
+            funct_name = f.__name__
+            if funct_name == 'count':
+                name = args[0].__name__
+                meth_args = (name,) + args[1:]
+            elif funct_name in ('reindex_index', 'compact_index'):
+                name = args[0].name
+                meth_args = (name,) + args[1:]
+            else:
+                meth_args = args
+            kwargs_copy = kwargs.copy()
+            res = None
+            __stack.append(funct_name)
+            if funct_name == 'insert':
+                try:
+                    res = f(*args, **kwargs)
+                except:
+                    packed = json.dumps((funct_name,
+                                         meth_args, kwargs_copy, None))
+                    f_obj.write('%s\n' % packed)
+                    f_obj.flush()
+                    raise
+                else:
+                    packed = json.dumps((funct_name,
+                                         meth_args, kwargs_copy, res))
+                f_obj.write('%s\n' % packed)
+                f_obj.flush()
+            else:
+                if funct_name == 'get':
+                    for curr in __stack:
+                        if ('delete' in curr or 'update' in curr) and not curr.startswith('test'):
+                            remove_from_stack(funct_name)
+                            return f(*args, **kwargs)
+                packed = json.dumps((funct_name, meth_args, kwargs_copy))
+                f_obj.write('%s\n' % packed)
+                f_obj.flush()
+                res = f(*args, **kwargs)
+            remove_from_stack(funct_name)
+            return res
+        return __inner
+
+    for meth_name, meth_f in inspect.getmembers(db_obj, predicate=inspect.ismethod):
+        if not meth_name.startswith('_'):
+            setattr(db_obj, meth_name, __dumper(meth_f))
+
+    setattr(db_obj, 'operation_logger', f_obj)
+
+
+def database_from_steps(db_obj, path):
+    # db_obj.insert=lambda data : insert_for_debug(db_obj, data)
+    with open(path, 'rb') as f_obj:
+        for current in f_obj:
+            line = json.loads(current[:-1])
+            if line[0] == 'count':
+                obj = getattr(db_obj, line[1][0])
+                line[1] = [obj] + line[1][1:]
+            name = line[0]
+            if name == 'insert':
+                try:
+                    line[1][0].pop('_rev')
+                except:
+                    pass
+            elif name in ('delete', 'update'):
+                el = db_obj.get('id', line[1][0]['_id'])
+                line[1][0]['_rev'] = el['_rev']
+#                print 'FROM STEPS doing', line
+            meth = getattr(db_obj, line[0], None)
+            if not meth:
+                raise Exception("Method = `%s` not found" % line[0])
+
+            meth(*line[1], **line[2])
+
+
+#def insert_for_debug(self, data):
+#
+#    _rev = data['_rev']
+#
+#    if not '_id' in data:
+#        _id = uuid4().hex
+#    else:
+#        _id = data['_id']
+#    data['_id'] = _id
+#    try:
+#        _id = bytes(_id)
+#    except:
+#        raise DatabaseException("`_id` must be valid bytes object")
+#    self._insert_indexes(_id, _rev, data)
+#    ret = {'_id': _id, '_rev': _rev}
+#    data.update(ret)
+#    return ret

CodernityDB/env.py

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2011-2012 Codernity (http://codernity.com)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+It's CodernityDB environment.
+Handles internal informations.'
+"""
+
+cdb_environment = {
+    'mode': 'normal'
+}

CodernityDB/hash_index.py

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2011-2012 Codernity (http://codernity.com)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from CodernityDB.index import (Index,
+                               IndexException,
+                               DocIdNotFound,
+                               ElemNotFound,
+                               TryReindexException,
+                               IndexPreconditionsException)
+
+import os
+import marshal
+import io
+import struct
+import shutil
+
+from CodernityDB.storage import IU_Storage, DummyStorage
+
+from CodernityDB.env import cdb_environment
+
+if cdb_environment.get('rlock_obj'):
+    from CodernityDB import patch
+    patch.patch_cache_rr(cdb_environment['rlock_obj'])
+
+from CodernityDB.rr_cache import cache1lvl
+
+
+from CodernityDB.misc import random_hex_32
+
+try:
+    from CodernityDB import __version__
+except ImportError:
+    from __init__ import __version__
+
+
+class IU_HashIndex(Index):
+    """
+    That class is for Internal Use only, if you want to use HashIndex just subclass the :py:class:`HashIndex` instead this one.
+
+    That design is because main index logic should be always in database not in custom user indexes.
+    """
+
+    def __init__(self, db_path, name, entry_line_format='<32s{key}IIcI', hash_lim=0xfffff, storage_class=None, key_format='c'):
+        """
+        The index is capable to solve conflicts by `Separate chaining`
+        :param db_path: database path
+        :type db_path: string
+        :param name: index name
+        :type name: ascii string
+        :param line_format: line format, `key_format` parameter value will replace `{key}` if present.
+        :type line_format: string (32s{key}IIcI by default) {doc_id}{hash_key}{start}{size}{status}{next}
+        :param hash_lim: maximum hash functon results (remember about birthday problem) count from 0
+        :type hash_lim: integer
+        :param storage_class: Storage class by default it will open standard :py:class:`CodernityDB.storage.Storage` (if string has to be accesible by globals()[storage_class])
+        :type storage_class: class name which will be instance of CodernityDB.storage.Storage instance or None
+        :param key_format: a index key format
+        """
+        if key_format and '{key}' in entry_line_format:
+            entry_line_format = entry_line_format.replace('{key}', key_format)
+        super(IU_HashIndex, self).__init__(db_path, name)
+        self.hash_lim = hash_lim
+        if not storage_class:
+            storage_class = IU_Storage
+        if storage_class and not isinstance(storage_class, basestring):
+            storage_class = storage_class.__name__
+        self.storage_class = storage_class
+        self.storage = None
+
+        self.bucket_line_format = "<I"
+        self.bucket_line_size = struct.calcsize(self.bucket_line_format)
+        self.entry_line_format = entry_line_format
+        self.entry_line_size = struct.calcsize(self.entry_line_format)
+
+        cache = cache1lvl(100)
+        self._find_key = cache(self._find_key)
+        self._locate_doc_id = cache(self._locate_doc_id)
+        self.bucket_struct = struct.Struct(self.bucket_line_format)
+        self.entry_struct = struct.Struct(entry_line_format)
+        self.data_start = (
+            self.hash_lim + 1) * self.bucket_line_size + self._start_ind + 2
+
+    def _fix_params(self):
+        super(IU_HashIndex, self)._fix_params()
+        self.bucket_line_size = struct.calcsize(self.bucket_line_format)
+        self.entry_line_size = struct.calcsize(self.entry_line_format)
+        self.data_start = (
+            self.hash_lim + 1) * self.bucket_line_size + self._start_ind + 2
+
+    def open_index(self):
+        if not os.path.isfile(os.path.join(self.db_path, self.name + '_buck')):
+            raise IndexException("Doesn't exists")
+        self.buckets = io.open(
+            os.path.join(self.db_path, self.name + "_buck"), 'r+b', buffering=0)
+        self._fix_params()
+        self._open_storage()
+
+    def create_index(self):
+        if os.path.isfile(os.path.join(self.db_path, self.name + '_buck')):
+            raise IndexException('Already exists')
+        with io.open(os.path.join(self.db_path, self.name + "_buck"), 'w+b') as f:
+            props = dict(name=self.name,
+                         bucket_line_format=self.bucket_line_format,
+                         entry_line_format=self.entry_line_format,
+                         hash_lim=self.hash_lim,
+                         version=self.__version__,
+                         storage_class=self.storage_class)
+            f.write(marshal.dumps(props))
+        self.buckets = io.open(
+            os.path.join(self.db_path, self.name + "_buck"), 'r+b', buffering=0)
+        self._create_storage()
+
+    def destroy(self):
+        super(IU_HashIndex, self).destroy()
+        self._clear_cache()
+
+    def _open_storage(self):
+        s = globals()[self.storage_class]
+        if not self.storage:
+            self.storage = s(self.db_path, self.name)
+        self.storage.open()
+
+    def _create_storage(self):
+        s = globals()[self.storage_class]
+        if not self.storage:
+            self.storage = s(self.db_path, self.name)
+        self.storage.create()
+
+    # def close_index(self):
+    #     self.buckets.flush()
+    #     self.buckets.close()
+    #     self.storage.close()
+#    @lfu_cache(100)
+    def _find_key(self, key):
+        """
+        Find the key position
+
+        :param key: the key to find
+        """
+        start_position = self._calculate_position(key)
+        self.buckets.seek(start_position)
+        curr_data = self.buckets.read(self.bucket_line_size)
+        if curr_data:
+            location = self.bucket_struct.unpack(curr_data)[0]
+            if not location:
+                return None, None, 0, 0, 'u'
+            found_at, doc_id, l_key, start, size, status, _next = self._locate_key(
+                key, location)
+            if status == 'd':  # when first record from many is deleted
+                while True:
+                    found_at, doc_id, l_key, start, size, status, _next = self._locate_key(
+                        key, _next)
+                    if status != 'd':
+                        break
+            return doc_id, l_key, start, size, status
+        else:
+            return None, None, 0, 0, 'u'
+
+    def _find_key_many(self, key, limit=1, offset=0):
+        location = None
+        start_position = self._calculate_position(key)
+        self.buckets.seek(start_position)
+        curr_data = self.buckets.read(self.bucket_line_size)
+        if curr_data:
+            location = self.bucket_struct.unpack(curr_data)[0]
+        while offset:
+            if not location:
+                break
+            try:
+                found_at, doc_id, l_key, start, size, status, _next = self._locate_key(
+                    key, location)
+            except IndexException:
+                break
+            else:
+                if status != 'd':
+                    if l_key == key:  # in case of hash function conflicts
+                        offset -= 1
+                location = _next
+        while limit:
+            if not location:
+                break
+            try:
+                found_at, doc_id, l_key, start, size, status, _next = self._locate_key(
+                    key, location)
+            except IndexException:
+                break
+            else:
+                if status != 'd':
+                    if l_key == key:  # in case of hash function conflicts
+                        yield doc_id, start, size, status
+                        limit -= 1
+                location = _next
+
+    def _calculate_position(self, key):
+        return abs(hash(key) & self.hash_lim) * self.bucket_line_size + self._start_ind
+
+    #TODO add cache!
+    def _locate_key(self, key, start):
+        """
+        Locate position of the key, it will iterate using `next` field in record
+