Source

DiffDB / diffdb1.py

Full commit
'''
 DiffDB
========

DiffDB is a proof of concept for creating an key/value database
interface where instead of always persisting the entire "value", a
patch is instead committed, where it is then applied by the database. 

It is assumed that the user or client of the database functions
similarly to a session where the value is loaded, work is done,
updating the session as needed, then persisting it at the end of the
request. This is similar to a VCS system in terms of workflow.

Since this is a proof of concept, it does not support things like
multiple mutations. Only the end state is tested for differences and
the patch does not get updated on each change. For example: ::

  db['foo'] = {'x': 1}
  # add y
  db['foo'].update({'y': 2})
  # remove y
  del db['foo']['y']

  # the only operation that happens is the {'x': 1} gets sent as a patch.


Dict Patch Format
-----------------

The dict patch format is a known dictionary format that explains how
to update a source document. Here is an example: ::

  patch = {
    '+': { 'x': 1 },
    '-': [ ['y'], ['z'] ],
  }
 

The '+' or 'update' operation defines a dictionary that will be used
to update the original. This works just like the dict update
function. The '-' or 'removal' operation defines where keys should be
removed. This effectively only has to consider root level keys b/c in
computing the diff, child dicts can be updated to remove values. For
example, the above example if the source is: ::

  {
    'a': 1,
    'y': 2,
    'z': 3,
  }

The result from applying the removal operation is: ::

  {
    'a': 1
  }

For clarity, applying the 'update' operation would produce: ::

  {
    'a': 1,
    'x': 1,
  }
'''


import os
import cherrypy
import simplejson
import time
import httplib2
import requests

from dictpatch import DictDiff, DictPatch

from pprint import pprint


class PatchError(Exception):
    pass


class Conn(object):
    '''A simple helper object to provide a URL to the database endpoint'''
    def __init__(self, base):
        self.base = base

    def url(self, key):
        '''return a url. This will apply a '/' to the end of the base
        URL passed in when initialized and simply add the key to the
        end'''
        return self.base + '/' + key


class DBase(object):
    '''
    This is our DBM like object.

    '''
    
    def __init__(self, conn):
        '''
        Arguments:
        - `conn`: a Conn object used to communicate with the db endpoint
        '''
        self.conn = conn
        self.http = httplib2.Http()
        self.cache = {}

    def clear(self, key):
        '''Clears a value to be sure it isn't cached. This is helpful
        to be sure when starting a set of changes you use a current
        version of the document from the source'''
        if self.cache.get(key):
            del self.cache[key]

    def __setitem__(self, key, value):
        '''Update the value via a patch or set a new value'''
        url = self.conn.url(key)

        # first try out cache, then the db, or else we know we need to
        # just add a new value
        if not self.cache.get(key):
            try:
                val = self.__getitem__(key)
            except KeyError:
                val = None
        else:
            val = self.cache[key]

        if not val:
            # no value so PUT a new one
            ct = 'application/json'
            body = simplejson.dumps(value)
            method = 'PUT'
        else:
            # find our diff and send the patch 
            diff = DictDiff(val)
            patch = diff.diff(value)
            body = simplejson.dumps(patch)
            ct = 'application/json+patch'
            method = 'PATCH'
        # make the request
        res, content = self.http.request(url, method=method, body=body, headers={'Content-Type': ct})

        #TODO: Error handling would be nice here

    def __getitem__(self, key):
        '''Get the value. This will always reset our cache value. We
        return a KeyError when the value has not been set'''
        res, content = self.http.request(self.conn.url(key))
        if res.status == 200:
            obj = simplejson.loads(content)
            self.cache[key] = obj
            return obj 
        if res.status == 404:
            raise KeyError()


class TestDBase(object):
    def test_set_op(self):
        db = DBase(Conn('http://localhost:9998'))
        d = {'x': 1, 'y': 2}
        db['hello/world'] = d
        out = db['hello/world']
        assert out == d
        

class Server(object):
    '''This provides our API. In theory this would wrap our actual
    session database (Postgres, MongoDB, etc.)'''

    def __init__(self):
        self.index_db = {('status', 'check'): {'all': 'ok'}}

    @cherrypy.tools.json_in(content_type=['application/json+patch',
                                          'application/json',
                                          'text/javascript'])
    @cherrypy.tools.json_out()
    def default(self, *args, **kw):
        '''
        This is a simple dispatcher method to our actual HTTP
        methods. I tried to use the MethodDispatcher in cherrypy but
        most tools do not play nice with any other dispatcher other
        than the default.
        '''
        meth = cherrypy.request.method.upper()
        func = getattr(self, meth)
        if not func:
            raise cherrypy.HTTPError(415)
        return func(*args, **kw)
    default.exposed = True

    def GET(self, *key):
        '''
        Return the value. We could (should) do some caching here if we
        used another db store.

        The special key '__all__' allows us to get our entire DB. This
        is simply here for debugging.
        '''
        if key[0] == '__all__':
            return dict(('/'.join(k), v) for k, v in self.index_db.iteritems())

        key = tuple(key)
        try:
            out = self.index_db[key]
            if out == None:
                raise cherrypy.HTTPError(404)
            return out
        except KeyError:
            raise cherrypy.HTTPError(404)
        
    def PATCH(self, *key):
        '''
        Take the patch and apply it to the source. 
        '''
        key = tuple(key)
        if not self.index_db.get(key):
            raise cherrypy.HTTPError(404)
        current = self.index_db[key]
        patch = DictPatch(cherrypy.request.json)
        new = patch.apply(current)
        self.index_db[key] = new

    def PUT(self, *key, **kw):
        '''
        Create a new document with the given key and document.
        '''
        key = tuple(key)
        self.index_db[key] = cherrypy.request.json


def run_server():
    '''Run our server. Listen on port 9998'''
    here = os.path.dirname(os.path.abspath(__file__))
    cherrypy.config.update({
        'server.socket_port': 9998,
    })

    # using okapi: http://aminus.net/wiki/Okapi
    okapi_config = {
        '/': {
            'tools.staticdir.on': True,
            'tools.staticdir.dir': os.path.join(here, 'okapi')
        }}
    cherrypy.tree.mount(None, '/okapi', config=okapi_config)
    cherrypy.tree.mount(Server())

    cherrypy.engine.start()
    cherrypy.engine.block()

if __name__ == '__main__':
    run_server()