Source

dbtest / test.py

#!/usr/bin/env python
from random import randint, shuffle
import time
from hashlib import sha1
import os
import marshal
import zlib
from bson import Binary
import sys

# Number of read/write operations
NUMBER = 2000

# Size of data of one record in database
#DATA_SIZE = 1000

# Content to save into database
# About 70K original and 17K gzipped
DATA = open('data/mara.html').read()

CONFIG = [
    {'key': 'mysql', 'enable': 1, 'name': 'MySQL'},
    {'key': 'mongo', 'enable': 1, 'name': 'MongoDB'},
    {'key': 'tc', 'enable': 1, 'name': 'Tokyo Cabinet'},
    {'key': 'postgres', 'enable': 0, 'name': 'Postgres'},
]


def random_data(number, only_hash=False):#, data_size=DATA_SIZE):
    _hash = sha1(str(number)).hexdigest()
    if only_hash:
        zdata = None
    else:
        page = {
            'url': 'http://enc-dic.com/enc_rus/Mara-705.html',
            'body': DATA,
        }
        data = marshal.dumps(page)
        zdata = zlib.compress(data)
    return _hash, zdata


def parse_page(data):
    dump = zlib.decompress(data)
    return marshal.loads(dump)


def build_hash_list():
    print 'Building hash list'
    hash_list =[]
    for x in xrange(NUMBER):
        _hash, data = random_data(x, only_hash=True)
        hash_list.append(_hash)
    shuffle(hash_list)
    return hash_list


def timeit(func, *args):
    ts = time.time()
    second_ts = ts
    count = 0
    for x in func(*args):
        count += 1
        now = time.time()
        if now - second_ts > 1:
            second_ts = now
            print (str(count)),
            sys.stdout.flush()
            count = 0
    if count:
        print str(count),
    print
    return time.time() - ts


def mysql_setup_database():
    import MySQLdb

    conn = MySQLdb.connect(user='web', passwd='web-**')
    cur = conn.cursor()
    cur.execute('drop database dbtest')
    cur.execute('create database dbtest charset utf8')
    cur.execute('use dbtest')
            #id int auto_increment primary key,
            #unique (hash)
    cur.execute('''
        create table cache (
            id varchar(40) primary key,
            data blob not null
        ) engine = myisam
    ''')


def mysql_write():
    import MySQLdb

    conn = MySQLdb.connect(user='web', passwd='web-**')
    cur = conn.cursor()
    cur.execute('use dbtest')
    for x in xrange(NUMBER):
        _hash, data = random_data(x)
        cur.execute('''
            insert into cache (id, data) values(%s, %s)
        ''', (_hash, data))
        yield 1


def mysql_read(hash_list):
    import MySQLdb

    conn = MySQLdb.connect(user='web', passwd='web-**')
    cur = conn.cursor()
    cur.execute('use dbtest')
    #cur.execute('load index into cache `cache`')
    for _hash in hash_list:
        cur.execute('''
            select id, data from cache
            where id = %s
        ''', (_hash,))
        _hash, data = cur.fetchone()
        page = parse_page(data)
        assert len(_hash) == 40
        assert len(page['body']) == len(DATA)
        assert page['body'].startswith(DATA[:100])
        yield 1


def postgres_setup_database():
    import psycopg2

    conn = psycopg2.connect(user='lorien', database='dbtest')
    cur = conn.cursor()
    cur.execute('drop table if exists cache')
    cur.execute('''
        create table cache (
            id bytea not null,
            data bytea not null,
            primary key (id)
        );
    ''')
    conn.commit()


def postgres_write():
    import psycopg2

    conn = psycopg2.connect(user='lorien', database='dbtest')
    cur = conn.cursor()
    for x in xrange(NUMBER):
        _hash, data = random_data(x)
        cur.execute('''
            insert into cache (id, data) values(%s, %s)
        ''', (_hash, psycopg2.Binary(data)))
        conn.commit()
        yield 1


def postgres_read(hash_list):
    import psycopg2

    conn = psycopg2.connect(user='lorien', database='dbtest')
    cur = conn.cursor()
    for _hash in hash_list:
        cur.execute('''
            select id, data from cache
            where id = %s
        ''', (_hash,))
        _hash, data = cur.fetchone()
        page = parse_page(data)
        assert len(_hash) == 40
        assert len(page['body']) == len(DATA)
        assert page['body'].startswith(DATA[:100])
        yield 1


def mongo_setup_database():
    import pymongo

    db = pymongo.Connection()['dbtest']
    db.cache.drop()


def mongo_write():
    import pymongo

    db = pymongo.Connection()['dbtest']
    for x in xrange(NUMBER):
        _hash, data = random_data(x)
        db.cache.save({'_id': _hash, 'data': Binary(data)}, safe=True)
        yield 1


def mongo_read(hash_list):
    import pymongo

    db = pymongo.Connection()['dbtest']
    for _hash in hash_list:
        item = db.cache.find_one({'_id': _hash})
        page = parse_page(item['data'])
        assert len(_hash) == 40
        assert len(page['body']) == len(DATA)
        assert page['body'].startswith(DATA[:100])
        yield 1


def tc_setup_database():
    import tc

    if os.path.exists('var/tc.db'):
        os.unlink('var/tc.db')


def tc_write():
    import tc

    db = tc.HDB()
    #db.tune(-1, -1, -1, tc.HDBTDEFLATE)
    db.open('var/tc.db', tc.HDBOWRITER | tc.HDBOCREAT)

    for x in xrange(NUMBER):
        _hash, data = random_data(x)
        db[_hash] = data
        yield 1


def tc_read(hash_list):
    import tc

    db = tc.HDB()
    #db.tune(-1, -1, -1, tc.HDBTDEFLATE)
    db.open('var/tc.db', tc.HDBOWRITER | tc.HDBOCREAT)

    for _hash in hash_list:
        data = db[_hash]
        page = parse_page(data)
        assert len(_hash) == 40
        assert len(page['body']) == len(DATA)
        assert page['body'].startswith(DATA[:100])
        yield 1


def main():
    messages = []
    hash_list = build_hash_list()
    print 'Number of operations: %d' % NUMBER
    print 'Size of data: %d' % len(DATA)

    for config in CONFIG:
        if config['enable']:
            setup_func = globals()['%s_setup_database' % config['key']]
            write_func = globals()['%s_write' % config['key']]
            read_func = globals()['%s_read' % config['key']]
            setup_func()

            print '%s: writing' % config['name']
            total = timeit(write_func)
            msg = '%s write: %0.2f sec.' % (config['name'], total)
            messages.append(msg)
            print msg

            print '%s: reading' % config['name']
            total = timeit(read_func, hash_list)
            msg = '%s read: %0.2f sec.' % (config['name'], total)
            messages.append(msg)
            print msg

    print 'Result:'
    print 'Number of operations: %d' % NUMBER
    print 'Size of data: %d' % len(DATA)
    for msg in messages:
        print msg
    


if __name__ == '__main__':
    main()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.