dbtest / test.py

#!/usr/bin/env python
import MySQLdb
from random import randint, shuffle
import time
from hashlib import sha1
import pymongo
import os
import tc
import marshal
import zlib
from bson import Binary
import sys

# Mysql connection
conn = MySQLdb.connect(user='web', passwd='web-**')

# Mongodb connection
mongo = pymongo.Connection()['dbtest']

# Number of read/write operations
NUMBER = 100

# Size of data of one record in database
#DATA_SIZE = 1000

# Content to save into database
# About 70K original and 17K gzipped
DATA = open('data/mara.html').read()

CONFIG = [
    {'key': 'mysql', 'enable': 1, 'name': 'MySQL'},
    {'key': 'mongo', 'enable': 1, 'name': 'MongoDB'},
    {'key': 'tc', 'enable': 1, 'name': 'Tokyo Cabinet'},
]


def mysql_setup_database():
    cur = conn.cursor()
    cur.execute('drop database dbtest')
    cur.execute('create database dbtest charset utf8')
    cur.execute('use dbtest')
    cur.execute('''
        create table cache (
            id int auto_increment primary key,
            hash varchar(40),
            data blob,
            unique (hash)
        ) engine = myisam
    ''')


def random_data(number, only_hash=False):#, data_size=DATA_SIZE):
    _hash = sha1(str(number)).hexdigest()
    if only_hash:
        zdata = None
    else:
        page = {
            'url': 'http://enc-dic.com/enc_rus/Mara-705.html',
            'body': DATA,
        }
        data = marshal.dumps(page)
        zdata = zlib.compress(data)
    return _hash, zdata


def parse_page(data):
    dump = zlib.decompress(data)
    return marshal.loads(dump)


def build_hash_list():
    print 'Building hash list'
    hash_list =[]
    for x in xrange(NUMBER):
        _hash, data = random_data(x, only_hash=True)
        hash_list.append(_hash)
    shuffle(hash_list)
    return hash_list


def timeit(func, *args):
    ts = time.time()
    second_ts = ts
    count = 0
    for x in func(*args):
        count += 1
        now = time.time()
        if now - second_ts > 1:
            second_ts = now
            print (str(count)),
            sys.stdout.flush()
            count = 0
    if count:
        print str(count),
    print
    return time.time() - ts


def mysql_write():
    cur = conn.cursor()
    for x in xrange(NUMBER):
        _hash, data = random_data(x)
        cur.execute('''
            insert into cache (hash, data) values(%s, %s)
        ''', (_hash, data))
        yield 1


def mysql_read(hash_list):
    cur = conn.cursor()
    #cur.execute('load index into cache `cache`')
    for _hash in hash_list:
        cur.execute('''
            select id, hash, data from cache
            where hash = %s
        ''', (_hash,))
        _id, _hash, data = cur.fetchone()
        page = parse_page(data)
        assert len(_hash) == 40
        assert len(page['body']) == len(DATA)
        assert page['body'].startswith(DATA[:100])
        yield 1


def mongo_setup_database():
    mongo.cache.drop()


def mongo_write():
    for x in xrange(NUMBER):
        _hash, data = random_data(x)
        mongo.cache.save({'_id': _hash, 'data': Binary(data)}, safe=True)
        yield 1


def mongo_read(hash_list):
    for _hash in hash_list:
        item = mongo.cache.find_one({'_id': _hash})
        page = parse_page(item['data'])
        assert len(_hash) == 40
        assert len(page['body']) == len(DATA)
        assert page['body'].startswith(DATA[:100])
        yield 1


def tc_setup_database():
    if os.path.exists('var/tc.db'):
        os.unlink('var/tc.db')


def tc_write():
    db = tc.HDB()
    #db.tune(-1, -1, -1, tc.HDBTDEFLATE)
    db.open('var/tc.db', tc.HDBOWRITER | tc.HDBOCREAT)

    for x in xrange(NUMBER):
        _hash, data = random_data(x)
        db[_hash] = data
        yield 1


def tc_read(hash_list):
    db = tc.HDB()
    #db.tune(-1, -1, -1, tc.HDBTDEFLATE)
    db.open('var/tc.db', tc.HDBOWRITER | tc.HDBOCREAT)

    for _hash in hash_list:
        data = db[_hash]
        page = parse_page(data)
        assert len(_hash) == 40
        assert len(page['body']) == len(DATA)
        assert page['body'].startswith(DATA[:100])
        yield 1


def main():
    messages = []
    hash_list = build_hash_list()
    print 'Number of operations: %d' % NUMBER
    print 'Size of data: %d' % len(DATA)

    for config in CONFIG:
        if config['enable']:
            setup_func = globals()['%s_setup_database' % config['key']]
            write_func = globals()['%s_write' % config['key']]
            read_func = globals()['%s_read' % config['key']]
            setup_func()

            print '%s: writing' % config['name']
            total = timeit(write_func)
            msg = '%s write: %0.2f sec.' % (config['name'], total)
            messages.append(msg)
            print msg

            print '%s: reading' % config['name']
            total = timeit(read_func, hash_list)
            msg = '%s read: %0.2f sec.' % (config['name'], total)
            messages.append(msg)
            print msg

    print 'Result:'
    print 'Number of operations: %d' % NUMBER
    print 'Size of data: %d' % len(DATA)
    for msg in messages:
        print msg
    


if __name__ == '__main__':
    main()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.