Source code for biolite.database

# BioLite - Tools for processing gene sequence data and automating workflows
# Copyright (c) 2012-2013 Brown University. All rights reserved.
# 
# This file is part of BioLite.
# 
# BioLite is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# BioLite is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with BioLite.  If not, see <http://www.gnu.org/licenses/>.

"""
Provides an interface to the underlying SQLite database that stores the BioLite
**catalog**, **runs**, **diagnostics**, and **programs** tables.

.. _catalog-table:

catalog Table
-------------

::

%s

.. _runs-table:

runs Table
----------

::

%s

diagnostics Table
-----------------

::

%s

.. _programs-table:

programs Table
--------------

::

%s
"""

import os
import sqlite3
import textwrap

import config
import utils


# Interal data and helper functions.
_db = None


def _create_sql(name, schema, index):
	sql = [" CREATE TABLE %s (" % name]
	sql += ["   %s %s," % s for s in schema[:-1]]
	sql += ["   %s %s);" % schema[-1]]
	sql += [" CREATE INDEX {0}_{1} ON {0}({1});".format(name, i) for i in index]
	return '\n'.join(sql)


def _create_db():
	global _db
	_db.executescript(catalog_sql + runs_sql + diagnostics_sql + programs_sql)


# External data.
runs_schema = (
	('run_id', "INTEGER PRIMARY KEY AUTOINCREMENT"),
	('id', 'VARCHAR(256)'),
	('name', 'VARCHAR(32)'),
	('hostname', 'VARCHAR(32)'),
	('username', 'VARCHAR(32)'),
	('timestamp', 'DATETIME'),
	('hidden', "INTEGER DEFAULT '0'"))

runs_index = ('id', 'name')

runs_sql = _create_sql('runs', runs_schema, runs_index)

diagnostics_schema = (
	('id', 'VARCHAR(256)'),
	('run_id', 'INTEGER'),
	('entity', 'VARCHAR(128)'),
	('attribute', 'VARCHAR(32)'),
	('value', 'TEXT'),
	('timestamp', 'DATETIME'))
diagnostics_index = ('id', 'run_id', 'entity', 'attribute', 'timestamp')

diagnostics_sql = _create_sql('diagnostics', diagnostics_schema, diagnostics_index) + "CREATE UNIQUE INDEX entry ON diagnostics(run_id,entity,attribute);"

catalog_schema = (
	('id', 'VARCHAR(256) PRIMARY KEY NOT NULL'),
	('paths', 'TEXT'),
	('species', 'VARCHAR(256)'),
	('ncbi_id', 'INTEGER'),
	('itis_id', 'INTEGER'),
	('extraction_id', 'VARCHAR(256)'),
	('library_id', 'VARCHAR(256)'),
	('library_type', 'VARCHAR(256)'),
	('tissue', 'VARCHAR(256)'),
	('sequencer', 'VARCHAR(256)'),
	('seq_center', 'VARCHAR(256)'),
	('note', 'TEXT'),
	('sample_prep', 'TEXT'),
	('timestamp', 'DATETIME'))

# Index all fields in the catalog, except id (already a key) and paths.
catalog_index = [column[0] for column in catalog_schema[2:]]
catalog_sql = _create_sql('catalog', catalog_schema, catalog_index)

programs_schema = (
	('binary', 'CHAR(32) PRIMARY KEY NOT NULL'),
	('name', 'VARCHAR(256)'),
	('version', 'TEXT'))

programs_index = ('name',)
programs_sql = _create_sql('programs', programs_schema, programs_index)

# Add the SQL schema for each table to the documentation.
__doc__ %= (catalog_sql, runs_sql, diagnostics_sql, programs_sql)


# Public functions. ###

[docs]def connect(): """ Establish a gobal database connection. """ global _db if _db is None: path = config.get_resource('database') exists = os.path.isfile(path) if not exists: utils.safe_mkdir(os.path.dirname(path)) _db = sqlite3.connect(path, timeout=60.0, isolation_level=None) if not exists: _create_db()
[docs]def disconnect(): """ Close the global database connection, set it to None. """ global _db if _db is not None: _db.close() _db = None
[docs]def execute(*args, **kwargs): global _db if _db is None: connect() try: return _db.execute(*args, **kwargs) except sqlite3.InterfaceError as e: utils.die( "sqlite error:\n", e, '\n', "SQL:", textwrap.dedent(args[0]), '\n', "Values:", args[1], '\n')