Commits

James Taylor  committed 3eea6c8

New motif/matrix classes. Includes a reader for TRANSFAC style databases
(which can be easily extended), and position specific weight/frequency and
scoring matrix code using numpy. Fast string scoring is provided through
a Cython extension.

This doesn't do everything the bx.pwm module does, but I think it is a little
better organized, and includes more documentation and tests. It would be
nice to see what we can move from bx.pwm into here.

  • Participants
  • Parent commits b4d901b

Comments (0)

Files changed (9)

File lib/bx/motif/__init__.py

Empty file added.

File lib/bx/motif/_pwm.c

+/* Generated by Cython 0.9.6.14 on Sat May 31 16:30:00 2008 */
+
+#define PY_SSIZE_T_CLEAN
+#include "Python.h"
+#include "structmember.h"
+#ifndef PY_LONG_LONG
+  #define PY_LONG_LONG LONG_LONG
+#endif
+#if PY_VERSION_HEX < 0x02050000
+  typedef int Py_ssize_t;
+  #define PY_SSIZE_T_MAX INT_MAX
+  #define PY_SSIZE_T_MIN INT_MIN
+  #define PyInt_FromSsize_t(z) PyInt_FromLong(z)
+  #define PyInt_AsSsize_t(o)   PyInt_AsLong(o)
+  #define PyNumber_Index(o)    PyNumber_Int(o)
+  #define PyIndex_Check(o)     PyNumber_Check(o)
+#endif
+#if PY_VERSION_HEX < 0x02040000
+  #define METH_COEXIST 0
+#endif
+#ifndef __stdcall
+  #define __stdcall
+#endif
+#ifndef __cdecl
+  #define __cdecl
+#endif
+#ifdef __cplusplus
+#define __PYX_EXTERN_C extern "C"
+#else
+#define __PYX_EXTERN_C extern
+#endif
+#include <math.h>
+#include "numpy/arrayobject.h"
+
+
+#ifdef __GNUC__
+#define INLINE __inline__
+#elif _WIN32
+#define INLINE __inline
+#else
+#define INLINE 
+#endif
+
+typedef struct {PyObject **p; char *s;} __Pyx_InternTabEntry; /*proto*/
+typedef struct {PyObject **p; char *s; long n; int is_unicode;} __Pyx_StringTabEntry; /*proto*/
+
+
+
+static int __pyx_skip_dispatch = 0;
+
+
+/* Type Conversion Predeclarations */
+
+#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
+static INLINE int __Pyx_PyObject_IsTrue(PyObject* x);
+static INLINE PY_LONG_LONG __pyx_PyInt_AsLongLong(PyObject* x);
+static INLINE unsigned PY_LONG_LONG __pyx_PyInt_AsUnsignedLongLong(PyObject* x);
+static INLINE Py_ssize_t __pyx_PyIndex_AsSsize_t(PyObject* b);
+
+#define __pyx_PyInt_AsLong(x) (PyInt_CheckExact(x) ? PyInt_AS_LONG(x) : PyInt_AsLong(x))
+#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
+
+static INLINE unsigned char __pyx_PyInt_unsigned_char(PyObject* x);
+static INLINE unsigned short __pyx_PyInt_unsigned_short(PyObject* x);
+static INLINE char __pyx_PyInt_char(PyObject* x);
+static INLINE short __pyx_PyInt_short(PyObject* x);
+static INLINE int __pyx_PyInt_int(PyObject* x);
+static INLINE long __pyx_PyInt_long(PyObject* x);
+static INLINE signed char __pyx_PyInt_signed_char(PyObject* x);
+static INLINE signed short __pyx_PyInt_signed_short(PyObject* x);
+static INLINE signed int __pyx_PyInt_signed_int(PyObject* x);
+static INLINE signed long __pyx_PyInt_signed_long(PyObject* x);
+static INLINE long double __pyx_PyInt_long_double(PyObject* x);
+#ifdef __GNUC__
+/* Test for GCC > 2.95 */
+#if __GNUC__ > 2 ||               (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)) 
+#define likely(x)   __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else /* __GNUC__ > 2 ... */
+#define likely(x)   (x)
+#define unlikely(x) (x)
+#endif /* __GNUC__ > 2 ... */
+#else /* __GNUC__ */
+#define likely(x)   (x)
+#define unlikely(x) (x)
+#endif /* __GNUC__ */
+    
+static PyObject *__pyx_m;
+static PyObject *__pyx_b;
+static PyObject *__pyx_empty_tuple;
+static int __pyx_lineno;
+static int __pyx_clineno = 0;
+static char * __pyx_cfilenm= __FILE__;
+static char *__pyx_filename;
+static char **__pyx_f;
+
+static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name, int exact); /*proto*/
+
+static INLINE void __Pyx_RaiseArgtupleTooLong(Py_ssize_t num_expected, Py_ssize_t num_found); /*proto*/
+
+static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name, long size);  /*proto*/
+
+static PyObject *__Pyx_ImportModule(char *name); /*proto*/
+
+static void __Pyx_AddTraceback(char *funcname); /*proto*/
+
+/* Declarations */
+
+
+static PyTypeObject *__pyx_ptype_2bx_5motif_4_pwm_ndarray = 0;
+
+
+/* Implementation of bx.motif._pwm */
+
+/* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":16
+ *     ctypedef float npy_float32
+ * 
+ * def score_string( ndarray matrix, ndarray char_to_index, object string, ndarray rval ):             # <<<<<<<<<<<<<< 
+ *     """
+ *     matrix *must* be a 2d array of type float32
+ */
+
+static PyObject *__pyx_pf_2bx_5motif_4_pwm_score_string(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_2bx_5motif_4_pwm_score_string[] = "\n    matrix *must* be a 2d array of type float32\n    char_to_index *must* be a 1d array of type int16\n    rval *must* be a 1d array of type float32 and the same length as string\n    ";
+static PyObject *__pyx_pf_2bx_5motif_4_pwm_score_string(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyArrayObject *__pyx_v_matrix = 0;
+  PyArrayObject *__pyx_v_char_to_index = 0;
+  PyObject *__pyx_v_string = 0;
+  PyArrayObject *__pyx_v_rval = 0;
+  char *__pyx_v_buffer;
+  Py_ssize_t __pyx_v_len;
+  float __pyx_v_score;
+  int __pyx_v_i;
+  int __pyx_v_j;
+  int __pyx_v_matrix_width;
+  npy_int16 __pyx_v_char_index;
+  int __pyx_v_stop;
+  PyObject *__pyx_r;
+  int __pyx_1;
+  int __pyx_2;
+  static char *__pyx_argnames[] = {"matrix","char_to_index","string","rval",0};
+  if (likely(!__pyx_kwds) && likely(PyTuple_GET_SIZE(__pyx_args) == 4)) {
+    __pyx_v_matrix = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 0));
+    __pyx_v_char_to_index = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 1));
+    __pyx_v_string = PyTuple_GET_ITEM(__pyx_args, 2);
+    __pyx_v_rval = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 3));
+  }
+  else {
+    if (unlikely(!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "OOOO", __pyx_argnames, &__pyx_v_matrix, &__pyx_v_char_to_index, &__pyx_v_string, &__pyx_v_rval))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L2;}
+  }
+  goto __pyx_L3;
+  __pyx_L2:;
+  __Pyx_AddTraceback("bx.motif._pwm.score_string");
+  return NULL;
+  __pyx_L3:;
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_matrix), __pyx_ptype_2bx_5motif_4_pwm_ndarray, 1, "matrix", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1;}
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_char_to_index), __pyx_ptype_2bx_5motif_4_pwm_ndarray, 1, "char_to_index", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1;}
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_rval), __pyx_ptype_2bx_5motif_4_pwm_ndarray, 1, "rval", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1;}
+
+  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":26
+ *     cdef float score
+ *     cdef int i, j
+ *     cdef int matrix_width = matrix.dimensions[0]             # <<<<<<<<<<<<<< 
+ *     cdef npy_int16 char_index
+ *     # Get input string as character pointer
+ */
+  __pyx_v_matrix_width = (__pyx_v_matrix->dimensions[0]);
+
+
+  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":29
+ *     cdef npy_int16 char_index
+ *     # Get input string as character pointer
+ *     PyString_AsStringAndSize( string, &buffer, &len )             # <<<<<<<<<<<<<< 
+ *     # Loop over each position in the string 
+ *     cdef int stop = len - matrix.dimensions[0] + 1
+ */
+  __pyx_1 = PyString_AsStringAndSize(__pyx_v_string, (&__pyx_v_buffer), (&__pyx_v_len)); if (unlikely(__pyx_1 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1;}
+
+  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":31
+ *     PyString_AsStringAndSize( string, &buffer, &len )
+ *     # Loop over each position in the string 
+ *     cdef int stop = len - matrix.dimensions[0] + 1             # <<<<<<<<<<<<<< 
+ *     for i from 0 <= i < stop:
+ *         score = 0.0
+ */
+  __pyx_v_stop = ((__pyx_v_len - (__pyx_v_matrix->dimensions[0])) + 1);
+
+
+  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":32
+ *     # Loop over each position in the string 
+ *     cdef int stop = len - matrix.dimensions[0] + 1
+ *     for i from 0 <= i < stop:             # <<<<<<<<<<<<<< 
+ *         score = 0.0
+ *         for j from 0 <= j < matrix_width:
+ */
+  for (__pyx_v_i = 0; __pyx_v_i < __pyx_v_stop; __pyx_v_i++) {
+
+    /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":33
+ *     cdef int stop = len - matrix.dimensions[0] + 1
+ *     for i from 0 <= i < stop:
+ *         score = 0.0             # <<<<<<<<<<<<<< 
+ *         for j from 0 <= j < matrix_width:
+ *             char_index = ( <npy_int16 *> ( char_to_index.data + buffer[i+j] * char_to_index.strides[0] ) )[0]
+ */
+    __pyx_v_score = 0.0;
+
+    /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":34
+ *     for i from 0 <= i < stop:
+ *         score = 0.0
+ *         for j from 0 <= j < matrix_width:             # <<<<<<<<<<<<<< 
+ *             char_index = ( <npy_int16 *> ( char_to_index.data + buffer[i+j] * char_to_index.strides[0] ) )[0]
+ *             if char_index < 0: 
+ */
+    for (__pyx_v_j = 0; __pyx_v_j < __pyx_v_matrix_width; __pyx_v_j++) {
+
+      /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":35
+ *         score = 0.0
+ *         for j from 0 <= j < matrix_width:
+ *             char_index = ( <npy_int16 *> ( char_to_index.data + buffer[i+j] * char_to_index.strides[0] ) )[0]             # <<<<<<<<<<<<<< 
+ *             if char_index < 0: 
+ *                 break
+ */
+      __pyx_v_char_index = (((npy_int16 *)(__pyx_v_char_to_index->data + ((__pyx_v_buffer[(__pyx_v_i + __pyx_v_j)]) * (__pyx_v_char_to_index->strides[0]))))[0]);
+
+      /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":36
+ *         for j from 0 <= j < matrix_width:
+ *             char_index = ( <npy_int16 *> ( char_to_index.data + buffer[i+j] * char_to_index.strides[0] ) )[0]
+ *             if char_index < 0:             # <<<<<<<<<<<<<< 
+ *                 break
+ *             score += ( <npy_float32*> ( matrix.data + j * matrix.strides[0] + char_index * matrix.strides[1] ) )[0]
+ */
+      __pyx_2 = (__pyx_v_char_index < 0);
+      if (__pyx_2) {
+        goto __pyx_L7;
+        goto __pyx_L8;
+      }
+      __pyx_L8:;
+
+      /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":38
+ *             if char_index < 0: 
+ *                 break
+ *             score += ( <npy_float32*> ( matrix.data + j * matrix.strides[0] + char_index * matrix.strides[1] ) )[0]             # <<<<<<<<<<<<<< 
+ *         else:
+ *             ( <npy_float32*> ( rval.data + i * rval.strides[0] ) )[0] = score */
+      __pyx_v_score += (((npy_float32 *)((__pyx_v_matrix->data + (__pyx_v_j * (__pyx_v_matrix->strides[0]))) + (__pyx_v_char_index * (__pyx_v_matrix->strides[1]))))[0]);
+    }
+    /*else*/ {
+
+      /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":40
+ *             score += ( <npy_float32*> ( matrix.data + j * matrix.strides[0] + char_index * matrix.strides[1] ) )[0]
+ *         else:
+ *             ( <npy_float32*> ( rval.data + i * rval.strides[0] ) )[0] = score             # <<<<<<<<<<<<<< 
+ */
+      (((npy_float32 *)(__pyx_v_rval->data + (__pyx_v_i * (__pyx_v_rval->strides[0]))))[0]) = __pyx_v_score;
+    }
+    __pyx_L7:;
+  }
+
+  __pyx_r = Py_None; Py_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1:;
+  __Pyx_AddTraceback("bx.motif._pwm.score_string");
+  __pyx_r = NULL;
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+static struct PyMethodDef __pyx_methods[] = {
+  {"score_string", (PyCFunction)__pyx_pf_2bx_5motif_4_pwm_score_string, METH_VARARGS|METH_KEYWORDS, __pyx_doc_2bx_5motif_4_pwm_score_string},
+  {0, 0, 0, 0}
+};
+
+static void __pyx_init_filenames(void); /*proto*/
+
+PyMODINIT_FUNC init_pwm(void); /*proto*/
+PyMODINIT_FUNC init_pwm(void) {
+  /*--- Libary function declarations ---*/
+  __pyx_init_filenames();
+  /*--- Module creation code ---*/
+  __pyx_m = Py_InitModule4("_pwm", __pyx_methods, 0, 0, PYTHON_API_VERSION);
+  if (!__pyx_m) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1;};
+  __pyx_b = PyImport_AddModule("__builtin__");
+  if (!__pyx_b) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1;};
+  if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1;};
+  /*--- Intern code ---*/
+  /*--- String init code ---*/
+  /*--- Builtin init code ---*/
+  __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1;}
+  __pyx_skip_dispatch = 0;
+  /*--- Global init code ---*/
+  /*--- Function export code ---*/
+  /*--- Type init code ---*/
+  __pyx_ptype_2bx_5motif_4_pwm_ndarray = __Pyx_ImportType("numpy", "ndarray", sizeof(PyArrayObject)); if (unlikely(!__pyx_ptype_2bx_5motif_4_pwm_ndarray)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1;}
+  /*--- Type import code ---*/
+  /*--- Function import code ---*/
+  /*--- Execution code ---*/
+
+  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":16
+ *     ctypedef float npy_float32
+ * 
+ * def score_string( ndarray matrix, ndarray char_to_index, object string, ndarray rval ):             # <<<<<<<<<<<<<< 
+ *     """
+ *     matrix *must* be a 2d array of type float32
+ */
+  return;
+  __pyx_L1:;
+  __Pyx_AddTraceback("bx.motif._pwm");
+}
+
+static char *__pyx_filenames[] = {
+  "_pwm.pyx",
+};
+
+/* Runtime support code */
+
+static void __pyx_init_filenames(void) {
+  __pyx_f = __pyx_filenames;
+}
+
+static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name, int exact) {
+    if (!type) {
+        PyErr_Format(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
+    if (none_allowed && obj == Py_None) return 1;
+    else if (exact) {
+        if (obj->ob_type == type) return 1;
+    }
+    else {
+        if (PyObject_TypeCheck(obj, type)) return 1;
+    }
+    PyErr_Format(PyExc_TypeError,
+        "Argument '%s' has incorrect type (expected %s, got %s)",
+        name, type->tp_name, obj->ob_type->tp_name);
+    return 0;
+}
+
+static INLINE void __Pyx_RaiseArgtupleTooLong(
+    Py_ssize_t num_expected,
+    Py_ssize_t num_found)
+{
+    const char* error_message =
+    #if PY_VERSION_HEX < 0x02050000
+        "function takes at most %d positional arguments (%d given)";
+    #else
+        "function takes at most %zd positional arguments (%zd given)";
+    #endif
+    PyErr_Format(PyExc_TypeError, error_message, num_expected, num_found);
+}
+
+#ifndef __PYX_HAVE_RT_ImportType
+#define __PYX_HAVE_RT_ImportType
+static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name,
+    long size)
+{
+    PyObject *py_module = 0;
+    PyObject *result = 0;
+    PyObject *py_name = 0;
+    
+    py_name = PyString_FromString(module_name);
+    if (!py_name)
+        goto bad;
+
+    py_module = __Pyx_ImportModule(module_name);
+    if (!py_module)
+        goto bad;
+    result = PyObject_GetAttrString(py_module, class_name);
+    if (!result)
+        goto bad;
+    if (!PyType_Check(result)) {
+        PyErr_Format(PyExc_TypeError, 
+            "%s.%s is not a type object",
+            module_name, class_name);
+        goto bad;
+    }
+    if (((PyTypeObject *)result)->tp_basicsize != size) {
+        PyErr_Format(PyExc_ValueError, 
+            "%s.%s does not appear to be the correct type object",
+            module_name, class_name);
+        goto bad;
+    }
+    return (PyTypeObject *)result;
+bad:
+    Py_XDECREF(py_name);
+    Py_XDECREF(result);
+    return 0;
+}
+#endif
+
+#ifndef __PYX_HAVE_RT_ImportModule
+#define __PYX_HAVE_RT_ImportModule
+static PyObject *__Pyx_ImportModule(char *name) {
+    PyObject *py_name = 0;
+    PyObject *py_module = 0;
+    
+    py_name = PyString_FromString(name);
+    if (!py_name)
+        goto bad;
+    py_module = PyImport_Import(py_name);
+    Py_DECREF(py_name);
+    return py_module;
+bad:
+    Py_XDECREF(py_name);
+    return 0;
+}
+#endif
+
+#include "compile.h"
+#include "frameobject.h"
+#include "traceback.h"
+
+static void __Pyx_AddTraceback(char *funcname) {
+    PyObject *py_srcfile = 0;
+    PyObject *py_funcname = 0;
+    PyObject *py_globals = 0;
+    PyObject *empty_string = 0;
+    PyCodeObject *py_code = 0;
+    PyFrameObject *py_frame = 0;
+    
+    py_srcfile = PyString_FromString(__pyx_filename);
+    if (!py_srcfile) goto bad;
+    if (__pyx_clineno) {
+        py_funcname = PyString_FromFormat( "%s (%s:%u)", funcname, __pyx_cfilenm, __pyx_clineno);
+    }
+    else {
+        py_funcname = PyString_FromString(funcname);
+    }
+    if (!py_funcname) goto bad;
+    py_globals = PyModule_GetDict(__pyx_m);
+    if (!py_globals) goto bad;
+    empty_string = PyString_FromString("");
+    if (!empty_string) goto bad;
+    py_code = PyCode_New(
+        0,            /*int argcount,*/
+        0,            /*int nlocals,*/
+        0,            /*int stacksize,*/
+        0,            /*int flags,*/
+        empty_string, /*PyObject *code,*/
+        __pyx_empty_tuple,  /*PyObject *consts,*/
+        __pyx_empty_tuple,  /*PyObject *names,*/
+        __pyx_empty_tuple,  /*PyObject *varnames,*/
+        __pyx_empty_tuple,  /*PyObject *freevars,*/
+        __pyx_empty_tuple,  /*PyObject *cellvars,*/
+        py_srcfile,   /*PyObject *filename,*/
+        py_funcname,  /*PyObject *name,*/
+        __pyx_lineno,   /*int firstlineno,*/
+        empty_string  /*PyObject *lnotab*/
+    );
+    if (!py_code) goto bad;
+    py_frame = PyFrame_New(
+        PyThreadState_Get(), /*PyThreadState *tstate,*/
+        py_code,             /*PyCodeObject *code,*/
+        py_globals,          /*PyObject *globals,*/
+        0                    /*PyObject *locals*/
+    );
+    if (!py_frame) goto bad;
+    py_frame->f_lineno = __pyx_lineno;
+    PyTraceBack_Here(py_frame);
+bad:
+    Py_XDECREF(py_srcfile);
+    Py_XDECREF(py_funcname);
+    Py_XDECREF(empty_string);
+    Py_XDECREF(py_code);
+    Py_XDECREF(py_frame);
+}
+
+/* Type Conversion Functions */
+
+static INLINE Py_ssize_t __pyx_PyIndex_AsSsize_t(PyObject* b) {
+  Py_ssize_t ival;
+  PyObject* x = PyNumber_Index(b);
+  if (!x) return -1;
+  ival = PyInt_AsSsize_t(x);
+  Py_DECREF(x);
+  return ival;
+}
+
+static INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
+   if (x == Py_True) return 1;
+   else if (x == Py_False) return 0;
+   else return PyObject_IsTrue(x);
+}
+
+static INLINE PY_LONG_LONG __pyx_PyInt_AsLongLong(PyObject* x) {
+    if (PyInt_CheckExact(x)) {
+        return PyInt_AS_LONG(x);
+    }
+    else if (PyLong_CheckExact(x)) {
+        return PyLong_AsLongLong(x);
+    }
+    else {
+        PY_LONG_LONG val;
+        PyObject* tmp = PyNumber_Int(x); if (!tmp) return (PY_LONG_LONG)-1;
+        val = __pyx_PyInt_AsLongLong(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+}
+
+static INLINE unsigned PY_LONG_LONG __pyx_PyInt_AsUnsignedLongLong(PyObject* x) {
+    if (PyInt_CheckExact(x)) {
+        long val = PyInt_AS_LONG(x);
+        if (unlikely(val < 0)) {
+            PyErr_SetString(PyExc_TypeError, "Negative assignment to unsigned type.");
+            return (unsigned PY_LONG_LONG)-1;
+        }
+        return val;
+    }
+    else if (PyLong_CheckExact(x)) {
+        return PyLong_AsUnsignedLongLong(x);
+    }
+    else {
+        PY_LONG_LONG val;
+        PyObject* tmp = PyNumber_Int(x); if (!tmp) return (PY_LONG_LONG)-1;
+        val = __pyx_PyInt_AsUnsignedLongLong(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+}
+
+
+static INLINE unsigned char __pyx_PyInt_unsigned_char(PyObject* x) {
+    if (sizeof(unsigned char) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        unsigned char val = (unsigned char)long_val;
+        if (unlikely((val != long_val)  || (long_val < 0))) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to unsigned char");
+            return (unsigned char)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE unsigned short __pyx_PyInt_unsigned_short(PyObject* x) {
+    if (sizeof(unsigned short) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        unsigned short val = (unsigned short)long_val;
+        if (unlikely((val != long_val)  || (long_val < 0))) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to unsigned short");
+            return (unsigned short)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE char __pyx_PyInt_char(PyObject* x) {
+    if (sizeof(char) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        char val = (char)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to char");
+            return (char)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE short __pyx_PyInt_short(PyObject* x) {
+    if (sizeof(short) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        short val = (short)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to short");
+            return (short)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE int __pyx_PyInt_int(PyObject* x) {
+    if (sizeof(int) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        int val = (int)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to int");
+            return (int)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE long __pyx_PyInt_long(PyObject* x) {
+    if (sizeof(long) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        long val = (long)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to long");
+            return (long)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE signed char __pyx_PyInt_signed_char(PyObject* x) {
+    if (sizeof(signed char) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        signed char val = (signed char)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to signed char");
+            return (signed char)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE signed short __pyx_PyInt_signed_short(PyObject* x) {
+    if (sizeof(signed short) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        signed short val = (signed short)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to signed short");
+            return (signed short)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE signed int __pyx_PyInt_signed_int(PyObject* x) {
+    if (sizeof(signed int) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        signed int val = (signed int)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to signed int");
+            return (signed int)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE signed long __pyx_PyInt_signed_long(PyObject* x) {
+    if (sizeof(signed long) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        signed long val = (signed long)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to signed long");
+            return (signed long)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE long double __pyx_PyInt_long_double(PyObject* x) {
+    if (sizeof(long double) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        long double val = (long double)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to long double");
+            return (long double)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}

File lib/bx/motif/_pwm.pyx

+"""
+Extensions used by the `pwm` module.
+"""
+
+cdef extern from "Python.h":
+    int PyString_AsStringAndSize(object obj, char **buffer, Py_ssize_t* length) except -1
+
+cdef extern from "numpy/arrayobject.h":
+    ctypedef int intp
+    ctypedef extern class numpy.ndarray [object PyArrayObject]:
+        cdef char *data
+        cdef int nd
+        cdef intp *dimensions
+        cdef intp *strides
+        cdef int flags
+    # These might be other types in the actual header depending on platform 
+    ctypedef int npy_int16
+    ctypedef float npy_float32
+
+def score_string( ndarray matrix, ndarray char_to_index, object string, ndarray rval ):
+    """
+    Score each position in string `string` using the scoring matrix `matrix`.
+    Characters in the string are mapped to columns in the matrix by `char_to_index`
+    and the score for each position is stored in `rval`.
+    
+    matrix *must* be a 2d array of type float32
+    char_to_index *must* be a 1d array of type int16
+    rval *must* be a 1d array of type float32 and the same length as string
+    """
+    cdef char *buffer
+    cdef Py_ssize_t len
+    cdef float score
+    cdef int i, j
+    cdef int matrix_width = matrix.dimensions[0]
+    cdef npy_int16 char_index
+    # Get input string as character pointer
+    PyString_AsStringAndSize( string, &buffer, &len )
+    # Loop over each position in the string 
+    cdef int stop = len - matrix.dimensions[0] + 1
+    for i from 0 <= i < stop:
+        score = 0.0
+        for j from 0 <= j < matrix_width:
+            char_index = ( <npy_int16 *> ( char_to_index.data + buffer[i+j] * char_to_index.strides[0] ) )[0]
+            if char_index < 0: 
+                break
+            score += ( <npy_float32*> ( matrix.data + j * matrix.strides[0] + char_index * matrix.strides[1] ) )[0]
+        else:
+            ( <npy_float32*> ( rval.data + i * rval.strides[0] ) )[0] = score

File lib/bx/motif/io/__init__.py

Empty file added.

File lib/bx/motif/io/transfac.py

+"""
+Classes for reading and writing motif data.
+"""
+
+from bx.motif.pwm import FrequencyMatrix
+
+class TransfacMotif( object ):
+    
+    def __init__( self ):
+        self.accession = None
+        self.id = None
+        self.dates = None
+        self.name = None
+        self.description = None
+        self.binding_factors = None
+        self.basis = None
+        self.comment = None
+        self.matrix = None
+        
+class TransfacReader( object ):
+    """
+    Reads motifs in TRANSFAC format.
+    """
+    
+    def __init__( self, input ):
+        self.input = iter( input )
+        self.input_exhausted = False
+    
+    def __iter__( self ):
+        return self
+    
+    def next( self ):
+        rval = self.next_motif()
+        while rval is None:
+            rval = self.next_motif()
+        return rval
+    
+    def next_motif( self ):
+        if self.input_exhausted:
+            raise StopIteration
+        # Accumulate lines until either the end of record indicator "//" is
+        # encounted or the input is exhausted.   
+        lines = []
+        while 1:
+            try:
+                line = self.input.next()
+            except StopIteration, e:
+                self.input_exhausted = True
+                break
+            if line.startswith( "//" ):
+                break
+            if not line.isspace():
+                lines.append( line )
+        if lines:
+            return self.parse_record( lines )
+    
+    parse_actions = {
+        "AC": ( "store_single", "accession" ),
+        "ID": ( "store_single", "id" ),
+        "DT": ( "store_single_list", "dates" ),
+        "NA": ( "store_single", "name" ),
+        "DE": ( "store_block", "description" ),
+        "BF": ( "store_single_list", "binding_factors" ),
+        "BA": ( "store_block", "basis" ),
+        "CC": ( "store_block", "comment" ),
+        "P0": ( "store_matrix", "matrix" )
+    }
+    
+    def parse_record( self, lines ):
+        """
+        Parse a TRANSFAC record out of `lines` and return a motif.
+        """
+        # Break lines up
+        temp_lines = []
+        for line in lines:
+            fields = line.rstrip( "\r\n" ).split( None, 1 )
+            if len( fields ) == 1:
+                fields.append( "" )
+            temp_lines.append( fields )
+        lines = temp_lines
+        # Fill in motif from lines
+        motif = TransfacMotif()
+        current_line = 0
+        while 1:
+            # Done parsing if no more lines to consume
+            if current_line >= len( lines ):
+                break
+            # Remove prefix and first separator from line
+            prefix, rest = lines[ current_line ]
+            # No action for this prefix, just ignore the line
+            if prefix not in self.parse_actions:
+                current_line += 1
+                continue
+            # Get action for line
+            action = self.parse_actions[ prefix ]
+            # Store a single line value
+            if action[0] == "store_single":
+                key = action[1]
+                setattr( motif, key, rest )
+                current_line += 1
+            # Add a single line value to a list
+            if action[0] == "store_single_list":
+                key = action[1]
+                if not getattr( motif, key ):
+                    setattr( motif, key, [] )
+                getattr( motif, key ).append( rest )
+                current_line += 1
+            # Store a block of text
+            if action[0] == "store_block":
+                key = action[1]
+                value = []
+                while current_line < len( lines ) and lines[ current_line ][0] == prefix:
+                    value.append( lines[current_line][1] )
+                    current_line += 1
+                setattr( motif, key, str.join( "\n", value ) )
+            # Store a matrix
+            if action[0] == "store_matrix":
+                # First line is alphabet
+                alphabet = rest.split()
+                alphabet_size = len( alphabet )
+                current_row = 0
+                rows = []
+                pattern = ""
+                current_line += 1
+                # Next lines are the rows of the matrix (we allow 0 rows)
+                while current_line < len( lines ):
+                    prefix, rest = lines[ current_line ]
+                    # Prefix should be a two digit 0 padded row number
+                    if prefix != ( "%02d" % ( current_row + 1 ) ):
+                        break
+                    # The first `alphabet_size` fields are the row values
+                    values = rest.split()
+                    rows.append( map( float, values[:alphabet_size] ) )
+                    # TRANSFAC includes an extra column with the IUPAC code
+                    if len( values ) > alphabet_size:
+                        pattern += values[alphabet_size]
+                    current_line += 1
+                    current_row += 1
+                # Only store the pattern if it is the correct length (meaning
+                # that every row had an extra field)
+                if len( pattern ) != len( rows ):
+                    pattern = None
+                matrix = FrequencyMatrix.from_rows( alphabet, rows )
+                setattr( motif, action[1], matrix )
+        # Only return a motif if we saw at least ID or AC or NA
+        if motif.id or motif.accession or motif.name:
+            return motif

File lib/bx/motif/io/transfac_tests.py

+from StringIO import StringIO
+import transfac
+from numpy import allclose
+
+sample = """
+VV  TRANSFAC MATRIX TABLE, Rel.3.2 26-06-1997
+XX
+//
+AC  a
+XX
+ID  V$MYOD_01
+XX
+DT  19.10.92 (created); ewi.
+DT  16.10.95 (updated); ewi.
+XX
+NA  MyoD
+XX
+DE  myoblast determination gene product
+XX
+BF  T00526; MyoD; Species: mouse, Mus musculus.
+XX
+P0      A      C      G      T
+01     100     200     200     0     S
+02     200     100     200     0     R
+03     300     0     100     100     A
+04     0     500     0     0     C
+05     500     0     0     0     A
+06     0     0     400     100     G
+07     0     100     400     0     G
+08     0     0     0     500     T
+09     0     0     500     0     G
+10     0     100     200     200     K
+11     0     200     0     300     Y
+12     100     0     300     100     G
+XX
+BA  5 functional elements in 3 genes
+XX
+//
+AC  M00002
+XX
+ID  V$E47_01
+XX
+DT  19.10.92 (created); ewi.
+DT  16.10.95 (updated); ewi.
+XX
+NA  E47
+XX
+DE  E47
+XX
+BF  T00207; E47; Species: human, Homo sapiens.
+XX
+P0      A      C      G      T
+01     400     400     300     0     N
+02     200     500     400     0     S
+03     300     200     400     200     N
+04     200     0     900     0     G
+05     0     1100     0     0     C
+06     1100     0     0     0     A
+07     0     0     1100     0     G
+08     100     200     800     0     G
+09     0     0     0     1100     T
+10     0     0     1100     0     G
+11     0     0     400     700     K
+12     100     400     300     300     N
+13     100     600     200     200     C
+14     100     400     400     200     N
+15     100     400     200     300     N
+XX
+BA  11 selected strong binding sites for E47, E47-MyoD, E12+MyoD 
+BA  and (weak) for E12
+XX
+CC  Group I in [903]; 5 sites selected in vitro for binding to E12N 
+CC  (=N-terminally truncated E12); matrix corrected according to 
+CC  the published sequences
+XX
+RN  [1]
+RA  Sun X.-H., Baltimore D.
+RT  An inhibitory domain of E12 transcription factor prevents 
+RT  DNA binding in E12 homodimers but not in E12 heterodimers
+RL  Cell 64:459-470 (1991).
+XX
+"""
+
+def test_reader():
+    input = StringIO( sample )
+    motifs = list( transfac.TransfacReader( input ) )
+    assert len( motifs ) == 2
+    # Single value parse
+    assert motifs[1].accession == "M00002"
+    # Value list parse
+    assert motifs[1].dates == [ '19.10.92 (created); ewi.', '16.10.95 (updated); ewi.' ]
+    # Matrix parse
+    assert motifs[1].matrix.sorted_alphabet == ['A','C','G','T']
+    assert allclose( motifs[1].matrix.values[0], [400,400,300,0] )

File lib/bx/motif/pwm.py

+"""
+Classes for working with position specific matrices.
+"""
+
+from numpy import *
+from copy import copy
+
+import _pwm
+
+class BaseMatrix( object ):
+    """
+    Base class for position specific matrices. 
+    """
+    def __init__( self ):
+        self.alphabet = None
+        self.sorted_alphabet = None
+        self.char_to_index = None
+        self.values = None
+
+    @classmethod
+    def from_rows( Class, alphabet, rows ):
+        """
+        Create a new matrix for a sequence over alphabet `alphabet` taking 
+        values from `rows` which is a list whose length is the width of the
+        matrix, and whose elements are lists of values associated with each
+        character (in the order those characters appear in alphabet). 
+        """
+        # Sorted alphabet
+        sorted_alphabet = sorted( alphabet )
+        # Character to index mapping (initialized to -1)
+        char_to_index = zeros( (256), int16 ) - 1
+        for i, ch  in enumerate( sorted_alphabet ):
+            char_to_index[ ord(ch) ] = i
+        # Array
+        values = zeros( ( len( rows) , len( alphabet ) ), float32 )
+        for i, row in enumerate( rows ):
+            for ch, val in zip( alphabet, row ):
+                values[i, char_to_index[ord(ch)]] = val
+        # Matrix
+        matrix = Class()
+        matrix.alphabet = alphabet
+        matrix.sorted_alphabet = sorted_alphabet
+        matrix.char_to_index = char_to_index
+        matrix.values = values
+        return matrix
+      
+    @classmethod
+    def create_from_other( Class, other, values=None ):
+        """
+        Create a new Matrix with attributes taken from `other` but with the 
+        values taken from `values` if provided
+        """
+        m = Class()
+        m.alphabet = other.alphabet
+        m.sorted_alphabet = other.sorted_alphabet
+        m.char_to_index = other.char_to_index
+        if values is not None:
+            m.values = values
+        else:
+            m.values = other.values
+        return m
+            
+    @property
+    def width( self ):
+        """
+        Return the width (size along the sequence axis) of this matrix.
+        """
+        return self.values.shape[0]
+
+    def reverse_complement( self ):
+        """
+        Create the reverse complement of this matrix. The result probably
+        only makese sense if the alphabet is that of DNA ('A','C','G','T').
+        """
+        rval = copy( self )
+        # Conveniently enough, reversing rows and columns is exactly what we
+        # want, since this results in A swapping with T and C swapping with G.
+        rval.values = self.values[::-1,::-1].copy()
+        return rval
+
+class FrequencyMatrix( BaseMatrix ):
+    """
+    A position specific count/frequency matrix.
+    """
+        
+    DEFAULT_CORRECTION = 0.0000000001
+    """
+    Default value to use for correcting when dealing with counts of zero,
+    chosen to produce scoring matrices that are the same as produced by CREAD.
+    """
+        
+    def to_logodds_scoring_matrix( self, background=None, correction=DEFAULT_CORRECTION ):
+        """
+        Create a standard logodds scoring matrix.
+        """
+        alphabet_size = len( self.alphabet )
+        if background is None:
+            background = ones( alphabet_size, float32 ) / alphabet_size
+        # Row totals as a one column array
+        totals = sum( self.values, 1 )[:,newaxis]
+        values = log2( maximum( self.values, correction ) ) \
+               - log2( totals ) \
+               - log2( maximum( background, correction ) )
+        return ScoringMatrix.create_from_other( self, values.astype( float32 ) )
+
+    def to_stormo_scoring_matrix( self, background=None ):
+        """
+        Create a scoring matrix from this count matrix using the method from:
+
+        Hertz, G.Z. and G.D. Stormo (1999). Identifying DNA and protein patterns with statistically 
+        significant alignments of multiple sequences. Bioinformatics 15(7): 563-577.
+        """
+        alphabet_size = len( self.alphabet )
+        if background is None:
+            background = ones( alphabet_size, float32 ) / alphabet_size
+        # Row totals as a one column array
+        totals = sum( self.values, 1 )[:,newaxis]
+        values = log2( self.values + background ) \
+               - log2( totals + 1 ) - log2( background )
+        return ScoringMatrix.create_from_other( self, values.astype( float32 ) )
+        
+class ScoringMatrix( BaseMatrix ):
+    """
+    A position specific matrix containing values that are suitable for
+    scoring a sequence.
+    """
+        
+    def score_string( self, string ):
+        """
+        Score each valid position in `string` using this scoring matrix. 
+        """
+        rval = zeros( len( string ), float32 )
+        _pwm.score_string( self.values, self.char_to_index, string, rval )
+        return rval

File lib/bx/motif/pwm_tests.py

+import pwm
+from numpy import allclose
+
+def test_create():
+    m = pwm.FrequencyMatrix.from_rows( ['A','C','G','T'], get_ctcf_rows() )
+    # Alphabet sort
+    assert m.sorted_alphabet == [ 'A', 'C', 'G', 'T' ]
+    # Character to index mapping
+    assert m.char_to_index[ ord('A') ] == 0
+    assert m.char_to_index[ ord('C') ] == 1
+    assert m.char_to_index[ ord('G') ] == 2
+    assert m.char_to_index[ ord('T') ] == 3
+    assert m.char_to_index[ ord('Q') ] == -1
+    # Values
+    assert allclose( m.values[0],  [ 2620, 2052, 3013, 2314 ] )
+    assert allclose( m.values[19],  [ 3144, 3231, 3056, 567 ] )
+    
+def test_scoring():
+    m = pwm.FrequencyMatrix.from_rows( ['A','C','G','T'], get_ctcf_rows() )
+    # Stormo method
+    sm = m.to_stormo_scoring_matrix()
+    # Forward matches
+    assert allclose( sm.score_string( "AATCACCACCTCCTGGCAGG" )[0], -156.8261261 )
+    assert allclose( sm.score_string( "TGCCTGCCTCTGTAGGCTCC" )[0], -128.8106842 )
+    assert allclose( sm.score_string( "GTTGCCAGTTGGGGGAAGCA" )[0], 4.65049839 )
+    assert allclose( sm.score_string( "GCAGACACCAGGTGGTTCAG" )[0], 1.60168743 )
+    # Reverse matches
+    rc = sm.reverse_complement()
+    assert allclose( rc.score_string( "AATCACCACCTCCTGGCAGG" )[0], 0.014178276062 )
+    assert allclose( rc.score_string( "TGCCTGCCTCTGTAGGCTCC" )[0], 0.723828315735 )
+    assert allclose( rc.score_string( "GTTGCCAGTTGGGGGAAGCA" )[0], -126.99407196 )
+    assert allclose( rc.score_string( "GCAGACACCAGGTGGTTCAG" )[0], -86.9560623169 )
+
+def get_ctcf_rows():
+    """
+    The CTCF primary site motif
+    """
+    return [
+         [   2620 ,  2052  ,  3013  ,  2314   ],
+         [      0 ,  3580  ,  1746  ,  4672   ],
+         [   2008 ,  1790  ,  4497  ,  1703   ],
+         [   3362 ,     0  ,  6637  ,     0   ],
+         [      0 , 10000  ,     0  ,     0   ],
+         [      0 , 10000  ,     0  ,     0   ],
+         [   7467 ,     0  ,  1310  ,  1222   ],
+         [    786 ,  4890  ,  4323  ,     0   ],
+         [   1179 ,  6288  ,   829  ,  1703   ],
+         [  10000 ,     0  ,     0  ,     0   ],
+         [      0 ,     0  , 10000  ,     0   ],
+         [   4847 ,     0  ,  5152  ,     0   ],
+         [      0 ,     0  ,  6200  ,  3799   ],
+         [      0 ,     0  , 10000  ,     0   ],
+         [      0 ,     0  , 10000  ,     0   ],
+         [   1572 ,  7467  ,     0  ,   960   ],
+         [   3842 ,     0  ,  5545  ,   611   ],
+         [      0 ,  5895  ,  4104  ,     0   ],
+         [   1615 ,  4192  ,  1397  ,  2794   ],
+         [   3144 ,  3231  ,  3056  ,   567   ]
+    ]
 
 from setuptools import *
 from glob import glob
+
+import numpy
        
 def main():                       
     setup(  name = "bx-python",
     extensions.append( Extension( "bx.pwm._position_weight_matrix",
                                   [ "lib/bx/pwm/_position_weight_matrix.pyx", "src/pwm_utils.c" ],
                                   include_dirs=["src"]  ) )
+    extensions.append( Extension( "bx.motif._pwm", [ "lib/bx/motif/_pwm.pyx" ], 
+                                  include_dirs=[numpy.get_include()] ) )
     # CpG masking
     extensions.append( Extension( "bx.align.sitemask._cpg", \
                                   [ "lib/bx/align/sitemask/_cpg.pyx",