James Taylor avatar James Taylor committed 522567c

Transfac Reader: requiring the matrix prefixes to always start at "01" was
apparently too strict.

Comments (0)

Files changed (3)

lib/bx/motif/_pwm.c

-/* Generated by Cython 0.9.6.14 on Sat May 31 16:30:00 2008 */
+/* Generated by Cython 0.9.6.14 on Sat May 31 17:10:21 2008 */
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
 static char *__pyx_filename;
 static char **__pyx_f;
 
+static char __pyx_mdoc[] = "\nExtensions used by the `pwm` module.\n";
+
 static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name, int exact); /*proto*/
 
 static INLINE void __Pyx_RaiseArgtupleTooLong(Py_ssize_t num_expected, Py_ssize_t num_found); /*proto*/
 
 /* Implementation of bx.motif._pwm */
 
-/* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":16
+/* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":20
  *     ctypedef float npy_float32
  * 
  * def score_string( ndarray matrix, ndarray char_to_index, object string, ndarray rval ):             # <<<<<<<<<<<<<< 
  *     """
- *     matrix *must* be a 2d array of type float32
+ *     Score each position in string `string` using the scoring matrix `matrix`.
  */
 
 static PyObject *__pyx_pf_2bx_5motif_4_pwm_score_string(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
-static char __pyx_doc_2bx_5motif_4_pwm_score_string[] = "\n    matrix *must* be a 2d array of type float32\n    char_to_index *must* be a 1d array of type int16\n    rval *must* be a 1d array of type float32 and the same length as string\n    ";
+static char __pyx_doc_2bx_5motif_4_pwm_score_string[] = "\n    Score each position in string `string` using the scoring matrix `matrix`.\n    Characters in the string are mapped to columns in the matrix by `char_to_index`\n    and the score for each position is stored in `rval`.\n    \n    matrix *must* be a 2d array of type float32\n    char_to_index *must* be a 1d array of type int16\n    rval *must* be a 1d array of type float32 and the same length as string\n    ";
 static PyObject *__pyx_pf_2bx_5motif_4_pwm_score_string(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
   PyArrayObject *__pyx_v_matrix = 0;
   PyArrayObject *__pyx_v_char_to_index = 0;
     __pyx_v_rval = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 3));
   }
   else {
-    if (unlikely(!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "OOOO", __pyx_argnames, &__pyx_v_matrix, &__pyx_v_char_to_index, &__pyx_v_string, &__pyx_v_rval))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L2;}
+    if (unlikely(!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "OOOO", __pyx_argnames, &__pyx_v_matrix, &__pyx_v_char_to_index, &__pyx_v_string, &__pyx_v_rval))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L2;}
   }
   goto __pyx_L3;
   __pyx_L2:;
   __Pyx_AddTraceback("bx.motif._pwm.score_string");
   return NULL;
   __pyx_L3:;
-  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_matrix), __pyx_ptype_2bx_5motif_4_pwm_ndarray, 1, "matrix", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1;}
-  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_char_to_index), __pyx_ptype_2bx_5motif_4_pwm_ndarray, 1, "char_to_index", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1;}
-  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_rval), __pyx_ptype_2bx_5motif_4_pwm_ndarray, 1, "rval", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1;}
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_matrix), __pyx_ptype_2bx_5motif_4_pwm_ndarray, 1, "matrix", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1;}
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_char_to_index), __pyx_ptype_2bx_5motif_4_pwm_ndarray, 1, "char_to_index", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1;}
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_rval), __pyx_ptype_2bx_5motif_4_pwm_ndarray, 1, "rval", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1;}
 
-  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":26
+  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":34
  *     cdef float score
  *     cdef int i, j
  *     cdef int matrix_width = matrix.dimensions[0]             # <<<<<<<<<<<<<< 
   __pyx_v_matrix_width = (__pyx_v_matrix->dimensions[0]);
 
 
-  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":29
+  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":37
  *     cdef npy_int16 char_index
  *     # Get input string as character pointer
  *     PyString_AsStringAndSize( string, &buffer, &len )             # <<<<<<<<<<<<<< 
  *     # Loop over each position in the string 
  *     cdef int stop = len - matrix.dimensions[0] + 1
  */
-  __pyx_1 = PyString_AsStringAndSize(__pyx_v_string, (&__pyx_v_buffer), (&__pyx_v_len)); if (unlikely(__pyx_1 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1;}
+  __pyx_1 = PyString_AsStringAndSize(__pyx_v_string, (&__pyx_v_buffer), (&__pyx_v_len)); if (unlikely(__pyx_1 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1;}
 
-  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":31
+  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":39
  *     PyString_AsStringAndSize( string, &buffer, &len )
  *     # Loop over each position in the string 
  *     cdef int stop = len - matrix.dimensions[0] + 1             # <<<<<<<<<<<<<< 
   __pyx_v_stop = ((__pyx_v_len - (__pyx_v_matrix->dimensions[0])) + 1);
 
 
-  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":32
+  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":40
  *     # Loop over each position in the string 
  *     cdef int stop = len - matrix.dimensions[0] + 1
  *     for i from 0 <= i < stop:             # <<<<<<<<<<<<<< 
  */
   for (__pyx_v_i = 0; __pyx_v_i < __pyx_v_stop; __pyx_v_i++) {
 
-    /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":33
+    /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":41
  *     cdef int stop = len - matrix.dimensions[0] + 1
  *     for i from 0 <= i < stop:
  *         score = 0.0             # <<<<<<<<<<<<<< 
  */
     __pyx_v_score = 0.0;
 
-    /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":34
+    /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":42
  *     for i from 0 <= i < stop:
  *         score = 0.0
  *         for j from 0 <= j < matrix_width:             # <<<<<<<<<<<<<< 
  */
     for (__pyx_v_j = 0; __pyx_v_j < __pyx_v_matrix_width; __pyx_v_j++) {
 
-      /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":35
+      /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":43
  *         score = 0.0
  *         for j from 0 <= j < matrix_width:
  *             char_index = ( <npy_int16 *> ( char_to_index.data + buffer[i+j] * char_to_index.strides[0] ) )[0]             # <<<<<<<<<<<<<< 
  */
       __pyx_v_char_index = (((npy_int16 *)(__pyx_v_char_to_index->data + ((__pyx_v_buffer[(__pyx_v_i + __pyx_v_j)]) * (__pyx_v_char_to_index->strides[0]))))[0]);
 
-      /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":36
+      /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":44
  *         for j from 0 <= j < matrix_width:
  *             char_index = ( <npy_int16 *> ( char_to_index.data + buffer[i+j] * char_to_index.strides[0] ) )[0]
  *             if char_index < 0:             # <<<<<<<<<<<<<< 
       }
       __pyx_L8:;
 
-      /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":38
+      /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":46
  *             if char_index < 0: 
  *                 break
  *             score += ( <npy_float32*> ( matrix.data + j * matrix.strides[0] + char_index * matrix.strides[1] ) )[0]             # <<<<<<<<<<<<<< 
     }
     /*else*/ {
 
-      /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":40
+      /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":48
  *             score += ( <npy_float32*> ( matrix.data + j * matrix.strides[0] + char_index * matrix.strides[1] ) )[0]
  *         else:
  *             ( <npy_float32*> ( rval.data + i * rval.strides[0] ) )[0] = score             # <<<<<<<<<<<<<< 
   /*--- Libary function declarations ---*/
   __pyx_init_filenames();
   /*--- Module creation code ---*/
-  __pyx_m = Py_InitModule4("_pwm", __pyx_methods, 0, 0, PYTHON_API_VERSION);
+  __pyx_m = Py_InitModule4("_pwm", __pyx_methods, __pyx_mdoc, 0, PYTHON_API_VERSION);
   if (!__pyx_m) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1;};
   __pyx_b = PyImport_AddModule("__builtin__");
   if (!__pyx_b) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1;};
   /*--- Global init code ---*/
   /*--- Function export code ---*/
   /*--- Type init code ---*/
-  __pyx_ptype_2bx_5motif_4_pwm_ndarray = __Pyx_ImportType("numpy", "ndarray", sizeof(PyArrayObject)); if (unlikely(!__pyx_ptype_2bx_5motif_4_pwm_ndarray)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1;}
+  __pyx_ptype_2bx_5motif_4_pwm_ndarray = __Pyx_ImportType("numpy", "ndarray", sizeof(PyArrayObject)); if (unlikely(!__pyx_ptype_2bx_5motif_4_pwm_ndarray)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1;}
   /*--- Type import code ---*/
   /*--- Function import code ---*/
   /*--- Execution code ---*/
 
-  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":16
+  /* "/Users/james/projects/bx-python/code/trunk/lib/bx/motif/_pwm.pyx":20
  *     ctypedef float npy_float32
  * 
  * def score_string( ndarray matrix, ndarray char_to_index, object string, ndarray rval ):             # <<<<<<<<<<<<<< 
  *     """
- *     matrix *must* be a 2d array of type float32
+ *     Score each position in string `string` using the scoring matrix `matrix`.
  */
   return;
   __pyx_L1:;

lib/bx/motif/io/transfac.py

         self.input = iter( input )
         self.input_exhausted = False
     
+    def as_dict( self, key="id" ):
+        """
+        Return a dictionary containing all remaining motifs, using `key`
+        as the dictionary key.
+        """
+        rval = {}
+        for motif in self:
+            rval[ getattr( motif, key ) ] = motif
+        return rval
+    
     def __iter__( self ):
         return self
     
                 # First line is alphabet
                 alphabet = rest.split()
                 alphabet_size = len( alphabet )
-                current_row = 0
                 rows = []
                 pattern = ""
                 current_line += 1
                 while current_line < len( lines ):
                     prefix, rest = lines[ current_line ]
                     # Prefix should be a two digit 0 padded row number
-                    if prefix != ( "%02d" % ( current_row + 1 ) ):
+                    if not prefix.isdigit():
                         break
                     # The first `alphabet_size` fields are the row values
                     values = rest.split()
                     if len( values ) > alphabet_size:
                         pattern += values[alphabet_size]
                     current_line += 1
-                    current_row += 1
                 # Only store the pattern if it is the correct length (meaning
                 # that every row had an extra field)
                 if len( pattern ) != len( rows ):

lib/bx/motif/io/transfac_tests.py

 BF  T00207; E47; Species: human, Homo sapiens.
 XX
 P0      A      C      G      T
-01     400     400     300     0     N
+00     400     400     300     0     N
 02     200     500     400     0     S
 03     300     200     400     200     N
 04     200     0     900     0     G
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.