dmfs-code / src /

#! /usr/bin/python
from __future__ import with_statement
import numpy as np
class motifRec(object):
    """Obtain the motifs from wordspy outputfile.
    def __init__(self, flname, protein):
        with open(flname, 'r') as f:
            strings =
            # parsing the wordspy output file to obtain relevant information
            import re
            refloat = r'\d+\.\d+'
            if not protein:
                motpat = re.compile(r'^\s*([ACGT]+)\s+(%s)\s+(-?%s)\s+null\s+(0\.\d+)\s+(\d+)\s+\d+\s+\{[acgt,]+\}'% (refloat,refloat),re.M|re.I)
                motpat = re.compile(r'^\s*([ACDEFGHIKLMNOPQRSTVW]+)\s+(%s)\s+(-?%s)\s+null\s+(0\.\d+)\s+(\d+)\s+\d+\s+\{[ACDEFGHIKLMNOPQRSTVW,]+\}'% (refloat,refloat),re.M|re.I)
            self.dupMotifs = motpat.findall(strings)
            # retain the last instance of motifs into unique motifs
            mtfsgns = dict()
            # only retain the latest stats for identical motifs
            for x in self.dupMotifs:
                mtfsgns[x[0]] = x
                # convert it to a recarray
            strlen = len( max( mtfsgns, key=lambda x: len(x) ) )
            rectype = np.dtype( [('motif', 'S%d'%strlen), ('Zscore', np.float64), ('NZscore', np.float64), ('Freq', np.float64), ('Occur', np.int_)] )
            self.uniMotifs = np.array(mtfsgns.values(), dtype=rectype)