Commits

Anonymous committed 78c1018

Program for projecting pwm maf hits onto reference coordinates

Comments (0)

Files changed (2)

pwm/pwm_score_maf.py

         pwm[ wm.id ] = wm
 
     fbunch = {}
-    for scoremax,index in MafScorer(pwm, species, inmaf):
+    for scoremax,index,headers in MafScorer(pwm, species, inmaf):
         print >>sys.stderr, index
         for k,matrix in scoremax.items():
             fname = k + '.mx'
 def MafScorer(pwm,species,inmaf):
 
     index = 0
+    for maf in align.maf.Reader( inmaf ):
+        try:
+            scoremax,width,headers = MafBlockScorer(pwm,species,maf)
+        except:
+            print >>sys.stderr, "Failed on:"
+            syserr = align.maf.Writer( sys.stderr )
+            syserr.write( maf )
+            #print >>sys.stderr,headers
+            print >>sys.stderr,width
+            print >>sys.stderr,len(scoremax)
+            syserr.close()
+            sys.exit(1)
+        index += width
+        yield scoremax,index,headers
 
-    for maf in align.maf.Reader( inmaf ):
-        width = len(maf.components[0].text)
+def MafBlockScorer(pwm,species,maf):
+    width = len(maf.components[0].text)
+    headers = [ (c.src,c.start,c.end) for c in maf.components]
 
-        # expand block rows to full
-        mafBlockSpecies = [specName.src.split('.')[0] for specName in maf.components]
-        alignlist = []
-        for sp in species:
-            try:
-                i = mafBlockSpecies.index( sp )
-                alignlist.append( maf.components[i].text )
-            except ValueError:
-                alignlist.append( [ NaN for n in range( width ) ] )
-        alignrows = pwmx.Align( alignlist )
-        scoremax = {}
-        # record gap positions
-        filter = pwmx.score_align_gaps( alignrows )
-        # score pwm models
-        for model in pwm.keys():
-            scoremax[model] = pwm[model].score_align( alignrows, filter )
-        index += width
-        yield scoremax,index
+    # expand block rows to full
+    mafBlockSpecies = [specName.src.split('.')[0] for specName in maf.components]
+    alignlist = []
+    for sp in species:
+        try:
+            i = mafBlockSpecies.index( sp )
+            alignlist.append( maf.components[i].text )
+        except ValueError:
+            alignlist.append( [ NaN for n in range( width ) ] )
+    alignrows = pwmx.Align( alignlist )
+    scoremax = {}
+    # record gap positions
+    filter = pwmx.score_align_gaps( alignrows )
+    # score pwm models
+    for model in pwm.keys():
+        #print >>sys.stderr,"%s_%d_%d" % headers[0],width,model
+        scoremax[model] = pwm[model].score_align( alignrows, filter )
+    yield scoremax,width,headers
 
 if __name__ == '__main__': main()

pwm/pwm_score_positions.py

+#!/usr/bin/env python2.4
+"""
+Returns all positions of a maf with any pwm score > threshold
+The positions are projected onto human coordinates
+"""
+
+import psyco_full
+import align.maf
+import position_weight_matrix as pwmx
+from pwm_score_maf import MafBlockScorer
+from numarray import *
+import sys
+import intervals
+
+def isnan(x):
+    return not x==x
+
+def main():
+
+    if len(sys.argv) < 6:
+        print >>sys.stderr, "%s transfac|basic pwmfile inmaf threshold spec1,spec2 " % sys.argv[0]
+        sys.exit(0)
+
+    pwm = {}
+    format = sys.argv[1]
+    for wm in pwmx.Reader(open(sys.argv[2]),format=format):
+        pwm[ wm.id ] = wm
+
+    
+    inmaf = open(sys.argv[3])
+    threshold = float(sys.argv[4])
+
+    species = []
+
+    for sp in sys.argv[5].split(','):
+        species.append( sp )
+
+    for maf in align.maf.Reader(inmaf):
+        mafchrom = maf.components[0].src.split('.')[1]
+        mafstart = maf.components[0].start
+        mafend = maf.components[0].end
+        reftext = maf.components[0].text
+
+        # maf block scores for each matrix
+        for scoremax,width,headers in MafBlockScorer(pwm, species, maf):
+            blocklength = width
+            mafsrc,mafstart,mafend = headers[0]
+            mafchrom = mafsrc.split('.')[1]
+
+            # lists of scores for each position in scoremax
+            for id,mx in scoremax.items():
+                for offset in range(blocklength):
+
+                    # scan all species with threshold
+                    for i in range(len(species)):
+                        if mx[i][offset] > threshold:
+                            refstart = mafstart + offset - reftext.count('-',0,offset)
+                            refend = refstart + len(pwm[id])
+                            data = " ".join([ "%.2f" % mx[x][offset] for x in range(len(species))])
+                            # underscore spaces in the name
+                            print mafchrom,refstart,refend,id.replace(' ','_'),data
+                            break
+
+if __name__ == '__main__': main()