Commits

James Taylor committed d9ae19f

Added a script for extracting the regions covered (in the reference species)
by all other species in an alignment.

  • Participants
  • Parent commits cbf40d1

Comments (0)

Files changed (1)

File maf_covered_regions.py

+#!/usr/bin/env python2.3
+
+"""
+Read a maf file and print the regions covered to a set of bed files (one for each source)
+
+usage: %prog bed_outfile_prefix < maf
+"""
+
+from __future__ import division
+
+import psyco_full
+import bx.align.maf
+import sys
+
+def block_pid( comp1, comp2 ):
+    match = 0
+    total = 0
+    t1 = comp1.text.lower()
+    t2 = comp2.text.lower()
+    for i in range( 0, len(t1) ):
+        a, b = t1[i], t2[i]
+        if a == '-' or b == '-': 
+            continue
+        elif a == b:
+            match += 1
+        total += 1
+    if total == 0: return None
+    return ( match / total )
+
+def main():
+    out_prefix = sys.argv[1]
+    print out_prefix
+    out_files = dict()
+    for block in bx.align.maf.Reader( sys.stdin ):
+        ref_comp = block.components[0]
+        ref_chrom = ref_comp.src.split('.')[1]
+        for comp in block.components[1:]:
+            comp_species, comp_chrom = comp.src.split('.')
+            if comp_species not in out_files:
+                f = open( "%s%s.bed" % ( out_prefix, comp_species ), "w" )
+                out_files[comp_species] = f
+            pid = block_pid( ref_comp, comp )
+            if pid:
+                out_files[comp_species].write( "%s\t%d\t%d\t%s:%d-%d,%s\t%f\n" %
+                                 ( ref_chrom, ref_comp.forward_strand_start, ref_comp.forward_strand_end, \
+                                   comp_chrom, comp.start, comp.end, comp.strand, pid ) )
+
+    for f in out_files.values():
+        f.close()
+    
+
+if __name__ == "__main__": main()