Commits

James Taylor  committed 3a05d48

Couple of scripts, might or might not be any good, wrote these during ENCODE and forgot about them until now.

  • Participants
  • Parent commits b8bc17f

Comments (0)

Files changed (2)

File bed_diff_basewise_summary.py

+#!/usr/bin/env python
+
+"""
+Find regions of first bed file that overlap regions in a second bed file
+
+usage: %prog bed_file_1 bed_file_2
+"""
+import sys
+from warnings import warn
+from bx.bitset import BinnedBitSet
+from bx.bitset_builders import *
+import cookbook.doc_optparse
+
+def coverage( bitsets ):
+    total = 0
+    for chrom in bitsets:
+        total += bitsets[chrom].count_range( 0, bitsets[chrom].size )
+    return total    
+
+options, args = cookbook.doc_optparse.parse( __doc__ )
+try:
+    in_fname, in2_fname = args
+except:
+    cookbook.doc_optparse.exit()
+
+bits1 = binned_bitsets_from_file( open( in_fname ) )
+bits2 = binned_bitsets_from_file( open( in2_fname ) )
+
+bits1_covered = coverage( bits1 )
+bits2_covered = coverage( bits2 )
+
+bitsets = dict()
+
+for key in bits1:
+    if key in bits2:
+        bits1[key].iand( bits2[key] )
+        bitsets[key] = bits1[key]
+
+both_covered = coverage( bitsets )
+
+print "in both:  \t%d" % both_covered
+print "only in %s:\t%d" % ( in_fname, bits1_covered - both_covered )
+print "only in %s:\t%d" % ( in2_fname, bits2_covered - both_covered )

File get_scores_in_intervals.py

+#!/usr/bin/env python
+
+"""
+usage: %prog score_file interval_file [out_file] 
+"""
+
+from __future__ import division
+
+import sys
+import psyco_full
+import bx.wiggle
+from bx.binned_array import BinnedArray
+from fpconst import isNaN
+import cookbook.doc_optparse
+import misc
+
+def read_scores( f ):
+    scores_by_chrom = dict()
+    for chrom, pos, val in bx.wiggle.Reader( f ):
+        if chrom not in scores_by_chrom:
+            scores_by_chrom[chrom] = BinnedArray()
+        scores_by_chrom[chrom][pos] = val
+    return scores_by_chrom
+
+def main():
+
+    # Parse command line
+    options, args = cookbook.doc_optparse.parse( __doc__ )
+    try:
+        score_file = open( args[0] )
+        interval_file = open( args[1] )
+        if len( args ) > 2:
+            out_file = open( args[2], 'w' )
+        else:
+            out_file = sys.stdout
+    except:
+        cookbook.doc_optparse.exit()
+
+    scores_by_chrom = read_scores( misc.open_compressed( sys.argv[1] ) )
+    for line in open( sys.argv[2] ):
+        fields = line.split()
+        chrom, start, stop = fields[0], int( fields[1] ), int( fields[2] )
+        if chrom in scores_by_chrom:
+            ba = scores_by_chrom[chrom]
+            scores = [ ba[i] for i in range( start, stop ) ]
+        else:
+            scores = []
+        print >> out_file, " ".join( fields ), " ".join( map( str, scores ) )
+
+    score_file.close()
+    interval_file.close()
+    out_file.close()
+
+if __name__ == "__main__": main()