bx-python / scripts / bed_coverage_by_interval.py

#!/usr/bin/env python

"""
For each interval in `bed1` print the fraction of bases covered by `bed2`.

usage: %prog bed1 bed2 [mask]
"""

from __future__ import division

import psyco_full
import sys
from bx.bitset import BinnedBitSet
from bx.bitset_builders import *
from itertools import *

bed1_fname, bed2_fname = sys.argv[1:3]

bitsets = binned_bitsets_from_file( open( bed2_fname ) )

def clone( bits ):
    b = BinnedBitSet( bits.size )
    b.ior( bits )
    return b

if len( sys.argv ) > 3:
    mask_fname = sys.argv[3]
    mask = binned_bitsets_from_file( open( mask_fname ) )
    new_bitsets = dict()
    for key in bitsets:
        if key in mask:
            b = clone( mask[key] )
            b.invert()
            b.iand( bitsets[key] )
            new_bitsets[key] = b
    bitsets = new_bitsets
else:
    mask = None

for line in open( bed1_fname ):
    fields = line.split()
    chr, start, end = fields[0], int( fields[1] ), int( fields[2] )
    bases_covered = 0
    if chr in bitsets:
         bases_covered = bitsets[ chr ].count_range( start, end-start )
    length = end - start
    if mask and chr in mask:
        bases_masked = mask[ chr ].count_range( start, end-start )
        length -= bases_masked
    assert bases_covered <= length, "%r, %r, %r" % ( bases_covered, bases_masked, length )
    if length == 0:
        print 0.0
    else:
        print bases_covered / length
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.