Source

bx-python / scripts / interval_count_intersections.py

#!/usr/bin/env python

"""
Read two lists of intervals (with chromosomes) and count the number of entries
in the second set that intersect any entry in the first set.

TODO: This could use bitsets rather than the intervals package, would it be
      faster?

usage: %prog bed1 bed2 > out
"""

from __future__ import division

import psyco_full

from bx import intervals
from bx import misc
import string
import sys

def main():

    intersecters = {}

    # Read ranges

    for chr, start, end in read_intervals( misc.open_compressed( sys.argv[1] ) ):
        if not intersecters.has_key( chr ): intersecters[ chr ] = intervals.Intersecter()
        intersecters[ chr ].add_interval( intervals.Interval( start, end ) )

    # Count intersection

    total = 0

    for chr, start, end in read_intervals( misc.open_compressed( sys.argv[2] ) ):
        if intersecters.has_key( chr ):
            intersection = intersecters[ chr ].find( start, end )
            if intersection: 
                #print chr, intersection
                total += 1

    print total

def read_intervals( input ):
    for line in input:
        fields = line.split()
        yield fields[0], int( fields[1] ), int( fields[2] )

main()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.