Source

bx-python / scripts / bed_complement.py

#!/usr/bin/env python2.4

"""
Complement the regions of a bed file. Requires a file that maps source names
to sizes. This should be in the simple LEN file format (each line contains
a source name followed by a size, separated by whitespace).

usage: %prog bed_file chrom_length_file
"""

import sys

from bx.bitset import *
from bx.bitset_builders import *

from bx.cookbook import doc_optparse

def read_len( f ):
    """Read a 'LEN' file and return a mapping from chromosome to length"""
    mapping = dict()
    for line in f:
        fields = line.split()
        mapping[ fields[0] ] = int( fields[1] )
    return mapping

options, args = doc_optparse.parse( __doc__ )
try:
    in_fname, len_fname = args
except:
    doc_optparse.exit()

bitsets = binned_bitsets_from_file( open( in_fname ) )

lens = read_len( open( len_fname ) )

for chrom in lens:
    if chrom in bitsets:
        bits = bitsets[chrom]
        bits.invert()
        len = lens[chrom]
        end = 0
        while 1:
            start = bits.next_set( end )
            if start == bits.size: break
            end = bits.next_clear( start )
            if end > len: end = len
            print "%s\t%d\t%d" % ( chrom, start, end )
            if end == len: break
    else:
        print "%s\t%d\t%d" % ( chrom, 0, lens[chrom] )
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.