1. James Taylor
  2. bx-python

Source

bx-python / scripts / bed_complement.py

#!/usr/bin/env python2.4

"""
Complement the regions of a bed file. Requires a file that maps source names
to sizes. This should be in the simple LEN file format (each line contains
a source name followed by a size, separated by whitespace).

usage: %prog bed_file chrom_length_file
"""

import sys

from bx.bitset import *
from bx.bitset_builders import *

from bx.cookbook import doc_optparse

def read_len( f ):
    """Read a 'LEN' file and return a mapping from chromosome to length"""
    mapping = dict()
    for line in f:
        fields = line.split()
        mapping[ fields[0] ] = int( fields[1] )
    return mapping

options, args = doc_optparse.parse( __doc__ )
try:
    in_fname, len_fname = args
except:
    doc_optparse.exit()

bitsets = binned_bitsets_from_file( open( in_fname ) )

lens = read_len( open( len_fname ) )

for chrom in lens:
    if chrom in bitsets:
        bits = bitsets[chrom]
        bits.invert()
        len = lens[chrom]
        end = 0
        while 1:
            start = bits.next_set( end )
            if start == bits.size: break
            end = bits.next_clear( start )
            if end > len: end = len
            print "%s\t%d\t%d" % ( chrom, start, end )
            if end == len: break
    else:
        print "%s\t%d\t%d" % ( chrom, 0, lens[chrom] )