Commits

James Taylor committed 91e4dcc

Added "bed_subtract_basewise" tool and fixed some bugs in and/or for
binned bitsets (one case that occurs only after complementing the
second set were not being handled correctly)

  • Participants
  • Parent commits 0ce4bd3

Comments (0)

Files changed (4)

File bed_subtract_basewise.py

+#!/usr/bin/env python
+
+"""
+Find regions of first bed that are not in second bed file (subtract
+second from first)
+
+usage: %prog bed_file_1 bed_file_2
+
+"""
+import sys
+from warnings import warn
+from bx.bitset import binned_bitsets_from_file
+import cookbook.doc_optparse
+
+def read_len( f ):
+    """Read a 'LEN' file and return a mapping from chromosome to length"""
+    mapping = dict()
+    for line in f:
+        fields = line.split()
+        mapping[ fields[0] ] = int( fields[1] )
+    return mapping
+
+def print_bits_as_bed( bits ):
+    end = 0
+    while 1:
+        start = bits.next_set( end )
+        if start == bits.size: break
+        end = bits.next_clear( start )
+        print "%s\t%d\t%d" % ( chrom, start, end )
+
+options, args = cookbook.doc_optparse.parse( __doc__ )
+try:
+    in_fname, in2_fname, len_fname = args
+except:
+    cookbook.doc_optparse.exit()
+
+lens = read_len( open( len_fname ) )
+
+# Read first bed into some bitsets
+
+bitsets1 = binned_bitsets_from_file( open( in_fname ), lens=lens )
+bitsets2 = binned_bitsets_from_file( open( in2_fname ), lens=lens )
+
+for chrom in bitsets1:  
+    if chrom not in bitsets1:
+        continue
+    bits1 = bitsets1[chrom]
+    if chrom in bitsets2:
+        bits2 = bitsets2[chrom]
+        bits2.invert()
+        bits1.iand( bits2 )
+    print_bits_as_bed( bits1 )
+    

File bx/bitset.pyx

     property size:
         def __get__( self ):
             return self.bb.size
+    property bin_size:
+        def __get__( self ):
+            return self.bb.bin_size
     def iand( self, BinnedBitSet other ):
         binBitsAnd( self.bb, other.bb )
     def ior( self, BinnedBitSet other ):
 
 from warnings import warn
 
-def binned_bitsets_from_file( f, chrom_col=0, start_col=1, end_col=2, upstream_pad=0, downstream_pad=0 ):
+def binned_bitsets_from_file( f, chrom_col=0, start_col=1, end_col=2, upstream_pad=0, downstream_pad=0, lens={} ):
     """Read a file into a dictionary of bitsets"""
     last_chrom = None
     last_bitset = None
         chrom = fields[chrom_col]
         if chrom != last_chrom:
             if chrom not in bitsets:
-                bitsets[chrom] = BinnedBitSet() 
+                if chrom in lens:
+                    size = lens[chrom]
+                else:
+                    size = MAX
+                bitsets[chrom] = BinnedBitSet( size ) 
             last_chrom = chrom
             last_bitset = bitsets[chrom]
         start, end = int( fields[start_col] ), int( fields[end_col] )

File scripts.list

 tfloc_summary.py
 ucsc_gene_table_to_intervals.py
 wiggle_to_simple.py
+wiggle_to_binned_array.py

File src/binBits.c

         }
         else if ( bb2->bins[i] == ALL_ZERO )
         {
-            bitFree( &bb1->bins[i] );
+            if ( bb1->bins[i] != ALL_ONE )
+            {
+                bitFree( &bb1->bins[i] );
+            }
             bb1->bins[i] = ALL_ZERO;
         }
+        else if ( bb2->bins[i] == ALL_ONE )
+        {
+            // Do nothing
+        }
         else if ( bb1->bins[i] == ALL_ONE )
         {
-            if ( bb2->bins[i] == ALL_ONE )
-            {
-                // Do nothing
-            }
-            else if ( bb2->bins[i] == ALL_ZERO )
-            {
-                bb1->bins[i] = ALL_ZERO;
-            }
-            else
-            {
-                bb1->bins[i] = bitClone( bb2->bins[i], bb1->bin_size );
-            }
+            bb1->bins[i] = bitClone( bb2->bins[i], bb1->bin_size );
         }
         else
-        {
+        {            
             bitAnd( bb1->bins[i], bb2->bins[i], bb1->bin_size );
         }
     }
         }
         else if ( bb2->bins[i] == ALL_ONE )
         {
-            bitFree( &bb1->bins[i] );
+            if ( bb1->bins[i] != ALL_ZERO )
+            {
+                bitFree( &bb1->bins[i] );
+            }
             bb1->bins[i] = ALL_ONE;
         }
+        else if ( bb2->bins[i] == ALL_ZERO )
+        {
+            // Do nothing
+        }
         else if ( bb1->bins[i] == ALL_ZERO )
         {
-            if ( bb2->bins[i] == ALL_ZERO )
-            {
-                // Do nothing
-            }
-            else if ( bb2->bins[i] == ALL_ONE )
-            {
-                bb1->bins[i] = ALL_ONE;
-            }
-            else
-            {
-                bb1->bins[i] = bitClone( bb2->bins[i], bb1->bin_size );
-            }
+            bb1->bins[i] = bitClone( bb2->bins[i], bb1->bin_size );
         }
         else
         {