Commits

James Taylor committed 2b8119f

add bed_extend_to.py, extend each record to a minimum size

Comments (0)

Files changed (1)

scripts/bed_extend_to.py

+#!/usr/bin/env python
+
+"""
+Read BED file and extend each record to the specified minimum length. If chromosome 
+size information is provided trim extended intervals.
+
+usage: %prog amount [ chrom_file ] < bed_file
+"""
+
+import sys
+from bx.intervals.io import GenomicIntervalReader
+
+length = int( sys.argv[1] )
+chrom_len = None
+if len( sys.argv ) > 2:
+    chrom_len = dict( ( fields[0], int( fields[1] ) ) for fields in map( str.split, open( sys.argv[2] ) ) )
+
+for interval in GenomicIntervalReader( sys.stdin ):
+    if interval.end - interval.start < length:
+        start = interval.start
+        end = interval.end
+        # Extend in positive direction on strand
+        if interval.strand == "+":
+            end = start + length
+        else:
+            start = end - length
+        # Trim 
+        if start < 0:
+            start = 0
+        if chrom_len and end > chrom_len[interval.chrom]:
+            end = chrom_len[interval.chrom]
+        # Set new start and end
+        interval.start = start
+        interval.end = end
+    # Output possibly adjusted interval
+    print interval