Commits

Greg Von Kuster committed b74040a

Added exception handling related to integer overflow for interval operations.

  • Participants
  • Parent commits 9b0f570

Comments (0)

Files changed (8)

lib/bx/intervals/io.py

                             size = lens[chrom]
                         else:
                             size = MAX
-                        bitsets[chrom] = BinnedBitSet( size )
+                        try:
+                            bbs = BinnedBitSet( size )
+                        except ValueError, e:
+                            continue
+                        bitsets[chrom] = bbs
                     last_chrom = chrom
                     last_bitset = bitsets[chrom]
                 start = max(int( interval[self.start_col]), 0 )
                 end = min(int( interval[self.end_col]), size)
-                last_bitset.set_range( start, end-start )
+                try:
+                    last_bitset.set_range( start, end-start )
+                except OverflowError, e:
+                    continue
         return bitsets
 
 class NiceReaderWrapper( GenomicIntervalReader ):
         GenomicIntervalReader.__init__( self, reader, **kwargs )
         self.outstream = kwargs.get("outstream", None)
         self.print_delegate = kwargs.get("print_delegate", None)
+        self.input_wrapper = iter( self.input )
+        self.input_iter = self.iterwrapper()
         self.skipped = 0
         self.skipped_lines = []
-        self.input_wrapper = iter( self.input )
-        self.input_iter = self.iterwrapper()
     def __iter__( self ):
         return self
     def next( self ):
                 self.skipped += 1
                 # no reason to stuff an entire bad file into memmory
                 if self.skipped < 10:
-                    self.skipped_lines.append( (self.linenum, self.current_line) )
+                    self.skipped_lines.append( ( self.linenum, self.current_line, str( e ) ) )
     def iterwrapper( self ):
         while 1:
             self.current_line = self.input_wrapper.next()

lib/bx/intervals/operations/base_coverage.py

     bitsets = reader.binned_bitsets()
     coverage = 0
     for chrom in bitsets:
-        coverage += bitsets[chrom].count_range(0, MAX_END)
+        try:
+            coverage += bitsets[chrom].count_range(0, MAX_END)
+        except IndexError, e:
+            try:
+                # This will work only if reader is a NiceReaderWrapper
+                reader.skipped += 1
+                # no reason to stuff an entire bad file into memmory
+                if reader.skipped < 10:
+                    reader.skipped_lines.append( ( reader.linenum, reader.current_line, str( e ) ) )
+            except:
+                pass
+            continue
     return coverage

lib/bx/intervals/operations/complement.py

     for chrom in bitsets:
         bitset = bitsets[chrom]
         out_intervals = bits_set_in_range( bitset, 0, lens.get(chrom, 512*1024*1024) )
-        # Write the intervals
-        for start, end in out_intervals:
-            fields = ["."  for x in range(max(reader.chrom_col, reader.start_col, reader.end_col)+1)]
-            # default the column to a + if it exists
-            if reader.strand_col < len( fields ) and reader.strand_col >= 0:
-                fields[reader.strand_col] = "+"
-            fields[reader.chrom_col] = chrom
-            fields[reader.start_col] = start
-            fields[reader.end_col] = end
-            new_interval = GenomicInterval(reader, fields, reader.chrom_col, reader.start_col, reader.end_col, reader.strand_col, "+")
-            yield new_interval
+        try:
+            # Write the intervals
+            for start, end in out_intervals:
+                fields = ["."  for x in range(max(reader.chrom_col, reader.start_col, reader.end_col)+1)]
+                # default the column to a + if it exists
+                if reader.strand_col < len( fields ) and reader.strand_col >= 0:
+                    fields[reader.strand_col] = "+"
+                fields[reader.chrom_col] = chrom
+                fields[reader.start_col] = start
+                fields[reader.end_col] = end
+                new_interval = GenomicInterval(reader, fields, reader.chrom_col, reader.start_col, reader.end_col, reader.strand_col, "+")
+                yield new_interval
+        except IndexError, e:
+            try:
+                # This will work only if reader is a NiceReaderWrapper
+                reader.skipped += 1
+                # no reason to stuff an entire bad file into memmory
+                if reader.skipped < 10:
+                    reader.skipped_lines.append( ( reader.linenum, reader.current_line, str( e ) ) )
+            except:
+                pass
+            continue
 
 
 # def main():

lib/bx/intervals/operations/coverage.py

             chrom = interval.chrom
             start = int(interval.start)
             end = int(interval.end)
-            if start > end: warn( "Interval start after end!" )
+            if start > end:
+                try:
+                    # This will only work if primary is a NiceReaderWrapper
+                    primary.skipped += 1
+                    # no reason to stuff an entire bad file into memmory
+                    if primary.skipped < 10:
+                        primary.skipped_lines.append( ( primary.linenum, primary.current_line, "Interval start after end!" ) )
+                except:
+                    pass
+                continue
             if chrom not in bitsets:
                 bases_covered = 0
                 percent = 0.0
             else:
-                bases_covered = bitsets[ chrom ].count_range( start, end-start )
-                if (end - start) == 0: percent = 0
-                else: percent = float(bases_covered) / float(end - start)
+                try:
+                    bases_covered = bitsets[ chrom ].count_range( start, end-start )
+                except IndexError, e:
+                    try:
+                        # This will only work if primary is a NiceReaderWrapper
+                        primary.skipped += 1
+                        # no reason to stuff an entire bad file into memmory
+                        if primary.skipped < 10:
+                            primary.skipped_lines.append( ( primary.linenum, primary.current_line, str( e ) ) )
+                    except:
+                        pass
+                    continue
+                if (end - start) == 0:
+                    percent = 0
+                else:
+                    percent = float(bases_covered) / float(end - start)
             interval.fields.append(str(bases_covered))
             interval.fields.append(str(percent))
             yield interval

lib/bx/intervals/operations/find_clusters.py

         else:
             if interval.chrom not in chroms:
                 chroms[interval.chrom] = ClusterTree( mincols, minregions )
-            chroms[interval.chrom].insert(interval.start, interval.end, linenum)
+            try:
+                chroms[interval.chrom].insert( interval.start, interval.end, linenum )
+            except OverflowError, e:
+                try:
+                    # This will work only if reader is a NiceReaderWrapper
+                    reader.skipped += 1
+                    if reader.skipped < 10:
+                        reader.skipped_lines.append( ( reader.linenum, reader.current_line, str( e ) ) )
+                except:
+                    pass
+                continue
     return chroms, extra
 
 

lib/bx/intervals/operations/intersect.py

             yield interval
         elif type( interval ) == GenomicInterval:
             chrom = interval.chrom
-            start = int(interval.start)
-            end = int(interval.end)
-            if chrom not in bitsets: continue
-            if start > end: warn( "Interval start after end!" )
+            start = int( interval.start )
+            end = int( interval.end )
+            if chrom not in bitsets:
+                continue
+            if start > end:
+                try:
+                    # This will only work if primary is a NiceReaderWrapper
+                    primary.skipped += 1
+                    # no reason to stuff an entire bad file into memmory
+                    if primary.skipped < 10:
+                        primary.skipped_lines.append( ( primary.linenum, primary.current_line, "Interval start after end!" ) )
+                except:
+                    pass
+                continue
             out_intervals = []
             # Intersect or Overlap
-            if bitsets[ chrom ].count_range( start, end-start ) >= mincols:                
-                if pieces:
-                    out_intervals = bits_set_in_range( bitsets[chrom], start, end )
-                else:
-                    out_intervals = [ ( start, end ) ]
+            try:
+                if bitsets[ chrom ].count_range( start, end-start ) >= mincols:                
+                    if pieces:
+                        out_intervals = bits_set_in_range( bitsets[chrom], start, end )
+                    else:
+                        out_intervals = [ ( start, end ) ]
+            except IndexError, e:
+                try:
+                    # This will only work if primary is a NiceReaderWrapper
+                    primary.skipped += 1
+                    # no reason to stuff an entire bad file into memmory
+                    if primary.skipped < 10:
+                        primary.skipped_lines.append( ( primary.linenum, primary.current_line, str( e ) ) )
+                except:
+                    pass
+                continue
             # Write the intervals
             for start, end in out_intervals:
                 new_interval = interval.copy()

lib/bx/intervals/operations/merge.py

         bitset = bitsets[chrom]
         output = ["."] * (max(interval.chrom_col, interval.start_col, interval.end_col) + 1)
         output[interval.chrom_col] = chrom
-        for start, end in bits_set_in_range(bitset,0, MAX_END):
-            output[interval.start_col] = str(start)
-            output[interval.end_col] = str(end)
-            yield output
+        try:
+            for start, end in bits_set_in_range(bitset,0, MAX_END):
+                output[interval.start_col] = str(start)
+                output[interval.end_col] = str(end)
+                yield output
+        except IndexError, e:
+            try:
+                # This will work only if interval is a NiceReaderWrapper
+                interval.skipped += 1
+                # no reason to stuff an entire bad file into memmory
+                if interval.skipped < 10:
+                    interval.skipped_lines.append( ( interval.linenum, interval.current_line, str( e ) ) )
+            except:
+                pass
+            continue

lib/bx/intervals/operations/subtract.py

                 out_intervals = []
                 # Find the intervals that meet the criteria (for the three sensible
                 # permutations of reverse and pieces)
-                if bitsets[ chrom ].count_range( start, end-start ) >= mincols:                
-                    if pieces:
-                        out_intervals = bits_clear_in_range( bitsets[chrom], start, end )
-                else:
-                    out_intervals = [ ( start, end ) ]
-                # Write the intervals
-                for start, end in out_intervals:
-                    new_interval = interval.copy()
-                    new_interval.start = start
-                    new_interval.end = end
-                    yield new_interval
+                try:
+                    if bitsets[ chrom ].count_range( start, end-start ) >= mincols:                
+                        if pieces:
+                            out_intervals = bits_clear_in_range( bitsets[chrom], start, end )
+                    else:
+                        out_intervals = [ ( start, end ) ]
+                    # Write the intervals
+                    for start, end in out_intervals:
+                        new_interval = interval.copy()
+                        new_interval.start = start
+                        new_interval.end = end
+                        yield new_interval
+                except IndexError, e:
+                    try:
+                        # This will work only if primary is a NiceReaderWrapper
+                        primary.skipped += 1
+                        # no reason to stuff an entire bad file into memmory
+                        if primary.skipped < 10:
+                            primary.skipped_lines.append( ( primary.linenum, primary.current_line, str( e ) ) )
+                    except:
+                        pass
+                    continue