Commits

Daniel Blankenberg committed 0e55736

Fix memory issues when dealing with alignments.

Make component.alignment a weakref and change name to component._alignment. Add __deepcopy__ methods to both component and alignment (used in fusing alignments), as weakref is not compatible with python's default behavior.

Add an iterator version of index.get() for returning blocks (index offsets are still processed as an array, to allow sorting).

  • Participants
  • Parent commits 2388ec8

Comments (0)

Files changed (2)

lib/bx/align/core.py

 
 import random
 import string
+import weakref
 from bx.misc.readlengths import read_lengths_file
 
 # DNA reverse complement table
         self.components = []
 
     def add_component( self, component ):
-        component.alignment = self
+        component._alignment = weakref.ref( self )
         self.components.append( component )
         if component.text is not None:
             if self.text_size == 0: 
     def __ne__( self, other ):
         return not( self.__eq__( other ) )
     
+    def __deepcopy__( self, memo ):
+        from copy import deepcopy
+        new = Alignment( score=self.score, attributes=deepcopy( self.attributes ), species_to_lengths=deepcopy( self.species_to_lengths ) )
+        for component in self.components:
+            new.add_component( deepcopy( component ) )
+        return new
+    
 class Component( object ):
 
     def __init__( self, src='', start=0, size=0, strand=None, src_size=None, text='' ):
-        self.alignment = None
+        self._alignment = None
         self.src = src
         self.start = start          # Nota Bene:  start,size,strand are as they
         self.size = size            # .. appear in a MAF file-- origin-zero, end
 
     def get_src_size( self ):
         if self._src_size == None:
-            if self.alignment == None: raise "component has no src_size"
-            self._src_size = self.alignment.src_size( self.src )
+            if self._alignment == None: raise "component has no src_size"
+            self._src_size = self._alignment().src_size( self.src )
         return self._src_size
     def set_src_size( self,src_size ):
         self._src_size = src_size
         comp.reverse()
         text = "".join(comp)
         new = Component( self.src, start, self.size, strand, self._src_size, text )
-        new.alignment = self.alignment
+        new._alignment = self._alignment
         return new
 
     def slice( self, start, end ):
         new = Component( src=self.src, start=self.start, strand=self.strand, src_size=self._src_size )
-        new.alignment = self.alignment
+        new._alignment = self._alignment
         new.text = self.text[start:end]
 
         #for i in range( 0, start ):
         
     def __ne__( self, other ):
         return not( self.__eq__( other ) )
+    
+    def __deepcopy__( self, memo ):
+        new = Component( src=self.src, start=self.start, size=self.size, strand=self.strand, src_size=self._src_size, text=self.text )
+        new._alignment = self._alignment
+        new.quality = self.quality
+        new.synteny_left = self.synteny_left
+        new.synteny_right = self.synteny_right
+        new.synteny_empty = self.synteny_empty
+        new.empty = self.empty
+        new.index = self.index
+        return new
 
 def get_reader( format, infile, species_to_lengths=None ):
     import bx.align.maf, bx.align.axt, bx.align.lav

lib/bx/interval_index_file.py

         return self.indexed_access_class( data_filename, index_filename, keep_open, **kwargs )
     def get( self, src, start, end ):
         blocks = []
-        for index in self.indexes: blocks.extend( index.get( src, start, end ) )
+        for block in self.get_as_iterator( src, start, end ): blocks.extend( block )
         return blocks
+    def get_as_iterator( self, src, start, end ):
+        for index in self.indexes:
+            for block in index.get_as_iterator( src, start, end ):
+                yield block
     def close( self ):
         for index in self.indexes:
             index.close()
             return f
 
     def get( self, src, start, end ):
-        intersections = self.indexes.find( src, start, end )
-        return map( self.get_at_offset, [ val for start, end, val in intersections ] )
+        return [ val for val in self.get_as_iterator( src, start, end ) ]
+    def get_as_iterator( self, src, start, end ):
+        for val_start, val_end, val in self.indexes.find( src, start, end ):
+            yield self.get_at_offset( val )
 
     def get_at_offset( self, offset ):
         if self.f: