1. James Taylor
  2. bx-python

Commits

Bob Harris  committed 6eb590e

many fixes related to negative strand components

  • Participants
  • Parent commits 7c61b47
  • Branches default

Comments (0)

Files changed (3)

File lib/bx/align/core.py

View file
         pos is relative to the + strand, regardless of the component's strand.
 
         """
-        assert (self.strand != '-')
-        if not self.index:
-            self.index = list()
-            for x in range( len(self.text) ):
-                if not self.text[x] == '-':
-                    self.index.append(x)
-            self.index.append( len(self.text) )
         start,end = self.get_forward_strand_start(),self.get_forward_strand_end()
         if pos < start or pos > end:
             raise "Range error: %d not in %d-%d" % ( pos, start, end )
+        if not self.index:
+            self.index = list()
+            if (self.strand == '-'):
+                for x in range( len(self.text)-1,-1,-1 ):
+                    if not self.text[x] == '-':
+                        self.index.append(len(self.text)-1-x)
+                self.index.append( len(self.text) )
+            else:
+                for x in range( len(self.text) ):
+                    if not self.text[x] == '-':
+                        self.index.append(x)
+                self.index.append( len(self.text) )
         x = None
         try:
             x = self.index[ pos - start ]

File scripts/maf_extract_ranges.py

View file
     # Iterate over input MAF
 
     for maf in bx.align.maf.Reader( sys.stdin ):
-        ref_component = maf.components[ refindex ]
-        if ref_component.strand == '-':
-            maf = maf.reverse_complement()
-            ref_component = maf.components[ refindex ]
+        ref = maf.components[ refindex ]
         # Find overlap with reference component
-        intersections = intersecter.find( ref_component.start, ref_component.end )
+        intersections = intersecter.find( ref.get_forward_strand_start(), ref.get_forward_strand_end() )
         # Keep output maf ordered
         intersections.sort()
         # Write each intersecting block
         for interval in intersections: 
-            start = max( interval.start, ref_component.start )
-            end = min( interval.end, ref_component.end )
+            start = max( interval.start, ref.get_forward_strand_start() )
+            end = min( interval.end, ref.get_forward_strand_end() )
             sliced = maf.slice_by_component( refindex, start, end ) 
             good = True
             for c in sliced.components: 

File scripts/maf_extract_ranges_indexed.py

View file
 NOTE: If two intervals overlap the same block it will be written twice. With
       non-overlapping intervals and --chop this is never a problem. 
       
+NOTE: Intervals are relative to the + strand, regardless of the strands in
+      the alignments.
+
+      
 WARNING: bz2/bz2t support and file cache support are new and not as well
          tested. 
 
    -s, --src=s:      Use this src for all intervals
    -p, --prefix=p:   Prepend this to each src before lookup
    -d, --dir=d:      Write each interval as a separate file in this directory
-   -S, --strand:     Strand is included as an additional column, and the blocks are reverse complemented so that they are always on the plus strand w/r/t the src species.
+   -S, --strand:     Strand is included as an additional column, and the blocks are reverse complemented (if necessary) so that they are always on that strand w/r/t the src species.
    -C, --usecache:   Use a cache that keeps blocks of the MAF files in memory (requires ~20MB per MAF)
 """
 
         out = bx.align.maf.Writer( sys.stdout )
     # Iterate over input ranges 
     for line in sys.stdin:
-        strand = "+"
+        strand = None
         fields = line.split()
         if fixed_src:
             src, start, end = fixed_src, int( fields[0] ), int( fields[1] )
         if chop:
             for block in blocks: 
                 ref = block.get_component_by_src( src )
-                if ref.strand == '-':
-                    block = block.reverse_complement()
-                    ref = block.get_component_by_src( src )
-                slice_start = max( start, ref.start )
-                slice_end = min( end, ref.end )
+                slice_start = max( start, ref.get_forward_strand_start() )
+                slice_end = min( end, ref.get_forward_strand_end() )
                 sliced = block.slice_by_component( ref, slice_start, slice_end ) 
                 # If the block is shorter than the minimum allowed size, stop
                 if mincols and ( sliced.text_size < mincols ):
                 # Keep only components that are not empty
                 sliced.components = [ c for c in sliced.components if c.size > 0 ]
                 # Reverse complement if needed
-                if strand != ref.strand: 
+                if ( strand != None ) and ( ref.strand != strand ): 
                     sliced = sliced.reverse_complement()
                 # Write the block
                 out.write( sliced )