James Taylor avatar James Taylor committed f452c9d

NibFile now checks for negative lengths (+ tests and some doc cleanup)

Comments (0)

Files changed (3)

lib/bx/seq/nib.py

         self.length = struct.unpack("%sL" % self.byte_order, file.read(NIB_LENGTH_SIZE))[0]
 
     def raw_fetch(self, start, length):
+        # Check parameters
+        assert start >= 0, "Start must be greater than 0"
+        assert length >= 0, "Length must be greater than 0"
+        assert start + length <= self.length, "Interval beyond end of sequence"
         # Read block of bytes containing sequence
         block_start = int(math.floor(start / 2))
         block_end = int(math.floor((start + length - 1) / 2))
         block_len = block_end + 1 - block_start
         self.file.seek(NIB_MAGIC_SIZE + NIB_LENGTH_SIZE + block_start)
         raw = self.file.read(block_len)
+        # Unpack compressed block into a character string and return
         return _nib.translate_raw_data( raw, start, length  )
 
 class NibReader(SeqReader):

lib/bx/seq/nib_tests.py

         # Test near end of file also
         check_get( nibfile, valid_seq_len - 10, 10 )
         check_get( nibfile, valid_seq_len - 11, 11 )
+        # Test really short gets
+        check_get( nibfile, 0, 0 )
+        check_get( nibfile, 1, 0 )
+        check_get( nibfile, 0, 1 )
+        check_get( nibfile, 1, 1 )
+        # Test negative length
+        self.assertRaises( AssertionError, nibfile.get, 20, -1 )
 
 def check_get( nibfile, start, len ):
     ## print "expect: |%r|" % valid_seq[start:start+len]

lib/bx/seq/seq.py

 Classes to support "biological sequence" files.
 
 :Author: Bob Harris (rsharris@bx.psu.edu)
-
-A biological sequence is a sequence of bytes or characters.  Usually these
-represent DNA (A,C,G,T), proteins, or some variation of those.
-
-class attributes:
-
-  file:    file object containing the sequence
-  revcomp: whether gets from this sequence should be reverse-complemented
-             False => no reverse complement
-             True  => (same as "-5'")
-             "maf" => (same as "-5'")
-             "+5'" => minus strand is from plus strand's 5' end (same as "-3'")
-             "+3'" => minus strand is from plus strand's 3' end (same as "-5'")
-             "-5'" => minus strand is from its 5' end (as per MAF file format)
-             "-3'" => minus strand is from its 3' end (as per genome browser,
-                      but with origin-zero)
-  name:    usually a species and/or chromosome name (e.g. "mule.chr5");  if
-           the file contains a name, that overrides this one
-  gap:     gap character that aligners should use for gaps in this sequence
 """
 
 # DNA reverse complement table
            "                                                                " \
            "                                                                "
 
+class SeqFile(object):
+    """
+    A biological sequence is a sequence of bytes or characters.  Usually these
+    represent DNA (A,C,G,T), proteins, or some variation of those.
 
-class SeqFile(object):
+    class attributes:
+
+        file:    file object containing the sequence
+        revcomp: whether gets from this sequence should be reverse-complemented
+                 False => no reverse complement
+                 True  => (same as "-5'")
+                 "maf" => (same as "-5'")
+                 "+5'" => minus strand is from plus strand's 5' end (same as "-3'")
+                 "+3'" => minus strand is from plus strand's 3' end (same as "-5'")
+                 "-5'" => minus strand is from its 5' end (as per MAF file format)
+                 "-3'" => minus strand is from its 3' end (as per genome browser,
+                          but with origin-zero)
+        name:    usually a species and/or chromosome name (e.g. "mule.chr5");  if
+                 the file contains a name, that overrides this one
+        gap:     gap character that aligners should use for gaps in this sequence
+    """
 
     def __init__(self, file=None, revcomp=False, name="", gap=None):
+        
+        
         self.file = file
         if   (revcomp == True):  self.revcomp = "-5'"
         elif (revcomp == "+3'"): self.revcomp = "-5'"
         return text
 
     def get(self, start, length):
-        assert (start >= 0), \
-            "attempt to fetch from sequence location %s" % start
-        assert (start + length - 1 < self.length), \
-            "attempt to fetch from beyond sequence end (%s..%s > %s)" \
-          % (start,start+length,self.length)
-        if (not self.revcomp):
-            return self.raw_fetch(start,length)
-        if (self.revcomp == "-3'"):
+        """
+        Fetch subsequence starting at position `start` with length `length`. 
+        This method is picky about parameters, the requested interval must 
+        have non-negative length and fit entirely inside the NIB sequence,
+        the returned string will contain exactly 'length' characters, or an
+        AssertionError will be generated.
+        """
+        # Check parameters
+        assert length >= 0, "Length must be non-negative (got %d)" % length 
+        assert start >= 0,"Start must be greater than 0 (got %d)" % start
+        assert start + length <= self.length, \
+            "Interval beyond end of sequence (%s..%s > %s)" % ( start, start + length, self.length )
+        # Fetch sequence and reverse complement if necesary
+        if not self.revcomp:
+            return self.raw_fetch( start, length )
+        if self.revcomp == "-3'":
             return self.reverse_complement(self.raw_fetch(start,length))
-        assert (self.revcomp == "-5'"), "unrecognized reverse complement scheme"
+        assert self.revcomp == "-5'", "unrecognized reverse complement scheme"
         start = self.length - (start+length)
         return self.reverse_complement(self.raw_fetch(start,length))
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.