Commits

sontek  committed ac9fa7a

Change raised exceptions from strings to ValueErrors

  • Participants
  • Parent commits 8600c70

Comments (0)

Files changed (11)

File lib/bx/align/axt.py

 """
-Support for reading and writing the `AXT`_ format used for pairwise 
+Support for reading and writing the `AXT`_ format used for pairwise
 alignments.
 
 .. _AXT: http://genome.ucsc.edu/goldenPath/help/axt.html
 
 class Reader( object ):
     """Iterate over all axt blocks in a file in order"""
-    
+
     def __init__( self, file, species1 = None, species2=None, species_to_lengths=None, support_ids=False ):
         self.file = file
         # nota bene: (self.species1 = species1 or "species1") is incorrect if species1=""
 class ReaderIter( object ):
     def __init__( self, reader ):
         self.reader = reader
-    def __iter__( self ): 
+    def __iter__( self ):
         return self
     def next( self ):
         v = self.reader.next()
 
     def write( self, alignment ):
         if (len(alignment.components) != 2):
-            raise "%d-component alignment is not compatible with axt" % \
-                   len(alignment.components)
+            raise ValueError("%d-component alignment is not compatible with axt" % \
+                   len(alignment.components))
         c1 = alignment.components[0]
         c2 = alignment.components[1]
 
     if not line: return
     fields = line.split()
     if (len(fields) < 9) or ((not support_ids) and (len(fields) > 9)):
-        raise "bad axt-block header: %s" % line
+        raise ValueError("bad axt-block header: %s" % line)
     attributes = {}
     if (len(fields) > 9):
         attributes["id"] = "_".join(fields[9:])
     seq1 = readline( file )
-    if not line or line.isspace(): raise "incomplete axt-block; header: %s" % line
+    if not line or line.isspace(): raise ValueError("incomplete axt-block; header: %s" % line)
     seq2 = readline( file )
-    if not line or line.isspace(): raise "incomplete axt-block; header: %s" % line
+    if not line or line.isspace(): raise ValueError("incomplete axt-block; header: %s" % line)
     # Build 2 component alignment
     alignment = Alignment(attributes=attributes,species_to_lengths=species_to_lengths)
     # Build component for species 1

File lib/bx/align/core.py

         component._alignment = weakref.ref( self )
         self.components.append( component )
         if component.text is not None:
-            if self.text_size == 0: 
+            if self.text_size == 0:
                 self.text_size = len( component.text )
-            elif self.text_size != len( component.text ): 
+            elif self.text_size != len( component.text ):
                 raise Exception( "Components must have same text length" )
 
     def get_score( self ):
 
     def __str__( self ):
         s = "a score=" + str( self.score )
-        for key in self.attributes: 
+        for key in self.attributes:
             s += " %s=%s" % ( key, self.attributes[key] )
         s += "\n"
         # Components
-        for c in self.components: 
+        for c in self.components:
             s += str( c )
             s += "\n"
         return s
         elif chrom in self.species_to_lengths:
             chrom_to_length = self.species_to_lengths
         else:
-            raise "no src_size (no length file for %s)" % species
+            raise ValueError("no src_size (no length file for %s)" % species)
         if type( chrom_to_length ) == int:         # (if it's a single length)
             return chrom_to_length
         if type( chrom_to_length ) == type( "" ):  # (if it's a file name)
             chrom_to_length = read_lengths_file( chrom_to_length )
             self.species_to_lengths[species] = chrom_to_length
-        if chrom not in chrom_to_length: raise "no src_size (%s has no length for %s)" % ( species, chrom )
+        if chrom not in chrom_to_length: raise ValueError("no src_size (%s has no length for %s)" % ( species, chrom ))
         return chrom_to_length[chrom]
 
     def get_component_by_src( self, src ):
     def get_component_by_src_start( self, src ):
         for c in self.components:
             if c.src.startswith( src ): return c
-        return None    
+        return None
 
     def slice( self, start, end ):
         new = Alignment( score=self.score, attributes=self.attributes )
             new.components.append( component.slice( start, end ) )
         new.text_size = end - start
         return new
-    
+
     def reverse_complement( self ):
         new = Alignment( score=self.score, attributes=self.attributes )
         for component in self.components:
             new.components.append( component.reverse_complement() )
         new.text_size = self.text_size
         return new
-    
+
     def slice_by_component( self, component_index, start, end ):
         """
         Return a slice of the alignment, corresponding to an coordinate interval in a specific component.
             ref = component_index
         else:
             raise ValueError( "can't figure out what to do" )
-        start_col = ref.coord_to_col( start )  
-        end_col = ref.coord_to_col( end )  
+        start_col = ref.coord_to_col( start )
+        end_col = ref.coord_to_col( end )
         if (ref.strand == '-'):
             (start_col,end_col) = (end_col,start_col)
         return self.slice( start_col, end_col )
-        
+
     def column_iter( self ):
         for i in range( self.text_size ):
             yield [ c.text[i] for c in self.components ]
             if seqs[i] is None: continue
             self.components[i].text = ''.join( seqs[i] )
         self.text_size = text_size
-        
+
     def __eq__( self, other ):
         if other is None or type( other ) != type( self ):
             return False
             if c1 != c2:
                 return False
         return True
-        
+
     def __ne__( self, other ):
         return not( self.__eq__( other ) )
-    
+
     def __deepcopy__( self, memo ):
         from copy import deepcopy
         new = Alignment( score=self.score, attributes=deepcopy( self.attributes ), species_to_lengths=deepcopy( self.species_to_lengths ) )
         for component in self.components:
             new.add_component( deepcopy( component ) )
         return new
-    
+
 class Component( object ):
 
     def __init__( self, src='', start=0, size=0, strand=None, src_size=None, text='' ):
 
     def __str__( self ):
         if self.empty:
-            rval = "e %s %d %d %s %d %s" % ( self.src, self.start, 
-                                             self.size, self.strand, 
+            rval = "e %s %d %d %s %d %s" % ( self.src, self.start,
+                                             self.size, self.strand,
                                              self.src_size, self.synteny_empty )
         else:
-            rval = "s %s %d %d %s %d %s" % ( self.src, self.start, 
-                                             self.size, self.strand, 
+            rval = "s %s %d %d %s %d %s" % ( self.src, self.start,
+                                             self.size, self.strand,
                                              self.src_size, self.text )
             if self.synteny_left and self.synteny_right:
-                rval += "\ni %s %s %d %s %d" % ( self.src, 
+                rval += "\ni %s %s %d %s %d" % ( self.src,
                                                  self.synteny_left[0], self.synteny_left[1],
                                                  self.synteny_right[0], self.synteny_right[1] )
         return rval
         if self.strand == '-': return self.src_size - self.end
         else: return self.start
     forward_strand_start = property( fget=get_forward_strand_start )
-        
+
     def get_forward_strand_end( self ):
         if self.strand == '-': return self.src_size - self.start
         else: return self.end
     forward_strand_end = property( fget=get_forward_strand_end)
 
     def reverse_complement( self ):
-        start = self.src_size - self.end 
+        start = self.src_size - self.end
         if self.strand == "+": strand = "-"
         else: strand = "+"
         comp = [ch for ch in self.text.translate(DNA_COMP)]
         start and end are relative to the + strand, regardless of the component's strand.
 
         """
-        start_col = self.coord_to_col( start )  
-        end_col = self.coord_to_col( end )  
+        start_col = self.coord_to_col( start )
+        end_col = self.coord_to_col( end )
         if (self.strand == '-'):
             (start_col,end_col) = (end_col,start_col)
         return self.slice( start_col, end_col )
-    
+
     def coord_to_col( self, pos ):
         """
         Return the alignment column index corresponding to coordinate pos.
         """
         start,end = self.get_forward_strand_start(),self.get_forward_strand_end()
         if pos < start or pos > end:
-            raise "Range error: %d not in %d-%d" % ( pos, start, end )
+            raise ValueError("Range error: %d not in %d-%d" % ( pos, start, end ))
         if not self.index:
             self.index = list()
             if (self.strand == '-'):
         except:
             raise Exception("Error in index.")
         return x
-    
-    
+
+
     def __eq__( self, other ):
         if other is None or type( other ) != type( self ):
             return False
         return ( self.src == other.src
                  and self.start == other.start
-                 and self.size == other.size            
-                 and self.strand == other.strand        
-                 and self._src_size == other._src_size   
+                 and self.size == other.size
+                 and self.strand == other.strand
+                 and self._src_size == other._src_size
                  and self.text == other.text
                  and self.synteny_left == other.synteny_left
                  and self.synteny_right == other.synteny_right
                  and self.synteny_empty == other.synteny_empty
                  and self.empty == other.empty )
-        
+
     def __ne__( self, other ):
         return not( self.__eq__( other ) )
-    
+
     def __deepcopy__( self, memo ):
         new = Component( src=self.src, start=self.start, size=self.size, strand=self.strand, src_size=self._src_size, text=self.text )
         new._alignment = self._alignment
     if format == "maf": return bx.align.maf.Reader( infile, species_to_lengths )
     elif format == "axt": return bx.align.axt.Reader( infile, species_to_lengths )
     elif format == "lav": return bx.align.lav.Reader( infile )
-    else: raise "Unknown alignment format %s" % format
+    else: raise ValueError("Unknown alignment format %s" % format)
 
 def get_writer( format, outfile, attributes={} ):
     import bx.align.maf, bx.align.axt, bx.align.lav
     if format == "maf": return bx.align.maf.Writer( outfile, attributes )
     elif format == "axt": return bx.align.axt.Writer( outfile, attributes )
     elif format == "lav": return bx.align.lav.Writer( outfile, attributes )
-    else: raise "Unknown alignment format %s" % format
+    else: raise ValueError("Unknown alignment format %s" % format)
 
 def get_indexed( format, filename, index_filename=None, keep_open=False, species_to_lengths=None ):
     import bx.align.maf, bx.align.axt, bx.align.lav
     if format == "maf": return bx.align.maf.Indexed( filename, index_filename, keep_open, species_to_lengths )
     elif format == "axt": return bx.align.axt.Indexed( filename, index_filename, keep_open, species_to_lengths )
     elif format == "lav": raise Exception("LAV support for Indexed has not been implemented")
-    else: raise "Unknown alignment format %s" % format
+    else: raise ValueError("Unknown alignment format %s" % format)
 
 def shuffle_columns( a ):
     """Randomize the columns of an alignment"""
     def coord_to_col( start, text, pos ):
         col = 0
         while start < pos:
-            if text[col] != '-': 
+            if text[col] != '-':
                 start += 1
-            col += 1 
+            col += 1
         return col

File lib/bx/align/lav.py

 """
 Support for reading and writing the LAV format produced by the `blastz`_
-pairwise aligner. 
+pairwise aligner.
 
 .. _blastz: http://www.bx.psu.edu/miller_lab/
 """
 
 	def write(self,alignment):
 		if (len(alignment.components) != 2):
-			raise "%d-component alignment is not compatible with lav" % \
-				   len(alignment.components)
+			raise ValueError("%d-component alignment is not compatible with lav" % \
+				   len(alignment.components))
 
 		c1 = alignment.components[0]
 		c2 = alignment.components[1]

File lib/bx/misc/readlengths.py

     """
     Returns a hash from sequence name to length.
     """
-    
+
     chrom_to_length = {}
     f = file ( name, "rt" )
     for line in f:
             chrom = fields[0]
             length = int( fields[1] )
         except:
-            raise "bad length file line: %s" % line
+            raise ValueError("bad length file line: %s" % line)
         if chrom in chrom_to_length and length != chrom_to_length[chrom]:
-            raise "%s has more than one length!" % chrom
+            raise ValueError("%s has more than one length!" % chrom)
         chrom_to_length[chrom] = length
     f.close()
     return chrom_to_length

File lib/bx/pwm/position_weight_matrix.py

             try:
                 if ncol == None: ncol = len(row)
                 elif ncol != len(row):
-                    raise "Align: __init__:alignment block:row %d does not have %d columns, it has %d" % (rownum,ncol,len(row))
+                    raise ValueError("Align: __init__:alignment block:row %d does not have %d columns, it has %d" % (rownum,ncol,len(row)))
             except:
                 print row
                 raise Exception('')
         nan = float('nan')
 
         matrix = zeros((align.nrows,align.ncols),float32)
-        
+
         # set to nans
         for ir in range( len(matrix) ):
             for ic in range(len( matrix[ir] )):
                 try:
                     (w,s) = self.parse_weight(count)
                 except ValueError:
-                    raise "pwm row %s has bad weight %s" % (" ".join(fields),t)
+                    raise ValueError("pwm row %s has bad weight %s" % (" ".join(fields),t))
 
                 # replace row counts with (values,scale)
                 rows[i][x] = (w,s)
     def score_seq(self,seq):
         if (type(seq[0]) == dict):
             return self.score_quantum_seq(seq)
- 
+
         scores = []
         for start in range( len(seq)):
             if start + len(self) > len(seq): break
         elif self.format == 'transfac':
             return self.read_as_transfac()
         else:
-            raise "unknown weight matrix file format: '%s'" % self.format
+            raise ValueError("unknown weight matrix file format: '%s'" % self.format)
 
     def read_as_basic(self):
         tfId    = None
         pwmRows = None
-    
+
         alphabet = ['A','C','G','T']
         while (True):
             line = self.file.readline()
                 pwmRows.append( tokens )
         if pwmRows != None: # we've finished collecting a desired matrix
             yield PositionWeightMatrix(tfId,pwmRows,alphabet,background=self.background,score_correction=self.score_correction)
-    
+
     def read_as_transfac(self):
         self.tfToPwm = {}
         tfId    = None
         pwmRows = None
-    
+
         while (True):
             line = self.file.readline()
             if (not line): break
                         print >>sys.stderr, "Failed to read", tfId
                     tfId    = None
                     pwmRows = None
-    
+
                 tokens = line.split (None, 2)
                 if len(tokens) != 2:
                     raise ValueError, "bad line, need two fields (%s)" % self.where()
                         % (tfId,self.where())
                 pwmRows = []          # start collecting a desired matrix
                 continue
-    
+
             # if we're not collecting, skip this line
             if pwmRows == None: continue
             if len(line) < 1:   continue
             if line.startswith('NA'):
                 words = line.strip().split()
                 tfId =  tfId + "\t" + " ".join(words[1:])
-    
+
             # handle a P0 line
             if line.startswith("P0"):
                 alphabet = line.split()[1:]
                 if len(alphabet) < 2:
                     raise ValueError, "bad line, need more dna (%s)" % self.where()
                 continue
-    
+
             # handle a 01,02,etc. line
             if line[0].isdigit():
                 tokens = line.split ()

File lib/bx/seq/core.py

 def seq_file (file, format=None, revcomp=False, name="", gap=None, contig=None):
     if (format == None): format = infer_format(file)
     if (contig != None) and (format not in ["fasta",None]):
-        raise "Contigs are not supported for format %s" % format
+        raise ValueError("Contigs are not supported for format %s" % format)
     if   (format == "fasta"): return fasta.FastaFile (file, revcomp=revcomp, name=name, gap=gap, contig=contig)
     elif (format == "nib"):   return nib.NibFile     (file, revcomp=revcomp, name=name, gap=gap)
     elif (format == "qdna"):  return qdna.QdnaFile   (file, revcomp=revcomp, name=name, gap=gap)
     else:
         if (format == None): format = ""
         else:                format = " " + format
-        raise "Unknown sequence format%s in %s" % (format,file.name)
+        raise ValueError("Unknown sequence format%s in %s" % (format,file.name))
 
 
 def seq_reader (file, format=None, revcomp=False, name="", gap=None):
     if   (format == "fasta"): return fasta.FastaReader (file, revcomp=revcomp, name=name, gap=gap)
     elif (format == "nib"):   return nib.NibReader     (file, revcomp=revcomp, name=name, gap=gap)
     elif (format == "qdna"):  return qdna.QdnaReader   (file, revcomp=revcomp, name=name, gap=gap)
-    else: raise "Unknown sequence format %s" % format
+    else: raise ValueError("Unknown sequence format %s" % format)
 
 
 def seq_writer (outfile, format=None, name=""):
     if   (format == "fasta"): return fasta.FastaWriter (outfile)
     elif (format == "nib"):   return nib.NibWriter     (outfile)
     elif (format == "qdna"):  return qdna.QdnaWriter   (outfile)
-    else: raise "Unknown sequence format %s" % format
+    else: raise ValueError("Unknown sequence format %s" % format)
 
 
 def infer_format (file):

File lib/bx/seq/qdna.py

             if (magic == qdnaMagicSwap):
                 self.byte_order = "<"
             else:
-                raise "not a quantum-dna file (magic=%08X)" % magic
+                raise ValueError("not a quantum-dna file (magic=%08X)" % magic)
 
         self.magic = magic
 
         self.version = struct.unpack("%sL" % self.byte_order,
                                      self.file.read(4))[0]
         if (self.version not in [0x100,0x200]):
-            raise "unsupported quantum-dna (version=%08X)" % self.version
+            raise ValueError("unsupported quantum-dna (version=%08X)" % self.version)
 
         self.headerLength = struct.unpack("%sL" % self.byte_order,
                                           self.file.read(4))[0]
         if (self.headerLength < 0x10):
-            raise "unsupported quantum-dna (header len=%08X)" % self.headerLength
+            raise ValueError("unsupported quantum-dna (header len=%08X)" % self.headerLength)
         if (self.version == 0x100) and (self.headerLength != 0x10):
-            raise "unsupported quantum-dna (version 1.0 header len=%08X)" % self.headerLength
+            raise ValueError("unsupported quantum-dna (version 1.0 header len=%08X)" % self.headerLength)
 
         self.seqOffset  = struct.unpack("%sL" % self.byte_order,
                                         self.file.read(4))[0]
 
             fields = line.split(None)
             if (len(fields) != 5):
-                raise "wrong vector size (line %d)" % lineNum
+                raise ValueError("wrong vector size (line %d)" % lineNum)
 
             try:
                 codeNum = int(fields[0],16)
             except:
-                raise "bad character code %s (line %d)" \
-                    % (fields[0],lineNum)
+                raise ValueError("bad character code %s (line %d)" \
+                    % (fields[0],lineNum))
 
             if (not 0 <= codeNum <= 255):
-                raise "character code %s is outside the valid range (line %d)" \
-                     % (fields[0],lineNum)
+                raise ValueError("character code %s is outside the valid range (line %d)" \
+                     % (fields[0],lineNum))
 
             if (chr(codeNum) in codeToProbs):
-                raise "character code %s appears more than once (line %d)" \
-                     % (fields[0],lineNum)
+                raise ValueError("character code %s appears more than once (line %d)" \
+                     % (fields[0],lineNum))
 
             try:
                 vec = {}
                     if (p < 0) or (p > 1): raise ValueError
                     vec[alphabet[ix-1]] = p
             except:
-                raise "%s is a bad probability value (line %d)" \
-                     % (fields[ix],lineNum)
+                raise ValueError("%s is a bad probability value (line %d)" \
+                     % (fields[ix],lineNum))
 
             codeToProbs[chr(codeNum)] = vec
 

File lib/bx/wiggle.py

 """
-Support for scores in the `wiggle`_ file format used by the UCSC Genome 
+Support for scores in the `wiggle`_ file format used by the UCSC Genome
 Browser.
 
 The positions in the wiggle format are 1-relative, however,
                     yield fields[0], int( fields[1] ), int( fields[2] ), fields[5], float( fields[3] )
                 else:
                     yield fields[0], int( fields[1] ), int( fields[2] ), strand, float( fields[3] )
-        elif mode == "variableStep": 
+        elif mode == "variableStep":
             fields = line.split()
             pos = int( fields[0] ) - 1
             yield current_chrom, pos, pos + current_span, strand, float( fields[1] )
             yield current_chrom, current_pos, current_pos + current_span, strand, float( line.split()[0] )
             current_pos += current_step
         else:
-            raise "Unexpected input line: %s" % line.strip()
+            raise ValueError("Unexpected input line: %s" % line.strip())
 
 
 class Reader( object ):
     """
     def __init__( self, f ):
         self.file = f
-        
+
     def __iter__( self ):
         for chrom, start, end, strand, val in IntervalReader( self.file ):
             for pos in xrange( start, end ):

File scripts/axt_to_lav.py

 #!/usr/bin/env python
 
 """
-Application to convert AXT file to LAV file. Reads an AXT file from standard 
-input and writes a LAV file to standard out; some statistics are written to 
+Application to convert AXT file to LAV file. Reads an AXT file from standard
+input and writes a LAV file to standard out; some statistics are written to
 standard error.
 
 usage: %prog primary_spec secondary_spec [--silent] < axt_file > lav_file
 Each spec is of the form seq_file[:species_name]:lengths_file.
 
 - seq_file should be a format string for the file names for the individual
-  sequences, with %s to be replaced by the alignment's src field.  For 
-  example, "hg18/%s.nib" would prescribe files named "hg18/chr1.nib", 
+  sequences, with %s to be replaced by the alignment's src field.  For
+  example, "hg18/%s.nib" would prescribe files named "hg18/chr1.nib",
   "hg18/chr2.nib", etc.
 
-- species_name is optional.  If present, it is prepended to the alignment's 
+- species_name is optional.  If present, it is prepended to the alignment's
   src field.
 
 - Lengths files provide the length of each chromosome (lav format needs this
 
 		fields = line.split ()
 		if (len(fields) != 2):
-			raise "bad lengths line (%s:%d): %s" % (fileName,lineNumber,line)
+			raise ValueError("bad lengths line (%s:%d): %s" % (fileName,lineNumber,line))
 
 		chrom = fields[0]
 		try:
 			length = int(fields[1])
 		except:
-			raise "bad lengths line (%s:%d): %s" % (fileName,lineNumber,line)
+			raise ValueError("bad lengths line (%s:%d): %s" % (fileName,lineNumber,line))
 
 		if (chrom in chromToLength):
-			raise "%s appears more than once (%s:%d): %s" \
-			    % (chrom,fileName,lineNumber)
+			raise ValueError("%s appears more than once (%s:%d): %s" \
+			    % (chrom,fileName,lineNumber))
 
 		chromToLength[chrom] = length
 

File scripts/axt_to_maf.py

 #!/usr/bin/env python
 
 """
-Application to convert AXT file to MAF file. Reads an AXT file from standard 
-input and writes a MAF file to standard out;  some statistics are written to 
+Application to convert AXT file to MAF file. Reads an AXT file from standard
+input and writes a MAF file to standard out;  some statistics are written to
 standard error.
 
 axt_to_maf primary:lengths_file secondary:lengths_file < axt_file > maf_file
   --silent: prevents stats report
-  
+
   Lengths files provide the length of each chromosome (maf format needs this
   information but axt file does not contain it).  The format is a series of
   lines of the form:
-  
+
     <chromosome name> <length>
-  
+
   The chromosome field in each axt block must match some <chromosome name> in
   the lengths file.
 """
 
 		fields = line.split ()
 		if (len(fields) != 2):
-			raise "bad lengths line (%s:%d): %s" % (fileName,lineNumber,line)
+			raise ValueError("bad lengths line (%s:%d): %s" % (fileName,lineNumber,line))
 
 		chrom = fields[0]
 		try:
 			length = int(fields[1])
 		except:
-			raise "bad lengths line (%s:%d): %s" % (fileName,lineNumber,line)
+			raise ValueError("bad lengths line (%s:%d): %s" % (fileName,lineNumber,line))
 
 		if (chrom in chromToLength):
-			raise "%s appears more than once (%s:%d): %s" \
-			    % (chrom,fileName,lineNumber)
+			raise ValueError("%s appears more than once (%s:%d): %s" \
+			    % (chrom,fileName,lineNumber))
 
 		chromToLength[chrom] = length
 

File scripts/tfloc_summary.py

 #!/usr/bin/env python2.3
 
 """
-Read TFLOC output from stdin and write out a summary in which the nth line 
+Read TFLOC output from stdin and write out a summary in which the nth line
 contains the number of sites found in the nth alignment of the input.
 
 TODO: This is very special case, should it be here?
 for line in sys.stdin:
     if line[0].isdigit():
         current_index = int( line )
-        max_index = max( current_index, max_index )        
+        max_index = max( current_index, max_index )
     elif line[0] == "'":
         try: counts[ current_index ] += 1
         except: counts[ current_index ] = 1
     else:
-        raise "Invalid input line " + line
+        raise ValueError("Invalid input line " + line)
 
 for i in range( max_index + 1 ):
     print counts.get( i, 0 )