Commits

Daniel Blankenberg committed 810f672

Rework wiggle readers. These now return zero-based (half-open) coordinates.

Add tests for wiggle readers.

  • Participants
  • Parent commits 13d4871

Comments (0)

Files changed (2)

File lib/bx/wiggle.py

 Support for scores in the `wiggle`_ file format used by the UCSC Genome 
 Browser.
 
+The positions in the wiggle format are 1-relative, however,
+the positions returned match the BED/interval format which is zero-based, half-open.
+
 .. _wiggle: http://genome.ucsc.edu/goldenPath/help/wiggle.html
 """
 
     return dict( [ field.split( '=' ) for field in line.split()[1:] ] )
 
 def IntervalReader( f ):
-
+    """
+    Iterator yielding chrom, start, end, strand, value.
+    Values are zero-based, half-open.
+    Regions which lack a score are ignored.
+    """
     current_chrom = None
     current_pos = None
     current_step = None
     mode = "bed"
 
     for line in f:
-        if line.startswith( "track" ) or line.startswith( "#" ) or line.isspace():
+        if line.isspace() or line.startswith( "track" ) or line.startswith( "#" ) or line.startswith( "browser" ):
             continue
         elif line.startswith( "variableStep" ):
             header = parse_header( line )
         elif line.startswith( "fixedStep" ):
             header = parse_header( line )
             current_chrom = header['chrom']
-            current_pos = int( header['start'] )
+            current_pos = int( header['start'] ) - 1
             current_step = int( header['step'] )
             if 'span' in header: current_span = int( header['span'] )
             else: current_span = 1
         elif mode == "bed":
             fields = line.split()
             if len( fields ) > 3:
-                chrom = fields[0]
-                start = int( fields[1] )
-                end = int( fields[2] )
-                val = float( fields[3] )
-                yield chrom, start, end, strand, val
-            else:
-                yield fields[0], fields[1], fields[2], strand, fields[3]
+                if len( fields ) > 5:
+                    yield fields[0], int( fields[1] ), int( fields[2] ), fields[5], float( fields[3] )
+                else:
+                    yield fields[0], int( fields[1] ), int( fields[2] ), strand, float( fields[3] )
         elif mode == "variableStep": 
             fields = line.split()
-            pos = int( fields[0] )
-            val = float( fields[1] )
-            yield current_chrom, pos, pos+current_span, strand, val
+            pos = int( fields[0] ) - 1
+            yield current_chrom, pos, pos + current_span, strand, float( fields[1] )
         elif mode == "fixedStep":
-            val = float( line.split()[0] )
+            yield current_chrom, current_pos, current_pos + current_span, strand, float( line.split()[0] )
             current_pos += current_step
-            yield current_chrom, current_pos, current_pos+current_span, strand, val
         else:
             raise "Unexpected input line: %s" % line.strip()
 
 
-
 class Reader( object ):
-
+    """
+    Iterator yielding chrom, position, value.
+    Values are zero-based.
+    Regions which lack a score are ignored.
+    """
     def __init__( self, f ):
-        self.file=f
-        self.current_chrom = None
-        self.current_pos = None
-        self.current_step = None
-        self.current_span = None
-        self.mode = "bed"
+        self.file = f
         
     def __iter__( self ):
-        return self
-
-    def next( self ):
-        line = self.file.readline()
-        if not line: raise StopIteration
-        if line.startswith( "track" ) or line.startswith( "#" ) or line.isspace():
-            return self.next()
-        elif line.startswith( "variableStep" ):
-            header = parse_header( line )
-            self.current_chrom = header['chrom']
-            self.current_pos = None
-            self.current_step = None
-            if 'span' in header: self.current_span = int( header['span'] )
-            else: self.current_span = 1
-            self.mode = "variableStep"
-            return self.next()
-        elif line.startswith( "fixedStep" ):
-            header = parse_header( line )
-            self.current_chrom = header['chrom']
-            self.current_pos = int( header['start'] )
-            self.current_step = int( header['step'] )
-            if 'span' in header: self.current_span = int( header['span'] )
-            else: self.current_span = 1
-            self.mode = "fixedStep"
-            return self.next()
-        elif self.mode == "bed":
-            fields = line.split()
-            if len( fields ) > 3:
-                chrom = fields[0]
-                start = int( fields[1] )
-                end = int( fields[2] )
-                val = float( fields[3] )
-                for i in range( start, end ):
-                    return chrom, i, val
-            else:
-                return fields[0], fields[1], fields[2]
-        elif self.mode == "variableStep": 
-            fields = line.split()
-            pos = int( fields[0] )
-            val = float( fields[1] )
-            for i in range( self.current_span ):
-                return self.current_chrom, pos+i, val
-        elif mode == "fixedStep":
-            val = float( line.split()[0] )
-            pos = self.current_pos
-            for i in range( self.current_span ):
-                return self.current_chrom, pos+i, val
-            self.current_pos += self.current_span
-        else:
-            raise "Unexpected input line: %s" % line.strip()
+        for chrom, start, end, strand, val in IntervalReader( self.file ):
+            for pos in xrange( start, end ):
+                yield chrom, pos, val

File lib/bx/wiggle_tests.py

+"""
+Tests for `bx.wiggle`.
+"""
+
+import unittest
+from bx import wiggle
+from StringIO import StringIO
+
+# A modified version of UCSC's example wiggle, taken from http://genome.ucsc.edu/goldenPath/help/wiggleExample.txt
+test_wig = """browser position chr19:59302001-59311000
+browser hide all
+browser pack refGene encodeRegions
+browser full altGraph
+#	5 base wide bar graph, autoScale is on by default == graphing
+#	limits will dynamically change to always show full range of data
+#	in viewing window, priority = 20 positions this as the second graph
+#	Note, zero-relative, half-open coordinate system in use for bed format
+track type=wiggle_0 name="Bed Format" description="BED format" visibility=full color=200,100,0 altColor=0,100,200 priority=20
+chr19 59302000 59302005 -1.0
+chr19 59302300 59302305 -0.75
+#	4 base wide bar graph at arbitrarily spaced positions,
+#	threshold line drawn at y=11.76
+#	autoScale off viewing range set to [0:25]
+#	priority = 10 positions this as the first graph
+#	Note, one-relative coordinate system in use for this format
+track type=wiggle_0 name="variableStep" description="variableStep format" visibility=full autoScale=off viewLimits=0.0:25.0 color=255,200,0 yLineMark=11.76 yLineOnOff=on priority=10
+variableStep chrom=chr19 span=4
+59304701 10.0
+59304901 12.5
+#	3 base wide points graph at every 300 bases, 50 pixel high graph
+#	autoScale off and viewing range set to [0:1000]
+#	priority = 30 positions this as the third graph
+#	Note, one-relative coordinate system in use for this format
+track type=wiggle_0 name="fixedStep" description="fixed step" visibility=full autoScale=off viewLimits=0:1000 color=0,200,100 maxHeightPixels=100:50:20 graphType=points priority=30
+fixedStep chrom=chr19 start=59307401 step=300 span=3
+1000
+ 900
+ 800
+"""
+
+interval_reader_result = [
+"chr19,59302000,59302005,+,-1.0",
+"chr19,59302300,59302305,+,-0.75",
+"chr19,59304700,59304704,+,10.0",
+"chr19,59304900,59304904,+,12.5",
+"chr19,59307400,59307403,+,1000.0",
+"chr19,59307700,59307703,+,900.0",
+"chr19,59308000,59308003,+,800.0"
+]
+
+position_reader_result = [
+"chr19,59302000,-1.0",
+"chr19,59302001,-1.0",
+"chr19,59302002,-1.0",
+"chr19,59302003,-1.0",
+"chr19,59302004,-1.0",
+"chr19,59302300,-0.75",
+"chr19,59302301,-0.75",
+"chr19,59302302,-0.75",
+"chr19,59302303,-0.75",
+"chr19,59302304,-0.75",
+"chr19,59304700,10.0",
+"chr19,59304701,10.0",
+"chr19,59304702,10.0",
+"chr19,59304703,10.0",
+"chr19,59304900,12.5",
+"chr19,59304901,12.5",
+"chr19,59304902,12.5",
+"chr19,59304903,12.5",
+"chr19,59307400,1000.0",
+"chr19,59307401,1000.0",
+"chr19,59307402,1000.0",
+"chr19,59307700,900.0",
+"chr19,59307701,900.0",
+"chr19,59307702,900.0",
+"chr19,59308000,800.0",
+"chr19,59308001,800.0",
+"chr19,59308002,800.0"
+]
+
+def test_reader():
+    #Test position reader
+    assert position_reader_result == [ ",".join( map( str, value ) ) for value in wiggle.Reader( StringIO( test_wig ) ) ]
+
+def test_interval_reader():
+    #Test interval reader reader
+    assert interval_reader_result == [ ",".join( map( str, value ) ) for value in wiggle.IntervalReader( StringIO( test_wig ) ) ]