Commits

Anonymous committed bbda064

Add automatic conversions for SAM to BAM and SAM to summary tree. Enable SAM datasets to be visualized in Trackster.

  • Participants
  • Parent commits c28e9ef

Comments (0)

Files changed (8)

datatypes_conf.xml.sample

     <datatype extension="qualsolid" type="galaxy.datatypes.qualityscore:QualityScoreSOLiD" display_in_upload="true"/>
     <datatype extension="qual454" type="galaxy.datatypes.qualityscore:QualityScore454" display_in_upload="true"/>
     <datatype extension="Roadmaps" type="galaxy.datatypes.assembly:Roadmaps" display_in_upload="false"/>
-    <datatype extension="sam" type="galaxy.datatypes.tabular:Sam" display_in_upload="true"/>
+    <datatype extension="sam" type="galaxy.datatypes.tabular:Sam" display_in_upload="true">
+    	<converter file="sam_to_bam.xml" target_datatype="bam"/>
+    	<converter file="sam_to_summary_tree_converter.xml" target_datatype="summary_tree"/>
+	</datatype>
     <datatype extension="scf" type="galaxy.datatypes.binary:Scf" mimetype="application/octet-stream" display_in_upload="true"/>
     <datatype extension="Sequences" type="galaxy.datatypes.assembly:Sequences" display_in_upload="false"/>
     <datatype extension="sff" type="galaxy.datatypes.binary:Sff" mimetype="application/octet-stream" display_in_upload="true"/>

lib/galaxy/datatypes/converters/bam_to_summary_tree_converter.py

-#!/usr/bin/env python
-
-from __future__ import division
-
-import sys, os
-sys.stderr = open(os.devnull, 'w')  # suppress stderr as cython produces warning on some systems:
-                                    # csamtools.so:6: RuntimeWarning: __builtin__.file size changed
-
-from galaxy import eggs
-import pkg_resources
-
-if sys.version_info[:2] == (2, 4):
-    pkg_resources.require( "ctypes" )
-pkg_resources.require( "pysam" )
-
-from pysam import csamtools
-from galaxy.visualization.tracks.summary import *
-
-def main():
-   
-    input_fname = sys.argv[1]
-    index_fname = sys.argv[2]
-    out_fname = sys.argv[3]
-    
-    bamfile = csamtools.Samfile( filename=input_fname, mode='rb', index_filename=index_fname )
-    
-    st = SummaryTree(block_size=25, levels=6, draw_cutoff=150, detail_cutoff=30)
-    for read in bamfile.fetch():
-        st.insert_range(bamfile.getrname(read.rname), read.pos, read.pos + read.rlen)
-    
-    st.write(out_fname)
-
-if __name__ == "__main__": 
-    main()

lib/galaxy/datatypes/converters/bam_to_summary_tree_converter.xml

 <tool id="CONVERTER_bam_to_summary_tree_0" name="Convert BAM to Summary Tree" version="1.0.0" hidden="true">
 <!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
-  <command interpreter="python">bam_to_summary_tree_converter.py $input1 $bai $output1</command>
+  <command interpreter="python">sam_or_bam_to_summary_tree_converter.py --bam $input1 $bai $output1</command>
   <inputs>
     <page>
         <param format="bam" name="input1" type="data" label="Choose BAM file"/>

lib/galaxy/datatypes/converters/sam_or_bam_to_summary_tree_converter.py

+#!/usr/bin/env python
+
+from __future__ import division
+
+import sys, os, optparse
+sys.stderr = open(os.devnull, 'w')  # suppress stderr as cython produces warning on some systems:
+                                    # csamtools.so:6: RuntimeWarning: __builtin__.file size changed
+
+from galaxy import eggs
+import pkg_resources
+
+if sys.version_info[:2] == (2, 4):
+    pkg_resources.require( "ctypes" )
+pkg_resources.require( "pysam" )
+
+from pysam import csamtools
+from galaxy.visualization.tracks.summary import *
+
+def main():
+    parser = optparse.OptionParser()
+    parser.add_option( '-S', '--sam', action="store_true", dest="is_sam" )
+    parser.add_option( '-B', '--bam', action="store_true", dest="is_bam" )
+    options, args = parser.parse_args()
+    
+    if options.is_bam:
+        input_fname = args[0]
+        index_fname = args[1]
+        out_fname = args[2]
+        samfile = csamtools.Samfile( filename=input_fname, mode='rb', index_filename=index_fname )
+    elif options.is_sam:
+        input_fname = args[0]
+        out_fname = args[1]
+        samfile = csamtools.Samfile( filename=input_fname, mode='r' )
+    
+    st = SummaryTree(block_size=25, levels=6, draw_cutoff=150, detail_cutoff=30)
+    for read in samfile.fetch():
+        st.insert_range( samfile.getrname( read.rname ), read.pos, read.pos + read.rlen )
+    
+    st.write(out_fname)
+
+if __name__ == "__main__": 
+    main()

lib/galaxy/datatypes/converters/sam_to_bam.xml

+<tool id="CONVERTER_sam_to_bam" name="Convert SAM to BAM" version="1.0.0">
+    <!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
+    <!-- Used on the metadata edit page. -->
+    <!-- FIXME: conversion will only work if headers for reference sequences are in input file.
+         To fix this: (a) merge sam_to_bam tool in tools with this conversion (like fasta_to_len 
+         conversion); and (b) define a datatype-specific way to set converter parameters.
+      -->
+    <command>samtools view -bS $input1 > $output 2> /dev/null </command>
+    <inputs>
+        <param name="input1" type="data" format="sam" label="SAM file"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="bam"/>
+    </outputs>
+    <help>
+    </help>
+</tool>

lib/galaxy/datatypes/converters/sam_to_summary_tree_converter.xml

+<tool id="CONVERTER_sam_to_summary_tree_0" name="Convert SAM to Summary Tree" version="1.0.0" hidden="true">
+<!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
+  <command interpreter="python">sam_or_bam_to_summary_tree_converter.py --sam $input1 $output1</command>
+  <inputs>
+    <page>
+        <param format="sam" name="input1" type="data" label="Choose sam file"/>
+    </page>
+   </inputs>
+  <outputs>
+    <data format="summary_tree" name="output1"/>
+  </outputs>
+  <help>
+  </help>
+</tool>

lib/galaxy/datatypes/tabular.py

         if result != 0:
             raise Exception('Result %s from %s' % (result, cmd))
     merge = staticmethod(merge)
-
+    
+    def get_track_type( self ):
+        return "ReadTrack", {"data": "bam", "index": "summary_tree"}
+    
 class Pileup( Tabular ):
     """Tab delimited data in pileup (6- or 10-column) format"""
     file_ext = "pileup"

lib/galaxy/visualization/tracks/data_providers.py

         max_low, max_high = get_bounds( results, 1, 2 )
                 
         return { 'data': results, 'message': message, 'max_low': max_low, 'max_high': max_high }
+        
+class SamDataProvider( BamDataProvider ):
+    
+    def __init__( self, converted_dataset=None, original_dataset=None, dependencies=None ):
+        """ Create SamDataProvider. """
+        
+        # HACK: to use BamDataProvider, original dataset must be BAM and 
+        # converted dataset must be BAI. Use BAI from BAM metadata.
+        if converted_dataset:
+            self.converted_dataset = converted_dataset.metadata.bam_index
+            self.original_dataset = converted_dataset
+        self.dependencies = dependencies
 
 class BBIDataProvider( TracksDataProvider ):
     """
     "tabix": { Vcf: VcfTabixDataProvider, Bed: BedTabixDataProvider, "default" : TabixDataProvider },
     "interval_index": IntervalIndexDataProvider,
     "bai": BamDataProvider,
+    "bam": SamDataProvider,
     "summary_tree": SummaryTreeDataProvider,
     "bigwig": BigWigDataProvider,
     "bigbed": BigBedDataProvider