Commits

Anonymous committed db204d5 Merge

merge upstream

Comments (0)

Files changed (6)

tools/cga_tools/cgatools_map2bam.xml

+<tool id="map2bam" name="CGA Map2Bam">
+  <description>Converts CGI initial reference mappings into a sorted BAM format.</description>
+  <command>
+cgatools map2sam --reads=$reads --mappings=$mappings --reference=$reference 2> $output2 | samtools view -uS - | samtools sort - $output
+  </command>
+
+  <inputs>
+    <param format="tsv,bz2" name="reads" type="data" label="Complete Genomics reads file"/>
+    <param format="tsv.bz2" name="mappings" type="data" label="Complete Genomics mappings file"/>
+    <param format="crr" name="reference" type="data" label="Complete Genomics reference file"/>
+  </inputs>
+
+  <outputs>
+    <data format="bam" name="output"/>
+    <data format="txt" name="output2"/>
+  </outputs>
+
+
+  <help>
+    The Map2Bam converter takes as input Reads and Mappings files, a library 
+    structure file and a crr reference file and generates one sorted BAM file as an 
+    output.
+  </help>
+
+</tool>

tools/globus/cga_demuxer.xml

     </param>
    </inputs>
    <outputs>
-     <data format="dat" type="data" name="output1"/>
+     <data format="cga" type="data" name="output1"/>
      <change_format>
-       <when input="out_format" value="dat" format="dat" />
+       <when input="out_format" value="cga" format="dat" />
        <when input="out_format" value="sam" format="sam" />
        <when input="out_format" value="bam" format="bam" />
      </change_format>

tools/globus/condor_run.py

 import os
 
 # Temporary location for stdout, stderr, and log files.
-SCRATCH_PATH="/nfs/scratch/condor_run"
+SCRATCH_PATH="/glusterfs/galaxy-data/condor_run"
 
 if not os.path.isdir(SCRATCH_PATH):
     os.mkdir(SCRATCH_PATH)

tools/globus/demuxer.py

     return lane_number, part_number
 
 
-def extract_file(data, key, lane_number, part_number):
-    parts = data[key]
-    for name in parts:
-        part = parts[name]
-        lane_no, part_no = parse_lane_and_part_numbers(part["reads"])
-        if lane_no == lane_number and part_no == part_number:
-            datapath = parts["reads"]
+def extract_file(data, extension, lane_number, part_number, outpath):
+    filepaths = data["files"]
+    print "lane_number:", lane_number, "part_number:", part_number
+    for path in filepaths:
+        print "path:", path
+        lane_no, part_no = parse_lane_and_part_numbers(path)
+        print "lane_no, part_no:", lane_no, part_no
+        if lane_no == lane_number and part_no == part_number and path.endswith(extension):
+            datapath = path
             print "Creating a symbolic link to the data %s at outpath %s"%(
                 datapath, outpath)
-            os.link(datapath, outpath)
-
+            if not os.path.exists(datapath):
+                print "Some of these tools add another extension (e.g.: samtools sam2bam) .bam extensions..."
+                datapath += ".%s"%(extension,)
+            print "os.path.exists(outpath):", os.path.exists(outpath)
+            os.unlink(outpath)
+            os.symlink(datapath, outpath)
+            return
+    raise Exception("File not found!")
 
 
 def convert_to_dataset(inpath, indir, outpath,
                        output_type, lane_number, part_number):
+    try:
+        lane_number = int(lane_number)
+    except ValueError:
+        print "Invalid lane number!"
+    try:
+        part_number = int(lane_number)
+    except ValueError:
+        print "Invalid part number!"
+
     with open(inpath, "r") as json_file:
         data = json.load(json_file)
         files = data["files"]
         #for f in files:
         #    print f
-        if output_type == "dat":
+        if output_type == "cga":
             parts = data["parts"]
             for name in parts:
                 part = parts[name]
                         datapath, outpath)
                     os.link(datapath, outpath)
         elif output_type == "sam":
-            extract_file(data, "sam", lane_number, part_number)
+            extract_file(data, "sam", lane_number, part_number, outpath)
         elif output_type == "bam":
-            extract_file(data, "bam", lane_number, part_number)
+            extract_file(data, "bam", lane_number, part_number, outpath)
 
                 
 if __name__ == "__main__":

tools/globus/map2sam_condor_run.py

     parts = input_dict["parts"]
     for name in parts:
         part = parts[name]
-        # parse lane number and part number out of:
-        # the reads file name reads_GS21910-FS3-L04_004.tsv.bz2
         basename = os.path.basename(part["reads"])
-        sans_extension = basename.split(".")[0]
-        lane_part_segment = sans_extension.split("-")[-1]
-        lane_string, part_string = lane_part_segment.split("_")
-        lane_number = int(lane_string.replace("L", ""))
-        part_number = int(part_string)
+        #if basename == "reads_GS21184-FS3-L05_012.tsv.bz2":
+        if basename:
+            # cgatools map2sam --reads=$reads --mappings=$mappings  --reference=$reference > $output
 
-        if part_number in part_numbers and lane_number in lane_numbers:
-            # cgatools map2sam --reads=$reads --mappings=$mappings  --reference=$reference > $output
-            args = ["map2sam", "--reads=%s" % part["reads"],
-                    "--mappings=%s" % part["mapping"],
-                    "--reference=%s" % reference_file]
-            out_path = os.path.join(output_dir, "output_%s.sam" % name)
-            out_paths.append(out_path)
-            items.append(CondorQueueItem(args, out_path))
+            # parse lane number and part number out of:
+            # the reads file name reads_GS21910-FS3-L04_004.tsv.bz2
+            basename = os.path.basename(part["reads"])
+            sans_extension = basename.split(".")[0]
+            lane_part_segment = sans_extension.split("-")[-1]
+            lane_string, part_string = lane_part_segment.split("_")
+            lane_number = int(lane_string.replace("L", ""))
+            part_number = int(part_string)
+
+            if part_number in part_numbers and lane_number in lane_numbers:
+                #  $ cgapipe.sh output.bam output.fastq --reads=reads.tsv \
+                #               --mappings=mappings.tsv --reference=ref.crr
+                bam_path = os.path.join(output_dir, "%s.bam" % name)
+                out_paths.append(bam_path)
+                fastq_path = os.path.join(output_dir, "%s.fastq" % name)
+                out_paths.append(fastq_path)
+                args = [bam_path, fastq_path,
+                        "--reads=%s" % part["reads"],
+                        "--mappings=%s" % part["mapping"],
+                        "--reference=%s" % reference_file]
+                out_path = os.path.join(output_dir, "%s.out"%(name,))
+                out_paths.append(out_path)
+                items.append(CondorQueueItem(args, out_path))
 
     print "Spawning job with %s items"%(len(items),)
-    job = CondorJob("/nfs/software/bin/cgatools_dd", items=items)
+    job = CondorJob("/nfs/software/bin/cgapipe.sh", items=items)
     print job.condor_job
 
     cluster_id = job.submit()

tools/globus/map2sam_condor_run.xml

       <option value="12">12</option>
     </param>
   </inputs>
-
   <outputs>
-    <data format="cgadata" name="output1" />
+    <data format="cga" name="output1" />
   </outputs>
-
-
   <help>
     The Map2Sam converter takes as input Reads and Mappings files, a library 
     structure file and a crr reference file and generates one SAM file as an 
     mappings and highlight them using the appropriate flag. The negative gaps 
     in CGI mappings are represented using GS/GQ/GC tags.
   </help>
-
 </tool>