Anonymous avatar Anonymous committed 7040384

QC swift scripts

Comments (0)

Files changed (39)

swift_scripts/qc/cf

+#site work=/glusterfs/davidk/swift_scripts/cga/swiftwork
+wrapperlog.always.transfer=true
+sitedir.keep=true
+execution.retries=0
+lazy.errors=false
+#status.mode=provider
+use.provider.staging=false
+provider.staging.pin.swiftfiles=false

swift_scripts/qc/create_cga_dataset.py

+#!/usr/bin/env python
+"""
+"""
+
+import ConfigParser
+import glob
+from optparse import OptionParser
+import os
+import sys
+import json
+
+sys.path.append("/nfs/software/galaxy-globus/lib") 
+
+# begin: imported for side affects
+import galaxy
+from galaxy import eggs
+import galaxy.model
+from galaxy.datatypes import sniff
+# end: imported for side affects
+
+from galaxy.datatypes.genetics import CgaData
+
+# TODO: more reliable way of finding ini file?
+CONFIG_FILE=os.path.join(
+    os.path.dirname(
+    os.path.dirname(
+    os.path.dirname(
+    os.path.abspath(galaxy.__file__)))),
+    "universe_wsgi.ini")
+
+def get_genomes():
+    """Return option list of genome directories.
+    """
+    options = []
+
+    #dir = "/Users/steder/T2DTest" # TODO pull this from galaxy.app.config
+    genome_dir = None
+    cp = ConfigParser.SafeConfigParser()
+    with open(CONFIG_FILE, "r") as config_file:
+        cp.readfp(config_file)
+        genome_dir = cp.get("galaxy:tools", "complete_genomics_root")
+
+    listing = os.listdir(genome_dir) 
+    for path in listing:
+        options.append((path, os.path.join(
+            genome_dir, path), True))
+        
+    if len(options) >= 1:
+        options.insert(0,('None','None',True))
+    else:
+        options = [('None','no genomes found',False),]
+    return options
+
+
+def write_dataset(outfile=None, input_directory=None,
+                  output_directory=None):
+    """Create a primary dataset file by reading input_directory.
+
+    Also attempts to create the extra files directory expected
+    by composite datasets by symbolicly linking input_directory
+    to output_directory.
+    """
+    # instead of creating the output directory we'll
+    # create a symbolic link to the input directory.
+    # TODO: uncomment this when we know that galaxy won't follow the link
+    # and accidentally delete our real dataset
+    #os.symlink(input_directory, output_directory)
+
+    # now create the primary dataset file:
+    d = CgaData.dataset_dict_from_directory(input_directory)
+    with open(outfile, "w") as out:
+        json.dump(d, out)
+
+
+if __name__ == "__main__":
+    """
+    """
+    parser = OptionParser(usage=__doc__, version="%prog 0.01")
+    parser.add_option("-o","--outf",dest="outf",
+      help="Output file", default = 'fakeped')
+    parser.add_option("-p","--outpath",dest="outpath",
+      help="Path for output files", default = './')
+    parser.add_option("-i", "--inpath", dest="inpath",
+      help="Input data directory", default="./")
+    (options,args) = parser.parse_args()
+    write_dataset(outfile=options.outf,
+                  input_directory=options.inpath,
+                  output_directory=options.outpath)
+
+
+        

swift_scripts/qc/fs.alldirect

+rule .* DIRECT /nfs/software/galaxy-globus/swift_scripts/qc 

swift_scripts/qc/galaxy-qc-condor

+<config>
+   <pool handle="condor">
+     <execution provider="condor" url="none"/>
+     <gridftp url="local://localhost"/>
+     <workdirectory>/glusterfs/galaxy-data/tmp</workdirectory>
+     <profile namespace="karajan" key="jobThrottle">1000</profile>
+     <profile namespace="karajan" key="initialScore">10000</profile>
+   </pool>
+</config>

swift_scripts/qc/galaxy-qc-local

+<config>
+  <pool handle="localhost">
+    <filesystem provider="local" />
+    <execution provider="coaster" jobmanager="local:local"/>
+    <profile namespace="karajan"  key="jobthrottle">1000</profile>
+    <profile namespace="karajan"  key="initialScore">10000</profile>
+    <profile namespace="globus"   key="jobsPerNode">1</profile>
+    <profile namespace="globus"   key="slots">8</profile>
+    <profile namespace="globus"   key="maxTime">1000</profile>
+    <profile namespace="globus"   key="nodeGranularity">1</profile>
+    <profile namespace="globus"   key="maxNodes">2</profile>
+    <workdirectory>/glusterfs/galaxy-data/tmp</workdirectory>
+  </pool>
+</config>

swift_scripts/qc/json_mapper.sh

+#!/bin/bash
+
+# Verify an argument is not null
+verify_not_null()
+{
+   argname=$1; shift
+   if [ _$1 != _ ]; then
+      return 0;
+   else
+      echo $0: value for $argname can not be null
+      exit 1
+   fi
+}
+
+PASSARGS=""
+while [ $# -gt 0 ]
+do
+   case "$1" in
+      -level) export LEVEL=$2; verify_not_null level $LEVEL; shift ;;
+      -filetype) export FILETYPE=$2; verify_not_null level $FILETYPE; shift ;;
+      -relative) export RELATIVE=$2; verify_not_null relative $RELATIVE; shift ;;
+       *) PASSARGS="$PASSARGS $1";;
+   esac
+   shift
+done
+
+echo "$0 $@" > mapper.log
+
+TEMP_FILE=`mktemp -p /glusterfs/galaxy-data/tmp`
+echo ./read_cga_dataset.py $PASSARGS -o $TEMP_FILE > jargs
+./read_cga_dataset.py $PASSARGS -o $TEMP_FILE > /dev/null 2>&1
+COUNT=0
+
+for line in `sort $TEMP_FILE`
+do
+
+   # Print reads
+   if [ "$FILETYPE" == "reads" ] && [[ "$line" == */reads* ]]; then
+      echo [$COUNT] $line | sed 's@'"$RELATIVE"'@@'
+      #echo [$COUNT] $line | sed 's@'"$RELATIVE"'@@' >> mapper.log
+      if [ "$LEVEL" == "1" ] && [ "$COUNT" == 1 ]; then
+         break
+      fi
+      if [ "$LEVEL" == "2" ] && [ "$COUNT" == 9 ]; then
+         break
+      fi
+      COUNT=$(($COUNT + 1))
+   fi
+
+   # Print mappings
+   if [ "$FILETYPE" == "mappings" ] && [[ "$line" == /*mapping* ]]; then
+      echo [$COUNT] $line | sed 's@'"$RELATIVE"'@@'
+      #echo [$COUNT] $line | sed 's@'"$RELATIVE"'@@' >> mapper.log
+      if [ "$LEVEL" == "1" ] && [ "$COUNT" == 1 ]; then
+         break
+      fi
+      if [ "$LEVEL" == "2" ] && [ "$COUNT" == 9 ]; then
+         break
+      fi
+      COUNT=$(($COUNT + 1))
+   fi
+
+   #if [ "$FILETYPE" == "reference" ] && [[ "$line" == *.crr ]]; then
+   #   echo [$COUNT] $line | sed 's@'"$RELATIVE"'@@'
+   #   echo [$COUNT] $line | sed 's@'"$RELATIVE"'@@' >> /tmp/mapper.log
+   #   if [ "$LEVEL" == "1" ] && [ "$COUNT" == 0 ]; then
+   #      break
+   #   fi
+   #   if [ "$LEVEL" == "2" ] && [ "$COUNT" == 40 ]; then
+   #      break
+   #   fi
+   #   COUNT=$(($COUNT + 1))
+   #fi
+
+done
+
+   # Print reference file
+   if [ "$FILETYPE" == "reference" ]; then
+      echo [0] build37.crr
+   fi
+
+rm "$TEMP_FILE"

swift_scripts/qc/qc-chrm.sh

+#!/bin/bash -x 
+#
+# Second, per chromosome phase of the QC pipeline.
+#
+# Merges bam files for a single chromosome, then generates fastq, stats,
+# and quality plots from the data.
+#
+# Assumes *chrm_N convention for chromosome files in input_dir. Outputs
+# files with a chrmN prefix.
+#
+
+
+if [ $# -ne 3 ]; then
+    echo "Usage: $0 chrm_number input_dir output_dir"
+fi
+
+cd $(dirname $0)
+bindir=$(pwd)
+
+#echo "$0 $@" > qchrm.log 
+export PYTHONPATH=/nfs/software/galaxy/lib
+
+export PATH=$bindir:/usr/local/bin:/usr/bin:/bin:/usr/local/tools/weblogo:/usr/local/tools/blat:/usr/local/tools/homer/bin:/nfs/software/bin
+
+chrm="$1"
+indir="$2"
+outdir="$3"
+
+bam_out="$outdir/chrm$chrm.bam"
+fastq_out="$outdir/chrm$chrm.fastq"
+solexa_out="$outdir/chrm$chrm.solexa.fastq"
+stats_out="$outdir/chrm$chrm.stats"
+box_out="$outdir/chrm${chrm}_boxplot.png"
+dist_out="$outdir/chrm${chrm}_distribution.png"
+
+echo "join chrm $(date)" \
+&& samtools merge "$bam_out" $indir/*chrm_${chrm}.bam \
+&& echo "sam2fastq $(date)" \
+&& picard_sam2fastq.sh QUIET=TRUE \
+     INPUT="$bam_out" FASTQ=/dev/stdout \
+     VALIDATION_STRINGENCY=LENIENT 2>/dev/null \
+     | dd bs=1M > "$fastq_out" \
+&& echo "solexa $(date)" \
+&& fastq_groomer.py "$fastq_out" sanger "$solexa_out" \
+     solexa ascii summarize_input \
+&& echo "stats $(date)" \
+&& fastx_quality_stats -i "$solexa_out" -o "$stats_out" \
+&& echo "graphs $(date)" \
+&& (fastq_quality_boxplot_graph.sh -i "$stats_out" -o "$box_out" \
+    -t "chr Fastq Quality Boxplot" &) \
+&& (fastx_nucleotide_distribution_line_graph.sh -i "$stats_out" \
+    -o "$dist_out" -t "chr Fastq Nucleotide Distribution" &)
+
+wait
+echo "done $(date)"

swift_scripts/qc/qc-part.sh

+#!/bin/bash -x
+#
+# First, per part phase of QC pipeline.
+#
+# Generates BAM and index files from a cga part, and then splits the bam by
+# chromosome. The first argument must be the bam output file location without
+# the .bam extension, the second argument is the chromosome base file (_N.bam
+# is appended to each out file, for N 0 to 23), and the rest of the arguments
+# are passed to cgatools map2sam.  For example:
+#
+#  $ qc-part.sh output/L01_001 output/L01_001-chrm --reads=reads.tsv \
+#               --mappings=maps.tsv --reference=build37.crr
+#   
+
+if [ $# -lt 3 ]; then
+    echo "Usage: $0 bam_base_file chrm_base_file MAP2SAM_ARGS..."
+    exit 1
+fi
+
+cd $(dirname $0)
+bindir=$(pwd)
+
+echo "Command: $0 $2" > qcpart.log 2>&1
+export PATH=$bindir:/usr/local/bin:/usr/bin:/bin:/usr/local/tools/weblogo:/usr/local/tools/blat:/usr/local/tools/homer/bin:/nfs/software/bin
+
+bam_out="$1"
+chrm_out="$2"
+shift 2
+
+echo "cga to bam $(date)" \
+&& cgatools map2sam --add-mate-sequence "$@" | dd bs=1M \
+    | samtools view -uS - \
+    | samtools sort - "$bam_out" \
+&& echo "index $(date)" \
+&& samtools index "${bam_out}.bam" \
+&& echo "split $(date)" \
+&& bam splitChromosome --in "${bam_out}.bam" --out "$chrm_out" \
+    --bamIndex "${bam_out}.bam.bai" \
+&& echo "done $(date)"

swift_scripts/qc/qc.swift

+type file;
+
+app (file bam, file plog) qcpart (file read, file map, file ref) {
+   qcpart @bam @strcat(@bam, "-chrm") @strcat("--reads=", @read) @strcat("--mappings=", @map) @strcat("--reference=", @ref) stdout=@plog stderr=@plog;
+}
+
+app (file out) qcchrm (int chrm) {
+   qcchrm @strcat("", chrm) "output" "output2";
+}
+
+file reads[] <ext; exec="json_mapper.sh", filetype="reads", i=@arg("i"),level=@arg("level"), relative=@arg("relative")>;
+file mappings[] <ext; exec="json_mapper.sh", filetype="mappings", i=@arg("i"), level=@arg("level"), relative=@arg("relative")>;
+file reference[] <ext; exec="json_mapper.sh", filetype="reference", i=@arg("i"), level=@arg("level"), relative=@arg("relative")>;
+
+file partlogs[] <simple_mapper; location=".", prefix="partlog", suffix=".log">;
+file bams[] <simple_mapper; location="output", prefix="output">;
+int level = @toint(@arg("level"));
+
+foreach part,i in reads {
+  (bams[i], partlogs[i]) = qcpart(reads[i], mappings[i], reference[0]);
+}
+
+file logs[] <simple_mapper; location="output", prefix="log", suffix=".log">;
+int end = 23 + @toint(@length(bams)) - @toint(@length(bams));
+
+foreach j in [0:end] {
+   logs[j] = qcchrm(j);
+}
+

swift_scripts/qc/qc.xml

+<program xmlns="http://ci.uchicago.edu/swift/2009/02/swiftscript"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xmlns:xs="http://www.w3.org/2001/XMLSchema">
+  
+  <types>
+     	<type>
+     		<typename>file</typename>
+     		<typealias>string</typealias>
+     		<typestructure></typestructure>
+     	</type>   
+  </types>  
+  <procedure name="qcpart" src="line 3">
+
+      <output 
+    name="bam" type="file"
+
+     xsi:nil="true" />
+
+      <output 
+    name="plog" type="file"
+
+     xsi:nil="true" />
+
+      <input 
+    name="read" type="file"
+
+     xsi:nil="true" />
+
+      <input 
+    name="map" type="file"
+
+     xsi:nil="true" />
+
+      <input 
+    name="ref" type="file"
+
+     xsi:nil="true" />
+    <binding>
+      <application src="line 3">
+        <executable>qcpart</executable>
+        <stdout><function name="filename">
+          <variableReference>plog</variableReference></function></stdout><stderr><function name="filename">
+          <variableReference>plog</variableReference></function></stderr><function name="filename">
+          <variableReference>bam</variableReference></function><function name="strcat">
+          <function name="filename">
+            <variableReference>bam</variableReference></function><stringConstant>-chrm</stringConstant></function><function name="strcat">
+          <stringConstant>--reads=</stringConstant><function name="filename">
+            <variableReference>read</variableReference></function></function><function name="strcat">
+          <stringConstant>--mappings=</stringConstant><function name="filename">
+            <variableReference>map</variableReference></function></function><function name="strcat">
+          <stringConstant>--reference=</stringConstant><function name="filename">
+            <variableReference>ref</variableReference></function></function>  </application>
+    </binding>
+  </procedure>
+  <procedure name="qcchrm" src="line 7">
+
+      <output 
+    name="out" type="file"
+
+     xsi:nil="true" />
+
+      <input 
+    name="chrm" type="int"
+
+     xsi:nil="true" />
+    <binding>
+      <application src="line 7">
+        <executable>qcchrm</executable>
+        <function name="strcat">
+          <stringConstant></stringConstant><variableReference>chrm</variableReference></function><stringConstant>output</stringConstant><stringConstant>output2</stringConstant>  </application>
+    </binding>
+  </procedure>
+  <variable name="reads" type="file[]" isGlobal="false">
+    <mapping descriptor="ext">
+      <param name="exec"><stringConstant>json_mapper.sh</stringConstant></param>
+      <param name="filetype"><stringConstant>reads</stringConstant></param>
+      <param name="i"><function name="arg">
+        <stringConstant>i</stringConstant></function></param>
+      <param name="level"><function name="arg">
+        <stringConstant>level</stringConstant></function></param>
+      <param name="relative"><function name="arg">
+        <stringConstant>relative</stringConstant></function></param>
+    </mapping>
+  </variable>
+  <variable name="mappings" type="file[]" isGlobal="false">
+    <mapping descriptor="ext">
+      <param name="exec"><stringConstant>json_mapper.sh</stringConstant></param>
+      <param name="filetype"><stringConstant>mappings</stringConstant></param>
+      <param name="i"><function name="arg">
+        <stringConstant>i</stringConstant></function></param>
+      <param name="level"><function name="arg">
+        <stringConstant>level</stringConstant></function></param>
+      <param name="relative"><function name="arg">
+        <stringConstant>relative</stringConstant></function></param>
+    </mapping>
+  </variable>
+  <variable name="reference" type="file[]" isGlobal="false">
+    <mapping descriptor="ext">
+      <param name="exec"><stringConstant>json_mapper.sh</stringConstant></param>
+      <param name="filetype"><stringConstant>reference</stringConstant></param>
+      <param name="i"><function name="arg">
+        <stringConstant>i</stringConstant></function></param>
+      <param name="level"><function name="arg">
+        <stringConstant>level</stringConstant></function></param>
+      <param name="relative"><function name="arg">
+        <stringConstant>relative</stringConstant></function></param>
+    </mapping>
+  </variable>
+  <variable name="partlogs" type="file[]" isGlobal="false">
+    <mapping descriptor="simple_mapper">
+      <param name="location"><stringConstant>.</stringConstant></param>
+      <param name="prefix"><stringConstant>partlog</stringConstant></param>
+      <param name="suffix"><stringConstant>.log</stringConstant></param>
+    </mapping>
+  </variable>
+  <variable name="bams" type="file[]" isGlobal="false">
+    <mapping descriptor="simple_mapper">
+      <param name="location"><stringConstant>output</stringConstant></param>
+      <param name="prefix"><stringConstant>output</stringConstant></param>
+    </mapping>
+  </variable>
+  <variable name="level" type="int" isGlobal="false" xsi:nil="true"/>
+  <assign src="line 19">
+   <variableReference>level</variableReference>
+   <function name="toint">
+     <function name="arg">
+       <stringConstant>level</stringConstant></function></function>
+  </assign>
+  <foreach var="part"  indexVar="i" src="line 19">
+  <in><variableReference>reads</variableReference></in>
+  <body><call proc="qcpart" src="line 20">
+      <output><arraySubscript>
+       <variableReference>bams</variableReference>
+       <variableReference>i</variableReference>
+      </arraySubscript></output>
+      <output><arraySubscript>
+       <variableReference>partlogs</variableReference>
+       <variableReference>i</variableReference>
+      </arraySubscript></output>
+      <input><arraySubscript>
+       <variableReference>reads</variableReference>
+       <variableReference>i</variableReference>
+      </arraySubscript></input>
+      <input><arraySubscript>
+       <variableReference>mappings</variableReference>
+       <variableReference>i</variableReference>
+      </arraySubscript></input>
+      <input><arraySubscript>
+       <variableReference>reference</variableReference>
+       <integerConstant>0</integerConstant>
+      </arraySubscript></input>
+    </call></body>
+  </foreach>
+  <variable name="logs" type="file[]" isGlobal="false">
+    <mapping descriptor="simple_mapper">
+      <param name="location"><stringConstant>output</stringConstant></param>
+      <param name="prefix"><stringConstant>log</stringConstant></param>
+      <param name="suffix"><stringConstant>.log</stringConstant></param>
+    </mapping>
+  </variable>
+  <variable name="end" type="int" isGlobal="false" xsi:nil="true"/>
+  <assign src="line 26">
+   <variableReference>end</variableReference>
+   <arith op="-">
+     <arith op="+">
+       <integerConstant>23</integerConstant>
+       <function name="toint">
+         <function name="length">
+           <variableReference>bams</variableReference></function></function>
+     </arith>
+     <function name="toint">
+       <function name="length">
+         <variableReference>bams</variableReference></function></function>
+   </arith>
+  </assign>
+  <foreach var="j"  src="line 26">
+  <in>
+    <range>
+      <integerConstant>0</integerConstant>
+      <variableReference>end</variableReference>
+    </range>
+  </in>
+  <body><call proc="qcchrm" src="line 27">
+      <output><arraySubscript>
+       <variableReference>logs</variableReference>
+       <variableReference>j</variableReference>
+      </arraySubscript></output>
+      <input><variableReference>j</variableReference></input>
+    </call></body>
+  </foreach>
+</program>

swift_scripts/qc/read_cga_dataset.py

+#!/usr/bin/env python
+"""
+"""
+from optparse import OptionParser
+import json
+
+
+def convert_to_txt(inpath, indir, outpath):
+    with open(outpath, "w") as of:
+        with open(inpath, "r") as json_file:
+            data = json.load(json_file)
+            files = data["files"]
+            for f in files:
+                of.write("%s\n"%(f["path"]))
+                
+if __name__ == "__main__":
+    """
+    """
+    parser = OptionParser(usage=__doc__, version="%prog 0.01")
+    parser.add_option("-o","--outpath",dest="outpath",
+      help="output file path", default = 'fakeped')
+    parser.add_option("-p","--indir",dest="indir",
+      help="path for input files", default = './')
+    parser.add_option("-i", "--inpath", dest="inpath",
+      help="path for input primary file", default="./")
+    (options,args) = parser.parse_args()
+    convert_to_txt(options.inpath, options.indir, options.outpath)
+
+
+        

swift_scripts/qc/sites.xml

+<config>
+   <pool handle="condor">
+     <execution provider="condor" url="none"/>
+     <gridftp url="local://localhost"/>
+     <workdirectory>/glusterfs/galaxy-data/tmp</workdirectory>
+     <profile namespace="karajan" key="jobThrottle">1000</profile>
+     <profile namespace="karajan" key="initialScore">10000</profile>
+   </pool>
+</config>

swift_scripts/qc/swift-qc.sh

+#!/bin/bash
+
+# crash: Report a problem and exit
+crash()
+{
+    MSG=$1
+    echo ${MSG}  >&2
+    exit 1
+}
+
+# Verify an argument is not null
+verify_not_null()
+{
+   argname=$1; shift
+   if [ _$1 != _ ]; then
+      return 0;
+   else
+      echo $0: value for $argname can not be null
+      exit 1
+   fi
+}
+
+# Process what we know and pass the rest to Swift
+SWIFTARGS=""
+while [ $# -gt 0 ]
+do
+   case "$1" in
+      -level) export LEVEL=$2; verify_not_null level $LEVEL; shift ;;
+      -template) export TEMPLATE=$2; verify_not_null template $TEMPLATE; shift;;
+      -work) export WORK=$2; verify_not_null work $WORK; shift;;
+      -project) export PROJECT=$2; verify_not_null project $PROJECT; shift;;
+      -queue) export QUEUE=$2; verify_not_null queue $QUEUE; shift;;
+      -jobthrottle) export JOBTHROTTLE=$2; verify_not_null jobthrottle $JOBTHROTTLE; shift;;
+      -o=*) export OUTPUT=`echo $1| cut -d'=' -f2`; verify_not_null output $OUTPUT; SWIFTARGS="$SWIFTARGS $1" ;;
+       *) SWIFTARGS="$SWIFTARGS $1";;
+   esac
+   shift
+done
+
+# Verify level
+if [ -z "$LEVEL" ]; then
+   crash "Level not specified. Use -level <value>"
+fi
+
+case "$LEVEL" in
+   0) ;;
+   1) ;;
+   2) ;;
+   3) ;;
+   *) crash "Unknown level $LEVEL";;
+esac
+
+# Verify a sites template exists
+if [ -z "$TEMPLATE" ]; then
+   crash "Template not specified. Use -template <name>"
+fi
+
+# Create Swift configuration files
+pushd /glusterfs/davidk/swift_scripts/qc > /dev/null 2>&1
+export SWIFT_HOME=$HOME/swift-0.92.1
+gensites -L . $TEMPLATE -p cf > sites.xml
+
+# Run Swift
+#echo swift -sites.file sites.xml -tc.file tc.data -config cf -cdm.file fs.alldirect cga.swift -level=$LEVEL $SWIFTARGS >> $LOG 2>&1
+swift -sites.file sites.xml -tc.file tc.data -config cf -cdm.file fs.alldirect qc.swift -level=$LEVEL $SWIFTARGS 2>&1
+#echo ./create_cga_dataset.py -o $OUTPUT -p `dirname $OUTPUT` -i output >> $LOG 2>&1
+#./create_cga_dataset.py -o $OUTPUT -p `dirname $OUTPUT` -i output >> $LOG 2>&1
+#popd > /dev/null 2>&1
+                       

swift_scripts/qc/tc.data

+condor qcpart /nfs/software/galaxy-globus/swift_scripts/qc/qc-part.sh null null null
+condor qcchrm /nfs/software/galaxy-globus/swift_scripts/qc/qc-chrm.sh null null null

swift_scripts/qc2/cf

+#site work=/glusterfs/davidk/swift_scripts/cga/swiftwork
+wrapperlog.always.transfer=true
+sitedir.keep=true
+execution.retries=0
+lazy.errors=false
+#status.mode=provider
+use.provider.staging=false
+provider.staging.pin.swiftfiles=false

swift_scripts/qc2/create_cga_dataset.py

+#!/usr/bin/env python
+"""
+"""
+
+import ConfigParser
+import glob
+from optparse import OptionParser
+import os
+import sys
+import json
+
+sys.path.append("/nfs/software/galaxy-globus/lib") 
+
+# begin: imported for side affects
+import galaxy
+from galaxy import eggs
+import galaxy.model
+from galaxy.datatypes import sniff
+# end: imported for side affects
+
+from galaxy.datatypes.genetics import CgaData
+
+# TODO: more reliable way of finding ini file?
+CONFIG_FILE=os.path.join(
+    os.path.dirname(
+    os.path.dirname(
+    os.path.dirname(
+    os.path.abspath(galaxy.__file__)))),
+    "universe_wsgi.ini")
+
+def get_genomes():
+    """Return option list of genome directories.
+    """
+    options = []
+
+    #dir = "/Users/steder/T2DTest" # TODO pull this from galaxy.app.config
+    genome_dir = None
+    cp = ConfigParser.SafeConfigParser()
+    with open(CONFIG_FILE, "r") as config_file:
+        cp.readfp(config_file)
+        genome_dir = cp.get("galaxy:tools", "complete_genomics_root")
+
+    listing = os.listdir(genome_dir) 
+    for path in listing:
+        options.append((path, os.path.join(
+            genome_dir, path), True))
+        
+    if len(options) >= 1:
+        options.insert(0,('None','None',True))
+    else:
+        options = [('None','no genomes found',False),]
+    return options
+
+
+def write_dataset(outfile=None, input_directory=None,
+                  output_directory=None):
+    """Create a primary dataset file by reading input_directory.
+
+    Also attempts to create the extra files directory expected
+    by composite datasets by symbolicly linking input_directory
+    to output_directory.
+    """
+    # instead of creating the output directory we'll
+    # create a symbolic link to the input directory.
+    # TODO: uncomment this when we know that galaxy won't follow the link
+    # and accidentally delete our real dataset
+    #os.symlink(input_directory, output_directory)
+
+    # now create the primary dataset file:
+    d = CgaData.dataset_dict_from_directory(input_directory)
+    with open(outfile, "w") as out:
+        json.dump(d, out)
+
+
+if __name__ == "__main__":
+    """
+    """
+    parser = OptionParser(usage=__doc__, version="%prog 0.01")
+    parser.add_option("-o","--outf",dest="outf",
+      help="Output file", default = 'fakeped')
+    parser.add_option("-p","--outpath",dest="outpath",
+      help="Path for output files", default = './')
+    parser.add_option("-i", "--inpath", dest="inpath",
+      help="Input data directory", default="./")
+    (options,args) = parser.parse_args()
+    write_dataset(outfile=options.outf,
+                  input_directory=options.inpath,
+                  output_directory=options.outpath)
+
+
+        

swift_scripts/qc2/fs.alldirect

+rule .* DIRECT /nfs/software/galaxy-globus/swift_scripts/qc2 

swift_scripts/qc2/galaxy-qc-condor

+<config>
+   <pool handle="condor">
+     <execution provider="condor" url="none"/>
+     <gridftp url="local://localhost"/>
+     <workdirectory>/glusterfs/galaxy-data/tmp</workdirectory>
+     <profile namespace="karajan" key="jobThrottle">1000</profile>
+     <profile namespace="karajan" key="initialScore">10000</profile>
+   </pool>
+</config>

swift_scripts/qc2/galaxy-qc-local

+<config>
+  <pool handle="localhost">
+    <filesystem provider="local" />
+    <execution provider="coaster" jobmanager="local:local"/>
+    <profile namespace="karajan"  key="jobthrottle">1000</profile>
+    <profile namespace="karajan"  key="initialScore">10000</profile>
+    <profile namespace="globus"   key="jobsPerNode">1</profile>
+    <profile namespace="globus"   key="slots">8</profile>
+    <profile namespace="globus"   key="maxTime">1000</profile>
+    <profile namespace="globus"   key="nodeGranularity">1</profile>
+    <profile namespace="globus"   key="maxNodes">2</profile>
+    <workdirectory>/glusterfs/galaxy-data/tmp</workdirectory>
+  </pool>
+</config>

swift_scripts/qc2/json_mapper.sh

+#!/bin/bash
+
+# Verify an argument is not null
+verify_not_null()
+{
+   argname=$1; shift
+   if [ _$1 != _ ]; then
+      return 0;
+   else
+      echo $0: value for $argname can not be null
+      exit 1
+   fi
+}
+
+PASSARGS=""
+while [ $# -gt 0 ]
+do
+   case "$1" in
+      -level) export LEVEL=$2; verify_not_null level $LEVEL; shift ;;
+      -filetype) export FILETYPE=$2; verify_not_null level $FILETYPE; shift ;;
+      -relative) export RELATIVE=$2; verify_not_null relative $RELATIVE; shift ;;
+       *) PASSARGS="$PASSARGS $1";;
+   esac
+   shift
+done
+
+echo "$0 $@" > mapper.log
+
+TEMP_FILE=`mktemp -p /glusterfs/galaxy-data/tmp`
+echo ./read_cga_dataset.py $PASSARGS -o $TEMP_FILE > jargs
+./read_cga_dataset.py $PASSARGS -o $TEMP_FILE > /dev/null 2>&1
+COUNT=0
+
+for line in `sort $TEMP_FILE`
+do
+
+   # Print reads
+   if [ "$FILETYPE" == "reads" ] && [[ "$line" == */reads* ]]; then
+      echo [$COUNT] $line | sed 's@'"$RELATIVE"'@@'
+      #echo [$COUNT] $line | sed 's@'"$RELATIVE"'@@' >> mapper.log
+      if [ "$LEVEL" == "1" ] && [ "$COUNT" == 1 ]; then
+         break
+      fi
+      if [ "$LEVEL" == "2" ] && [ "$COUNT" == 9 ]; then
+         break
+      fi
+      COUNT=$(($COUNT + 1))
+   fi
+
+   # Print mappings
+   if [ "$FILETYPE" == "mappings" ] && [[ "$line" == /*mapping* ]]; then
+      echo [$COUNT] $line | sed 's@'"$RELATIVE"'@@'
+      #echo [$COUNT] $line | sed 's@'"$RELATIVE"'@@' >> mapper.log
+      if [ "$LEVEL" == "1" ] && [ "$COUNT" == 1 ]; then
+         break
+      fi
+      if [ "$LEVEL" == "2" ] && [ "$COUNT" == 9 ]; then
+         break
+      fi
+      COUNT=$(($COUNT + 1))
+   fi
+
+   #if [ "$FILETYPE" == "reference" ] && [[ "$line" == *.crr ]]; then
+   #   echo [$COUNT] $line | sed 's@'"$RELATIVE"'@@'
+   #   echo [$COUNT] $line | sed 's@'"$RELATIVE"'@@' >> /tmp/mapper.log
+   #   if [ "$LEVEL" == "1" ] && [ "$COUNT" == 0 ]; then
+   #      break
+   #   fi
+   #   if [ "$LEVEL" == "2" ] && [ "$COUNT" == 40 ]; then
+   #      break
+   #   fi
+   #   COUNT=$(($COUNT + 1))
+   #fi
+
+done
+
+   # Print reference file
+   if [ "$FILETYPE" == "reference" ]; then
+      echo [0] build37.crr
+   fi
+
+rm "$TEMP_FILE"

swift_scripts/qc2/qc-chrm.sh

+#!/bin/bash -x 
+#
+# Second, per chromosome phase of the QC pipeline.
+#
+# Merges bam files for a single chromosome, then generates fastq, stats,
+# and quality plots from the data.
+#
+# Assumes *chrm_N convention for chromosome files in input_dir. Outputs
+# files with a chrmN prefix.
+#
+
+
+if [ $# -ne 3 ]; then
+    echo "Usage: $0 chrm_number input_dir output_dir"
+fi
+
+cd $(dirname $0)
+bindir=$(pwd)
+
+#echo "$0 $@" > qchrm.log 
+export PYTHONPATH=/nfs/software/galaxy/lib
+
+export PATH=$bindir:/usr/local/bin:/usr/bin:/bin:/usr/local/tools/weblogo:/usr/local/tools/blat:/usr/local/tools/homer/bin:/nfs/software/bin
+
+chrm="$1"
+indir="$2"
+outdir="$3"
+
+bam_out="$outdir/chrm$chrm.bam"
+fastq_out="$outdir/chrm$chrm.fastq"
+solexa_out="$outdir/chrm$chrm.solexa.fastq"
+stats_out="$outdir/chrm$chrm.stats"
+box_out="$outdir/chrm${chrm}_boxplot.png"
+dist_out="$outdir/chrm${chrm}_distribution.png"
+
+echo "join chrm $(date)" \
+&& samtools merge "$bam_out" $indir/*chrm_${chrm}.bam \
+&& echo "sam2fastq $(date)" \
+&& picard_sam2fastq.sh QUIET=TRUE \
+     INPUT="$bam_out" FASTQ=/dev/stdout \
+     VALIDATION_STRINGENCY=LENIENT 2>/dev/null \
+     | dd bs=1M > "$fastq_out" \
+&& echo "solexa $(date)" \
+&& fastq_groomer.py "$fastq_out" sanger "$solexa_out" \
+     solexa ascii summarize_input \
+&& echo "stats $(date)" \
+&& fastx_quality_stats -i "$solexa_out" -o "$stats_out" \
+&& echo "graphs $(date)" \
+&& (fastq_quality_boxplot_graph.sh -i "$stats_out" -o "$box_out" \
+    -t "chr Fastq Quality Boxplot" &) \
+&& (fastx_nucleotide_distribution_line_graph.sh -i "$stats_out" \
+    -o "$dist_out" -t "chr Fastq Nucleotide Distribution" &)
+
+wait
+echo "done $(date)"

swift_scripts/qc2/qc-part.sh

+#!/bin/bash -x
+#
+# First, per part phase of QC pipeline.
+#
+# Generates BAM and index files from a cga part, and then splits the bam by
+# chromosome. The first argument must be the bam output file location without
+# the .bam extension, the second argument is the chromosome base file (_N.bam
+# is appended to each out file, for N 0 to 23), and the rest of the arguments
+# are passed to cgatools map2sam.  For example:
+#
+#  $ qc-part.sh output/L01_001 output/L01_001-chrm --reads=reads.tsv \
+#               --mappings=maps.tsv --reference=build37.crr
+#   
+
+if [ $# -lt 3 ]; then
+    echo "Usage: $0 bam_base_file chrm_base_file MAP2SAM_ARGS..."
+    exit 1
+fi
+
+cd $(dirname $0)
+bindir=$(pwd)
+
+echo "Command: $0 $2" > qcpart.log 2>&1
+export PATH=$bindir:/usr/local/bin:/usr/bin:/bin:/usr/local/tools/weblogo:/usr/local/tools/blat:/usr/local/tools/homer/bin:/nfs/software/bin
+
+bam_out="$1"
+chrm_out="$2"
+shift 2
+
+echo "cga to bam $(date)" \
+&& cgatools map2sam --add-mate-sequence "$@" | dd bs=1M \
+    | samtools view -uS - \
+    | samtools sort - "$bam_out" \
+&& echo "index $(date)" \
+&& samtools index "${bam_out}.bam" \
+&& echo "split $(date)" \
+&& bam splitChromosome --in "${bam_out}.bam" --out "$chrm_out" \
+    --bamIndex "${bam_out}.bam.bai" \
+&& echo "done $(date)"

swift_scripts/qc2/qc.swift

+type file;
+
+app (file bam, file plog) qcpart (file read, file map, file ref) {
+   qcpart @bam @strcat(@bam, "-chrm") @strcat("--reads=", @read) @strcat("--mappings=", @map) @strcat("--reference=", @ref) stdout=@plog stderr=@plog;
+}
+
+app (file out) qcchrm (int chrm) {
+   qcchrm @strcat("", chrm) "output" "output2";
+}
+
+file reads[] <ext; exec="json_mapper.sh", filetype="reads", i=@arg("i"),level=@arg("level"), relative=@arg("relative")>;
+file mappings[] <ext; exec="json_mapper.sh", filetype="mappings", i=@arg("i"), level=@arg("level"), relative=@arg("relative")>;
+file reference[] <ext; exec="json_mapper.sh", filetype="reference", i=@arg("i"), level=@arg("level"), relative=@arg("relative")>;
+
+file partlogs[] <simple_mapper; location=".", prefix="partlog", suffix=".log">;
+file bams[] <simple_mapper; location="output", prefix="output">;
+int level = @toint(@arg("level"));
+
+foreach part,i in reads {
+  (bams[i], partlogs[i]) = qcpart(reads[i], mappings[i], reference[0]);
+}
+
+file logs[] <simple_mapper; location="output", prefix="log", suffix=".log">;
+int end = 23 + @toint(@length(bams)) - @toint(@length(bams));
+
+foreach j in [0:end] {
+   logs[j] = qcchrm(j);
+}
+

swift_scripts/qc2/qc.xml

+<program xmlns="http://ci.uchicago.edu/swift/2009/02/swiftscript"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xmlns:xs="http://www.w3.org/2001/XMLSchema">
+  
+  <types>
+     	<type>
+     		<typename>file</typename>
+     		<typealias>string</typealias>
+     		<typestructure></typestructure>
+     	</type>   
+  </types>  
+  <procedure name="qcpart" src="line 3">
+
+      <output 
+    name="bam" type="file"
+
+     xsi:nil="true" />
+
+      <output 
+    name="plog" type="file"
+
+     xsi:nil="true" />
+
+      <input 
+    name="read" type="file"
+
+     xsi:nil="true" />
+
+      <input 
+    name="map" type="file"
+
+     xsi:nil="true" />
+
+      <input 
+    name="ref" type="file"
+
+     xsi:nil="true" />
+    <binding>
+      <application src="line 3">
+        <executable>qcpart</executable>
+        <stdout><function name="filename">
+          <variableReference>plog</variableReference></function></stdout><stderr><function name="filename">
+          <variableReference>plog</variableReference></function></stderr><function name="filename">
+          <variableReference>bam</variableReference></function><function name="strcat">
+          <function name="filename">
+            <variableReference>bam</variableReference></function><stringConstant>-chrm</stringConstant></function><function name="strcat">
+          <stringConstant>--reads=</stringConstant><function name="filename">
+            <variableReference>read</variableReference></function></function><function name="strcat">
+          <stringConstant>--mappings=</stringConstant><function name="filename">
+            <variableReference>map</variableReference></function></function><function name="strcat">
+          <stringConstant>--reference=</stringConstant><function name="filename">
+            <variableReference>ref</variableReference></function></function>  </application>
+    </binding>
+  </procedure>
+  <procedure name="qcchrm" src="line 7">
+
+      <output 
+    name="out" type="file"
+
+     xsi:nil="true" />
+
+      <input 
+    name="chrm" type="int"
+
+     xsi:nil="true" />
+    <binding>
+      <application src="line 7">
+        <executable>qcchrm</executable>
+        <function name="strcat">
+          <stringConstant></stringConstant><variableReference>chrm</variableReference></function><stringConstant>output</stringConstant><stringConstant>output2</stringConstant>  </application>
+    </binding>
+  </procedure>
+  <variable name="reads" type="file[]" isGlobal="false">
+    <mapping descriptor="ext">
+      <param name="exec"><stringConstant>json_mapper.sh</stringConstant></param>
+      <param name="filetype"><stringConstant>reads</stringConstant></param>
+      <param name="i"><function name="arg">
+        <stringConstant>i</stringConstant></function></param>
+      <param name="level"><function name="arg">
+        <stringConstant>level</stringConstant></function></param>
+      <param name="relative"><function name="arg">
+        <stringConstant>relative</stringConstant></function></param>
+    </mapping>
+  </variable>
+  <variable name="mappings" type="file[]" isGlobal="false">
+    <mapping descriptor="ext">
+      <param name="exec"><stringConstant>json_mapper.sh</stringConstant></param>
+      <param name="filetype"><stringConstant>mappings</stringConstant></param>
+      <param name="i"><function name="arg">
+        <stringConstant>i</stringConstant></function></param>
+      <param name="level"><function name="arg">
+        <stringConstant>level</stringConstant></function></param>
+      <param name="relative"><function name="arg">
+        <stringConstant>relative</stringConstant></function></param>
+    </mapping>
+  </variable>
+  <variable name="reference" type="file[]" isGlobal="false">
+    <mapping descriptor="ext">
+      <param name="exec"><stringConstant>json_mapper.sh</stringConstant></param>
+      <param name="filetype"><stringConstant>reference</stringConstant></param>
+      <param name="i"><function name="arg">
+        <stringConstant>i</stringConstant></function></param>
+      <param name="level"><function name="arg">
+        <stringConstant>level</stringConstant></function></param>
+      <param name="relative"><function name="arg">
+        <stringConstant>relative</stringConstant></function></param>
+    </mapping>
+  </variable>
+  <variable name="partlogs" type="file[]" isGlobal="false">
+    <mapping descriptor="simple_mapper">
+      <param name="location"><stringConstant>.</stringConstant></param>
+      <param name="prefix"><stringConstant>partlog</stringConstant></param>
+      <param name="suffix"><stringConstant>.log</stringConstant></param>
+    </mapping>
+  </variable>
+  <variable name="bams" type="file[]" isGlobal="false">
+    <mapping descriptor="simple_mapper">
+      <param name="location"><stringConstant>output</stringConstant></param>
+      <param name="prefix"><stringConstant>output</stringConstant></param>
+    </mapping>
+  </variable>
+  <variable name="level" type="int" isGlobal="false" xsi:nil="true"/>
+  <assign src="line 19">
+   <variableReference>level</variableReference>
+   <function name="toint">
+     <function name="arg">
+       <stringConstant>level</stringConstant></function></function>
+  </assign>
+  <foreach var="part"  indexVar="i" src="line 19">
+  <in><variableReference>reads</variableReference></in>
+  <body><call proc="qcpart" src="line 20">
+      <output><arraySubscript>
+       <variableReference>bams</variableReference>
+       <variableReference>i</variableReference>
+      </arraySubscript></output>
+      <output><arraySubscript>
+       <variableReference>partlogs</variableReference>
+       <variableReference>i</variableReference>
+      </arraySubscript></output>
+      <input><arraySubscript>
+       <variableReference>reads</variableReference>
+       <variableReference>i</variableReference>
+      </arraySubscript></input>
+      <input><arraySubscript>
+       <variableReference>mappings</variableReference>
+       <variableReference>i</variableReference>
+      </arraySubscript></input>
+      <input><arraySubscript>
+       <variableReference>reference</variableReference>
+       <integerConstant>0</integerConstant>
+      </arraySubscript></input>
+    </call></body>
+  </foreach>
+  <variable name="logs" type="file[]" isGlobal="false">
+    <mapping descriptor="simple_mapper">
+      <param name="location"><stringConstant>output</stringConstant></param>
+      <param name="prefix"><stringConstant>log</stringConstant></param>
+      <param name="suffix"><stringConstant>.log</stringConstant></param>
+    </mapping>
+  </variable>
+  <variable name="end" type="int" isGlobal="false" xsi:nil="true"/>
+  <assign src="line 26">
+   <variableReference>end</variableReference>
+   <arith op="-">
+     <arith op="+">
+       <integerConstant>23</integerConstant>
+       <function name="toint">
+         <function name="length">
+           <variableReference>bams</variableReference></function></function>
+     </arith>
+     <function name="toint">
+       <function name="length">
+         <variableReference>bams</variableReference></function></function>
+   </arith>
+  </assign>
+  <foreach var="j"  src="line 26">
+  <in>
+    <range>
+      <integerConstant>0</integerConstant>
+      <variableReference>end</variableReference>
+    </range>
+  </in>
+  <body><call proc="qcchrm" src="line 27">
+      <output><arraySubscript>
+       <variableReference>logs</variableReference>
+       <variableReference>j</variableReference>
+      </arraySubscript></output>
+      <input><variableReference>j</variableReference></input>
+    </call></body>
+  </foreach>
+</program>

swift_scripts/qc2/qc2-chrm.sh

+#!/bin/bash
+#
+# Second, per chromosome phase of the QC pipeline, with the fastq conversion
+# done in the first per part phase to increase parallelism.
+#
+# Merges fastq files for a single chromosome, then generates stats and quality
+# plots from the data.
+#
+# Assumes *chrm_N convention for chromosome files in input_dir. Outputs files
+# with a chrmN prefix.
+#
+
+if [ $# -ne 3 ]; then
+    echo "Usage: $0 chrm_number input_dir output_dir"
+fi
+
+cd $(dirname $0)
+bindir=$(pwd)
+
+export PYTHONPATH=/nfs/software/galaxy/lib
+
+export PATH=$bindir:/usr/local/bin:/usr/bin:/bin:/usr/local/tools/weblogo:/usr/local/tools/blat:/usr/local/tools/homer/bin:/nfs/software/bin
+
+chrm="$1"
+indir="$2"
+outdir="$3"
+
+fastq_out="$outdir/chrm$chrm.fastq"
+solexa_out="$outdir/chrm$chrm.solexa.fastq"
+stats_out="$outdir/chrm$chrm.stats"
+box_out="$outdir/chrm${chrm}_boxplot.png"
+dist_out="$outdir/chrm${chrm}_distribution.png"
+
+echo "join chrm $(date)" \
+&& cat $indir/*chrm_${chrm}.fastq | dd bs=1M > "$fastq_out" \
+&& echo "solexa $(date)" \
+&& fastq_groomer.py "$fastq_out" sanger "$solexa_out" \
+     solexa ascii summarize_input \
+&& echo "stats $(date)" \
+&& fastx_quality_stats -i "$solexa_out" -o "$stats_out" \
+&& echo "graphs $(date)" \
+&& (fastq_quality_boxplot_graph.sh -i "$stats_out" -o "$box_out" \
+    -t "chr Fastq Quality Boxplot" &) \
+&& (fastx_nucleotide_distribution_line_graph.sh -i "$stats_out" \
+    -o "$dist_out" -t "chr Fastq Nucleotide Distribution" &)
+
+wait
+echo "done $(date)"

swift_scripts/qc2/qc2-part.sh

+#!/bin/bash
+#
+# First, per part phase of QC pipeline that includes fastq conversion, to
+# increase the parallelism.
+#
+# Generates BAM and index files from a cga part, splits the bam by chromosome,
+# and converts the chromosome bams to fastq. The first argument must be the bam
+# output file location without the .bam extension, the second argument is the
+# chromosome base file (_N.bam is appended to each out file, for N 0 to 23),
+# and the rest of the arguments are passed to cgatools map2sam.  For example:
+#
+#  $ qc2-part.sh output/L01_001 output/L01_001-chrm --reads=reads.tsv \
+#                --mappings=maps.tsv --reference=build37.crr
+#
+
+if [ $# -lt 3 ]; then
+    echo "Usage: $0 bam_base_file chrm_base_file MAP2SAM_ARGS..."
+fi
+
+cd $(dirname $0)
+bindir=$(pwd)
+
+export PATH=$bindir:/usr/local/bin:/usr/bin:/bin:/usr/local/tools/weblogo:/usr/local/tools/blat:/usr/local/tools/homer/bin:/nfs/software/bin
+
+bam_out="$1"
+chrm_out="$2"
+shift 2
+
+echo "cga to bam $(date)" \
+&& cgatools map2sam --add-mate-sequence "$@" | dd bs=1M \
+    | samtools view -uS - \
+    | samtools sort - "$bam_out" \
+&& echo "index $(date)" \
+&& samtools index "${bam_out}.bam" \
+&& echo "split $(date)" \
+&& bam splitChromosome --in "${bam_out}.bam" --out "$chrm_out" \
+    --bamIndex "${bam_out}.bam.bai" \
+&& echo "sam2fastq $(date)" \
+&& for chrm in "$chrm_out"_*.bam; do
+      fastq_out=${chrm%.bam}.fastq
+      (picard_sam2fastq.sh QUIET=TRUE \
+       INPUT="$chrm" FASTQ=/dev/stdout \
+       VALIDATION_STRINGENCY=LENIENT 2>/dev/null \
+       | dd bs=1M > "$fastq_out" &)
+   done
+
+wait
+echo "done $(date)"

swift_scripts/qc2/qc2.kml

+<project><!-- CACHE ID 362905f5-19df-410b-8f2d-c1f9d7121cbe -->
+  <import file="sys.xml"/>
+  <import file="scheduler.xml"/>
+  <import file="rlog.xml"/>
+  <import file="vdl.k"/>
+  <types>
+     <xs:schema targetNamespace="http://ci.uchicago.edu/swift/2009/02/swiftscript" xmlns="http://ci.uchicago.edu/swift/2009/02/swiftscript" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema">
+      	   <xs:simpleType name="file">
+      	   <xs:restriction base="string"/>
+      	   </xs:simpleType>  
+     </xs:schema>	 
+  </types>
+  <global name="swift#string#17004">
+    <vdl:new type="string" value="" />
+  </global>
+  <global name="swift#string#17003">
+    <vdl:new type="string" value="--reference=" />
+  </global>
+  <global name="swift#string#17001">
+    <vdl:new type="string" value="--reads=" />
+  </global>
+  <global name="swift#string#17030">
+    <vdl:new type="string" value="." />
+  </global>
+  <global name="swift#string#17039">
+    <vdl:new type="string" value="log" />
+  </global>
+  <global name="swift#string#17000">
+    <vdl:new type="string" value="-chrm" />
+  </global>
+  <global name="swift#string#17016">
+    <vdl:new type="string" value="relative" />
+  </global>
+  <global name="swift#string#17025">
+    <vdl:new type="string" value="reference" />
+  </global>
+  <global name="swift#string#17012">
+    <vdl:new type="string" value="i" />
+  </global>
+  <global name="swift#string#17010">
+    <vdl:new type="string" value="reads" />
+  </global>
+  <global name="swift#string#17006">
+    <vdl:new type="string" value="output2" />
+  </global>
+  <global name="swift#string#17014">
+    <vdl:new type="string" value="level" />
+  </global>
+  <global name="swift#string#17034">
+    <vdl:new type="string" value=".log" />
+  </global>
+  <global name="swift#string#17019">
+    <vdl:new type="string" value="mappings" />
+  </global>
+  <global name="swift#string#17002">
+    <vdl:new type="string" value="--mappings=" />
+  </global>
+  <global name="swift#string#17005">
+    <vdl:new type="string" value="output" />
+  </global>
+  <global name="swift#string#17032">
+    <vdl:new type="string" value="partlog" />
+  </global>
+  <global name="swift#string#17008">
+    <vdl:new type="string" value="json_mapper.sh" />
+  </global>
+  <global name="swift#int#0">
+    <vdl:new type="int" value="0" />
+  </global>
+  <global name="swift#int#23">
+    <vdl:new type="int" value="23" />
+  </global>
+  <element name="qcpart" arguments="bam,plog,read,map,ref">
+    <parameterlog>
+    <string>input</string>
+    <string>read</string>
+    <vdl:getdatasetprovenanceid var="{read}" />
+    <string>{#thread}</string>
+    </parameterlog>
+    <parameterlog>
+    <string>input</string>
+    <string>map</string>
+    <vdl:getdatasetprovenanceid var="{map}" />
+    <string>{#thread}</string>
+    </parameterlog>
+    <parameterlog>
+    <string>input</string>
+    <string>ref</string>
+    <vdl:getdatasetprovenanceid var="{ref}" />
+    <string>{#thread}</string>
+    </parameterlog>
+    <parameterlog>
+    <string>output</string>
+    <string>bam</string>
+    <vdl:getdatasetprovenanceid var="{bam}" />
+    <string>{#thread}</string>
+    </parameterlog>
+    <parameterlog>
+    <string>output</string>
+    <string>plog</string>
+    <vdl:getdatasetprovenanceid var="{plog}" />
+    <string>{#thread}</string>
+    </parameterlog>
+    <log level="debug" message="PROCEDURE line=3 thread={#thread} name=qcpart"/>
+    <vdl:execute>
+      <vdl:tr>qcpart</vdl:tr>
+      <vdl:stagein var="{read}"/>
+      <vdl:stagein var="{map}"/>
+      <vdl:stagein var="{ref}"/>
+      <vdl:stageout var="{bam}"/>
+      <vdl:stageout var="{plog}"/>
+      <vdl:arguments>
+        <swiftscript:filename>
+         <variable>bam</variable> 
+        </swiftscript:filename>
+        <swiftscript:strcat>
+         <swiftscript:filename>
+         <variable>bam</variable> 
+        </swiftscript:filename><variable>swift#string#17000</variable> 
+        </swiftscript:strcat>
+        <swiftscript:strcat>
+         <variable>swift#string#17001</variable><swiftscript:filename>
+         <variable>read</variable> 
+        </swiftscript:filename> 
+        </swiftscript:strcat>
+        <swiftscript:strcat>
+         <variable>swift#string#17002</variable><swiftscript:filename>
+         <variable>map</variable> 
+        </swiftscript:filename> 
+        </swiftscript:strcat>
+        <swiftscript:strcat>
+         <variable>swift#string#17003</variable><swiftscript:filename>
+         <variable>ref</variable> 
+        </swiftscript:filename> 
+        </swiftscript:strcat>
+      </vdl:arguments>
+      <vdl:stdout>
+        <swiftscript:filename>
+         <variable>plog</variable> 
+        </swiftscript:filename>
+      </vdl:stdout><vdl:stderr>
+        <swiftscript:filename>
+         <variable>plog</variable> 
+        </swiftscript:filename>
+      </vdl:stderr>
+    </vdl:execute>
+    <vdl:closedataset var="{bam}"/>
+    <vdl:closedataset var="{plog}"/>
+    <log level="debug" message="PROCEDURE_END line=3"/></element>
+
+  <element name="qcchrm" arguments="out,chrm">
+    <parameterlog>
+    <string>input</string>
+    <string>chrm</string>
+    <vdl:getdatasetprovenanceid var="{chrm}" />
+    <string>{#thread}</string>
+    </parameterlog>
+    <parameterlog>
+    <string>output</string>
+    <string>out</string>
+    <vdl:getdatasetprovenanceid var="{out}" />
+    <string>{#thread}</string>
+    </parameterlog>
+    <log level="debug" message="PROCEDURE line=7 thread={#thread} name=qcchrm"/>
+    <vdl:execute>
+      <vdl:tr>qcchrm</vdl:tr>
+      <vdl:stagein var="{chrm}"/>
+      <vdl:stageout var="{out}"/>
+      <vdl:arguments>
+        <swiftscript:strcat>
+         <variable>swift#string#17004</variable><variable>chrm</variable> 
+        </swiftscript:strcat>
+        <variable>swift#string#17005</variable>
+        <variable>swift#string#17006</variable>
+      </vdl:arguments>
+
+    </vdl:execute>
+    <vdl:closedataset var="{out}"/>
+    <log level="debug" message="PROCEDURE_END line=7"/></element>
+
+  <set name="swift#mapper#17007">
+        <vdl:new type="string" dbgname="swift#mapper#17007" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17007</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17007}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17009">
+        <vdl:new type="string" dbgname="swift#mapper#17009" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17009</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17009}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17011">
+        <vdl:new type="string" dbgname="swift#mapper#17011" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17011</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17011}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17013">
+        <vdl:new type="string" dbgname="swift#mapper#17013" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17013</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17013}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17015">
+        <vdl:new type="string" dbgname="swift#mapper#17015" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17015</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17015}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="reads">
+    <vdl:new type="file[]" dbgname="reads">
+      <vdl:mapping descriptor="ext">
+        <vdl:parameter name="exec"><vdl:new type="string" value="json_mapper.sh" /></vdl:parameter>
+        <vdl:parameter name="filetype"><vdl:new type="string" value="reads" /></vdl:parameter>
+        <vdl:parameter name="i"><variable>swift#mapper#17011</variable></vdl:parameter>
+        <vdl:parameter name="level"><variable>swift#mapper#17013</variable></vdl:parameter>
+        <vdl:parameter name="relative"><variable>swift#mapper#17015</variable></vdl:parameter>
+        <vdl:parameter name="input" value="true" />
+      </vdl:mapping>
+    </vdl:new>
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>reads</string>
+  <vdl:getdatasetprovenanceid var="{reads}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17017">
+        <vdl:new type="string" dbgname="swift#mapper#17017" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17017</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17017}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17018">
+        <vdl:new type="string" dbgname="swift#mapper#17018" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17018</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17018}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17020">
+        <vdl:new type="string" dbgname="swift#mapper#17020" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17020</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17020}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17021">
+        <vdl:new type="string" dbgname="swift#mapper#17021" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17021</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17021}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17022">
+        <vdl:new type="string" dbgname="swift#mapper#17022" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17022</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17022}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="mappings">
+    <vdl:new type="file[]" dbgname="mappings">
+      <vdl:mapping descriptor="ext">
+        <vdl:parameter name="exec"><vdl:new type="string" value="json_mapper.sh" /></vdl:parameter>
+        <vdl:parameter name="filetype"><vdl:new type="string" value="mappings" /></vdl:parameter>
+        <vdl:parameter name="i"><variable>swift#mapper#17020</variable></vdl:parameter>
+        <vdl:parameter name="level"><variable>swift#mapper#17021</variable></vdl:parameter>
+        <vdl:parameter name="relative"><variable>swift#mapper#17022</variable></vdl:parameter>
+        <vdl:parameter name="input" value="true" />
+      </vdl:mapping>
+    </vdl:new>
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>mappings</string>
+  <vdl:getdatasetprovenanceid var="{mappings}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17023">
+        <vdl:new type="string" dbgname="swift#mapper#17023" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17023</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17023}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17024">
+        <vdl:new type="string" dbgname="swift#mapper#17024" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17024</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17024}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17026">
+        <vdl:new type="string" dbgname="swift#mapper#17026" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17026</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17026}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17027">
+        <vdl:new type="string" dbgname="swift#mapper#17027" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17027</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17027}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17028">
+        <vdl:new type="string" dbgname="swift#mapper#17028" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17028</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17028}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="reference">
+    <vdl:new type="file[]" dbgname="reference">
+      <vdl:mapping descriptor="ext">
+        <vdl:parameter name="exec"><vdl:new type="string" value="json_mapper.sh" /></vdl:parameter>
+        <vdl:parameter name="filetype"><vdl:new type="string" value="reference" /></vdl:parameter>
+        <vdl:parameter name="i"><variable>swift#mapper#17026</variable></vdl:parameter>
+        <vdl:parameter name="level"><variable>swift#mapper#17027</variable></vdl:parameter>
+        <vdl:parameter name="relative"><variable>swift#mapper#17028</variable></vdl:parameter>
+        <vdl:parameter name="input" value="true" />
+      </vdl:mapping>
+    </vdl:new>
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>reference</string>
+  <vdl:getdatasetprovenanceid var="{reference}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17029">
+        <vdl:new type="string" dbgname="swift#mapper#17029" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17029</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17029}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17031">
+        <vdl:new type="string" dbgname="swift#mapper#17031" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17031</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17031}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17033">
+        <vdl:new type="string" dbgname="swift#mapper#17033" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17033</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17033}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="partlogs">
+    <vdl:new type="file[]" dbgname="partlogs" waitfor="88003">
+      <vdl:mapping descriptor="simple_mapper">
+        <vdl:parameter name="location"><vdl:new type="string" value="." /></vdl:parameter>
+        <vdl:parameter name="prefix"><vdl:new type="string" value="partlog" /></vdl:parameter>
+        <vdl:parameter name="suffix"><vdl:new type="string" value=".log" /></vdl:parameter>
+      </vdl:mapping>
+    </vdl:new>
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>partlogs</string>
+  <vdl:getdatasetprovenanceid var="{partlogs}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17035">
+        <vdl:new type="string" dbgname="swift#mapper#17035" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17035</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17035}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17036">
+        <vdl:new type="string" dbgname="swift#mapper#17036" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17036</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17036}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="bams">
+    <vdl:new type="file[]" dbgname="bams" waitfor="88003">
+      <vdl:mapping descriptor="simple_mapper">
+        <vdl:parameter name="location"><vdl:new type="string" value="output" /></vdl:parameter>
+        <vdl:parameter name="prefix"><vdl:new type="string" value="output" /></vdl:parameter>
+      </vdl:mapping>
+    </vdl:new>
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>bams</string>
+  <vdl:getdatasetprovenanceid var="{bams}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="level">
+        <vdl:new type="int" dbgname="level" waitfor="88000" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>level</string>
+  <vdl:getdatasetprovenanceid var="{level}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17037">
+        <vdl:new type="string" dbgname="swift#mapper#17037" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17037</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17037}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17038">
+        <vdl:new type="string" dbgname="swift#mapper#17038" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17038</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17038}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="swift#mapper#17040">
+        <vdl:new type="string" dbgname="swift#mapper#17040" waitfor="" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>swift#mapper#17040</string>
+  <vdl:getdatasetprovenanceid var="{swift#mapper#17040}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="logs">
+    <vdl:new type="file[]" dbgname="logs" waitfor="88006">
+      <vdl:mapping descriptor="simple_mapper">
+        <vdl:parameter name="location"><vdl:new type="string" value="output" /></vdl:parameter>
+        <vdl:parameter name="prefix"><vdl:new type="string" value="log" /></vdl:parameter>
+        <vdl:parameter name="suffix"><vdl:new type="string" value=".log" /></vdl:parameter>
+      </vdl:mapping>
+    </vdl:new>
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>logs</string>
+  <vdl:getdatasetprovenanceid var="{logs}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <set name="end">
+        <vdl:new type="int" dbgname="end" waitfor="88004" />
+  </set>
+  <parameterlog>
+  <string>intermediate</string>
+  <string>end</string>
+  <vdl:getdatasetprovenanceid var="{end}" />
+  <string>{#thread}</string>
+  </parameterlog>
+  <restartLog>
+  	<vdl:mains>
+		<vdl:startprogressticker />
+		<vdl:mainp>
+		    <parallel>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17007</variable>
+		               </argument>
+		               <argument name="value">
+		                 <variable>swift#string#17008</variable>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17009</variable>
+		               </argument>
+		               <argument name="value">
+		                 <variable>swift#string#17010</variable>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17011</variable>
+		               </argument>
+		               <argument name="value">
+		                 <swiftscript:arg>
+		                  <variable>swift#string#17012</variable> 
+		                 </swiftscript:arg>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17013</variable>
+		               </argument>
+		               <argument name="value">
+		                 <swiftscript:arg>
+		                  <variable>swift#string#17014</variable> 
+		                 </swiftscript:arg>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17015</variable>
+		               </argument>
+		               <argument name="value">
+		                 <swiftscript:arg>
+		                  <variable>swift#string#17016</variable> 
+		                 </swiftscript:arg>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17017</variable>
+		               </argument>
+		               <argument name="value">
+		                 <variable>swift#string#17008</variable>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17018</variable>
+		               </argument>
+		               <argument name="value">
+		                 <variable>swift#string#17019</variable>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17020</variable>
+		               </argument>
+		               <argument name="value">
+		                 <swiftscript:arg>
+		                  <variable>swift#string#17012</variable> 
+		                 </swiftscript:arg>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17021</variable>
+		               </argument>
+		               <argument name="value">
+		                 <swiftscript:arg>
+		                  <variable>swift#string#17014</variable> 
+		                 </swiftscript:arg>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17022</variable>
+		               </argument>
+		               <argument name="value">
+		                 <swiftscript:arg>
+		                  <variable>swift#string#17016</variable> 
+		                 </swiftscript:arg>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17023</variable>
+		               </argument>
+		               <argument name="value">
+		                 <variable>swift#string#17008</variable>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17024</variable>
+		               </argument>
+		               <argument name="value">
+		                 <variable>swift#string#17025</variable>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17026</variable>
+		               </argument>
+		               <argument name="value">
+		                 <swiftscript:arg>
+		                  <variable>swift#string#17012</variable> 
+		                 </swiftscript:arg>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17027</variable>
+		               </argument>
+		               <argument name="value">
+		                 <swiftscript:arg>
+		                  <variable>swift#string#17014</variable> 
+		                 </swiftscript:arg>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17028</variable>
+		               </argument>
+		               <argument name="value">
+		                 <swiftscript:arg>
+		                  <variable>swift#string#17016</variable> 
+		                 </swiftscript:arg>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17029</variable>
+		               </argument>
+		               <argument name="value">
+		                 <variable>swift#string#17030</variable>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17031</variable>
+		               </argument>
+		               <argument name="value">
+		                 <variable>swift#string#17032</variable>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17033</variable>
+		               </argument>
+		               <argument name="value">
+		                 <variable>swift#string#17034</variable>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17035</variable>
+		               </argument>
+		               <argument name="value">
+		                 <variable>swift#string#17005</variable>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>swift#mapper#17036</variable>
+		               </argument>
+		               <argument name="value">
+		                 <variable>swift#string#17005</variable>
+		               </argument>
+		             </vdl:setfieldvalue>
+		        </sequential>
+		        <sequential>
+		             <vdl:setfieldvalue>
+		               <argument name="var">
+		                 <variable>level</variable>
+		               </argument>
+		               <argument name="value">
+		                 <swiftscript:toint>
+		                  <swiftscript:arg>
+		                  <variable>swift#string#17014</variable> 
+		                 </swiftscript:arg> 
+		                 </swiftscript:toint>
+		               </argument>
+		             </vdl:setfieldvalue>
+		            <partialCloseDataset var="{level}" closeID="88000" />
+		        </sequential>
+		        <sequential>
+		          <vdl:tparallelFor name="$">
+		            <getarrayiterator><variable>reads</variable></getarrayiterator>
+		            <set names="$$, part">
+		              <each items="{$}"/>
+		            </set>
+		              <set name="i">
+		                <vdl:new type="int" value="{$$}"/>
+		              </set>	<log level="debug" message="FOREACH_IT_START line=19 thread={#thread}"/>
+		          <log level="debug"><string>SCOPE thread={#thread}</string></log>
+
+		                <sequentialWithID>
+		                    <sequential>
+		                      <qcpart>
+		                        <parallel>
+		                          <vdl:getfieldsubscript>
+		                            <argument name="var"><variable>bams</variable></argument>
+		                            <argument name="subscript"><variable>i</variable></argument>
+		                          </vdl:getfieldsubscript>
+		                          <vdl:getfieldsubscript>
+		                            <argument name="var"><variable>partlogs</variable></argument>
+		                            <argument name="subscript"><variable>i</variable></argument>
+		                          </vdl:getfieldsubscript>
+		                          <vdl:getfieldsubscript>
+		                            <argument name="var"><variable>reads</variable></argument>
+		                            <argument name="subscript"><variable>i</variable></argument>
+		                          </vdl:getfieldsubscript>
+		                          <vdl:getfieldsubscript>
+		                            <argument name="var"><variable>mappings</variable></argument>
+		                            <argument name="subscript"><variable>i</variable></argument>
+		                          </vdl:getfieldsubscript>
+		                          <vdl:getfieldsubscript>
+		                            <argument name="var"><variable>reference</variable></argument>
+		                            <argument name="subscript"><variable>swift#int#0</variable></argument>
+		                          </vdl:getfieldsubscript>
+		                        </parallel>
+		                      </qcpart>
+		                    </sequential>
+		                </sequentialWithID>
+		              <log level="debug" message="FOREACH_IT_END line=19 thread={#thread}"/>
+		          </vdl:tparallelFor>
+		            <partialCloseDataset var="{bams}" closeID="88003" />