galaxy-central (ngs) / tools / ngs_rna / tophat_color_wrapper.xml

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
<tool id="tophat_color" name="Tophat for SOLiD" version="1.0.0">
    <description>Find splice junctions using RNA-seq data</description>
    <requirements>
        <requirement type="package">tophat</requirement>
    </requirements>
    <command interpreter="python">
        tophat_wrapper.py
            ## Change this to accommodate the number of threads you have available.
            --num-threads="4"

            ## base- or color-space
            --color-space

            ## Provide outputs.
            --junctions-output=$junctions
            --hits-output=$accepted_hits

            ## Handle reference file.
            #if $refGenomeSource.genomeSource == "history":
                --own-file=$refGenomeSource.ownFile
            #else:
                --indexes-path="${ filter( lambda x: str( x[0] ) == str( $refGenomeSource.index ), $__app__.tool_data_tables[ 'tophat_indexes_color' ].get_fields() )[0][-1] }"
            #end if

            ## Are reads single-end or paired?
            --single-paired=$singlePaired.sPaired

            ## First input file always required.
            --input1=$input1

            ## Set params based on whether reads are single-end or paired.
            #if $singlePaired.sPaired == "single":
                --settings=$singlePaired.sParams.sSettingsType
                #if $singlePaired.sParams.sSettingsType == "full":
                    -a $singlePaired.sParams.anchor_length
                    -m $singlePaired.sParams.splice_mismatches
                    -i $singlePaired.sParams.min_intron_length
                    -I $singlePaired.sParams.max_intron_length
                    -F $singlePaired.sParams.junction_filter
                    -g $singlePaired.sParams.max_multihits
                    --min-segment-intron $singlePaired.sParams.min_segment_intron
                    --max-segment-intron $singlePaired.sParams.max_segment_intron
                    --seg-mismatches=$singlePaired.sParams.seg_mismatches
                    --seg-length=$singlePaired.sParams.seg_length
                    --library-type=$singlePaired.sParams.library_type
                    
                    ## Indel search.
                    #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes":
                        --allow-indels
                        --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length
                        --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length
                    #end if

                    ## Supplying junctions parameters.
                    #if $singlePaired.sParams.own_junctions.use_junctions == "Yes":
                        #if $singlePaired.sParams.own_junctions.gene_model_ann.use_annotations == "Yes":
                            -G $singlePaired.sParams.own_junctions.gene_model_ann.gene_annotation_model
                        #end if
                        #if $singlePaired.sParams.own_junctions.raw_juncs.use_juncs == "Yes":
                            -j $singlePaired.sParams.own_junctions.raw_juncs.raw_juncs
                        #end if
                        ## TODO: No idea why a string cast is necessary, but it is:
                        #if str($singlePaired.sParams.own_junctions.no_novel_juncs) == "Yes":
                            --no-novel-juncs
                        #end if
                    #end if

                    #if $singlePaired.sParams.closure_search.use_search == "Yes":
                        --closure-search
                        --min-closure-exon $singlePaired.sParams.closure_search.min_closure_exon
                        --min-closure-intron $singlePaired.sParams.closure_search.min_closure_intron
                        --max-closure-intron $singlePaired.sParams.closure_search.max_closure_intron
                    #else:
                        --no-closure-search
                    #end if
                    #if $singlePaired.sParams.coverage_search.use_search == "Yes":
                        --coverage-search
                        --min-coverage-intron $singlePaired.sParams.coverage_search.min_coverage_intron
                        --max-coverage-intron $singlePaired.sParams.coverage_search.max_coverage_intron
                    #else:
                        --no-coverage-search
                    #end if
                    ## TODO: No idea why the type conversion is necessary, but it seems to be.
                    #if str($singlePaired.sParams.microexon_search) == "Yes":
                        --microexon-search
                    #end if
                #end if
            #else:
                --input2=$singlePaired.input2
                -r $singlePaired.mate_inner_distance
                --settings=$singlePaired.pParams.pSettingsType
                #if $singlePaired.pParams.pSettingsType == "full":
                    --mate-std-dev=$singlePaired.pParams.mate_std_dev
                    -a $singlePaired.pParams.anchor_length
                    -m $singlePaired.pParams.splice_mismatches
                    -i $singlePaired.pParams.min_intron_length
                    -I $singlePaired.pParams.max_intron_length
                    -F $singlePaired.pParams.junction_filter
                    -g $singlePaired.pParams.max_multihits
                    --min-segment-intron $singlePaired.pParams.min_segment_intron
                    --max-segment-intron $singlePaired.pParams.max_segment_intron
                    --seg-mismatches=$singlePaired.pParams.seg_mismatches
                    --seg-length=$singlePaired.pParams.seg_length
                    --library-type=$singlePaired.pParams.library_type
                    
                    ## Indel search.
                    #if $singlePaired.pParams.indel_search.allow_indel_search == "Yes":
                        --allow-indels
                        --max-insertion-length $singlePaired.pParams.indel_search.max_insertion_length
                        --max-deletion-length $singlePaired.pParams.indel_search.max_deletion_length
                    #end if

                    ## Supplying junctions parameters.
                    #if $singlePaired.pParams.own_junctions.use_junctions == "Yes":
                        #if $singlePaired.pParams.own_junctions.gene_model_ann.use_annotations == "Yes":
                            -G $singlePaired.pParams.own_junctions.gene_model_ann.gene_annotation_model
                        #end if
                        #if $singlePaired.pParams.own_junctions.raw_juncs.use_juncs == "Yes":
                            -j $singlePaired.pParams.own_junctions.raw_juncs.raw_juncs
                        #end if
                        ## TODO: No idea why type cast is necessary, but it is:
                        #if str($singlePaired.pParams.own_junctions.no_novel_juncs) == "Yes":
                            --no-novel-juncs
                        #end if
                    #end if

                    #if $singlePaired.pParams.closure_search.use_search == "Yes":
                        --closure-search
                        --min-closure-exon $singlePaired.pParams.closure_search.min_closure_exon
                        --min-closure-intron $singlePaired.pParams.closure_search.min_closure_intron
                        --max-closure-intron $singlePaired.pParams.closure_search.max_closure_intron
                    #else:
                        --no-closure-search
                    #end if
                    #if $singlePaired.pParams.coverage_search.use_search == "Yes":
                        --coverage-search
                        --min-coverage-intron $singlePaired.pParams.coverage_search.min_coverage_intron
                        --max-coverage-intron $singlePaired.pParams.coverage_search.max_coverage_intron
                    #else:
                        --no-coverage-search
                    #end if
                    ## TODO: No idea why the type conversion is necessary, but it seems to be.
                    #if str ($singlePaired.pParams.microexon_search) == "Yes":
                        --microexon-search
                   #end if
                #end if
            #end if
    </command>
    <inputs>
        <param format="fastqcssanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Color-space: Must have Sanger-scaled quality values with ASCII offset 33" />
        <conditional name="refGenomeSource">
          <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
            <option value="indexed">Use a built-in index</option>
            <option value="history">Use one from the history</option>
          </param>
          <when value="indexed">
            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
              <options from_data_table="tophat_indexes_color">
                <filter type="sort_by" column="2"/>
                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
              </options>
            </param>
          </when>
          <when value="history">
            <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
          </when>  <!-- history -->
        </conditional>  <!-- refGenomeSource -->
        <conditional name="singlePaired">
            <param name="sPaired" type="select" label="Is this library mate-paired?">
              <option value="single">Single-end</option>
              <option value="paired">Paired-end</option>
            </param>
            <when value="single">
              <conditional name="sParams">
                <param name="sSettingsType" type="select" label="TopHat settings to use" help="You can use the default settings or set custom values for any of Tophat's parameters.">
                  <option value="preSet">Use Defaults</option>
                  <option value="full">Full parameter list</option>
                </param>
                <when value="preSet" />
                <!-- Full/advanced params. -->
                <when value="full">
                  <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.">
                      <option value="fr-unstranded">FR Unstranded</option>
                      <option value="fr-firststrand">FR First Strand</option>
                      <option value="fr-secondstrand">FR Second Strand</option>
                  </param>
                  <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." />
                  <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" />
                  <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." />
                  <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." />
                  <conditional name="indel_search">
                      <param name="allow_indel_search" type="select" label="Allow indel search">
                          <option value="No">No</option>
                          <option value="Yes">Yes</option>
                      </param>
                      <when value="No"/>
                      <when value="Yes">
                         <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." />
                         <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." />
                      </when>
                  </conditional>
                  <param name="junction_filter" type="float" value="0.15" label="Minimum isoform fraction: filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" />
                  <param name="max_multihits" type="integer" value="40" label="Maximum number of alignments to be allowed" />
                  <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" />
                  <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" />
                  <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" />
                  <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" />
                  
                  <!-- Options for supplying own junctions. -->
                  <conditional name="own_junctions">
                      <param name="use_junctions" type="select" label="Use Own Junctions">
                        <option value="No">No</option>
                        <option value="Yes">Yes</option>
                      </param>
                      <when value="Yes">
                          <conditional name="gene_model_ann">
                             <param name="use_annotations" type="select" label="Use Gene Annotation Model">
                                <option value="No">No</option>
                                <option value="Yes">Yes</option>
                             </param>
                             <when value="No" />
                             <when value="Yes">
                               <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/>
                             </when>
                          </conditional>
                          <conditional name="raw_juncs">
                             <param name="use_juncs" type="select" label="Use Raw Junctions">
                                <option value="No">No</option>
                                <option value="Yes">Yes</option>
                             </param>
                             <when value="No" />
                             <when value="Yes">
                               <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/>
                             </when>
                          </conditional>
                          <param name="no_novel_juncs" type="select" label="Only look for supplied junctions">
                            <option value="No">No</option>
                            <option value="Yes">Yes</option>
                          </param>
                      </when>
                      <when value="No" />
                  </conditional> <!-- /own_junctions -->
                  
                  <!-- Closure search. -->
                  <conditional name="closure_search">
                    <param name="use_search" type="select" label="Use Closure Search">
                      <option value="No">No</option>
                      <option value="Yes">Yes</option>
                    </param>
                    <when value="Yes">
                        <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." />
                        <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" />
                        <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" />
                    </when>
                    <when value="No" />
                  </conditional>
                  <!-- Coverage search. -->
                  <conditional name="coverage_search">
                    <param name="use_search" type="select" label="Use Coverage Search">
                        <option selected="true" value="Yes">Yes</option>
                        <option value="No">No</option>
                    </param>
                    <when value="Yes">
                        <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" />
                        <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" />
                    </when>
                    <when value="No" />
                  </conditional>     
                  <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.">
                    <option value="No">No</option>
                    <option value="Yes">Yes</option>
                  </param>
                </when>  <!-- full -->
              </conditional>  <!-- sParams -->
            </when>  <!--  single -->
            <when value="paired">
              <param format="fastqcssanger" name="input2" type="data" label="RNA-Seq FASTQ file" help="Color-space: Must have Sanger-scaled quality values with ASCII offset 33" />
              <param name="mate_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs" />
              <conditional name="pParams">
                <param name="pSettingsType" type="select" label="TopHat settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list">
                  <option value="preSet">Commonly used</option>
                  <option value="full">Full parameter list</option>
                </param>
                <when value="preSet" />
                <!-- Full/advanced params. -->
                <when value="full">
                    <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.">
                        <option value="fr-unstranded">FR Unstranded</option>
                        <option value="fr-firststrand">FR First Strand</option>
                        <option value="fr-secondstrand">FR Second Strand</option>
                    </param>
                    <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs"  help="The standard deviation for the distribution on inner distances between mate pairs."/>
                  <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." />
                  <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" />
                  <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." />
                  <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." />
                  <conditional name="indel_search">
                      <param name="allow_indel_search" type="select" label="Allow indel search">
                          <option value="No">No</option>
                          <option value="Yes">Yes</option>
                      </param>
                      <when value="No"/>
                      <when value="Yes">
                         <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." />
                         <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." />
                      </when>
                  </conditional>
                  <param name="junction_filter" type="float" value="0.15" label="Minimum isoform fraction: filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" />
                  <param name="max_multihits" type="integer" value="40" label="Maximum number of alignments to be allowed" />
                  <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" />
                  <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" />
                  <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" />
                  <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" />
                  <!-- Options for supplying own junctions. -->
                  <conditional name="own_junctions">
                      <param name="use_junctions" type="select" label="Use Own Junctions">
                        <option value="No">No</option>
                        <option value="Yes">Yes</option>
                      </param>
                      <when value="Yes">
                          <conditional name="gene_model_ann">
                             <param name="use_annotations" type="select" label="Use Gene Annotation Model">
                                <option value="No">No</option>
                                <option value="Yes">Yes</option>
                             </param>
                             <when value="No" />
                             <when value="Yes">
                               <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/>
                             </when>
                          </conditional>
                          <conditional name="raw_juncs">
                             <param name="use_juncs" type="select" label="Use Raw Junctions">
                                <option value="No">No</option>
                                <option value="Yes">Yes</option>
                             </param>
                             <when value="No" />
                             <when value="Yes">
                               <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/>
                             </when>
                          </conditional>
                          <param name="no_novel_juncs" type="select" label="Only look for supplied junctions">
                            <option value="No">No</option>
                            <option value="Yes">Yes</option>
                          </param>
                      </when>
                      <when value="No" />
                  </conditional> <!-- /own_junctions -->
                  
                  <!-- Closure search. -->
                  <conditional name="closure_search">
                    <param name="use_search" type="select" label="Use Closure Search">
                      <option value="No">No</option>
                      <option value="Yes">Yes</option>
                    </param>
                    <when value="Yes">
                        <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." />
                        <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" />
                        <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" />
                    </when>
                    <when value="No" />
                  </conditional>
                  <!-- Coverage search. -->
                  <conditional name="coverage_search">
                    <param name="use_search" type="select" label="Use Coverage Search">
                        <option selected="true" value="Yes">Yes</option>
                        <option value="No">No</option>
                    </param>
                    <when value="Yes">
                        <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" />
                        <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" />
                    </when>
                    <when value="No" />
                  </conditional>
                  <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.">
                    <option value="No">No</option>
                    <option value="Yes">Yes</option>
                  </param>
                </when>  <!-- full -->
              </conditional>  <!-- pParams -->
            </when>  <!-- paired -->
        </conditional>
    </inputs>

    <outputs>
        <data format="bed" name="insertions" label="${tool.name} on ${on_string}: insertions" from_work_dir="tophat_out/insertions.bed">
            <filter>
                (
                    ( ( 'sParams' in singlePaired ) and ( 'indel_search' in singlePaired['sParams'] ) and 
                      ( singlePaired['sParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) or 
                    ( ( 'pParams' in singlePaired ) and ( 'indel_search' in singlePaired['pParams'] ) and 
                      ( singlePaired['pParams']['indel_search']['allow_indel_search'] == 'Yes' ) )
                ) 
            </filter>
            <actions>
              <conditional name="refGenomeSource.genomeSource">
                <when value="indexed">
                  <action type="metadata" name="dbkey">
                    <option type="from_data_table" name="tophat_indexes_color" column="1" offset="0">
                      <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
                      <filter type="param_value" ref="refGenomeSource.index" column="0"/>
                    </option>
                  </action>
                </when>
                <when value="history">
                  <action type="metadata" name="dbkey">
                    <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
                  </action>
                </when>
              </conditional>
            </actions>
        </data>
        <data format="bed" name="deletions" label="${tool.name} on ${on_string}: deletions" from_work_dir="tophat_out/deletions.bed">
            <filter>
                (
                    ( ( 'sParams' in singlePaired ) and ( 'indel_search' in singlePaired['sParams'] ) and 
                      ( singlePaired['sParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) or 
                    ( ( 'pParams' in singlePaired ) and ( 'indel_search' in singlePaired['pParams'] ) and 
                      ( singlePaired['pParams']['indel_search']['allow_indel_search'] == 'Yes' ) )
                )
            </filter>
            <actions>
              <conditional name="refGenomeSource.genomeSource">
                <when value="indexed">
                  <action type="metadata" name="dbkey">
                    <option type="from_data_table" name="tophat_indexes_color" column="1" offset="0">
                      <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
                      <filter type="param_value" ref="refGenomeSource.index" column="0"/>
                    </option>
                  </action>
                </when>
                <when value="history">
                  <action type="metadata" name="dbkey">
                    <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
                  </action>
                </when>
              </conditional>
            </actions>
        </data>
        <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions">
            <actions>
              <conditional name="refGenomeSource.genomeSource">
                <when value="indexed">
                  <action type="metadata" name="dbkey">
                    <option type="from_data_table" name="tophat_indexes_color" column="1" offset="0">
                      <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
                      <filter type="param_value" ref="refGenomeSource.index" column="0"/>
                    </option>
                  </action>
                </when>
                <when value="history">
                  <action type="metadata" name="dbkey">
                    <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
                  </action>
                </when>
              </conditional>
            </actions>
        </data>
        <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits">
            <actions>
              <conditional name="refGenomeSource.genomeSource">
                <when value="indexed">
                  <action type="metadata" name="dbkey">
                    <option type="from_data_table" name="tophat_indexes_color" column="1" offset="0">
                      <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
                      <filter type="param_value" ref="refGenomeSource.index" column="0"/>
                    </option>
                  </action>
                </when>
                <when value="history">
                  <action type="metadata" name="dbkey">
                    <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
                  </action>
                </when>
              </conditional>
            </actions>
        </data>
    </outputs>
    <tests>
        <!-- Test color-space single-end reads with user-supplied reference fasta and preset parameters -->
        <test>
            <!-- TopHat commands:
            cp test-data/tophat_in5.fasta tophat_in5.fa
            bowtie-build -C -f tophat_in5.fasta tophat_in5
            tophat -p 1 -C tophat_in5 test-data/tophat_in4.fastqcssanger
            Rename the files in tmp_dir appropriately
            -->
            <param name="input1" ftype="fastqcssanger" value="tophat_in4.fastqcssanger" />
            <param name="genomeSource" value="history" />
            <param name="ownFile" ftype="fasta" value="tophat_in5.fasta"/>
            <param name="sPaired" value="single" />
            <param name="sSettingsType" value="preSet" />
            <output name="junctions" file="tophat_out5j.bed" />
            <output name="accepted_hits" file="tophat_out5h.bam" compare="sim_size" />
        </test>
        <!-- Test color-space single-end reads with pre-built index and full parameters -->
        <test>
            <!-- Tophat commands:
            tophat -p 1 -C -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +allow-indels +coverage-search +min-coverage-intron 50 +max-coverage-intron 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search partialMm9chrX_random_cs test-data/tophat_in4.fastqcssanger
            Replace the + with double-dash
            Rename the files in tmp_dir appropriately
            -->
            <param name="input1" ftype="fastqcssanger" value="tophat_in4.fastqcssanger"/>
            <param name="genomeSource" value="indexed"/>
            <param name="index" value="partialMm9chrX_random_cs" />
            <param name="sPaired" value="single"/>
            <param name="sSettingsType" value="full"/>
            <param name="library_type" value="FR Unstranded"/>
            <param name="anchor_length" value="8"/>
            <param name="splice_mismatches" value="0"/>
            <param name="min_intron_length" value="70"/>
            <param name="max_intron_length" value="500000"/>
            <param name="junction_filter" value="0.15"/>
            <param name="max_multihits" value="40"/>
            <param name="min_segment_intron" value="50" />
            <param name="max_segment_intron" value="500000" />
            <param name="seg_mismatches" value="2"/>
            <param name="seg_length" value="25"/>
            <param name="allow_indel_search" value="Yes"/>
            <param name="max_insertion_length" value="3"/>
            <param name="max_deletion_length" value="3"/>
            <param name="use_junctions" value="Yes" />
            <param name="use_annotations" value="No" />
            <param name="use_juncs" value="No" />
            <param name="no_novel_juncs" value="No" />
            <param name="use_search" value="Yes" />
            <param name="min_closure_exon" value="50" />
            <param name="min_closure_intron" value="50" />
            <param name="max_closure_intron" value="5000" />
            <param name="use_search" value="Yes" />
            <param name="min_coverage_intron" value="50" />
            <param name="max_coverage_intron" value="20000" />
            <param name="microexon_search" value="Yes" />
            <output name="insertions" file="tophat_out6i.bed" />
            <output name="deletions" file="tophat_out6d.bed" />
            <output name="junctions" file="tophat_out6j.bed" />
            <output name="accepted_hits" file="tophat_out6h.bam" compare="sim_size" />
        </test>
        <!-- Test color-space paired-end reads with pre-built index and preset parameters -->
        <test>
            <!-- TopHat commands:
            tophat -C -o tmp_dir -r 50 -p 1 partialMm9chrX_random_cs test-data/tophat_in6.fastqcssanger test-data/tophat_in7.fastqcssanger
            Rename the files in tmp_dir appropriately
            -->
            <param name="input1" ftype="fastqcssanger" value="tophat_in6.fastqcssanger" />
            <param name="genomeSource" value="indexed" />
            <param name="index" value="partialMm9chrX_random_cs" />
            <param name="sPaired" value="paired" />
            <param name="input2" ftype="fastqcssanger" value="tophat_in7.fastqcssanger"/>
            <param name="mate_inner_distance" value="50"/>
            <param name="pSettingsType" value="preSet" />
            <output name="junctions" file="tophat_out7j.bed" />
            <output name="accepted_hits" file="tophat_out7h.bam" compare="sim_size" />
        </test>
        <!-- Test color-space paired-end reads with user-supplied reference fasta and full parameters -->
        <test>
            <!-- TopHat commands:
            cp test-data/tophat_in5.fasta tophat_in5.fa
            bowtie-build -C -f tophat_in5.fa tophat_in5
            tophat -C -o tmp_dir -r 20 -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +library-type fr-unstranded +allow-indels +coverage-search +min-coverage-intron 50 +max-coverage-intron 15000 +mate-std-dev 20 +segment-mismatch 2 +segment-length 20 +min-segment-intron 50 +max-segment-intron 500000 tophat_in5 test-data/tophat_in6.fastqcssanger test-data/tophat_in7.fastqcssanger
            Replace the + with double-dash
            Rename the files in tmp_dir appropriately
            -->
            <param name="input1" ftype="fastqcssanger" value="tophat_in6.fastqcssanger"/>
            <param name="genomeSource" value="history"/>
            <param name="ownFile" ftype="fasta" value="tophat_in5.fasta"/>
            <param name="sPaired" value="paired"/>
            <param name="input2" ftype="fastqcssanger" value="tophat_in7.fastqcssanger"/>
            <param name="mate_inner_distance" value="20"/>
            <param name="pSettingsType" value="full"/>
            <param name="library_type" value="FR Unstranded"/>
            <param name="mate_std_dev" value="20"/>
            <param name="anchor_length" value="8"/>
            <param name="splice_mismatches" value="0"/>
            <param name="min_intron_length" value="70"/>
            <param name="max_intron_length" value="500000"/>
            <param name="junction_filter" value="0.15"/>
            <param name="max_multihits" value="40"/>
            <param name="min_segment_intron" value="70" />
            <param name="max_segment_intron" value="400000" />
            <param name="seg_mismatches" value="2"/>
            <param name="seg_length" value="20"/>
            <param name="allow_indel_search" value="Yes"/>
            <param name="max_insertion_length" value="3"/>
            <param name="max_deletion_length" value="3"/>
            <param name="use_junctions" value="No" />
            <param name="use_search" value="No" />
            <param name="use_search" value="Yes" />
            <param name="min_coverage_intron" value="50" />
            <param name="max_coverage_intron" value="20000" />
            <param name="microexon_search" value="No" />
            <output name="insertions" file="tophat_out8i.bed" />
            <output name="deletions" file="tophat_out8d.bed" />
            <output name="junctions" file="tophat_out8j.bed" />
            <output name="accepted_hits" file="tophat_out8h.bam" compare="sim_size" />
        </test>
    </tests>

    <help>
**Tophat Overview**

TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie, and then analyzes the mapping results to identify splice junctions between exons. Please cite: Trapnell, C., Pachter, L. and Salzberg, S.L. TopHat: discovering splice junctions with RNA-Seq. Bioinformatics 25, 1105-1111 (2009).        

.. _Tophat: http://tophat.cbcb.umd.edu/
        
------

**Know what you are doing**

.. class:: warningmark

There is no such thing (yet) as an automated gearshift in splice junction identification. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.

.. __: http://tophat.cbcb.umd.edu/manual.html

------

**Input formats**

Tophat accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.

------

**Outputs**

Tophat produces two main output files:

- junctions -- A UCSC BED_ track of junctions reported by TopHat. Each junction consists of two connected BED blocks, where each block is as long as the maximal overhang of any read spanning the junction. The score is the number of alignments spanning the junction.
- accepted_hits -- A list of read alignments in BAM_ format.

.. _BED: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
.. _BAM: http://samtools.sourceforge.net/

Two other possible outputs, depending on the options you choose, are insertions and deletions, both of which are in BED format.

-------

**Tophat settings**

All of the options have a default value. You can change any of them. Some of the options in Tophat have been implemented here.

------

**Tophat parameter list**

This is a list of implemented Tophat options::

This is a list of implemented Tophat options::

  -r                                This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments 
                                    selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter 
                                    is required for paired end runs.
  --mate-std-dev INT                The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.
  -a/--min-anchor-length INT        The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction. Note that individual spliced     
                                    alignments may span a junction with fewer than this many bases on one side. However, every junction involved in spliced alignments is supported by at least one 
                                    read with this many bases on each side. This must be at least 3 and the default is 8.
  -m/--splice-mismatches INT        The maximum number of mismatches that may appear in the "anchor" region of a spliced alignment. The default is 0.
  -i/--min-intron-length INT        The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart. The default is 70.
  -I/--max-intron-length INT        The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read. The default is 500000.
  -F/--min-isoform-fraction 0.0-1.0 TopHat filters out junctions supported by too few alignments. Suppose a junction spanning two exons, is supported by S reads. Let the average depth of coverage of 
                                    exon A be D, and assume that it is higher than B. If S / D is less than the minimum isoform fraction, the junction is not reported. A value of zero disables the 
                                    filter. The default is 0.15.
  -g/--max-multihits INT            Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many 
                                    alignments. The default is 40.
  -G/--GTF [GTF 2.2 file]           Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping.
  -j/--raw-juncs [juncs file]       Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive.
  -no-novel-juncs                   Only look for junctions indicated in the supplied GFF file. (ignored without -G)
  --no-closure-search               Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default.
  --closure-search                  Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (about or less than 50bp)
  --no-coverage-search              Disables the coverage based search for junctions.
  --coverage-search                 Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.
  --microexon-search                With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.
  --butterfly-search                TopHat will use a slower but potentially more sensitive algorithm to find junctions in addition to its standard search. Consider using this if you expect that your experiment produced a lot of reads from pre-mRNA, that fall within the introns of your transcripts.
  --segment-mismatches              Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2.
  --segment-length                  Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25.
  --min-closure-exon                During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50.
  --min-closure-intron              The minimum intron length that may be found during closure search. The default is 50.
  --max-closure-intron              The maximum intron length that may be found during closure search. The default is 5000.
  --min-coverage-intron             The minimum intron length that may be found during coverage search. The default is 50.
  --max-coverage-intron             The maximum intron length that may be found during coverage search. The default is 20000.
  --min-segment-intron              The minimum intron length that may be found during split-segment search. The default is 50.
  --max-segment-intron              The maximum intron length that may be found during split-segment search. The default is 500000.
    </help>
</tool>
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.