Source

galaxy-central (ngs) / tools / sr_mapping / bwa_wrapper.xml

Full commit
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
<tool id="bwa_wrapper" name="Map with BWA for Illumina" version="1.2.2">
  <description></description>
  <parallelism method="basic"></parallelism>
  <command interpreter="python">
    bwa_wrapper.py 
      --threads="4"

      #if $input1.ext == "fastqillumina":
            --illumina1.3
      #end if

      ## reference source
      --fileSource=$genomeSource.refGenomeSource
      #if $genomeSource.refGenomeSource == "history":
        ##build index on the fly
        --ref="${genomeSource.ownFile}"
        --dbkey=$dbkey
      #else:
        ##use precomputed indexes
        --ref="${ filter( lambda x: str( x[0] ) == str( $genomeSource.indices ), $__app__.tool_data_tables[ 'bwa_indexes' ].get_fields() )[0][-1] }"
        --do_not_build_index
      #end if

      ## input file(s)
      --input1=$paired.input1
      #if $paired.sPaired == "paired":
        --input2=$paired.input2
      #end if

      ## output file
      --output=$output

      ## run parameters
      --genAlignType=$paired.sPaired
      --params=$params.source_select
      #if $params.source_select != "pre_set":
        --maxEditDist=$params.maxEditDist
        --fracMissingAligns=$params.fracMissingAligns
        --maxGapOpens=$params.maxGapOpens
        --maxGapExtens=$params.maxGapExtens
        --disallowLongDel=$params.disallowLongDel
        --disallowIndel=$params.disallowIndel
        --seed=$params.seed
        --maxEditDistSeed=$params.maxEditDistSeed
        --mismatchPenalty=$params.mismatchPenalty
        --gapOpenPenalty=$params.gapOpenPenalty
        --gapExtensPenalty=$params.gapExtensPenalty
        --suboptAlign=$params.suboptAlign
        --noIterSearch=$params.noIterSearch
        --outputTopN=$params.outputTopN
        --outputTopNDisc=$params.outputTopNDisc
        --maxInsertSize=$params.maxInsertSize
        --maxOccurPairing=$params.maxOccurPairing
        #if $params.readGroup.specReadGroup == "yes"
          --rgid="$params.readGroup.rgid"
          --rgcn="$params.readGroup.rgcn"
          --rgds="$params.readGroup.rgds"
          --rgdt="$params.readGroup.rgdt"
          --rgfo="$params.readGroup.rgfo"
          --rgks="$params.readGroup.rgks"
          --rglb="$params.readGroup.rglb"
          --rgpg="$params.readGroup.rgpg"
          --rgpi="$params.readGroup.rgpi"
          --rgpl="$params.readGroup.rgpl"
          --rgpu="$params.readGroup.rgpu"
          --rgsm="$params.readGroup.rgsm"
        #end if
      #end if

      ## suppress output SAM header
      --suppressHeader=$suppressHeader
  </command>
  <requirements>
    <requirement type="package">bwa</requirement>
  </requirements>
  <inputs>
    <conditional name="genomeSource">
      <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
        <option value="indexed">Use a built-in index</option>
        <option value="history">Use one from the history</option>
      </param>
      <when value="indexed">
        <param name="indices" type="select" label="Select a reference genome">
          <options from_data_table="bwa_indexes">
            <filter type="sort_by" column="2" />
            <validator type="no_options" message="No indexes are available" />
          </options>
        </param>
      </when>
      <when value="history">
        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
      </when>
    </conditional>
    <conditional name="paired">
      <param name="sPaired" type="select" label="Is this library mate-paired?">
        <option value="single">Single-end</option>
        <option value="paired">Paired-end</option>
      </param>
      <when value="single">
        <param name="input1" type="data" format="fastqsanger,fastqillumina" label="FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
      </when>
      <when value="paired">
        <param name="input1" type="data" format="fastqsanger,fastqillumina" label="Forward FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
        <param name="input2" type="data" format="fastqsanger,fastqillumina" label="Reverse FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
      </when>
    </conditional>
    <conditional name="params">
      <param name="source_select" type="select" label="BWA settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
        <option value="pre_set">Commonly Used</option>
        <option value="full">Full Parameter List</option>
      </param>
      <when value="pre_set" />
      <when value="full">
        <param name="maxEditDist" type="integer" value="0" label="Maximum edit distance (aln -n)" help="Enter this value OR a fraction of missing alignments, not both" />
        <param name="fracMissingAligns" type="float" value="0.04" label="Fraction of missing alignments given 2% uniform base error rate (aln -n)" help="Enter this value OR maximum edit distance, not both" />
        <param name="maxGapOpens" type="integer" value="1" label="Maximum number of gap opens (aln -o)" />
        <param name="maxGapExtens" type="integer" value="-1" label="Maximum number of gap extensions (aln -e)" help="-1 for k-difference mode (disallowing long gaps)" />
        <param name="disallowLongDel" type="integer" value="16" label="Disallow long deletion within [value] bp towards the 3'-end (aln -d)" />
        <param name="disallowIndel" type="integer" value="5" label="Disallow insertion/deletion within [value] bp towards the end (aln -i)" />
        <param name="seed" type="integer" value="-1" label="Number of first subsequences to take as seed (aln -l)" help="Enter -1 for infinity" />
        <param name="maxEditDistSeed" type="integer" value="2" label="Maximum edit distance in the seed (aln -k)" />
        <param name="mismatchPenalty" type="integer" value="3" label="Mismatch penalty (aln -M)" help="BWA will not search for suboptimal hits with a score lower than [value]" />
        <param name="gapOpenPenalty" type="integer" value="11" label="Gap open penalty (aln -O)" />
        <param name="gapExtensPenalty" type="integer" value="4" label="Gap extension penalty (aln -E)" />
        <param name="suboptAlign" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Proceed with suboptimal alignments even if the top hit is a repeat (aln -R)" help="For paired-end reads only. By default, BWA only searches for suboptimal alignments if the top hit is unique. Using this option has no effect on accuracy for single-end reads. It is mainly designed for improving the alignment accuracy of paired-end reads. However, the pairing procedure will be slowed down, especially for very short reads (~32bp)" />
        <param name="noIterSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Disable iterative search (aln -N)" help="All hits with no more than maxDiff differences will be found. This mode is much slower than the default" />
        <param name="outputTopN" type="integer" value="3" label="Maximum number of alignments to output in the XA tag for reads paired properly (samse/sampe -n)" help="If a read has more than INT hits, the XA tag will not be written" />
        <param name="outputTopNDisc" type="integer" value="10" label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) (sampe -N)" help="For paired-end reads only. If a read has more than INT hits, the XA tag will not be written" />
        <param name="maxInsertSize" type="integer" value="500" label="Maximum insert size for a read pair to be considered as being mapped properly (sampe -a)" help="For paired-end reads only. Only used when there are not enough good alignments to infer the distribution of insert sizes" />
        <param name="maxOccurPairing" type="integer" value="100000" label="Maximum occurrences of a read for pairing (sampe -o)" help="For paired-end reads only. A read with more occurrences will be treated as a single-end read. Reducing this parameter helps faster pairing" />
        <conditional name="readGroup">
          <param name="specReadGroup" type="select" label="Specify the read group for this file? (samse/sampe -r)">
            <option value="yes">Yes</option>
            <option value="no" selected="True">No</option>
          </param>
          <when value="yes">
            <param name="rgid" type="text" size="25" label="Read group identifier (ID). Each @RG line must have a unique ID. The value of ID is used in the RG 
tags of alignment records. Must be unique among all read groups in header section." help="Required if RG specified. Read group 
IDs may be modified when merging SAM files in order to handle collisions." />
            <param name="rgcn" type="text" size="25" label="Sequencing center that produced the read (CN)" help="Optional" />
            <param name="rgds" type="text" size="25" label="Description (DS)" help="Optional" />
            <param name="rgdt" type="text" size="25" label="Date that run was produced (DT)" help="Optional. ISO8601 format date or date/time, like YYYY-MM-DD" />
            <param name="rgfo" type="text" size="25" label="Flow order (FO). The array of nucleotide bases that correspond to the nucleotides used for each 
flow of each read." help="Optional. Multi-base flows are encoded in IUPAC format, and non-nucleotide flows by 
various other characters. Format : /\*|[ACMGRSVTWYHKDBN]+/" />
            <param name="rgks" type="text" size="25" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" help="Optional" />
            <param name="rglb" type="text" size="25" label="Library name (LB)" help="Required if RG specified" />
            <param name="rgpg" type="text" size="25" label="Programs used for processing the read group (PG)" help="Optional" />
            <param name="rgpi" type="text" size="25" label="Predicted median insert size (PI)" help="Optional" />
            <param name="rgpl" type="text" size="25" label="Platform/technology used to produce the reads (PL)" help="Required if RG specified. Valid values : CAPILLARY, LS454, ILLUMINA, 
SOLID, HELICOS, IONTORRENT and PACBIO" />
            <param name="rgpu" type="text" size="25" label="Platform unit (PU)" help="Optional. Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" />
            <param name="rgsm" type="text" size="25" label="Sample (SM)" help="Required if RG specified. Use pool name where a pool is being sequenced" />
          </when>
          <when value="no" />
        </conditional>
      </when>
    </conditional>
    <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="BWA produces SAM with several lines of header information" />
  </inputs>
  <outputs>
    <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
      <actions>
        <conditional name="genomeSource.refGenomeSource">
          <when value="indexed">
            <action type="metadata" name="dbkey">
              <option type="from_data_table" name="bwa_indexes" column="1">
                <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
                <filter type="param_value" ref="genomeSource.indices" column="0"/>
              </option>
            </action>
          </when>
          <when value="history">
            <action type="metadata" name="dbkey">
              <option type="from_param" name="genomeSource.ownFile" param_attribute="dbkey" />
            </action>
          </when>
        </conditional>
      </actions>
    </data>
  </outputs>
  <tests>
    <test>
      <!--
      BWA commands:
      bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out1.sai
      bwa samse phiX.fasta bwa_wrapper_out1.sai test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out1.sam
      phiX.fasta is the prefix for the reference files (phiX.fasta.amb, phiX.fasta.ann, phiX.fasta.bwt, ...)
      remove the comment lines (beginning with '@') from the resulting sam file
      plain old sort doesn't handle underscores like python:
      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out1.u.sam bwa_wrapper_out1.sam
      -->
      <param name="refGenomeSource" value="indexed" />
      <param name="indices" value="phiX" />
      <param name="sPaired" value="single" />
      <param name="input1" value="bwa_wrapper_in1.fastqsanger" ftype="fastqsanger" />
      <param name="source_select" value="pre_set" />
      <param name="suppressHeader" value="true" />
      <output name="output" file="bwa_wrapper_out1.sam" ftype="sam" sort="True" />
    </test>
    <test>
      <!--
      BWA commands:
      cp test-data/phiX.fasta phiX.fasta
      bwa index -a is phiX.fasta
      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out2.sai
      bwa samse -n 3 phiX.fasta bwa_wrapper_out2.sai test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out2.u.sam
      phiX.fasta is the prefix for the reference files (phiX.fasta.amb, phiX.fasta.ann, phiX.fasta.bwt, ...)
      remove the comment lines (beginning with '@') from the resulting sam file
      plain old sort doesn't handle underscores like python:
      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out2.u.sam bwa_wrapper_out2.sam
      -->
      <param name="refGenomeSource" value="history" />
      <param name="ownFile" value="phiX.fasta" />
      <param name="sPaired" value="single" />
      <param name="input1" value="bwa_wrapper_in1.fastqsanger" ftype="fastqsanger" />
      <param name="source_select" value="full" />
      <param name="maxEditDist" value="0" />  
      <param name="fracMissingAligns" value="0.04" />
      <param name="maxGapOpens" value="1" />
      <param name="maxGapExtens" value="-1" />
      <param name="disallowLongDel" value="16" />
      <param name="disallowIndel" value="5" />
      <param name="seed" value="-1" />
      <param name="maxEditDistSeed" value="2" />
      <param name="mismatchPenalty" value="3" />
      <param name="gapOpenPenalty" value="11" />
      <param name="gapExtensPenalty" value="4" />
      <param name="suboptAlign" value="true" />
      <param name="noIterSearch" value="true" />
      <param name="outputTopN" value="3" />
      <param name="outputTopNDisc" value="10" />
      <param name="maxInsertSize" value="500" />
      <param name="maxOccurPairing" value="100000" />
      <param name="specReadGroup" value="no" />
      <param name="suppressHeader" value="true" />
      <output name="output" file="bwa_wrapper_out2.sam" ftype="sam" sort="True" />
    </test>
    <test>
      <!--
      BWA commands:
      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in2.fastqsanger > bwa_wrapper_out3a.sai
      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out3b.sai
      bwa sampe -a 500 -o 100000 -n 3 -N 10 -r "@RG\tID:abcdefg\tDS:descrip\tDT:2010-11-01\tLB:lib-mom-A\tPI:400\tPL:ILLUMINA\tSM:mom" phiX.fasta bwa_wrapper_out3a.sai bwa_wrapper_out3b.sai test-data/bwa_wrapper_in2.fastqsanger test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out3.u.sam
      phiX.fasta is the prefix for the reference
      plain old sort doesn't handle underscores like python:
      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out3.u.sam bwa_wrapper_out3.sam
      -->
      <param name="refGenomeSource" value="indexed" />
      <param name="indices" value="phiX" />
      <param name="sPaired" value="paired" />
      <param name="input1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" />
      <param name="input2" value="bwa_wrapper_in3.fastqsanger" ftype="fastqsanger" />
      <param name="source_select" value="full" />
      <param name="maxEditDist" value="0" />
      <param name="fracMissingAligns" value="0.04" />
      <param name="maxGapOpens" value="1" />
      <param name="maxGapExtens" value="-1" />
      <param name="disallowLongDel" value="16" />
      <param name="disallowIndel" value="5" />
      <param name="seed" value="-1" />
      <param name="maxEditDistSeed" value="2" />
      <param name="mismatchPenalty" value="3" />
      <param name="gapOpenPenalty" value="11" />
      <param name="gapExtensPenalty" value="4" />
      <param name="suboptAlign" value="true" />
      <param name="noIterSearch" value="true" />
      <param name="outputTopN" value="3" />
      <param name="outputTopNDisc" value="10" />
      <param name="maxInsertSize" value="500" />
      <param name="maxOccurPairing" value="100000" />
      <param name="specReadGroup" value="yes" />
      <param name="rgid" value="abcdefg" />
      <param name="rgcn" value="" />
      <param name="rgds" value="descrip" />
      <param name="rgdt" value="2010-11-01" />
      <param name="rgfo" value="" />
      <param name="rgks" value="" />
      <param name="rglb" value="lib-mom-A" />
      <param name="rgpg" value="" />
      <param name="rgpi" value="400" />
      <param name="rgpl" value="ILLUMINA" />
      <param name="rgpu" value="" />
      <param name="rgsm" value="mom" />
      <param name="suppressHeader" value="false" />
      <output name="output" file="bwa_wrapper_out3.sam" ftype="sam" sort="True" lines_diff="2" />
    </test>
    <test>
      <!--
      BWA commands:
      cp test-data/phiX.fasta phiX.fasta
      bwa index -a is phiX.fasta
      bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in2.fastqsanger > bwa_wrapper_out8a.sai
      bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out8b.sai
      bwa sampe -a 500 -o 100000 phiX.fasta bwa_wrapper_out8a.sai bwa_wrapper_out8b.sai test-data/bwa_wrapper_in2.fastqsanger test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out8.u.sam
      phiX.fa is the prefix for the reference
      remove the comment lines (beginning with '@') from the resulting sam file
      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out8.u.sam bwa_wrapper_out8.sam
      -->
      <param name="refGenomeSource" value="history" />
      <!-- this is the backwards-compatible "unique value" for this index, not an actual path -->
      <param name="ownFile" value="phiX.fasta" />
      <param name="sPaired" value="paired" />
      <param name="input1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" />
      <param name="input2" value="bwa_wrapper_in3.fastqsanger" ftype="fastqsanger" />
      <param name="source_select" value="preSet" />
      <param name="suppressHeader" value="true" />
      <output name="output" file="bwa_wrapper_out8.sam" ftype="sam" sort="True" />
    </test>
  </tests> 
  <help>

**What it does**

BWA is a fast light-weighted tool that aligns relatively short sequences (queries) to a sequence database (large), such as the human reference genome. It is developed by Heng Li at the Sanger Insitute. Li H. and Durbin R. (2009) Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 25, 1754-60. 

------

**Know what you are doing**

.. class:: warningmark

There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.

 .. __: http://bio-bwa.sourceforge.net/

------

**Input formats**

BWA accepts files in either Sanger FASTQ format (galaxy type *fastqsanger*) or Illumina FASTQ format (galaxy type *fastqillumina*). Use the FASTQ Groomer to prepare your files.

------

**A Note on Built-in Reference Genomes**

Some genomes have multiple variants. If only one "type" of genome is listed, it is the Full version, which means that everything that came in the original genome data download (possibly with mitochondrial and plasmid DNA added if it wasn't already included). The Full version is available for every genome. Some genomes also come in the Canonical variant, which contains only the "canonical" (well-defined) chromosomes or segments, such as chr1-chr22, chrX, chrY, and chrM for human. Other variations include gender. These will come in the canonical form only, so the general Canonical variant is actually Canonical Female and the other is Canonical Male (identical to female excluding chrX).

------

**Outputs**

The output is in SAM format, and has the following columns::

    Column  Description
  --------  --------------------------------------------------------
  1  QNAME  Query (pair) NAME
  2  FLAG   bitwise FLAG
  3  RNAME  Reference sequence NAME
  4  POS    1-based leftmost POSition/coordinate of clipped sequence
  5  MAPQ   MAPping Quality (Phred-scaled)
  6  CIGAR  extended CIGAR string
  7  MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
  8  MPOS   1-based Mate POSition
  9  ISIZE  Inferred insert SIZE
  10 SEQ    query SEQuence on the same strand as the reference
  11 QUAL   query QUALity (ASCII-33 gives the Phred base quality)
  12 OPT    variable OPTional fields in the format TAG:VTYPE:VALU
  
The flags are as follows::

    Flag  Description
  ------  -------------------------------------
  0x0001  the read is paired in sequencing
  0x0002  the read is mapped in a proper pair
  0x0004  the query sequence itself is unmapped
  0x0008  the mate is unmapped
  0x0010  strand of the query (1 for reverse)
  0x0020  strand of the mate
  0x0040  the read is the first read in a pair
  0x0080  the read is the second read in a pair
  0x0100  the alignment is not primary

It looks like this (scroll sideways to see the entire example)::

  QNAME	FLAG	RNAME	POS	MAPQ	CIAGR	MRNM	MPOS	ISIZE	SEQ	QUAL	OPT
  HWI-EAS91_1_30788AAXX:1:1:1761:343	4	*	0	0	*	*	0	0	AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG	hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
  HWI-EAS91_1_30788AAXX:1:1:1578:331	4	*	0	0	*	*	0	0	GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG	hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh

-------

**BWA settings**

All of the options have a default value. You can change any of them. All of the options in BWA have been implemented here.

------

**BWA parameter list**

This is an exhaustive list of BWA options:

For **aln**::

  -n NUM  Maximum edit distance if the value is INT, or the fraction of missing
          alignments given 2% uniform base error rate if FLOAT. In the latter
          case, the maximum edit distance is automatically chosen for different 
          read lengths. [0.04]
  -o INT  Maximum number of gap opens [1]
  -e INT  Maximum number of gap extensions, -1 for k-difference mode
          (disallowing long gaps) [-1]
  -d INT  Disallow a long deletion within INT bp towards the 3'-end [16]
  -i INT  Disallow an indel within INT bp towards the ends [5]
  -l INT  Take the first INT subsequence as seed. If INT is larger than the
          query sequence, seeding will be disabled. For long reads, this option 
          is typically ranged from 25 to 35 for '-k 2'. [inf]
  -k INT  Maximum edit distance in the seed [2]
  -t INT  Number of threads (multi-threading mode) [1]
  -M INT  Mismatch penalty. BWA will not search for suboptimal hits with a score
          lower than (bestScore-misMsc). [3]
  -O INT  Gap open penalty [11]
  -E INT  Gap extension penalty [4]
  -c      Reverse query but not complement it, which is required for alignment
          in the color space.
  -R      Proceed with suboptimal alignments even if the top hit is a repeat. By
          default, BWA only searches for suboptimal alignments if the top hit is
          unique. Using this option has no effect on accuracy for single-end
          reads. It is mainly designed for improving the alignment accuracy of
          paired-end reads. However, the pairing procedure will be slowed down,
          especially for very short reads (~32bp).
  -N      Disable iterative search. All hits with no more than maxDiff
          differences will be found. This mode is much slower than the default.

For **samse**::

  -n INT  Maximum number of alignments to output in the XA tag for reads paired
          properly. If a read has more than INT hits, the XA tag will not be
          written. [3]
  -r STR  Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]

For **sampe**::

  -a INT  Maximum insert size for a read pair to be considered as being mapped
          properly. Since version 0.4.5, this option is only used when there
          are not enough good alignment to infer the distribution of insert
          sizes. [500]
  -n INT  Maximum number of alignments to output in the XA tag for reads paired
          properly. If a read has more than INT hits, the XA tag will not be
          written. [3]
  -N INT  Maximum number of alignments to output in the XA tag for disconcordant
          read pairs (excluding singletons). If a read has more than INT hits,
          the XA tag will not be written. [10]
  -o INT  Maximum occurrences of a read for pairing. A read with more
          occurrences will be treated as a single-end read. Reducing this
          parameter helps faster pairing. [100000]
  -r STR  Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]

For specifying the read group in **samse** or **sampe**, use the following::

  @RG   Read group. Unordered multiple @RG lines are allowed. 
  ID    Read group identifier. Each @RG line must have a unique ID. The value of
        ID is used in the RG tags of alignment records. Must be unique among all
        read groups in header section. Read group IDs may be modified when
        merging SAM files in order to handle collisions. 
  CN    Name of sequencing center producing the read. 
  DS    Description. 
  DT    Date the run was produced (ISO8601 date or date/time). 
  FO    Flow order. The array of nucleotide bases that correspond to the
        nucleotides used for each flow of each read. Multi-base flows are encoded
        in IUPAC format, and non-nucleotide flows by various other characters.
        Format : /\*|[ACMGRSVTWYHKDBN]+/ 
  KS    The array of nucleotide bases that correspond to the key sequence of each read. 
  LB    Library. 
  PG    Programs used for processing the read group. 
  PI    Predicted median insert size. 
  PL    Platform/technology used to produce the reads. Valid values : CAPILLARY,
        LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO. 
  PU    Platform unit (e.g. flowcell-barcode.lane for Illumina or slide for
        SOLiD). Unique identifier. 
  SM    Sample. Use pool name where a pool is being sequenced. 

  </help>
</tool>