Commits

boliu  committed fec1903 Draft

update some tools

  • Participants
  • Parent commits 083f9f4

Comments (0)

Files changed (13)

File lib/galaxy/config.py

         tempfile.tempdir = self.new_file_path
         self.openid_consumer_cache_path = resolve_path( kwargs.get( "openid_consumer_cache_path", "database/openid_consumer_cache" ), self.root )
         self.cookie_path = kwargs.get( "cookie_path", "/" )
+
+        # web API
+        self.enable_api = string_as_bool( kwargs.get( 'enable_api', False ) )
+
         self.genome_data_path = kwargs.get( "genome_data_path", "tool-data/genome" )
         self.rsync_url = kwargs.get( "rsync_url", "rsync://scofield.bx.psu.edu/indexes" )
         # Galaxy OpenID settings
         self.enable_whoosh_library_search = string_as_bool( kwargs.get( 'enable_whoosh_library_search', False ) )
         self.whoosh_index_dir = resolve_path( kwargs.get( "whoosh_index_dir", "database/whoosh_indexes" ), self.root )
         self.ftp_upload_dir = kwargs.get( 'ftp_upload_dir', None )
+
+        self.galaxy_user = kwargs.get("galaxy_user", "galaxy")
+        self.globus_online = kwargs.get("globus_online", False)
+        self.globus_endpoint = kwargs.get("globus_endpoint", None)
+        self.home_root = kwargs.get("home_root", None)
+        self.globus_userkey = kwargs.get("globus_userkey", None)
+        self.globus_usercert = kwargs.get("globus_usercert", None)
+        self.globus_rootcert = kwargs.get("globus_rootcert", None)
+        self.globus_scratch = kwargs.get("globus_scratch", None)
+
         self.ftp_upload_site = kwargs.get( 'ftp_upload_site', None )
         self.allow_library_path_paste = kwargs.get( 'allow_library_path_paste', False )
         self.disable_library_comptypes = kwargs.get( 'disable_library_comptypes', '' ).lower().split( ',' )

File tool-data/shared/igv/igv_build_sites.txt

+#site_id	site_name	site_url	dbkey	ivg_build_name
+web_link_main	web current	http://www.broadinstitute.org/igv/projects/current/igv.php	hg19,hg_g1k_v37,hg18,1kg_ref,hg17,hg16,mm9,mm8,mm7,panTro2,rheMac2,rn4,canFam2,bosTau6,bosTau4,bosTau3,susScrofa,galGal3,cavPor3,monDom5,xenTro2,taeGut1,zebrafish,danRer6,danRer7,gasAcu1,Aplysia,Plasmodium_3D7_v2.1,Plasmodium_3D7_v5.5,Plasmodium_6.1,PlasmoDB_7.0,pvivax,GSM552910,sacCer1,sacCer2,sk1,Y55,sacCer62,spombe_709,spombe_1.55,candida,mg8,spur_2.1,spur_2.5,spur_3.0,WS201,ce6,ce4,dm3,dm2,dmel_5.9,dmel_r5.22,dmel_r5.33,tcas_2.0,tcas_3.0,ncrassa_v3,nc10,Glamblia_2.0,me49,tb927,tbgambi,lmjr,anidulans_4.1,NC_009012,U00096.2,NC_000913.2,NC_002655.2,CSavignyi_v2.1,tair8,tair9,tair10,O_Sativa_r6,osativa_6.1,B73,ZmB73_5a,ppatens_1.2,D.discoideum	hg19,b37,hg18,1kg_ref,hg17,hg16,mm9,mm8,mm7,panTro2,rheMac2,rn4,canFam2,bosTau6,bosTau4,bosTau3,susScrofa,galGal3,cavPor3,monDom5,xenTro2,taeGut1,zebrafish,danRer6,danRer7,gasAcu1,Aplysia,Plasmodium_3D7_v2.1,Plasmodium_3D7_v5.5,Plasmodium_6.1,PlasmoDB_7.0,pvivax,GSM552910,sacCer1,sacCer2,sk1,Y55,sacCer62,spombe_709,spombe_1.55,candida,mg8,spur_2.1,spur_2.5,spur_3.0,WS201,ce6,ce4,dm3,dm2,dmel_5.9,dmel_r5.22,dmel_r5.33,tcas_2.0,tcas_3.0,ncrassa_v3,nc10,Glamblia_2.0,me49,tb927,tbgambi,lmjr,anidulans_4.1,NC_009012,U00096.2,NC_000913.2,NC_002655.2,CSavignyi_v2.1,tair8,tair9,tair10,O_Sativa_r6,osativa_6.1,B73,ZmB73_5a,ppatens_1.2,D.discoideum
+#web_jnlp_1.5	web 1.5	http://www.broadinstitute.org/igvdata/jws/prod	hg19,hg_g1k_v37,hg18,1kg_ref,hg17,hg16,mm9,mm8,mm7,panTro2,rheMac2,rn4,canFam2,bosTau6,bosTau4,bosTau3,susScrofa,galGal3,cavPor3,monDom5,xenTro2,taeGut1,zebrafish,danRer6,danRer7,gasAcu1,Aplysia,Plasmodium_3D7_v2.1,Plasmodium_3D7_v5.5,Plasmodium_6.1,PlasmoDB_7.0,pvivax,GSM552910,sacCer1,sacCer2,sk1,Y55,sacCer62,spombe_709,spombe_1.55,candida,mg8,spur_2.1,spur_2.5,spur_3.0,WS201,ce6,ce4,dm3,dm2,dmel_5.9,dmel_r5.22,dmel_r5.33,tcas_2.0,tcas_3.0,ncrassa_v3,nc10,Glamblia_2.0,me49,tb927,tbgambi,lmjr,anidulans_4.1,NC_009012,U00096.2,NC_000913.2,NC_002655.2,CSavignyi_v2.1,tair8,tair9,tair10,O_Sativa_r6,osativa_6.1,B73,ZmB73_5a,ppatens_1.2,D.discoideum	hg19,b37,hg18,1kg_ref,hg17,hg16,mm9,mm8,mm7,panTro2,rheMac2,rn4,canFam2,bosTau6,bosTau4,bosTau3,susScrofa,galGal3,cavPor3,monDom5,xenTro2,taeGut1,zebrafish,danRer6,danRer7,gasAcu1,Aplysia,Plasmodium_3D7_v2.1,Plasmodium_3D7_v5.5,Plasmodium_6.1,PlasmoDB_7.0,pvivax,GSM552910,sacCer1,sacCer2,sk1,Y55,sacCer62,spombe_709,spombe_1.55,candida,mg8,spur_2.1,spur_2.5,spur_3.0,WS201,ce6,ce4,dm3,dm2,dmel_5.9,dmel_r5.22,dmel_r5.33,tcas_2.0,tcas_3.0,ncrassa_v3,nc10,Glamblia_2.0,me49,tb927,tbgambi,lmjr,anidulans_4.1,NC_009012,U00096.2,NC_000913.2,NC_002655.2,CSavignyi_v2.1,tair8,tair9,tair10,O_Sativa_r6,osativa_6.1,B73,ZmB73_5a,ppatens_1.2,D.discoideum
+local_default	local	http://localhost:60151/load	hg19,hg_g1k_v37,hg18,1kg_ref,hg17,hg16,mm9,mm8,mm7,panTro2,rheMac2,rn4,canFam2,bosTau6,bosTau4,bosTau3,susScrofa,galGal3,cavPor3,monDom5,xenTro2,taeGut1,zebrafish,danRer6,danRer7,gasAcu1,Aplysia,Plasmodium_3D7_v2.1,Plasmodium_3D7_v5.5,Plasmodium_6.1,PlasmoDB_7.0,pvivax,GSM552910,sacCer1,sacCer2,sk1,Y55,sacCer62,spombe_709,spombe_1.55,candida,mg8,spur_2.1,spur_2.5,spur_3.0,WS201,ce6,ce4,dm3,dm2,dmel_5.9,dmel_r5.22,dmel_r5.33,tcas_2.0,tcas_3.0,ncrassa_v3,nc10,Glamblia_2.0,me49,tb927,tbgambi,lmjr,anidulans_4.1,NC_009012,U00096.2,NC_000913.2,NC_002655.2,CSavignyi_v2.1,tair8,tair9,tair10,O_Sativa_r6,osativa_6.1,B73,ZmB73_5a,ppatens_1.2,D.discoideum	hg19,b37,hg18,1kg_ref,hg17,hg16,mm9,mm8,mm7,panTro2,rheMac2,rn4,canFam2,bosTau6,bosTau4,bosTau3,susScrofa,galGal3,cavPor3,monDom5,xenTro2,taeGut1,zebrafish,danRer6,danRer7,gasAcu1,Aplysia,Plasmodium_3D7_v2.1,Plasmodium_3D7_v5.5,Plasmodium_6.1,PlasmoDB_7.0,pvivax,GSM552910,sacCer1,sacCer2,sk1,Y55,sacCer62,spombe_709,spombe_1.55,candida,mg8,spur_2.1,spur_2.5,spur_3.0,WS201,ce6,ce4,dm3,dm2,dmel_5.9,dmel_r5.22,dmel_r5.33,tcas_2.0,tcas_3.0,ncrassa_v3,nc10,Glamblia_2.0,me49,tb927,tbgambi,lmjr,anidulans_4.1,NC_009012,U00096.2,NC_000913.2,NC_002655.2,CSavignyi_v2.1,tair8,tair9,tair10,O_Sativa_r6,osativa_6.1,B73,ZmB73_5a,ppatens_1.2,D.discoideum

File tool-data/shared/rviewer/rviewer_build_sites.txt

+#site_id	site_name	site_url	dbkey	rviewer_genome_version
+#lbl_test	test	http://127.0.0.1:8888	hg18,hg19	hg18,hg19
+lbl_main	main	http://rviewer.lbl.gov/rviewer	hg18,hg19	hg18,hg19

File tools/demultiplexer/demultiplexer.xml

   <description>
   </description>
 
-<command >perl demultiplexer.pl
+<command >perl ${GALAXY_DATA_INDEX_DIR}/../tools/demultiplexer/demultiplexer.pl
   -f $fileformat
   -t $tagfile
 
 		<data format="tabular" name="output1" label="${tool.name} on ${on_string}"/>
 	</outputs>
 	<help>
-Novoalign_ is an alignment tool for aligning short sequences against an indexed set of reference sequences. Typically used for aligning Illumina single end and paired end reads. 
- .. _Novoalign: http://www.novocraft.com/main/index.php
+Example command line
+$ demultiplexer.pl --tag-read 1 --tag-end 3 --extra-tag-bases 1 --file-format scarf --tag-file indexes.txt --sequence-file s_7_1_withindex_sequence.txt --sequence-file s_7_3_withindex_sequence.txt
 
 	</help>
 </tool>

File tools/ncbi_blast_plus/blastxml_to_tabular.py

+#!/usr/bin/env python
+"""Convert a BLAST XML file to 12 column tabular output
+
+Takes three command line options, input BLAST XML filename, output tabular
+BLAST filename, output format (std for standard 12 columns, or ext for the
+extended 24 columns offered in the BLAST+ wrappers).
+
+The 12 columns output are 'qseqid sseqid pident length mismatch gapopen qstart
+qend sstart send evalue bitscore' or 'std' at the BLAST+ command line, which
+mean:
+   
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The additional columns offered in the Galaxy BLAST+ wrappers are:
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+    23 qlen          Query sequence length
+    24 slen          Subject sequence length
+====== ============= ===========================================
+
+Most of these fields are given explicitly in the XML file, others some like
+the percentage identity and the number of gap openings must be calculated.
+
+Be aware that the sequence in the extended tabular output or XML direct from
+BLAST+ may or may not use XXXX masking on regions of low complexity. This
+can throw the off the calculation of percentage identity and gap openings.
+[In fact, both BLAST 2.2.24+ and 2.2.25+ have a subtle bug in this regard,
+with these numbers changing depending on whether or not the low complexity
+filter is used.]
+
+This script attempts to produce identical output to what BLAST+ would have done.
+However, check this with "diff -b ..." since BLAST+ sometimes includes an extra
+space character (probably a bug).
+"""
+import sys
+import re
+
+if sys.version_info[:2] >= ( 2, 5 ):
+    import xml.etree.cElementTree as ElementTree
+else:
+    from galaxy import eggs
+    import pkg_resources; pkg_resources.require( "elementtree" )
+    from elementtree import ElementTree
+
+def stop_err( msg ):
+    sys.stderr.write("%s\n" % msg)
+    sys.exit(1)
+
+#Parse Command Line
+try:
+    in_file, out_file, out_fmt = sys.argv[1:]
+except:
+    stop_err("Expect 3 arguments: input BLAST XML file, output tabular file, out format (std or ext)")
+
+if out_fmt == "std":
+    extended = False
+elif out_fmt == "x22":
+    stop_err("Format argument x22 has been replaced with ext (extended 24 columns)")
+elif out_fmt == "ext":
+    extended = True
+else:
+    stop_err("Format argument should be std (12 column) or ext (extended 24 columns)")
+
+
+# get an iterable
+try: 
+    context = ElementTree.iterparse(in_file, events=("start", "end"))
+except:
+    stop_err("Invalid data format.")
+# turn it into an iterator
+context = iter(context)
+# get the root element
+try:
+    event, root = context.next()
+except:
+    stop_err( "Invalid data format." )
+
+
+re_default_query_id = re.compile("^Query_\d+$")
+assert re_default_query_id.match("Query_101")
+assert not re_default_query_id.match("Query_101a")
+assert not re_default_query_id.match("MyQuery_101")
+re_default_subject_id = re.compile("^Subject_\d+$")
+assert re_default_subject_id.match("Subject_1")
+assert not re_default_subject_id.match("Subject_")
+assert not re_default_subject_id.match("Subject_12a")
+assert not re_default_subject_id.match("TheSubject_1")
+
+
+outfile = open(out_file, 'w')
+blast_program = None
+for event, elem in context:
+    if event == "end" and elem.tag == "BlastOutput_program":
+        blast_program = elem.text
+    # for every <Iteration> tag
+    if event == "end" and elem.tag == "Iteration":
+        #Expecting either this, from BLAST 2.2.25+ using FASTA vs FASTA
+        # <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>
+        # <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>
+        # <Iteration_query-len>406</Iteration_query-len>
+        # <Iteration_hits></Iteration_hits>
+        #
+        #Or, from BLAST 2.2.24+ run online
+        # <Iteration_query-ID>Query_1</Iteration_query-ID>
+        # <Iteration_query-def>Sample</Iteration_query-def>
+        # <Iteration_query-len>516</Iteration_query-len>
+        # <Iteration_hits>...
+        qseqid = elem.findtext("Iteration_query-ID")
+        if re_default_query_id.match(qseqid):
+            #Place holder ID, take the first word of the query definition
+            qseqid = elem.findtext("Iteration_query-def").split(None,1)[0]
+        qlen = int(elem.findtext("Iteration_query-len"))
+                                        
+        # for every <Hit> within <Iteration>
+        for hit in elem.findall("Iteration_hits/Hit"):
+            #Expecting either this,
+            # <Hit_id>gi|3024260|sp|P56514.1|OPSD_BUFBU</Hit_id>
+            # <Hit_def>RecName: Full=Rhodopsin</Hit_def>
+            # <Hit_accession>P56514</Hit_accession>
+            #or,
+            # <Hit_id>Subject_1</Hit_id>
+            # <Hit_def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</Hit_def>
+            # <Hit_accession>Subject_1</Hit_accession>
+            #
+            #apparently depending on the parse_deflines switch
+            sseqid = hit.findtext("Hit_id").split(None,1)[0]
+            hit_def = sseqid + " " + hit.findtext("Hit_def")
+            if re_default_subject_id.match(sseqid) \
+            and sseqid == hit.findtext("Hit_accession"):
+                #Place holder ID, take the first word of the subject definition
+                hit_def = hit.findtext("Hit_def")
+                sseqid = hit_def.split(None,1)[0]
+            # for every <Hsp> within <Hit>
+            for hsp in hit.findall("Hit_hsps/Hsp"):
+                nident = hsp.findtext("Hsp_identity")
+                length = hsp.findtext("Hsp_align-len")
+                pident = "%0.2f" % (100*float(nident)/float(length))
+
+                q_seq = hsp.findtext("Hsp_qseq")
+                h_seq = hsp.findtext("Hsp_hseq")
+                m_seq = hsp.findtext("Hsp_midline")
+                assert len(q_seq) == len(h_seq) == len(m_seq) == int(length)
+                gapopen = str(len(q_seq.replace('-', ' ').split())-1  + \
+                              len(h_seq.replace('-', ' ').split())-1)
+
+                mismatch = m_seq.count(' ') + m_seq.count('+') \
+                         - q_seq.count('-') - h_seq.count('-')
+                #TODO - Remove this alternative mismatch calculation and test
+                #once satisifed there are no problems
+                expected_mismatch = len(q_seq) \
+                                  - sum(1 for q,h in zip(q_seq, h_seq) \
+                                        if q == h or q == "-" or h == "-")
+                xx = sum(1 for q,h in zip(q_seq, h_seq) if q=="X" and h=="X")
+                if not (expected_mismatch - q_seq.count("X") <= int(mismatch) <= expected_mismatch + xx):
+                    stop_err("%s vs %s mismatches, expected %i <= %i <= %i" \
+                             % (qseqid, sseqid, expected_mismatch - q_seq.count("X"),
+                                int(mismatch), expected_mismatch))
+
+                #TODO - Remove this alternative identity calculation and test
+                #once satisifed there are no problems
+                expected_identity = sum(1 for q,h in zip(q_seq, h_seq) if q == h)
+                if not (expected_identity - xx <= int(nident) <= expected_identity + q_seq.count("X")):
+                    stop_err("%s vs %s identities, expected %i <= %i <= %i" \
+                             % (qseqid, sseqid, expected_identity, int(nident),
+                                expected_identity + q_seq.count("X")))
+                
+
+                evalue = hsp.findtext("Hsp_evalue")
+                if evalue == "0":
+                    evalue = "0.0"
+                else:
+                    evalue = "%0.0e" % float(evalue)
+                
+                bitscore = float(hsp.findtext("Hsp_bit-score"))
+                if bitscore < 100:
+                    #Seems to show one decimal place for lower scores
+                    bitscore = "%0.1f" % bitscore
+                else:
+                    #Note BLAST does not round to nearest int, it truncates
+                    bitscore = "%i" % bitscore
+
+                values = [qseqid,
+                          sseqid,
+                          pident,
+                          length, #hsp.findtext("Hsp_align-len")
+                          str(mismatch),
+                          gapopen,
+                          hsp.findtext("Hsp_query-from"), #qstart,
+                          hsp.findtext("Hsp_query-to"), #qend,
+                          hsp.findtext("Hsp_hit-from"), #sstart,
+                          hsp.findtext("Hsp_hit-to"), #send,
+                          evalue, #hsp.findtext("Hsp_evalue") in scientific notation
+                          bitscore, #hsp.findtext("Hsp_bit-score") rounded
+                          ]
+
+                if extended:
+                    sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(">"))
+                    #print hit_def, "-->", sallseqid
+                    positive = hsp.findtext("Hsp_positive")
+                    ppos = "%0.2f" % (100*float(positive)/float(length))
+                    qframe = hsp.findtext("Hsp_query-frame")
+                    sframe = hsp.findtext("Hsp_hit-frame")
+                    if blast_program == "blastp":
+                        #Probably a bug in BLASTP that they use 0 or 1 depending on format
+                        if qframe == "0": qframe = "1"
+                        if sframe == "0": sframe = "1"
+                    slen = int(hit.findtext("Hit_len"))
+                    values.extend([sallseqid,
+                                   hsp.findtext("Hsp_score"), #score,
+                                   nident,
+                                   positive,
+                                   hsp.findtext("Hsp_gaps"), #gaps,
+                                   ppos,
+                                   qframe,
+                                   sframe,
+                                   #NOTE - for blastp, XML shows original seq, tabular uses XXX masking
+                                   q_seq,
+                                   h_seq,
+                                   str(qlen),
+                                   str(slen),
+                                   ])
+                #print "\t".join(values) 
+                outfile.write("\t".join(values) + "\n")
+        # prevents ElementTree from growing large datastructure
+        root.clear()
+        elem.clear()
+outfile.close()

File tools/ncbi_blast_plus/blastxml_to_tabular.xml

+<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.0.8">
+    <description>Convert BLAST XML output to tabular</description>
+    <command interpreter="python">
+      blastxml_to_tabular.py $blastxml_file $tabular_file $out_format
+    </command>
+    <inputs>
+        <param name="blastxml_file" type="data" format="blastxml" label="BLAST results as XML"/> 
+        <param name="out_format" type="select" label="Output format">
+            <option value="std" selected="True">Tabular (standard 12 columns)</option>
+            <option value="ext">Tabular (extended 24 columns)</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="tabular_file" format="tabular" label="BLAST results as tabular" />
+    </outputs>
+    <requirements>
+    </requirements>
+    <tests>
+        <test>
+            <param name="blastxml_file" value="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" />
+            <param name="out_format" value="std" />
+            <!-- Note this has some white space differences from the actual blastp output blast_four_human_vs_rhodopsin.tabluar -->
+            <output name="tabular_file" file="blastp_four_human_vs_rhodopsin_converted.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="blastxml_file" value="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" />
+            <param name="out_format" value="ext" />
+            <!-- Note this has some white space differences from the actual blastp output blast_four_human_vs_rhodopsin_22c.tabluar -->
+            <output name="tabular_file" file="blastp_four_human_vs_rhodopsin_converted_ext.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="blastxml_file" value="blastp_sample.xml" ftype="blastxml" />
+            <param name="out_format" value="std" />
+            <!-- Note this has some white space differences from the actual blastp output -->
+            <output name="tabular_file" file="blastp_sample_converted.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="blastxml_file" value="blastx_rhodopsin_vs_four_human.xml" ftype="blastxml" />
+            <param name="out_format" value="std" />
+            <!-- Note this has some white space differences from the actual blastx output -->
+            <output name="tabular_file" file="blastx_rhodopsin_vs_four_human_converted.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="blastxml_file" value="blastx_rhodopsin_vs_four_human.xml" ftype="blastxml" />
+            <param name="out_format" value="ext" />
+            <!-- Note this has some white space and XXXX masking differences from the actual blastx output -->
+            <output name="tabular_file" file="blastx_rhodopsin_vs_four_human_converted_ext.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="blastxml_file" value="blastx_sample.xml" ftype="blastxml" />
+            <param name="out_format" value="std" />
+            <!-- Note this has some white space differences from the actual blastx output -->
+            <output name="tabular_file" file="blastx_sample_converted.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="blastxml_file" value="blastp_human_vs_pdb_seg_no.xml" ftype="blastxml" />
+            <param name="out_format" value="std" />
+            <!-- Note this has some white space differences from the actual blastp output -->
+            <output name="tabular_file" file="blastp_human_vs_pdb_seg_no_converted_std.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="blastxml_file" value="blastp_human_vs_pdb_seg_no.xml" ftype="blastxml" />
+            <param name="out_format" value="ext" />
+            <!-- Note this has some white space differences from the actual blastp output -->
+            <output name="tabular_file" file="blastp_human_vs_pdb_seg_no_converted_ext.tabular" ftype="tabular" />
+        </test>
+    </tests>
+    <help>
+    
+**What it does**
+
+NCBI BLAST+ (and the older NCBI 'legacy' BLAST) can output in a range of
+formats including tabular and a more detailed XML format. A complex workflow
+may need both the XML and the tabular output - but running BLAST twice is
+slow and wasteful.
+
+This tool takes the BLAST XML output and by default converts it into the
+standard 12 column tabular equivalent:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 22 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+    23 qlen          Query sequence length
+    24 slen          Subject sequence length
+====== ============= ===========================================
+
+Beware that the XML file (and thus the conversion) and the tabular output
+direct from BLAST+ may differ in the presence of XXXX masking on regions
+low complexity (columns 21 and 22), and thus also calculated figures like
+the percentage idenity (column 3).
+
+    </help>
+</tool>

File tools/ncbi_blast_plus/hide_stderr.py

+#!/usr/bin/env python
+"""A simple script to redirect stderr to stdout when the return code is zero.
+
+See https://bitbucket.org/galaxy/galaxy-central/issue/325/
+
+Currently Galaxy ignores the return code from command line tools (even if it
+is non-zero which by convention indicates an error) and treats any output on
+stderr as an error (even though by convention stderr is used for errors or
+warnings).
+
+This script runs the given command line, capturing all stdout and stderr in
+memory, and gets the return code. For a zero return code, any stderr (which
+should be warnings only) is added to the stdout. That way Galaxy believes
+everything is fine. For a non-zero return code, we output stdout as is, and
+any stderr, plus the return code to ensure there is some output on stderr.
+That way Galaxy treats this as an error.
+
+Once issue 325 is fixed, this script will not be needed.
+"""
+import sys
+import subprocess
+
+#Avoid using shell=True when we call subprocess to ensure if the Python
+#script is killed, so too is the BLAST process.
+try:
+    words = []
+    for w in sys.argv[1:]:
+       if " " in w:
+           words.append('"%s"' % w)
+       else:
+           words.append(w)
+    cmd = " ".join(words)
+    child = subprocess.Popen(sys.argv[1:],
+                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+except Exception, err:
+    sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
+    sys.exit(1)
+#Use .communicate as can get deadlocks with .wait(),
+stdout, stderr = child.communicate()
+return_code = child.returncode
+
+if return_code:
+    sys.stdout.write(stdout)
+    sys.stderr.write(stderr)
+    sys.stderr.write("Return error code %i from command:\n" % return_code)
+    sys.stderr.write("%s\n" % cmd)
+else:
+    sys.stdout.write(stdout)
+    sys.stdout.write(stderr)

File tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml

+<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.11">
+    <description>Search nucleotide database with nucleotide query sequence(s)</description>
+    <!-- If job splitting is enabled, break up the query file into batches of 500 sequences -->
+    <parallelism method="multi" split_inputs="query" split_mode="number_of_parts" split_size="4" shared_inputs="subject" merge_outputs="output1"></parallelism>
+    <version_command>blastn -version</version_command>
+    <command interpreter="python">hide_stderr.py
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastn
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+  -db "${db_opts.database.fields.path}"
+#else:
+  -subject "$db_opts.subject"
+#end if
+-task $blast_type
+-evalue $evalue_cutoff
+-out $output1
+##Set the extended list here so if/when we add things, saved workflows are not affected
+#if str($out_format)=="ext":
+    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
+#else:
+    -outfmt $out_format
+#end if
+-num_threads 8
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+$adv_opts.strand
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+$adv_opts.ungapped
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+    </command>
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
+        <conditional name="db_opts">
+            <param name="db_opts_selector" type="select" label="Subject database/sequences">
+              <option value="db" selected="True">BLAST Database</option>
+              <option value="file">FASTA file</option>
+            </param>
+            <when value="db">
+                <param name="database" type="select" label="Nucleotide BLAST database">
+                    <options from_file="blastdb.loc">
+                      <column name="value" index="0"/>
+                      <column name="name" index="1"/>
+                      <column name="path" index="2"/>
+                    </options>
+                </param>
+                <param name="subject" type="hidden" value="" /> 
+            </when>
+            <when value="file">
+                <param name="database" type="hidden" value="" /> 
+                <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> 
+            </when>
+        </conditional>
+        <param name="blast_type" type="select" display="radio" label="Type of BLAST">
+            <option value="megablast">megablast</option>
+            <option value="blastn">blastn</option>
+            <option value="blastn-short">blastn-short</option>
+            <option value="dc-megablast">dc-megablast</option>
+            <!-- Using BLAST 2.2.24+ this gives an error:
+            BLAST engine error: Program type 'vecscreen' not supported
+            <option value="vecscreen">vecscreen</option>
+            -->
+        </param>
+        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <param name="out_format" type="select" label="Output format">
+            <option value="6" selected="True">Tabular (standard 12 columns)</option>
+            <option value="ext">Tabular (extended 24 columns)</option>
+            <option value="5">BLAST XML</option>
+            <option value="0">Pairwise text</option>
+            <option value="0 -html">Pairwise HTML</option>
+            <option value="2">Query-anchored text</option>
+            <option value="2 -html">Query-anchored HTML</option>
+            <option value="4">Flat query-anchored text</option>
+            <option value="4 -html">Flat query-anchored HTML</option>
+            <!--
+            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
+            -->
+        </param>
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <!-- Could use a select (yes, no, other) where other allows setting 'level window linker' -->
+                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with DUST)" truevalue="-dust yes" falsevalue="-dust no" checked="true" />
+                <param name="strand" type="select" label="Query strand(s) to search against database/subject">
+                    <option value="-strand both">Both</option>
+                    <option value="-strand plus">Plus (forward)</option>
+                    <option value="-strand minus">Minus (reverse complement)</option>
+                </param>
+                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
+                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!-- I'd like word_size to be optional, with minimum 4 for blastn -->
+                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 4.">
+                    <validator type="in_range" min="0" />
+                </param>
+                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
+                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector}">
+            <change_format>
+                <when input="out_format" value="0" format="txt"/>
+                <when input="out_format" value="0 -html" format="html"/>
+                <when input="out_format" value="2" format="txt"/>
+                <when input="out_format" value="2 -html" format="html"/>
+                <when input="out_format" value="4" format="txt"/>
+                <when input="out_format" value="4 -html" format="html"/>
+                <when input="out_format" value="5" format="blastxml"/>
+            </change_format>
+        </data>
+    </outputs>
+    <requirements>
+        <requirement type="binary">blastn</requirement>
+    </requirements>
+    <help>
+    
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
+
+-----
+
+**What it does**
+
+Search a *nucleotide database* using a *nucleotide query*,
+using the NCBI BLAST+ blastn command line tool.
+Algorithms include blastn, megablast, and discontiguous megablast.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 24 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+    23 qlen          Query sequence length
+    24 slen          Subject sequence length
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Zhang et al. A Greedy Algorithm for Aligning DNA Sequences. 2000. JCB: 203-214.
+
+    </help>
+</tool>

File tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml

+<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.11">
+    <description>Search protein database with protein query sequence(s)</description>
+    <!-- If job splitting is enabled, break up the query file into batches of 500 sequences -->
+    <parallelism method="multi" split_inputs="query" split_mode="number_of_parts" split_size="4" shared_inputs="subject" merge_outputs="output1"></parallelism>
+    <version_command>blastp -version</version_command>
+    <command interpreter="python">hide_stderr.py
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastp
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+  -db "${db_opts.database.fields.path}"
+#else:
+  -subject "$db_opts.subject"
+#end if
+-task $blast_type
+-evalue $evalue_cutoff
+-out $output1
+##Set the extended list here so if/when we add things, saved workflows are not affected
+#if str($out_format)=="ext":
+    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
+#else:
+    -outfmt $out_format
+#end if
+-num_threads 8
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+-matrix $adv_opts.matrix
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+##Ungapped disabled for now - see comments below
+##$adv_opts.ungapped
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+    </command>
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> 
+        <conditional name="db_opts">
+            <param name="db_opts_selector" type="select" label="Subject database/sequences">
+              <option value="db" selected="True">BLAST Database</option>
+              <option value="file">FASTA file</option>
+            </param>
+            <when value="db">
+                <param name="database" type="select" label="Protein BLAST database">
+                    <options from_file="blastdb_p.loc">
+                      <column name="value" index="0"/>
+                      <column name="name" index="1"/>
+                      <column name="path" index="2"/>
+                    </options>
+                </param>
+                <param name="subject" type="hidden" value="" /> 
+            </when>
+            <when value="file">
+                <param name="database" type="hidden" value="" /> 
+                <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> 
+            </when>
+        </conditional>
+        <param name="blast_type" type="select" display="radio" label="Type of BLAST">
+            <option value="blastp">blastp</option>
+            <option value="blastp-short">blastp-short</option>
+        </param>
+        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <param name="out_format" type="select" label="Output format">
+            <option value="6" selected="True">Tabular (standard 12 columns)</option>
+            <option value="ext">Tabular (extended 24 columns)</option>
+            <option value="5">BLAST XML</option>
+            <option value="0">Pairwise text</option>
+            <option value="0 -html">Pairwise HTML</option>
+            <option value="2">Query-anchored text</option>
+            <option value="2 -html">Query-anchored HTML</option>
+            <option value="4">Flat query-anchored text</option>
+            <option value="4 -html">Flat query-anchored HTML</option>
+            <!--
+            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
+            -->
+        </param>
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" />
+                <param name="matrix" type="select" label="Scoring matrix">
+                    <option value="BLOSUM90">BLOSUM90</option>
+                    <option value="BLOSUM80">BLOSUM80</option>
+                    <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
+                    <option value="BLOSUM50">BLOSUM50</option> 
+                    <option value="BLOSUM45">BLOSUM45</option>
+                    <option value="PAM250">PAM250</option>
+                    <option value="PAM70">PAM70</option>
+                    <option value="PAM30">PAM30</option>
+                </param>
+                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
+                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!-- I'd like word_size to be optional, with minimum 2 for blastp -->
+                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!--
+                Can't use '-ungapped' on its own, error back is:
+                Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
+                Tried using '-ungapped -comp_based_stats F' and blastp crashed with 'Attempt to access NULL pointer.'
+                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
+                -->
+                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector}">
+            <change_format>
+                <when input="out_format" value="0" format="txt"/>
+                <when input="out_format" value="0 -html" format="html"/>
+                <when input="out_format" value="2" format="txt"/>
+                <when input="out_format" value="2 -html" format="html"/>
+                <when input="out_format" value="4" format="txt"/>
+                <when input="out_format" value="4 -html" format="html"/>
+                <when input="out_format" value="5" format="blastxml"/>
+            </change_format>
+        </data>
+    </outputs>
+    <requirements>
+        <requirement type="binary">blastp</requirement>
+    </requirements>
+    <tests>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="blast_type" value="blastp" />
+            <param name="out_format" value="5" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="False" />
+            <param name="matrix" value="BLOSUM62" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="True" />
+            <output name="output1" file="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" />
+        </test>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="blast_type" value="blastp" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="False" />
+            <param name="matrix" value="BLOSUM62" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="True" />
+            <output name="output1" file="blastp_four_human_vs_rhodopsin.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="blast_type" value="blastp" />
+            <param name="out_format" value="ext" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="False" />
+            <param name="matrix" value="BLOSUM62" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="True" />
+            <output name="output1" file="blastp_four_human_vs_rhodopsin_ext.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="rhodopsin_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="blast_type" value="blastp" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastp_rhodopsin_vs_four_human.tabular" ftype="tabular" />
+        </test>
+    </tests>
+    <help>
+    
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
+
+-----
+
+**What it does**
+
+Search a *protein database* using a *protein query*,
+using the NCBI BLAST+ blastp command line tool.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 24 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+    23 qlen          Query sequence length
+    24 slen          Subject sequence length
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005.
+
+    </help>
+</tool>

File tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml

+<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.11">
+    <description>Search protein database with translated nucleotide query sequence(s)</description>
+    <!-- If job splitting is enabled, break up the query file into batches of 500 sequences -->
+    <parallelism method="multi" split_inputs="query" split_mode="number_of_parts" split_size="4" shared_inputs="subject" merge_outputs="output1"></parallelism>
+    <version_command>blastx -version</version_command>
+    <command interpreter="python">hide_stderr.py
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastx
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+  -db "${db_opts.database.fields.path}"
+#else:
+  -subject "$db_opts.subject"
+#end if
+-evalue $evalue_cutoff
+-out $output1
+##Set the extended list here so if/when we add things, saved workflows are not affected
+#if str($out_format)=="ext":
+    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
+#else:
+    -outfmt $out_format
+#end if
+-num_threads 8
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+$adv_opts.strand
+-matrix $adv_opts.matrix
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+$adv_opts.ungapped
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+    </command>
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
+        <conditional name="db_opts">
+            <param name="db_opts_selector" type="select" label="Subject database/sequences">
+              <option value="db" selected="True">BLAST Database</option>
+              <option value="file">FASTA file</option>
+            </param>
+            <when value="db">
+                <param name="database" type="select" label="Protein BLAST database">
+                    <options from_file="blastdb_p.loc">
+                      <column name="value" index="0"/>
+                      <column name="name" index="1"/>
+                      <column name="path" index="2"/>
+                    </options>
+                </param>
+                <param name="subject" type="hidden" value="" /> 
+            </when>
+            <when value="file">
+                <param name="database" type="hidden" value="" /> 
+                <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> 
+            </when>
+        </conditional>
+        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <param name="out_format" type="select" label="Output format">
+            <option value="6" selected="True">Tabular (standard 12 columns)</option>
+            <option value="ext">Tabular (extended 24 columns)</option>
+            <option value="5">BLAST XML</option>
+            <option value="0">Pairwise text</option>
+            <option value="0 -html">Pairwise HTML</option>
+            <option value="2">Query-anchored text</option>
+            <option value="2 -html">Query-anchored HTML</option>
+            <option value="4">Flat query-anchored text</option>
+            <option value="4 -html">Flat query-anchored HTML</option>
+            <!--
+            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
+            -->
+        </param>
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
+                <param name="strand" type="select" label="Query strand(s) to search against database/subject">
+                    <option value="-strand both">Both</option>
+                    <option value="-strand plus">Plus (forward)</option>
+                    <option value="-strand minus">Minus (reverse complement)</option>
+                </param>
+                <param name="matrix" type="select" label="Scoring matrix">
+                    <option value="BLOSUM90">BLOSUM90</option>
+                    <option value="BLOSUM80">BLOSUM80</option>
+                    <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
+                    <option value="BLOSUM50">BLOSUM50</option> 
+                    <option value="BLOSUM45">BLOSUM45</option>
+                    <option value="PAM250">PAM250</option>
+                    <option value="PAM70">PAM70</option>
+                    <option value="PAM30">PAM30</option>
+                </param>
+                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
+                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!-- I'd like word_size to be optional, with minimum 2 for blastx -->
+                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
+                    <validator type="in_range" min="0" />
+                </param>
+                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
+                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="blastx on ${db_opts.db_opts_selector}">
+            <change_format>
+                <when input="out_format" value="0" format="txt"/>
+                <when input="out_format" value="0 -html" format="html"/>
+                <when input="out_format" value="2" format="txt"/>
+                <when input="out_format" value="2 -html" format="html"/>
+                <when input="out_format" value="4" format="txt"/>
+                <when input="out_format" value="4 -html" format="html"/>
+                <when input="out_format" value="5" format="blastxml"/>
+            </change_format>
+        </data>
+    </outputs>
+    <requirements>
+        <requirement type="binary">blastx</requirement>
+    </requirements>
+    <tests>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="5" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastx_rhodopsin_vs_four_human.xml" ftype="blastxml" />
+        </test>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastx_rhodopsin_vs_four_human.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="ext" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastx_rhodopsin_vs_four_human_ext.tabular" ftype="tabular" />
+        </test>
+    </tests>
+    <help>
+    
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
+
+-----
+
+**What it does**
+
+Search a *protein database* using a *translated nucleotide query*,
+using the NCBI BLAST+ blastx command line tool.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 24 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+    23 qlen          Query sequence length
+    24 slen          Subject sequence length 
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+    </help>
+</tool>

File tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml

+<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.11">
+    <description>Search translated nucleotide database with protein query sequence(s)</description>
+    <!-- If job splitting is enabled, break up the query file into batches of 500 sequences -->
+    <parallelism method="multi" split_inputs="query" split_mode="number_of_parts" split_size="4" shared_inputs="subject" merge_outputs="output1"></parallelism>
+    <version_command>tblastn -version</version_command>
+    <command interpreter="python">hide_stderr.py
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+tblastn
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+  -db "${db_opts.database.fields.path}"
+#else:
+  -subject "$db_opts.subject"
+#end if
+-evalue $evalue_cutoff
+-out $output1
+##Set the extended list here so if/when we add things, saved workflows are not affected
+#if str($out_format)=="ext":
+    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
+#else:
+    -outfmt $out_format
+#end if
+-num_threads 8
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+-matrix $adv_opts.matrix
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+##Ungapped disabled for now - see comments below
+##$adv_opts.ungapped
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+    </command>
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> 
+        <conditional name="db_opts">
+            <param name="db_opts_selector" type="select" label="Subject database/sequences">
+              <option value="db" selected="True">BLAST Database</option>
+              <option value="file">FASTA file</option>
+            </param>
+            <when value="db">
+                <param name="database" type="select" label="Nucleotide BLAST database">
+                    <options from_file="blastdb.loc">
+                      <column name="value" index="0"/>
+                      <column name="name" index="1"/>
+                      <column name="path" index="2"/>
+                    </options>
+                </param>
+                <param name="subject" type="hidden" value="" /> 
+            </when>
+            <when value="file">
+                <param name="database" type="hidden" value="" /> 
+                <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> 
+            </when>
+        </conditional>
+        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <param name="out_format" type="select" label="Output format">
+            <option value="6" selected="True">Tabular (standard 12 columns)</option>
+            <option value="ext">Tabular (extended 24 columns)</option>
+            <option value="5">BLAST XML</option>
+            <option value="0">Pairwise text</option>
+            <option value="0 -html">Pairwise HTML</option>
+            <option value="2">Query-anchored text</option>
+            <option value="2 -html">Query-anchored HTML</option>
+            <option value="4">Flat query-anchored text</option>
+            <option value="4 -html">Flat query-anchored HTML</option>
+            <!--
+            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
+            -->
+        </param>
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
+                <param name="matrix" type="select" label="Scoring matrix">
+                    <option value="BLOSUM90">BLOSUM90</option>
+                    <option value="BLOSUM80">BLOSUM80</option>
+                    <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
+                    <option value="BLOSUM50">BLOSUM50</option> 
+                    <option value="BLOSUM45">BLOSUM45</option>
+                    <option value="PAM250">PAM250</option>
+                    <option value="PAM70">PAM70</option>
+                    <option value="PAM30">PAM30</option>
+                </param>
+                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
+                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!-- I'd like word_size to be optional, with minimum 2 for blastp -->
+                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!--
+                Can't use '-ungapped' on its own, error back is:
+                Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
+                Tried using '-ungapped -comp_based_stats F' and tblastn crashed with 'Attempt to access NULL pointer.'
+                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
+                -->
+                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="tblastn on ${db_opts.db_opts_selector}">
+            <change_format>
+                <when input="out_format" value="0" format="txt"/>
+                <when input="out_format" value="0 -html" format="html"/>
+                <when input="out_format" value="2" format="txt"/>
+                <when input="out_format" value="2 -html" format="html"/>
+                <when input="out_format" value="4" format="txt"/>
+                <when input="out_format" value="4 -html" format="html"/>
+                <when input="out_format" value="5" format="blastxml"/>
+            </change_format>
+        </data>
+    </outputs>
+    <requirements>
+        <requirement type="binary">tblastn</requirement>
+    </requirements>
+    <tests>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="5" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="false" />
+            <param name="matrix" value="BLOSUM80" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="false" />
+            <output name="output1" file="tblastn_four_human_vs_rhodopsin.xml" ftype="blastxml" />
+        </test>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="ext" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="false" />
+            <param name="matrix" value="BLOSUM80" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="false" />
+            <output name="output1" file="tblastn_four_human_vs_rhodopsin_ext.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="false" />
+            <param name="matrix" value="BLOSUM80" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="false" />
+            <output name="output1" file="tblastn_four_human_vs_rhodopsin.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <!-- Same as above, but parse deflines - on BLAST 2.2.25+ makes no difference -->
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="false" />
+            <param name="matrix" value="BLOSUM80" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="true" />
+            <output name="output1" file="tblastn_four_human_vs_rhodopsin.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="0 -html" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="false" />
+            <param name="matrix" value="BLOSUM80" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="false" />
+            <output name="output1" file="tblastn_four_human_vs_rhodopsin.html" ftype="html" />
+        </test>
+    </tests>
+    <help>
+    
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
+
+-----
+
+**What it does**
+
+Search a *translated nucleotide database* using a *protein query*,
+using the NCBI BLAST+ tblastn command line tool.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 24 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+    23 qlen          Query sequence length
+    24 slen          Subject sequence length
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+    </help>
+</tool>

File tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml

+<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.11">
+    <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description>
+    <!-- If job splitting is enabled, break up the query file into batches of 500 sequences -->
+    <parallelism method="multi" split_inputs="query" split_mode="number_of_parts" split_size="4" shared_inputs="subject" merge_outputs="output1"></parallelism>
+    <version_command>tblastx -version</version_command>
+    <command interpreter="python">hide_stderr.py
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+tblastx
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+  -db "${db_opts.database.fields.path}"
+#else:
+  -subject "$db_opts.subject"
+#end if
+-evalue $evalue_cutoff
+-out $output1
+##Set the extended list here so if/when we add things, saved workflows are not affected
+#if str($out_format)=="ext":
+    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
+#else:
+    -outfmt $out_format
+#end if
+-num_threads 8
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+$adv_opts.strand
+-matrix $adv_opts.matrix
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+    </command>
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
+        <conditional name="db_opts">
+            <param name="db_opts_selector" type="select" label="Subject database/sequences">
+              <option value="db" selected="True">BLAST Database</option>
+              <option value="file">FASTA file</option>
+            </param>
+            <when value="db">
+                <param name="database" type="select" label="Nucleotide BLAST database">
+                    <options from_file="blastdb.loc">
+                      <column name="value" index="0"/>
+                      <column name="name" index="1"/>
+                      <column name="path" index="2"/>
+                    </options>
+                </param>
+                <param name="subject" type="hidden" value="" /> 
+            </when>
+            <when value="file">
+                <param name="database" type="hidden" value="" /> 
+                <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> 
+            </when>
+        </conditional>
+        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <param name="out_format" type="select" label="Output format">
+            <option value="6" selected="True">Tabular (standard 12 columns)</option>
+            <option value="ext">Tabular (extended 24 columns)</option>
+            <option value="5">BLAST XML</option>
+            <option value="0">Pairwise text</option>
+            <option value="0 -html">Pairwise HTML</option>
+            <option value="2">Query-anchored text</option>
+            <option value="2 -html">Query-anchored HTML</option>
+            <option value="4">Flat query-anchored text</option>
+            <option value="4 -html">Flat query-anchored HTML</option>
+            <!--
+            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
+            -->
+        </param>
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
+                <param name="strand" type="select" label="Query strand(s) to search against database/subject">
+                    <option value="-strand both">Both</option>
+                    <option value="-strand plus">Plus (forward)</option>
+                    <option value="-strand minus">Minus (reverse complement)</option>
+                </param>
+                <param name="matrix" type="select" label="Scoring matrix">
+                    <option value="BLOSUM90">BLOSUM90</option>
+                    <option value="BLOSUM80">BLOSUM80</option>
+                    <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
+                    <option value="BLOSUM50">BLOSUM50</option> 
+                    <option value="BLOSUM45">BLOSUM45</option>
+                    <option value="PAM250">PAM250</option>
+                    <option value="PAM70">PAM70</option>
+                    <option value="PAM30">PAM30</option>
+                </param>
+                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
+                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!-- I'd like word_size to be optional, with minimum 2 for tblastx -->
+                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
+                    <validator type="in_range" min="0" />
+                </param>
+                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="tblastx on ${db_opts.db_opts_selector}">
+            <change_format>
+                <when input="out_format" value="0" format="txt"/>
+                <when input="out_format" value="0 -html" format="html"/>
+                <when input="out_format" value="2" format="txt"/>
+                <when input="out_format" value="2 -html" format="html"/>
+                <when input="out_format" value="4" format="txt"/>
+                <when input="out_format" value="4 -html" format="html"/>
+                <when input="out_format" value="5" format="blastxml"/>
+            </change_format>
+        </data>
+    </outputs>
+    <requirements>
+        <requirement type="binary">tblastx</requirement>
+    </requirements>
+    <help>
+    
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
+
+-----
+
+**What it does**
+
+Search a *translated nucleotide database* using a *protein query*,
+using the NCBI BLAST+ tblastx command line tool.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 24 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+    23 qlen          Query sequence length
+    24 slen          Subject sequence length
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+    </help>
+</tool>

File tools/variant_detection/freebayes.xml

+<?xml version="1.0"?>
+<tool id="freebayes" name="FreeBayes" version="0.0.2">
+  <requirements>
+    <requirement type="package" version="0.9.4">freebayes</requirement>
+    <requirement type="package">samtools</requirement>
+  </requirements>
+  <description> - Bayesian genetic variant detector</description>
+  <command>
+    ##set up input files
+    #set $reference_fasta_filename = "localref.fa"
+    #if str( $reference_source.reference_source_selector ) == "history":
+        ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
+        samtools faidx "${reference_fasta_filename}" 2&gt;&amp;1 || echo "Error running samtools faidx for FreeBayes" &gt;&amp;2 &amp;&amp;
+    #else:
+        #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+    #end if
+    #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
+        ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" &amp;&amp;
+        ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &amp;&amp;
+    #end for
+    ##finished setting up inputs
+    
+    ##start FreeBayes commandline
+    freebayes
+    #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
+        --bam "localbam_${bam_count}.bam"
+    #end for
+    --fasta-reference "${reference_fasta_filename}" 
+    
+    ##outputs
+    --vcf "${output_vcf}"
+    
+    ##advanced options
+    #if str( $options_type.options_type_selector ) == "advanced":
+        ##additional outputs
+        #if $options_type.output_trace_option:
+            --trace "${output_trace}"
+        #end if
+        #if $options_type.output_failed_alleles_option:
+            --failed-alleles "${output_failed_alleles_bed}"
+        #end if
+        
+        ##additional inputs
+        #if str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_target_file":
+            --targets "${options_type.target_limit_type.input_target_bed}"
+        #elif str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_region":
+            --region "${options_type.target_limit_type.region_chromosome}:${options_type.target_limit_type.region_start}..${options_type.target_limit_type.region_end}"
+        #end if
+        #if $options_type.input_sample_file:
+            --samples "${options_type.input_sample_file}"
+        #end if
+        #if $options_type.input_populations_file:
+            --populations "${options_type.input_populations_file}"
+        #end if
+        #if $options_type.input_cnv_map_bed:
+            --cnv-map "${options_type.input_cnv_map_bed}"
+        #end if
+        #if str( $options_type.input_variant_type.input_variant_type_selector ) == "provide_vcf":
+            --variant-input "${options_type.input_variant_type.input_variant_vcf}"
+            ${options_type.input_variant_type.only_use_input_alleles}
+        #end if
+        
+        ##reporting
+        #if str( $options_type.section_reporting_type.section_reporting_type_selector ) == "set":
+            --pvar "${options_type.section_reporting_type.pvar}"
+            ${options_type.section_reporting_type.show_reference_repeats}
+        #end if
+        
+        ##population model
+        #if str( $options_type.section_population_model_type.section_population_model_type_selector ) == "set":
+            --theta "${options_type.section_population_model_type.theta}"
+            --ploidy "${options_type.section_population_model_type.ploidy}"
+            ${options_type.section_population_model_type.pooled}
+        #end if
+        
+        ##reference allele
+        #if str( $options_type.use_reference_allele_type.use_reference_allele_type_selector ) == "include_reference_allele":
+            --use-reference-allele
+            ${options_type.use_reference_allele_type.diploid_reference}
+            --reference-quality "${options_type.use_reference_allele_type.reference_quality_mq},${options_type.use_reference_allele_type.reference_quality_bq}"
+        #end if
+        
+        ##allele scope
+        #if str( $options_type.section_allele_scope_type.section_allele_scope_type_selector ) == "set":
+            ${options_type.section_allele_scope_type.no_snps}
+            ${options_type.section_allele_scope_type.no_indels}
+            ${options_type.section_allele_scope_type.no_mnps}
+            ${options_type.section_allele_scope_type.no_complex}
+            --use-best-n-alleles "${options_type.section_allele_scope_type.use_best_n_alleles}"
+            #if $options_type.section_allele_scope_type.max_complex_gap:
+                --max-complex-gap "${options_type.section_allele_scope_type.max_complex_gap}"
+            #end if