Mercurial > repos > peterjc > ncbi_blast_plus

--- a/tools/ncbi_blast_plus/README.rst	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/README.rst	Mon Nov 25 10:58:46 2013 -0500
@@ -127,6 +127,10 @@
           defined in updated blast_datatypes on Galaxy ToolShed.
         - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26
         - Now depends on package_blast_plus_2_2_27 in ToolShed
+v0.0.22 - More use macros to simplify the wrappers
+        - Set number of threads via $GALAXY_SLOTS environment variable
+        - More descriptive default output names
+        - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18)
 ======= ======================================================================
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py	Mon Nov 25 10:58:46 2013 -0500
@@ -63,7 +63,7 @@
 import re

 if "-v" in sys.argv or "--version" in sys.argv:
-    print "v0.0.12"
+    print "v0.0.22"
     sys.exit(0)

 if sys.version_info[:2] >= ( 2, 5 ):
@@ -228,7 +228,10 @@
                           ]

                 if extended:
-                    sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(">"))
+                    try:
+                        sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(" >"))
+                    except IndexError as e:
+                        stop_err("Problem splitting multuple hits?\n%r\n--> %s" % (hit_def, e))
                     #print hit_def, "-->", sallseqid
                     positive = hsp.findtext("Hsp_positive")
                     ppos = "%0.2f" % (100*float(positive)/float(length))
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml	Mon Nov 25 10:58:46 2013 -0500
@@ -1,4 +1,4 @@
-<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.0.11">
+<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.0.22">
     <description>Convert BLAST XML output to tabular</description>
     <version_command interpreter="python">blastxml_to_tabular.py --version</version_command>
     <command interpreter="python">
@@ -17,7 +17,7 @@
         </param>
     </inputs>
     <outputs>
-        <data name="tabular_file" format="tabular" label="BLAST results as tabular" />
+        <data name="tabular_file" format="tabular" label="$blastxml_file.display_name (as tabular)" />
     </outputs>
     <requirements>
     </requirements>
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml	Mon Nov 25 10:58:46 2013 -0500
@@ -1,13 +1,10 @@
-<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.21">
+<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.22">
     <description>Show BLAST database information from blastdbcmd</description>
-    <requirements>
-        <requirement type="binary">blastdbcmd</requirement>
-        <requirement type="package" version="2.2.27">blast+</requirement>
-    </requirements>
-    <version_command>blastdbcmd -version</version_command>
     <macros>
+        <token name="@BINARY@">blastdbcmd</token>
         <import>ncbi_macros.xml</import>
     </macros>
+    <expand macro="requirements" />
     <command>
 blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out "$info"
     </command>
@@ -33,17 +30,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers:

-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml	Mon Nov 25 10:58:46 2013 -0500
@@ -1,13 +1,10 @@
-<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.21">
+<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.22">
     <description>Extract sequence(s) from BLAST database</description>
-    <requirements>
-        <requirement type="binary">blastdbcmd</requirement>
-        <requirement type="package" version="2.2.27">blast+</requirement>
-    </requirements>
-    <version_command>blastdbcmd -version</version_command>
     <macros>
+        <token name="@BINARY@">blastdbcmd</token>
         <import>ncbi_macros.xml</import>
     </macros>
+    <expand macro="requirements" />
     <command>
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
@@ -105,17 +102,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers:

-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml	Mon Nov 25 10:58:46 2013 -0500
@@ -1,53 +1,29 @@
-<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.21">
+<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.22">
     <description>Search nucleotide database with nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
-    <requirements>
-        <requirement type="binary">blastn</requirement>
-        <requirement type="package" version="2.2.27">blast+</requirement>
-    </requirements>
-    <version_command>blastn -version</version_command>
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
     <macros>
+        <token name="@BINARY@">blastn</token>
         <import>ncbi_macros.xml</import>
     </macros>
+    <expand macro="requirements" />
     <command>
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
 blastn
 -query "$query"
-#if $db_opts.db_opts_selector == "db":
-  -db "${db_opts.database.fields.path}"
-#elif $db_opts.db_opts_selector == "histdb":
-  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
-#else:
-  -subject "$db_opts.subject"
-#end if
+@BLAST_DB_SUBJECT@
 -task $blast_type
 -evalue $evalue_cutoff
--out "$output1"
-##Set the extended list here so if/when we add things, saved workflows are not affected
-#if str($out_format)=="ext":
-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
-#else:
-    -outfmt $out_format
-#end if
--num_threads 8
+@BLAST_OUTPUT@
+@THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
-$adv_opts.filter_query
 $adv_opts.strand
-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
-## Note -max_target_seqs overrides -num_descriptions and -num_alignments
-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
--max_target_seqs $adv_opts.max_hits
-#end if
+@ADVANCED_OPTIONS@
 #if (str($adv_opts.identity_cutoff) and float(str($adv_opts.identity_cutoff)) > 0 ):
 -perc_identity $adv_opts.identity_cutoff
 #end if
-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
--word_size $adv_opts.word_size
-#end if
 $adv_opts.ungapped
-$adv_opts.parse_deflines
 ## End of advanced options:
 #end if
     </command>
@@ -69,38 +45,25 @@
             <option value="vecscreen">vecscreen</option>
             -->
         </param>
-        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <expand macro="input_evalue" />
         <expand macro="input_out_format" />
-        <conditional name="adv_opts">
-            <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
-              <option value="advanced">Show Advanced Options</option>
+        <expand macro="advanced_options">
+            <!-- Could use a select (yes, no, other) where other allows setting 'level window linker' -->
+            <param name="filter_query" type="boolean" label="Filter out low complexity regions (with DUST)" truevalue="-dust yes" falsevalue="-dust no" checked="true" />
+            <expand macro="input_strand" />
+            <expand macro="input_max_hits" />
+            <param name="identity_cutoff" type="float" min="0" max="100" value="0" label="Percent identity cutoff (-perc_identity)" help="Use zero for no cutoff" />
+
+            <!-- I'd like word_size to be optional, with minimum 4 for blastn -->
+            <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 4.">
+                <validator type="in_range" min="0" />
             </param>
-            <when value="basic" />
-            <when value="advanced">
-                <!-- Could use a select (yes, no, other) where other allows setting 'level window linker' -->
-                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with DUST)" truevalue="-dust yes" falsevalue="-dust no" checked="true" />
-                <param name="strand" type="select" label="Query strand(s) to search against database/subject">
-                    <option value="-strand both">Both</option>
-                    <option value="-strand plus">Plus (forward)</option>
-                    <option value="-strand minus">Minus (reverse complement)</option>
-                </param>
-                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
-                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
-                    <validator type="in_range" min="0" />
-                </param>
-                <param name="identity_cutoff" type="float" min="0" max="100" value="0" label="Percent identity cutoff (-perc_identity)" help="Use zero for no cutoff" />
-                <!-- I'd like word_size to be optional, with minimum 4 for blastn -->
-                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 4.">
-                    <validator type="in_range" min="0" />
-                </param>
-                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
-                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
-            </when>
-        </conditional>
+            <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
+            <expand macro="input_parse_deflines" />
+        </expand>
     </inputs>
     <outputs>
-        <data name="output1" format="tabular" label="${blast_type.value_label} on ${on_string}">
+        <data name="output1" format="tabular" label="${blast_type.value_label} $query.name vs @ON_DB_SUBJECT@">
             <expand macro="output_change_format" />
         </data>
     </outputs>
@@ -118,12 +81,7 @@
     </tests>
     <help>

-.. class:: warningmark
-
-**Note**. Database searches may take a substantial amount of time.
-For large input datasets it is advisable to allow overnight processing.
-
------
+@SEARCH_TIME_WARNING@

 **What it does**

@@ -131,71 +89,11 @@
 using the NCBI BLAST+ blastn command line tool.
 Algorithms include blastn, megablast, and discontiguous megablast.

-.. class:: warningmark
-
-You can also search against a FASTA file of subject nucleotide
-sequences. This is *not* advised because it is slower (only one
-CPU is used), but more importantly gives e-values for pairwise
-searches (very small e-values which will look overly signficiant).
-In most cases you should instead turn the other FASTA file into a
-database first using *makeblastdb* and search against that.
+@FASTA_WARNING@

 -----

-**Output format**
-
-Because Galaxy focuses on processing tabular data, the default output of this
-tool is tabular. The standard BLAST+ tabular output contains 12 columns:
-
-====== ========= ============================================
-Column NCBI name Description
------- --------- --------------------------------------------
-     1 qseqid    Query Seq-id (ID of your sequence)
-     2 sseqid    Subject Seq-id (ID of the database hit)
-     3 pident    Percentage of identical matches
-     4 length    Alignment length
-     5 mismatch  Number of mismatches
-     6 gapopen   Number of gap openings
-     7 qstart    Start of alignment in query
-     8 qend      End of alignment in query
-     9 sstart    Start of alignment in subject (database hit)
-    10 send      End of alignment in subject (database hit)
-    11 evalue    Expectation value (E-value)
-    12 bitscore  Bit score
-====== ========= ============================================
-
-The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
-included by selecting the extended tabular output. The extra columns are
-included *after* the standard 12 columns. This is so that you can write
-workflow filtering steps that accept either the 12 or 24 column tabular
-BLAST output. Galaxy now uses this extended 24 column output by default.
-
-====== ============= ===========================================
-Column NCBI name     Description
------- ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
-    14 score         Raw score
-    15 nident        Number of identical matches
-    16 positive      Number of positive-scoring matches
-    17 gaps          Total number of gaps
-    18 ppos          Percentage of positive-scoring matches
-    19 qframe        Query frame
-    20 sframe        Subject frame
-    21 qseq          Aligned part of query sequence
-    22 sseq          Aligned part of subject sequence
-    23 qlen          Query sequence length
-    24 slen          Subject sequence length
-====== ============= ===========================================
-
-The third option is BLAST XML output, which is designed to be parsed by
-another program, and is understood by some Galaxy tools.
-
-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+@OUTPUT_FORMAT@

 -------

@@ -204,17 +102,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers:

-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml	Mon Nov 25 10:58:46 2013 -0500
@@ -1,51 +1,27 @@
-<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.21">
+<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.22">
     <description>Search protein database with protein query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
-    <requirements>
-        <requirement type="binary">blastp</requirement>
-        <requirement type="package" version="2.2.27">blast+</requirement>
-    </requirements>
-    <version_command>blastp -version</version_command>
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />
     <macros>
+        <token name="@BINARY@">blastp</token>
         <import>ncbi_macros.xml</import>
     </macros>
+    <expand macro="requirements" />
     <command>
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
 blastp
 -query "$query"
-#if $db_opts.db_opts_selector == "db":
-  -db "${db_opts.database.fields.path}"
-#elif $db_opts.db_opts_selector == "histdb":
-  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
-#else:
-  -subject "$db_opts.subject"
-#end if
+@BLAST_DB_SUBJECT@
 -task $blast_type
 -evalue $evalue_cutoff
--out "$output1"
-##Set the extended list here so if/when we add things, saved workflows are not affected
-#if str($out_format)=="ext":
-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
-#else:
-    -outfmt $out_format
-#end if
--num_threads 8
+@BLAST_OUTPUT@
+@THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
-$adv_opts.filter_query
 -matrix $adv_opts.matrix
-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
-## Note -max_target_seqs overrides -num_descriptions and -num_alignments
-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
--max_target_seqs $adv_opts.max_hits
-#end if
-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
--word_size $adv_opts.word_size
-#end if
+@ADVANCED_OPTIONS@
 ##Ungapped disabled for now - see comments below
 ##$adv_opts.ungapped
-$adv_opts.parse_deflines
 ## End of advanced options:
 #end if
     </command>
@@ -61,40 +37,25 @@
             <option value="blastp">blastp</option>
             <option value="blastp-short">blastp-short</option>
         </param>
-        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <expand macro="input_evalue" />
         <expand macro="input_out_format" />
-        <conditional name="adv_opts">
-            <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
-              <option value="advanced">Show Advanced Options</option>
-            </param>
-            <when value="basic" />
-            <when value="advanced">
-                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
-                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" />
-
-                <expand macro="input_scoring_matrix" />
-
-                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
-                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!-- I'd like word_size to be optional, with minimum 2 for blastp -->
-                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!--
-                Can't use '-ungapped' on its own, error back is:
-                Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
-                Tried using '-ungapped -comp_based_stats F' and blastp crashed with 'Attempt to access NULL pointer.'
-                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
-                -->
-                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
-            </when>
-        </conditional>
+        <expand macro="advanced_options">
+            <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+            <expand macro="input_filter_query_default_false" />
+            <expand macro="input_scoring_matrix" />
+            <expand macro="input_max_hits" />
+            <expand macro="input_word_size" />
+            <!--
+            Can't use '-ungapped' on its own, error back is:
+            Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
+            Tried using '-ungapped -comp_based_stats F' and blastp crashed with 'Attempt to access NULL pointer.'
+            <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
+            -->
+            <expand macro="input_parse_deflines" />
+        </expand>
     </inputs>
     <outputs>
-        <data name="output1" format="tabular" label="${blast_type.value_label} on ${on_string}">
+        <data name="output1" format="tabular" label="${blast_type.value_label} $query.name vs @ON_DB_SUBJECT@">
             <expand macro="output_change_format" />
         </data>
     </outputs>
@@ -161,83 +122,18 @@
     </tests>
     <help>

-.. class:: warningmark
-
-**Note**. Database searches may take a substantial amount of time.
-For large input datasets it is advisable to allow overnight processing.
-
------
+@SEARCH_TIME_WARNING@

 **What it does**

 Search a *protein database* using a *protein query*,
 using the NCBI BLAST+ blastp command line tool.

-.. class:: warningmark
-
-You can also search against a FASTA file of subject protein
-sequences. This is *not* advised because it is slower (only one
-CPU is used), but more importantly gives e-values for pairwise
-searches (very small e-values which will look overly signficiant).
-In most cases you should instead turn the other FASTA file into a
-database first using *makeblastdb* and search against that.
+@FASTA_WARNING@

 -----

-**Output format**
-
-Because Galaxy focuses on processing tabular data, the default output of this
-tool is tabular. The standard BLAST+ tabular output contains 12 columns:
-
-====== ========= ============================================
-Column NCBI name Description
------- --------- --------------------------------------------
-     1 qseqid    Query Seq-id (ID of your sequence)
-     2 sseqid    Subject Seq-id (ID of the database hit)
-     3 pident    Percentage of identical matches
-     4 length    Alignment length
-     5 mismatch  Number of mismatches
-     6 gapopen   Number of gap openings
-     7 qstart    Start of alignment in query
-     8 qend      End of alignment in query
-     9 sstart    Start of alignment in subject (database hit)
-    10 send      End of alignment in subject (database hit)
-    11 evalue    Expectation value (E-value)
-    12 bitscore  Bit score
-====== ========= ============================================
-
-The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
-included by selecting the extended tabular output. The extra columns are
-included *after* the standard 12 columns. This is so that you can write
-workflow filtering steps that accept either the 12 or 24 column tabular
-BLAST output. Galaxy now uses this extended 24 column output by default.
-
-====== ============= ===========================================
-Column NCBI name     Description
------- ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
-    14 score         Raw score
-    15 nident        Number of identical matches
-    16 positive      Number of positive-scoring matches
-    17 gaps          Total number of gaps
-    18 ppos          Percentage of positive-scoring matches
-    19 qframe        Query frame
-    20 sframe        Subject frame
-    21 qseq          Aligned part of query sequence
-    22 sseq          Aligned part of subject sequence
-    23 qlen          Query sequence length
-    24 slen          Subject sequence length
-====== ============= ===========================================
-
-The third option is BLAST XML output, which is designed to be parsed by
-another program, and is understood by some Galaxy tools.
-
-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+@OUTPUT_FORMAT@

 -------

@@ -246,17 +142,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers:

-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>
--- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml	Mon Nov 25 10:58:46 2013 -0500
@@ -1,51 +1,27 @@
-<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.21">
+<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.22">
     <description>Search protein database with translated nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
-    <requirements>
-        <requirement type="binary">blastx</requirement>
-        <requirement type="package" version="2.2.27">blast+</requirement>
-    </requirements>
-    <version_command>blastx -version</version_command>
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
     <macros>
+        <token name="@BINARY@">blastx</token>
         <import>ncbi_macros.xml</import>
     </macros>
+    <expand macro="requirements" />
     <command>
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
 blastx
 -query "$query"
-#if $db_opts.db_opts_selector == "db":
-  -db "${db_opts.database.fields.path}"
-#elif $db_opts.db_opts_selector == "histdb":
-  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
-#else:
-  -subject "$db_opts.subject"
-#end if
+@BLAST_DB_SUBJECT@
 -query_gencode $query_gencode
 -evalue $evalue_cutoff
--out "$output1"
-##Set the extended list here so if/when we add things, saved workflows are not affected
-#if str($out_format)=="ext":
-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
-#else:
-    -outfmt $out_format
-#end if
--num_threads 8
+@BLAST_OUTPUT@
+@THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
-$adv_opts.filter_query
 $adv_opts.strand
 -matrix $adv_opts.matrix
-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
-## Note -max_target_seqs overrides -num_descriptions and -num_alignments
-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
--max_target_seqs $adv_opts.max_hits
-#end if
-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
--word_size $adv_opts.word_size
-#end if
+@ADVANCED_OPTIONS@
 $adv_opts.ungapped
-$adv_opts.parse_deflines
 ## End of advanced options:
 #end if
     </command>
@@ -57,42 +33,22 @@

         <expand macro="input_conditional_protein_db" />
         <expand macro="input_query_gencode" />
-        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <expand macro="input_evalue" />

         <expand macro="input_out_format" />
-
-        <conditional name="adv_opts">
-            <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
-              <option value="advanced">Show Advanced Options</option>
-            </param>
-            <when value="basic" />
-            <when value="advanced">
-                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
-                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
-                <param name="strand" type="select" label="Query strand(s) to search against database/subject">
-                    <option value="-strand both">Both</option>
-                    <option value="-strand plus">Plus (forward)</option>
-                    <option value="-strand minus">Minus (reverse complement)</option>
-                </param>
-
-                <expand macro="input_scoring_matrix" />
-
-                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
-                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!-- I'd like word_size to be optional, with minimum 2 for blastx -->
-                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
-                    <validator type="in_range" min="0" />
-                </param>
-                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
-                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
-            </when>
-        </conditional>
+        <expand macro="advanced_options">
+            <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+            <expand macro="input_filter_query_default_true" />
+            <expand macro="input_strand" />
+            <expand macro="input_scoring_matrix" />
+            <expand macro="input_max_hits" />
+            <expand macro="input_word_size" />
+            <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
+            <expand macro="input_parse_deflines" />
+        </expand>
     </inputs>
     <outputs>
-        <data name="output1" format="tabular" label="blastx on ${on_string}">
+        <data name="output1" format="tabular" label="blastx $query.name vs @ON_DB_SUBJECT@">
             <expand macro="output_change_format" />
         </data>
     </outputs>
@@ -130,83 +86,18 @@
     </tests>
     <help>

-.. class:: warningmark
-
-**Note**. Database searches may take a substantial amount of time.
-For large input datasets it is advisable to allow overnight processing.
-
------
+@SEARCH_TIME_WARNING@

 **What it does**

 Search a *protein database* using a *translated nucleotide query*,
 using the NCBI BLAST+ blastx command line tool.

-.. class:: warningmark
-
-You can also search against a FASTA file of subject protein
-sequences. This is *not* advised because it is slower (only one
-CPU is used), but more importantly gives e-values for pairwise
-searches (very small e-values which will look overly signficiant).
-In most cases you should instead turn the other FASTA file into a
-database first using *makeblastdb* and search against that.
+@FASTA_WARNING@

 -----

-**Output format**
-
-Because Galaxy focuses on processing tabular data, the default output of this
-tool is tabular. The standard BLAST+ tabular output contains 12 columns:
-
-====== ========= ============================================
-Column NCBI name Description
------- --------- --------------------------------------------
-     1 qseqid    Query Seq-id (ID of your sequence)
-     2 sseqid    Subject Seq-id (ID of the database hit)
-     3 pident    Percentage of identical matches
-     4 length    Alignment length
-     5 mismatch  Number of mismatches
-     6 gapopen   Number of gap openings
-     7 qstart    Start of alignment in query
-     8 qend      End of alignment in query
-     9 sstart    Start of alignment in subject (database hit)
-    10 send      End of alignment in subject (database hit)
-    11 evalue    Expectation value (E-value)
-    12 bitscore  Bit score
-====== ========= ============================================
-
-The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
-included by selecting the extended tabular output. The extra columns are
-included *after* the standard 12 columns. This is so that you can write
-workflow filtering steps that accept either the 12 or 24 column tabular
-BLAST output. Galaxy now uses this extended 24 column output by default.
-
-====== ============= ===========================================
-Column NCBI name     Description
------- ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
-    14 score         Raw score
-    15 nident        Number of identical matches
-    16 positive      Number of positive-scoring matches
-    17 gaps          Total number of gaps
-    18 ppos          Percentage of positive-scoring matches
-    19 qframe        Query frame
-    20 sframe        Subject frame
-    21 qseq          Aligned part of query sequence
-    22 sseq          Aligned part of subject sequence
-    23 qlen          Query sequence length
-    24 slen          Subject sequence length
-====== ============= ===========================================
-
-The third option is BLAST XML output, which is designed to be parsed by
-another program, and is understood by some Galaxy tools.
-
-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+@OUTPUT_FORMAT@

 -------

@@ -215,17 +106,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers:

-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>
--- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml	Mon Nov 25 10:58:46 2013 -0500
@@ -1,4 +1,4 @@
-<tool id="ncbi_dustmasker_wrapper" name="NCBI dustmasker" version="0.0.21">
+<tool id="ncbi_dustmasker_wrapper" name="NCBI dustmasker" version="0.0.22">
     <!-- dustmasker wrapper from Edward Kirton and Nicola Soranzo -->
     <description>masks low complexity regions</description>
     <requirements>
@@ -99,17 +99,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers (a more specific paper covering this wrapper is planned):

-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>
--- a/tools/ncbi_blast_plus/ncbi_macros.xml	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml	Mon Nov 25 10:58:46 2013 -0500
@@ -1,6 +1,5 @@
 <macros>
-    <macro name="output_change_format">
-
+    <xml name="output_change_format">
         <change_format>
             <when input="out_format" value="0" format="txt"/>
             <when input="out_format" value="0 -html" format="html"/>
@@ -10,9 +9,8 @@
             <when input="out_format" value="4 -html" format="html"/>
             <when input="out_format" value="5" format="blastxml"/>
         </change_format>
-
-    </macro>
-    <macro name="input_out_format">
+    </xml>
+    <xml name="input_out_format">
         <param name="out_format" type="select" label="Output format">
             <option value="6">Tabular (standard 12 columns)</option>
             <option value="ext" selected="True">Tabular (extended 24 columns)</option>
@@ -27,8 +25,8 @@
             <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
             -->
         </param>
-    </macro>
-    <macro name="input_scoring_matrix">
+    </xml>
+    <xml name="input_scoring_matrix">
         <param name="matrix" type="select" label="Scoring matrix">
             <option value="BLOSUM90">BLOSUM90</option>
             <option value="BLOSUM80">BLOSUM80</option>
@@ -39,8 +37,8 @@
             <option value="PAM70">PAM70</option>
             <option value="PAM30">PAM30</option>
         </param>
-    </macro>
-    <macro name="stdio">
+    </xml>
+    <xml name="stdio">
         <stdio>
             <!-- Anything other than zero is an error -->
             <exit_code range="1:" />
@@ -49,8 +47,8 @@
             <regex match="Error:" />
             <regex match="Exception:" />
         </stdio>
-    </macro>
-    <macro name="input_query_gencode">
+    </xml>
+    <xml name="input_query_gencode">
         <param name="query_gencode" type="select" label="Query genetic code">
             <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details -->
             <option value="1" select="True">1. Standard</option>
@@ -72,9 +70,8 @@
             <option value="23">23. Thraustochytrium Mitochondrial Code</option>
             <option value="24">24. Pterobranchia mitochondrial code</option>
         </param>
-    </macro>
-
-    <macro name="input_db_gencode">
+    </xml>
+    <xml name="input_db_gencode">
         <param name="db_gencode" type="select" label="Database/subject genetic code">
             <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details -->
             <option value="1" select="True">1. Standard</option>
@@ -96,9 +93,8 @@
             <option value="23">23. Thraustochytrium Mitochondrial Code</option>
             <option value="24">24. Pterobranchia mitochondrial code</option>
         </param>
-    </macro>
-
-    <macro name="input_conditional_nucleotide_db">
+    </xml>
+    <xml name="input_conditional_nucleotide_db">
         <conditional name="db_opts">
             <param name="db_opts_selector" type="select" label="Subject database/sequences">
               <option value="db" selected="True">Locally installed BLAST database</option>
@@ -127,9 +123,8 @@
                 <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/>
             </when>
         </conditional>
-    </macro>
-
-    <macro name="input_conditional_protein_db">
+    </xml>
+    <xml name="input_conditional_protein_db">
         <conditional name="db_opts">
             <param name="db_opts_selector" type="select" label="Subject database/sequences">
               <option value="db" selected="True">Locally installed BLAST database</option>
@@ -158,9 +153,8 @@
                 <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/>
             </when>
         </conditional>
-    </macro>
-
-    <macro name="input_conditional_pssm">
+    </xml>
+    <xml name="input_conditional_pssm">
         <conditional name="db_opts">
             <param name="db_opts_selector" type="select" label="Protein domain database (PSSM)">
               <option value="db" selected="True">Locally installed BLAST database</option>
@@ -187,9 +181,8 @@
             </when>
             -->
         </conditional>
-    </macro>
-
-    <macro name="input_conditional_choose_db_type">
+    </xml>
+    <xml name="input_conditional_choose_db_type">
         <conditional name="db_opts">
             <param name="db_type" type="select" label="Type of BLAST database">
               <option value="nucl" selected="True">Nucleotide</option>
@@ -214,7 +207,175 @@
                 </param>
             </when>
         </conditional>
-    </macro>
+    </xml>
+    <xml name="input_parse_deflines">
+        <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
+    </xml>
+    <xml name="input_filter_query_default_false">
+        <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" />
+    </xml>
+    <xml name="input_filter_query_default_true">
+        <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
+    </xml>
+    <xml name="input_max_hits">
+        <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
+            <validator type="in_range" min="0" />
+        </param>
+    </xml>
+    <xml name="input_evalue">
+        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+    </xml>
+    <xml name="input_word_size">
+        <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
+            <validator type="in_range" min="0" />
+        </param>
+    </xml>
+    <xml name="input_strand">
+        <param name="strand" type="select" label="Query strand(s) to search against database/subject">
+            <option value="-strand both">Both</option>
+            <option value="-strand plus">Plus (forward)</option>
+            <option value="-strand minus">Minus (reverse complement)</option>
+        </param>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="binary">@BINARY@</requirement>
+            <requirement type="package" version="2.2.27">blast+</requirement>
+        </requirements>
+        <version_command>@BINARY@ -version</version_command>
+    </xml>
+    <xml name="advanced_options">
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <yield />
+            </when>
+        </conditional>
+    </xml>
+    <token name="@THREADS@">-num_threads "\${GALAXY_SLOTS:-8}"</token>
+    <token name="@BLAST_DB_SUBJECT@">
+#if $db_opts.db_opts_selector == "db":
+  -db "${db_opts.database.fields.path}"
+#elif $db_opts.db_opts_selector == "histdb":
+  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
+#else:
+  -subject "$db_opts.subject"
+#end if
+    </token>
+    <token name="@BLAST_OUTPUT@">-out "$output1"
+##Set the extended list here so if/when we add things, saved workflows are not affected
+#if str($out_format)=="ext":
+    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
+#else:
+    -outfmt $out_format
+#end if
+    </token>
+    <token name="@ADVANCED_OPTIONS@">$adv_opts.filter_query
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+$adv_opts.parse_deflines
+    </token>
+    <!-- @ON_DB_SUBJECT@ is for use with @BLAST_DB_SUBJECT@ -->
+    <token name="@ON_DB_SUBJECT@">#if str($db_opts.db_opts_selector)=='db'
+${db_opts.database}
+#elif str($db_opts.db_opts_selector)=='histdb'
+${db_opts.histdb.name}
+#else
+${db_opts.subject.name}
+#end if</token>
+    <token name="@REFERENCES@">
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167

+Christiam Camacho et al. (2009).
+BLAST+: architecture and applications.
+BMC Bioinformatics. 15;10:421.
+http://dx.doi.org/10.1186/1471-2105-10-421

+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+    </token>
+    <token name="@OUTPUT_FORMAT@">**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 24 column tabular
+BLAST output. Galaxy now uses this extended 24 column output by default.
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+    23 qlen          Query sequence length
+    24 slen          Subject sequence length
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+    </token>
+    <token name="@FASTA_WARNING@">.. class:: warningmark
+
+You can also search against a FASTA file of subject (target)
+sequences. This is *not* advised because it is slower (only one
+CPU is used), but more importantly gives e-values for pairwise
+searches (very small e-values which will look overly signficiant).
+In most cases you should instead turn the other FASTA file into a
+database first using *makeblastdb* and search against that.
+    </token>
+    <token name="@SEARCH_TIME_WARNING@">.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.
+
+-----
+    </token>
 </macros>
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml	Mon Nov 25 10:58:46 2013 -0500
@@ -1,13 +1,10 @@
-<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.21">
+<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.22">
     <description>Make BLAST database</description>
-    <requirements>
-        <requirement type="binary">makeblastdb</requirement>
-        <requirement type="package" version="2.2.27">blast+</requirement>
-    </requirements>
-    <version_command>makeblastdb -version</version_command>
     <macros>
+        <token name="@BINARY@">makeblastdb</token>
         <import>ncbi_macros.xml</import>
     </macros>
+    <expand macro="requirements" />
     <command>
 makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}"
 $parse_seqids
@@ -46,34 +43,36 @@
 ## #else if $tax.select == 'map':
 ## -taxid_map $tax.map
 ## #end if
+## --------------------------------------------------------------------
+## Capture the stdout log information to the primary file (plain text):
+&gt;&gt; "$outfile"
     </command>
-
     <expand macro="stdio" />
-
     <inputs>
         <param name="dbtype" type="select" display="radio" label="Molecule type of input">
             <option value="prot">protein</option>
             <option value="nucl">nucleotide</option>
         </param>
         <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)
+             NOTE Double check the new database would be self contained first
         <repeat name="in" title="BLAST or FASTA Database" min="1">
             <param name="file" type="data" format="fasta,blastdbn,blastdbp" label="BLAST or FASTA database" />
         </repeat>
         -->
+        <!-- TODO Switch this to using <param ... multiple="true" /> instead of <repeat> block? -->
         <repeat name="in" title="FASTA file" min="1">
             <param name="file" type="data" format="fasta" />
         </repeat>
         <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" />
         <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" />
-        <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values." help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />
-
+        <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />
         <!-- SEQUENCE MASKING OPTIONS -->
         <repeat name="mask_data" title="Masking data file">
-            <param name="file" type="data" format="maskinfo-asn1,maskinfo-asn1-binary" label="ASN.1 file containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" />
+            <param name="mask_data_file" type="data" format="maskinfo-asn1,maskinfo-asn1-binary" label="ASN.1 file containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" />
         </repeat>
         <!-- TODO
         <repeat name="gi_mask" title="Create GI indexed masking data">
-            <param name="file" type="data" format="asnb" label="Masking data output file" />
+            <param name="gi_mask_file" type="data" format="asnb" label="Masking data output file" />
         </repeat>
         -->

@@ -106,6 +105,25 @@
         </data>
     </outputs>
     <tests>
+        <!-- Note the (two line) PIN file is not reproducible run to run.
+        -->
+        <test>
+            <param name="dbtype" value="prot" />
+            <param name="file" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="title" value="Just 4 human proteins" />
+            <param name="parse_seqids" value="" />
+            <param name="hash_index" value="true" />
+            <output name="out_file" file="four_human_proteins.fasta.log" ftype="blastdbp" lines_diff="6">
+                <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />
+                <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />
+                <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />
+                <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" />
+                <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" />
+                <extra_files type="file" value="four_human_proteins.fasta.phi" name="blastdb.phi" />
+                <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" />
+                <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" />
+            </output>
+        </test>
     </tests>
     <help>
 **What it does**
@@ -129,17 +147,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers:

-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>
--- a/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml	Mon Nov 25 10:58:46 2013 -0500
@@ -1,15 +1,12 @@
-<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.0.21">
+<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.0.22">
     <description>Search protein domain database (PSSMs) with protein query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>
-    <requirements>
-        <requirement type="binary">rpsblast</requirement>
-        <requirement type="package" version="2.2.27">blast+</requirement>
-    </requirements>
-    <version_command>rpsblast -version</version_command>
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />
     <macros>
+        <token name="@BINARY@">deltablast</token>
         <import>ncbi_macros.xml</import>
     </macros>
+    <expand macro="requirements" />
     <command>
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
@@ -21,25 +18,10 @@
   -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
 #end if
 -evalue $evalue_cutoff
--out "$output1"
-##Set the extended list here so if/when we add things, saved workflows are not affected
-#if str($out_format)=="ext":
-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
-#else:
-    -outfmt $out_format
-#end if
--num_threads 8
+@BLAST_OUTPUT@
+@THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
-$adv_opts.filter_query
-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
-## Note -max_target_seqs overrides -num_descriptions and -num_alignments
-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
--max_target_seqs $adv_opts.max_hits
-#end if
-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
--word_size $adv_opts.word_size
-#end if
-$adv_opts.parse_deflines
+@ADVANCED_OPTIONS@
 ## End of advanced options:
 #end if
     </command>
@@ -51,30 +33,17 @@

         <expand macro="input_conditional_pssm" />

-        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <expand macro="input_evalue" />

         <expand macro="input_out_format" />

-        <conditional name="adv_opts">
-            <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
-              <option value="advanced">Show Advanced Options</option>
-            </param>
-            <when value="basic" />
-            <when value="advanced">
-                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
-                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" />
-                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
-                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!-- I'd like word_size to be optional, with minimum 2 for rpsblast -->
-                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
-                    <validator type="in_range" min="0" />
-                </param>
-                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
-            </when>
-        </conditional>
+        <expand macro="advanced_options">
+            <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+            <expand macro="input_filter_query_default_false" />
+            <expand macro="input_max_hits" />
+            <expand macro="input_word_size" />
+            <expand macro="input_parse_deflines" />
+        </expand>
     </inputs>
     <outputs>
         <data name="output1" format="tabular" label="rpsblast on ${on_string}">
@@ -85,12 +54,7 @@
     </outputs>
     <help>

-.. class:: warningmark
-
-**Note**. Database searches may take a substantial amount of time.
-For large input datasets it is advisable to allow overnight processing.
-
------
+@SEARCH_TIME_WARNING@

 **What it does**

@@ -129,60 +93,7 @@

 -----

-**Output format**
-
-Because Galaxy focuses on processing tabular data, the default output of this
-tool is tabular. The standard BLAST+ tabular output contains 12 columns:
-
-====== ========= ============================================
-Column NCBI name Description
------- --------- --------------------------------------------
-     1 qseqid    Query Seq-id (ID of your sequence)
-     2 sseqid    Subject Seq-id (ID of the database hit)
-     3 pident    Percentage of identical matches
-     4 length    Alignment length
-     5 mismatch  Number of mismatches
-     6 gapopen   Number of gap openings
-     7 qstart    Start of alignment in query
-     8 qend      End of alignment in query
-     9 sstart    Start of alignment in subject (database hit)
-    10 send      End of alignment in subject (database hit)
-    11 evalue    Expectation value (E-value)
-    12 bitscore  Bit score
-====== ========= ============================================
-
-The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
-included by selecting the extended tabular output. The extra columns are
-included *after* the standard 12 columns. This is so that you can write
-workflow filtering steps that accept either the 12 or 24 column tabular
-BLAST output. Galaxy now uses this extended 24 column output by default.
-
-====== ============= ===========================================
-Column NCBI name     Description
------- ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
-    14 score         Raw score
-    15 nident        Number of identical matches
-    16 positive      Number of positive-scoring matches
-    17 gaps          Total number of gaps
-    18 ppos          Percentage of positive-scoring matches
-    19 qframe        Query frame
-    20 sframe        Subject frame
-    21 qseq          Aligned part of query sequence
-    22 sseq          Aligned part of subject sequence
-    23 qlen          Query sequence length
-    24 slen          Subject sequence length
-====== ============= ===========================================
-
-The third option is BLAST XML output, which is designed to be parsed by
-another program, and is understood by some Galaxy tools.
-
-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+@OUTPUT_FORMAT@

 -------

@@ -191,17 +102,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers:

-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>
--- a/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml	Mon Nov 25 10:58:46 2013 -0500
@@ -1,15 +1,12 @@
-<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.0.21">
+<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.0.22">
     <description>Search protein domain database (PSSMs) with translated nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>
-    <requirements>
-        <requirement type="binary">rpstblastn</requirement>
-        <requirement type="package" version="2.2.27">blast+</requirement>
-    </requirements>
-    <version_command>rpstblastn -version</version_command>
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
     <macros>
+        <token name="@BINARY@">rpstblastn</token>
         <import>ncbi_macros.xml</import>
     </macros>
+    <expand macro="requirements" />
     <command>
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
@@ -21,26 +18,11 @@
   -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
 #end if
 -evalue $evalue_cutoff
--out "$output1"
-## Set the extended list here so if/when we add things, saved workflows are not affected
-#if str($out_format)=="ext":
-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
-#else:
-    -outfmt $out_format
-#end if
+@BLAST_OUTPUT@
 ## rpstblastn does not support multiple threads up to release 2.2.27+. Added in BLAST 2.2.28+.
 ##-num_threads 8
 #if $adv_opts.adv_opts_selector=="advanced":
-$adv_opts.filter_query
-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
-## Note -max_target_seqs overrides -num_descriptions and -num_alignments
-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
--max_target_seqs $adv_opts.max_hits
-#end if
-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
--word_size $adv_opts.word_size
-#end if
-$adv_opts.parse_deflines
+@ADVANCED_OPTIONS@
 ## End of advanced options:
 #end if
     </command>
@@ -50,30 +32,18 @@

         <expand macro="input_conditional_pssm" />

-        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <expand macro="input_evalue" />

         <expand macro="input_out_format" />

-        <conditional name="adv_opts">
-            <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
-              <option value="advanced">Show Advanced Options</option>
-            </param>
-            <when value="basic" />
-            <when value="advanced">
-                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
-                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" />
-                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
-                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!-- I'd like word_size to be optional, with minimum 2 for rpsblast -->
-                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
-                    <validator type="in_range" min="0" />
-                </param>
-                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
-            </when>
-        </conditional>
+        <expand macro="advanced_options">
+            <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+            <expand macro="input_filter_query_default_false" />
+            <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
+            <expand macro="input_max_hits" />
+            <expand macro="input_word_size" />
+            <expand macro="input_parse_deflines" />
+        </expand>
     </inputs>
     <outputs>
         <data name="output1" format="tabular" label="rpstblastn on ${on_string}">
@@ -82,12 +52,7 @@
     </outputs>
     <help>

-.. class:: warningmark
-
-**Note**. Database searches may take a substantial amount of time.
-For large input datasets it is advisable to allow overnight processing.
-
------
+@SEARCH_TIME_WARNING@

 **What it does**

@@ -126,60 +91,7 @@

 -----

-**Output format**
-
-Because Galaxy focuses on processing tabular data, the default output of this
-tool is tabular. The standard BLAST+ tabular output contains 12 columns:
-
-====== ========= ============================================
-Column NCBI name Description
------- --------- --------------------------------------------
-     1 qseqid    Query Seq-id (ID of your sequence)
-     2 sseqid    Subject Seq-id (ID of the database hit)
-     3 pident    Percentage of identical matches
-     4 length    Alignment length
-     5 mismatch  Number of mismatches
-     6 gapopen   Number of gap openings
-     7 qstart    Start of alignment in query
-     8 qend      End of alignment in query
-     9 sstart    Start of alignment in subject (database hit)
-    10 send      End of alignment in subject (database hit)
-    11 evalue    Expectation value (E-value)
-    12 bitscore  Bit score
-====== ========= ============================================
-
-The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
-included by selecting the extended tabular output. The extra columns are
-included *after* the standard 12 columns. This is so that you can write
-workflow filtering steps that accept either the 12 or 24 column tabular
-BLAST output. Galaxy now uses this extended 24 column output by default.
-
-====== ============= ===========================================
-Column NCBI name     Description
------- ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
-    14 score         Raw score
-    15 nident        Number of identical matches
-    16 positive      Number of positive-scoring matches
-    17 gaps          Total number of gaps
-    18 ppos          Percentage of positive-scoring matches
-    19 qframe        Query frame
-    20 sframe        Subject frame
-    21 qseq          Aligned part of query sequence
-    22 sseq          Aligned part of subject sequence
-    23 qlen          Query sequence length
-    24 slen          Subject sequence length
-====== ============= ===========================================
-
-The third option is BLAST XML output, which is designed to be parsed by
-another program, and is understood by some Galaxy tools.
-
-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+@OUTPUT_FORMAT@

 -------

@@ -188,17 +100,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers:

-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml	Mon Nov 25 10:58:46 2013 -0500
@@ -1,51 +1,27 @@
-<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.21">
+<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.22">
     <description>Search translated nucleotide database with protein query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
-    <requirements>
-        <requirement type="binary">tblastn</requirement>
-        <requirement type="package" version="2.2.27">blast+</requirement>
-    </requirements>
-    <version_command>tblastn -version</version_command>
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
     <macros>
+        <token name="@BINARY@">tblastn</token>
         <import>ncbi_macros.xml</import>
     </macros>
+    <expand macro="requirements" />
     <command>
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
 tblastn
 -query "$query"
-#if $db_opts.db_opts_selector == "db":
-  -db "${db_opts.database.fields.path}"
-#elif $db_opts.db_opts_selector == "histdb":
-  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
-#else:
-  -subject "$db_opts.subject"
-#end if
+@BLAST_DB_SUBJECT@
 -evalue $evalue_cutoff
--out "$output1"
-##Set the extended list here so if/when we add things, saved workflows are not affected
-#if str($out_format)=="ext":
-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
-#else:
-    -outfmt $out_format
-#end if
--num_threads 8
+@BLAST_OUTPUT@
+@THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
 -db_gencode $adv_opts.db_gencode
-$adv_opts.filter_query
 -matrix $adv_opts.matrix
-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
-## Note -max_target_seqs overrides -num_descriptions and -num_alignments
-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
--max_target_seqs $adv_opts.max_hits
-#end if
-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
--word_size $adv_opts.word_size
-#end if
+@ADVANCED_OPTIONS@
 ##Ungapped disabled for now - see comments below
 ##$adv_opts.ungapped
-$adv_opts.parse_deflines
 ## End of advanced options:
 #end if
     </command>
@@ -57,43 +33,26 @@

         <expand macro="input_conditional_nucleotide_db" />
         <expand macro="input_out_format" />
-        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
-
-        <conditional name="adv_opts">
-            <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
-              <option value="advanced">Show Advanced Options</option>
-            </param>
-            <when value="basic" />
-            <when value="advanced">
-
-                <expand macro="input_db_gencode" />
-
-                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
-                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
-
-                <expand macro="input_scoring_matrix" />
+        <expand macro="input_evalue" />
+        <expand macro="advanced_options">
+            <expand macro="input_db_gencode" />

-                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
-                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!-- I'd like word_size to be optional, with minimum 2 for blastp -->
-                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!--
-                Can't use '-ungapped' on its own, error back is:
-                Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
-                Tried using '-ungapped -comp_based_stats F' and tblastn crashed with 'Attempt to access NULL pointer.'
-                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
-                -->
-                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
-            </when>
-        </conditional>
+            <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+            <expand macro="input_filter_query_default_true" />
+            <expand macro="input_scoring_matrix" />
+            <expand macro="input_max_hits" />
+            <expand macro="input_word_size" />
+            <!--
+            Can't use '-ungapped' on its own, error back is:
+            Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
+            Tried using '-ungapped -comp_based_stats F' and tblastn crashed with 'Attempt to access NULL pointer.'
+            <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
+            -->
+            <expand macro="input_parse_deflines" />
+        </expand>
     </inputs>
     <outputs>
-        <data name="output1" format="tabular" label="tblastn on ${on_string}">
+        <data name="output1" format="tabular" label="tblastn $query.name vs @ON_DB_SUBJECT@">
             <expand macro="output_change_format" />
         </data>
     </outputs>
@@ -177,83 +136,18 @@
     </tests>
     <help>

-.. class:: warningmark
-
-**Note**. Database searches may take a substantial amount of time.
-For large input datasets it is advisable to allow overnight processing.
-
------
+@SEARCH_TIME_WARNING@

 **What it does**

 Search a *translated nucleotide database* using a *protein query*,
 using the NCBI BLAST+ tblastn command line tool.

-.. class:: warningmark
-
-You can also search against a FASTA file of subject nucleotide
-sequences. This is *not* advised because it is slower (only one
-CPU is used), but more importantly gives e-values for pairwise
-searches (very small e-values which will look overly signficiant).
-In most cases you should instead turn the other FASTA file into a
-database first using *makeblastdb* and search against that.
+@FASTA_WARNING@

 -----

-**Output format**
-
-Because Galaxy focuses on processing tabular data, the default output of this
-tool is tabular. The standard BLAST+ tabular output contains 12 columns:
-
-====== ========= ============================================
-Column NCBI name Description
------- --------- --------------------------------------------
-     1 qseqid    Query Seq-id (ID of your sequence)
-     2 sseqid    Subject Seq-id (ID of the database hit)
-     3 pident    Percentage of identical matches
-     4 length    Alignment length
-     5 mismatch  Number of mismatches
-     6 gapopen   Number of gap openings
-     7 qstart    Start of alignment in query
-     8 qend      End of alignment in query
-     9 sstart    Start of alignment in subject (database hit)
-    10 send      End of alignment in subject (database hit)
-    11 evalue    Expectation value (E-value)
-    12 bitscore  Bit score
-====== ========= ============================================
-
-The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
-included by selecting the extended tabular output. The extra columns are
-included *after* the standard 12 columns. This is so that you can write
-workflow filtering steps that accept either the 12 or 24 column tabular
-BLAST output. Galaxy now uses this extended 24 column output by default.
-
-====== ============= ===========================================
-Column NCBI name     Description
------- ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
-    14 score         Raw score
-    15 nident        Number of identical matches
-    16 positive      Number of positive-scoring matches
-    17 gaps          Total number of gaps
-    18 ppos          Percentage of positive-scoring matches
-    19 qframe        Query frame
-    20 sframe        Subject frame
-    21 qseq          Aligned part of query sequence
-    22 sseq          Aligned part of subject sequence
-    23 qlen          Query sequence length
-    24 slen          Subject sequence length
-====== ============= ===========================================
-
-The third option is BLAST XML output, which is designed to be parsed by
-another program, and is understood by some Galaxy tools.
-
-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+@OUTPUT_FORMAT@

 -------

@@ -262,17 +156,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers:

-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>
--- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml	Mon Nov 25 10:58:46 2013 -0500
@@ -1,51 +1,29 @@
-<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.21">
+<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.22">
     <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
-    <requirements>
-        <requirement type="binary">tblastx</requirement>
-        <requirement type="package" version="2.2.27">blast+</requirement>
-    </requirements>
-    <version_command>tblastx -version</version_command>
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
     <macros>
+        <token name="@BINARY@">tblastx</token>
         <import>ncbi_macros.xml</import>
     </macros>
+    <expand macro="requirements" />
     <command>
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
 tblastx
 -query "$query"
-#if $db_opts.db_opts_selector == "db":
-  -db "${db_opts.database.fields.path}"
-#elif $db_opts.db_opts_selector == "histdb":
-  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
-#else:
-  -subject "$db_opts.subject"
-#end if
+@BLAST_DB_SUBJECT@
 -query_gencode $query_gencode
 -evalue $evalue_cutoff
--out "$output1"
-##Set the extended list here so if/when we add things, saved workflows are not affected
-#if str($out_format)=="ext":
-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
-#else:
-    -outfmt $out_format
-#end if
--num_threads 8
+@BLAST_OUTPUT@
+@THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
 -db_gencode $adv_opts.db_gencode
-$adv_opts.filter_query
 $adv_opts.strand
 -matrix $adv_opts.matrix
 ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
 ## Note -max_target_seqs overrides -num_descriptions and -num_alignments
-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
--max_target_seqs $adv_opts.max_hits
-#end if
-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
--word_size $adv_opts.word_size
-#end if
-$adv_opts.parse_deflines
+@ADVANCED_OPTIONS@
 ## End of advanced options:
 #end if
     </command>
@@ -57,42 +35,24 @@

         <expand macro="input_conditional_nucleotide_db" />
         <expand macro="input_query_gencode" />
-        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <expand macro="input_evalue" />

         <expand macro="input_out_format" />
-
-        <conditional name="adv_opts">
-            <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
-              <option value="advanced">Show Advanced Options</option>
-            </param>
-            <when value="basic" />
-            <when value="advanced">
-
-                <expand macro="input_db_gencode" />
+        <expand macro="advanced_options">
+            <expand macro="input_db_gencode" />

-                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
-                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
-                <param name="strand" type="select" label="Query strand(s) to search against database/subject">
-                    <option value="-strand both">Both</option>
-                    <option value="-strand plus">Plus (forward)</option>
-                    <option value="-strand minus">Minus (reverse complement)</option>
-                </param>
-                <expand macro="input_scoring_matrix" />
-                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
-                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
-                    <validator type="in_range" min="0" />
-                </param>
-                <!-- I'd like word_size to be optional, with minimum 2 for tblastx -->
-                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
-                    <validator type="in_range" min="0" />
-                </param>
-                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
-            </when>
-        </conditional>
+            <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+            <expand macro="input_filter_query_default_true" />
+            <expand macro="input_strand" />
+            <expand macro="input_scoring_matrix" />
+            <expand macro="input_max_hits" />
+            <!-- I'd like word_size to be optional, with minimum 2 for tblastx -->
+            <expand macro="input_word_size" />
+            <expand macro="input_parse_deflines" />
+        </expand>
     </inputs>
     <outputs>
-        <data name="output1" format="tabular" label="tblastx on ${on_string}">
+        <data name="output1" format="tabular" label="tblastx $query.name vs @ON_DB_SUBJECT@">
             <expand macro="output_change_format" />
         </data>
     </outputs>
@@ -110,83 +70,18 @@
     </tests>
     <help>

-.. class:: warningmark
-
-**Note**. Database searches may take a substantial amount of time.
-For large input datasets it is advisable to allow overnight processing.
-
------
+@SEARCH_TIME_WARNING@

 **What it does**

 Search a *translated nucleotide database* using a *protein query*,
 using the NCBI BLAST+ tblastx command line tool.

-.. class:: warningmark
-
-You can also search against a FASTA file of subject nucleotide
-sequences. This is *not* advised because it is slower (only one
-CPU is used), but more importantly gives e-values for pairwise
-searches (very small e-values which will look overly signficiant).
-In most cases you should instead turn the other FASTA file into a
-database first using *makeblastdb* and search against that.
+@FASTA_WARNING@

 -----

-**Output format**
-
-Because Galaxy focuses on processing tabular data, the default output of this
-tool is tabular. The standard BLAST+ tabular output contains 12 columns:
-
-====== ========= ============================================
-Column NCBI name Description
------- --------- --------------------------------------------
-     1 qseqid    Query Seq-id (ID of your sequence)
-     2 sseqid    Subject Seq-id (ID of the database hit)
-     3 pident    Percentage of identical matches
-     4 length    Alignment length
-     5 mismatch  Number of mismatches
-     6 gapopen   Number of gap openings
-     7 qstart    Start of alignment in query
-     8 qend      End of alignment in query
-     9 sstart    Start of alignment in subject (database hit)
-    10 send      End of alignment in subject (database hit)
-    11 evalue    Expectation value (E-value)
-    12 bitscore  Bit score
-====== ========= ============================================
-
-The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
-included by selecting the extended tabular output. The extra columns are
-included *after* the standard 12 columns. This is so that you can write
-workflow filtering steps that accept either the 12 or 24 column tabular
-BLAST output. Galaxy now uses this extended 24 column output by default.
-
-====== ============= ===========================================
-Column NCBI name     Description
------- ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
-    14 score         Raw score
-    15 nident        Number of identical matches
-    16 positive      Number of positive-scoring matches
-    17 gaps          Total number of gaps
-    18 ppos          Percentage of positive-scoring matches
-    19 qframe        Query frame
-    20 sframe        Subject frame
-    21 qseq          Aligned part of query sequence
-    22 sseq          Aligned part of subject sequence
-    23 qlen          Query sequence length
-    24 slen          Subject sequence length
-====== ============= ===========================================
-
-The third option is BLAST XML output, which is designed to be parsed by
-another program, and is understood by some Galaxy tools.
-
-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+@OUTPUT_FORMAT@

 -------

@@ -195,17 +90,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers:

-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>
--- a/tools/ncbi_blast_plus/repository_dependencies.xml	Thu Oct 10 11:53:57 2013 -0400
+++ b/tools/ncbi_blast_plus/repository_dependencies.xml	Mon Nov 25 10:58:46 2013 -0500
@@ -1,4 +1,4 @@
 <?xml version="1.0"?>
 <repositories description="This requires the BLAST datatype definitions (e.g. the BLAST XML format).">
-    <repository changeset_revision="7ceb2ae30ff4" name="blast_datatypes" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="e36c60d13c94" name="blast_datatypes" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" />
 </repositories>