Mercurial > repos > peterjc > ncbi_blast_plus

--- a/tools/ncbi_blast_plus/hide_stderr.py	Wed Apr 17 09:44:04 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-"""A simple script to redirect stderr to stdout when the return code is zero.
-
-See https://bitbucket.org/galaxy/galaxy-central/issue/325/
-
-Currently Galaxy ignores the return code from command line tools (even if it
-is non-zero which by convention indicates an error) and treats any output on
-stderr as an error (even though by convention stderr is used for errors or
-warnings).
-
-This script runs the given command line, capturing all stdout and stderr in
-memory, and gets the return code. For a zero return code, any stderr (which
-should be warnings only) is added to the stdout. That way Galaxy believes
-everything is fine. For a non-zero return code, we output stdout as is, and
-any stderr, plus the return code to ensure there is some output on stderr.
-That way Galaxy treats this as an error.
-
-Once issue 325 is fixed, this script will not be needed.
-"""
-import sys
-import subprocess
-
-#Avoid using shell=True when we call subprocess to ensure if the Python
-#script is killed, so too is the BLAST process.
-try:
-    words = []
-    for w in sys.argv[1:]:
-       if " " in w:
-           words.append('"%s"' % w)
-       else:
-           words.append(w)
-    cmd = " ".join(words)
-    child = subprocess.Popen(sys.argv[1:],
-                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-except Exception, err:
-    sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
-    sys.exit(1)
-#Use .communicate as can get deadlocks with .wait(),
-stdout, stderr = child.communicate()
-return_code = child.returncode
-
-if return_code:
-    sys.stdout.write(stdout)
-    sys.stderr.write(stderr)
-    sys.stderr.write("Return error code %i from command:\n" % return_code)
-    sys.stderr.write("%s\n" % cmd)
-else:
-    sys.stdout.write(stdout)
-    sys.stdout.write(stderr)
--- a/tools/ncbi_blast_plus/ncbi_blast_plus.txt	Wed Apr 17 09:44:04 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blast_plus.txt	Wed Apr 17 09:44:25 2013 -0400
@@ -8,9 +8,10 @@
 Currently tested with NCBI BLAST 2.2.26+ (i.e. version 2.2.26 of BLAST+),
 and do not work with the NCBI 'legacy' BLAST suite (e.g. blastall).

-Note that these wrappers were originally distributed as part of the main
-Galaxy repository, but as of August 2012 moved to the Galaxy Tool Shed.
-My thanks to Dannon Baker from the Galaxy development team for this assistance
+Note that these wrappers (and the associated datetypes) were originally
+distributed as part of the main Galaxy repository, but as of August 2012
+moved to the Galaxy Tool Shed as 'ncbi_blast_plus' (and 'blast_datatypes').
+My thanks to Dannon Baker from the Galaxy development team for his assistance
 with this.


@@ -25,7 +26,7 @@
 files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein
 databases like NR).

-You will also need to install the 'blast_datatypes' from the Tool Shed. This
+You will also need to install 'blast_datatypes' from the Tool Shed. This
 defines the BLAST XML file format ('blastxml').


@@ -39,6 +40,10 @@
           very large sets of queries where BLAST+ can become memory hungry)
         - Include warning that BLAST+ with subject FASTA gives pairwise
           e-values
+v0.0.13 - Use the new error handling options in Galaxy (the previously
+          bundled hide_stderr.py script is no longer needed).
+v0.0.14 - Support for makeblastdb and local BLAST databases in the history
+          (using work from Edward Kirton).


 Developers
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml	Wed Apr 17 09:44:25 2013 -0400
@@ -0,0 +1,63 @@
+<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.2">
+    <description>Show BLAST database information from blastdbcmd</description>
+    <command>
+blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out $info
+    </command>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+	<!-- Suspect blastdbcmd sometimes fails to set error level -->
+	<regex match="Error:" />
+	<regex match="EXception:" />
+    </stdio>
+    <inputs>
+        <conditional name="db_opts">
+            <param name="db_type" type="select" label="Type of BLAST database">
+              <option value="nucl" selected="True">Nucleotide</option>
+              <option value="prot">Protein</option>
+            </param>
+            <when value="nucl">
+                <param name="database" type="select" label="Nucleotide BLAST database">
+                    <options from_file="blastdb.loc">
+                      <column name="value" index="0"/>
+                      <column name="name" index="1"/>
+                      <column name="path" index="2"/>
+                    </options>
+                </param>
+            </when>
+            <when value="prot">
+                <param name="database" type="select" label="Protein BLAST database">
+                    <options from_file="blastdb_p.loc">
+                      <column name="value" index="0"/>
+                      <column name="name" index="1"/>
+                      <column name="path" index="2"/>
+                    </options>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="info" format="txt" label="${db_opts.database.fields.name} info" />
+    </outputs>
+    <requirements>
+        <requirement type="binary">blastdbcmd</requirement>
+    </requirements>
+    <help>
+
+**What it does**
+
+Calls the NCBI BLAST+ blastdbcmd command line tool with the -info
+switch to give summary information about a BLAST database, such as
+the size (number of sequences and total length) and date.
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005.
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml	Wed Apr 17 09:44:25 2013 -0400
@@ -0,0 +1,135 @@
+<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.3">
+    <description>Extract sequence(s) from BLAST database</description>
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}"
+
+##TODO: What about -ctrl_a and -target_only as advanced options?
+
+#if $id_opts.id_type=="file":
+-entry_batch "$id_opts.entries"
+#else:
+##Perform some simple search/replaces to remove whitespace
+##and make it comma separated, and escape any pipe characters
+-entry "$id_opts.entries.replace('\r',',').replace('\n',',').replace(' ','').replace(',,',',').replace(',,',',').strip(',').replace('|','\|')"
+#end if
+
+##When building a BLAST database, to ensure unique IDs makeblastdb will
+##do things like turning a FASTA entry with ID of ERP44 into lcl|ERP44
+##(if using -parse_seqids) or simply assign it an ID using the record
+##number like gnl|BL_ORD_ID|123 (to cope with duplicate IDs in the FASTA
+##file). In -parse_seqids mode, a duplicate FASTA ID gives an error.
+##
+##The BLAST plain text and XML output will contain these BLAST IDs, but
+##the tabular output does not (at least, not in BLAST 2.2.25+).
+##Therefore in general, Galaxy users won't care about the (internal)
+##BLAST identifiers.
+##
+##The blastdbcmd FASTA output will also contain these IDs, but in the
+##context of the BLAST tabular output they are not helpful. Therefore
+##to recover the original ID as used in the FASTA file for makeblastdb
+##we need a litte post processing.
+##
+##We remove the NCBI's lcl|... or gnl|BL_ORD_ID|123 prefixes
+##using sed, however the exact syntax differs for Mac OS X's sed
+
+#if str($outfmt)=="blastid":
+-out "$seq"
+#else if sys.platform == "darwin":
+| sed -E 's/^>(lcl\||gnl\|BL_ORD_ID\|[0-9]* )/>/1' > "$seq"
+#else:
+| sed 's/>\(lcl|\|gnl|BL_ORD_ID|[0-9]* \)/>/1' > "$seq"
+#end if
+    </command>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+	<!-- Suspect blastdbcmd sometimes fails to set error level -->
+	<regex match="Error:" />
+	<regex match="EXception:" />
+    </stdio>
+    <inputs>
+        <conditional name="db_opts">
+            <param name="db_type" type="select" label="Type of BLAST database">
+              <option value="nucl" selected="True">Nucleotide</option>
+              <option value="prot">Protein</option>
+            </param>
+            <when value="nucl">
+                <param name="database" type="select" label="Nucleotide BLAST database">
+                    <options from_file="blastdb.loc">
+                      <column name="value" index="0"/>
+                      <column name="name" index="1"/>
+                      <column name="path" index="2"/>
+                    </options>
+                </param>
+            </when>
+            <when value="prot">
+                <param name="database" type="select" label="Protein BLAST database">
+                    <options from_file="blastdb_p.loc">
+                      <column name="value" index="0"/>
+                      <column name="name" index="1"/>
+                      <column name="path" index="2"/>
+                    </options>
+                </param>
+            </when>
+        </conditional>
+        <conditional name="id_opts">
+            <param name="id_type" type="select" label="Type of identifier list">
+              <option value="file">From file</option>
+              <option value="prompt">User entered</option>
+            </param>
+            <when value="file">
+                <param name="entries" type="data" format="txt,tabular" label="Sequence identifier(s)" help="Plain text file with one ID per line (i.e. single column tabular file)"/>
+            </when>
+            <when value="prompt">
+                <param name="entries" type="text" label="Sequence identifier(s)" help="Comma or new line separated list." optional="False" area="True" size="10x30"/>
+            </when>
+        </conditional>
+        <param name="outfmt" type="select" label="Output format">
+          <option value="original">FASTA with original identifiers</option>
+          <option value="blastid">FASTA with BLAST assigned identifiers</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="seq" format="fasta" label="Sequences from ${db_opts.database.fields.name}" />
+    </outputs>
+    <requirements>
+        <requirement type="binary">blastdbcmd</requirement>
+    </requirements>
+    <help>
+
+**What it does**
+
+Extracts FASTA formatted sequences from a BLAST database
+using the NCBI BLAST+ blastdbcmd command line tool.
+
+.. class:: warningmark
+
+**BLAST assigned identifiers**
+
+When a BLAST database is constructed from a FASTA file, the
+original identifiers can be replaced with BLAST assigned
+identifiers, partly to ensure uniqueness. e.g. Sometimes
+a prefix of 'lcl|' is added (lcl is short for local),
+or an arbitrary name starting 'gnl|BL_ORD_ID|' is created.
+
+If you are using the tabular output from BLAST, it will contain
+the original identifiers - not the BLAST assigned identifiers
+suitable for use with the blastdbcmd tool.
+
+If you are using the XML or plain text output, this will also
+contain the BLAST assigned identifiers. However, this means
+getting a list of BLAST assigned identifiers isn't straightforward.
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005.
+
+    </help>
+</tool>
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml	Wed Apr 17 09:44:04 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml	Wed Apr 17 09:44:25 2013 -0400
@@ -1,15 +1,17 @@
-<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.12">
+<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.14">
     <description>Search nucleotide database with nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
     <version_command>blastn -version</version_command>
-    <command interpreter="python">hide_stderr.py
+    <command>
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
 blastn
 -query "$query"
 #if $db_opts.db_opts_selector == "db":
   -db "${db_opts.database.fields.path}"
+#elif $db_opts.db_opts_selector == "histdb":
+  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
 #else:
   -subject "$db_opts.subject"
 #end if
@@ -39,12 +41,21 @@
 ## End of advanced options:
 #end if
     </command>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+        <!-- In case the return code has not been set propery check stderr too -->
+        <regex match="Error:" />
+        <regex match="EXception:" />
+    </stdio>
     <inputs>
         <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/>
         <conditional name="db_opts">
             <param name="db_opts_selector" type="select" label="Subject database/sequences">
               <option value="db" selected="True">BLAST Database</option>
-              <option value="file">FASTA file (pairwise e-values)</option>
+              <option value="histdb">BLAST database from your history</option>
+              <option value="file">FASTA file from your history (pairwise e-values)</option>
             </param>
             <when value="db">
                 <param name="database" type="select" label="Nucleotide BLAST database">
@@ -54,10 +65,17 @@
                       <column name="path" index="2"/>
                     </options>
                 </param>
+                <param name="histdb" type="hidden" value="" />
                 <param name="subject" type="hidden" value="" />
             </when>
+            <when value="histdb">
+                <param name="database" type="hidden" value="" />
+                <param name="histdb" type="data" format="blastdbn" label="Nucleotide BLAST database" />
+                <param name="subject" type="hidden" value="" />
+            </when>
             <when value="file">
-                <param name="database" type="hidden" value="" />
+                <param name="database" type="hidden" value="" />
+                <param name="histdb" type="hidden" value="" />
                 <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/>
             </when>
         </conditional>
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml	Wed Apr 17 09:44:04 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml	Wed Apr 17 09:44:25 2013 -0400
@@ -1,15 +1,17 @@
-<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.12">
+<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.14">
     <description>Search protein database with protein query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
     <version_command>blastp -version</version_command>
-    <command interpreter="python">hide_stderr.py
+    <command>
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
 blastp
 -query "$query"
 #if $db_opts.db_opts_selector == "db":
   -db "${db_opts.database.fields.path}"
+#elif $db_opts.db_opts_selector == "histdb":
+  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
 #else:
   -subject "$db_opts.subject"
 #end if
@@ -40,12 +42,21 @@
 ## End of advanced options:
 #end if
     </command>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+        <!-- In case the return code has not been set propery check stderr too -->
+        <regex match="Error:" />
+        <regex match="EXception:" />
+    </stdio>
     <inputs>
         <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/>
         <conditional name="db_opts">
             <param name="db_opts_selector" type="select" label="Subject database/sequences">
               <option value="db" selected="True">BLAST Database</option>
-              <option value="file">FASTA file (pairwise e-values)</option>
+              <option value="histdb">BLAST database from your history</option>
+              <option value="file">FASTA file from your history (pairwise e-values)</option>
             </param>
             <when value="db">
                 <param name="database" type="select" label="Protein BLAST database">
@@ -55,10 +66,17 @@
                       <column name="path" index="2"/>
                     </options>
                 </param>
+                <param name="histdb" type="hidden" value="" />
                 <param name="subject" type="hidden" value="" />
             </when>
+            <when value="histdb">
+                <param name="database" type="hidden" value="" />
+                <param name="histdb" type="data" format="blastdbp" label="Protein BLAST database" />
+                <param name="subject" type="hidden" value="" />
+            </when>
             <when value="file">
                 <param name="database" type="hidden" value="" />
+                <param name="histdb" type="hidden" value="" />
                 <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/>
             </when>
         </conditional>
--- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml	Wed Apr 17 09:44:04 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml	Wed Apr 17 09:44:25 2013 -0400
@@ -1,15 +1,17 @@
-<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.12">
+<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.14">
     <description>Search protein database with translated nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
     <version_command>blastx -version</version_command>
-    <command interpreter="python">hide_stderr.py
+    <command>
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
 blastx
 -query "$query"
 #if $db_opts.db_opts_selector == "db":
   -db "${db_opts.database.fields.path}"
+#elif $db_opts.db_opts_selector == "histdb":
+  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
 #else:
   -subject "$db_opts.subject"
 #end if
@@ -40,12 +42,21 @@
 ## End of advanced options:
 #end if
     </command>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+        <!-- In case the return code has not been set propery check stderr too -->
+        <regex match="Error:" />
+        <regex match="EXception:" />
+    </stdio>
     <inputs>
         <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/>
         <conditional name="db_opts">
             <param name="db_opts_selector" type="select" label="Subject database/sequences">
               <option value="db" selected="True">BLAST Database</option>
-              <option value="file">FASTA file (pairwise e-values)</option>
+              <option value="histdb">BLAST database from your history</option>
+              <option value="file">FASTA file from your history (pairwise e-values)</option>
             </param>
             <when value="db">
                 <param name="database" type="select" label="Protein BLAST database">
@@ -55,10 +66,17 @@
                       <column name="path" index="2"/>
                     </options>
                 </param>
-                <param name="subject" type="hidden" value="" />
+                <param name="histdb" type="hidden" value="" />
+                <param name="subject" type="hidden" value="" />
+            </when>
+            <when value="histdb">
+                <param name="database" type="hidden" value="" />
+                <param name="histdb" type="data" format="blastdbp" label="Protein BLAST database" />
+                <param name="subject" type="hidden" value="" />
             </when>
             <when value="file">
-                <param name="database" type="hidden" value="" />
+                <param name="database" type="hidden" value="" />
+                <param name="histdb" type="hidden" value="" />
                 <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/>
             </when>
         </conditional>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml	Wed Apr 17 09:44:25 2013 -0400
@@ -0,0 +1,121 @@
+<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.1">
+<description>Make BLAST database</description>
+<version_command>makeblastdb -version</version_command>
+<command>
+makeblastdb -out ${os.path.join($outfile.extra_files_path,'blastdb')}
+$parse_seqids
+$hash_index
+## Single call to -in with multiple filenames space separated with outer quotes
+## (presumably any filenames with spaces would be a problem). Note this gives
+## some extra spaces, e.g. -in " file1 file2 file3  " but BLAST seems happy:
+-in "
+#for $i in $in
+${i.file} #end for
+"
+#if $title:
+-title "$title"
+#else:
+##Would default to being based on the cryptic Galaxy filenames, which is unhelpful
+-title "BLAST Database"
+#end if
+-dbtype $dbtype
+## #set $sep = '-mask_data '
+## #for $i in $mask_data
+## $sep${i.file}
+## #set $set = ', '
+## #end for
+## #set $sep = '-gi_mask -gi_mask_name '
+## #for $i in $gi_mask
+## $sep${i.file}
+## #set $set = ', '
+## #end for
+## #if $tax.select == 'id':
+## -taxid $tax.id
+## #else if $tax.select == 'map':
+## -taxid_map $tax.map
+## #end if
+</command>
+<stdio>
+    <!-- Anything other than zero is an error -->
+    <exit_code range="1:" />
+    <exit_code range=":-1" />
+    <!-- In case the return code has not been set propery check stderr too -->
+    <regex match="Error:" />
+    <regex match="EXception:" />
+</stdio>
+<inputs>
+    <param name="dbtype" type="select" display="radio" label="Molecule type of input">
+        <option value="prot">protein</option>
+        <option value="nucl">nucleotide</option>
+    </param>
+    <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)
+    <repeat name="in" title="Blast or Fasta Database" min="1">
+        <param name="file" type="data" format="fasta,blastdbn,blastdbp" label="Blast or Fasta database" />
+    </repeat>
+    -->
+    <repeat name="in" title="FASTA file" min="1">
+        <param name="file" type="data" format="fasta" />
+    </repeat>
+    <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" />
+    <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" />
+    <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values." help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />
+
+    <!-- SEQUENCE MASKING OPTIONS -->
+    <!-- TODO
+    <repeat name="mask_data" title="Provide one or more files containing masking data">
+        <param name="file" type="data" format="asnb" label="File containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" />
+    </repeat>
+    <repeat name="gi_mask" title="Create GI indexed masking data">
+        <param name="file" type="data" format="asnb" label="Masking data output file" />
+    </repeat>
+    -->
+
+    <!-- TAXONOMY OPTIONS -->
+    <!-- TODO
+    <conditional name="tax">
+        <param name="select" type="select" label="Taxonomy options">
+            <option value="">Do not assign sequences to Taxonomy IDs</option>
+            <option value="id">Assign all sequences to one Taxonomy ID</option>
+            <option value="map">Supply text file mapping sequence IDs to taxnomy IDs</option>
+        </param>
+        <when value="">
+        </when>
+        <when value="id">
+            <param name="id" type="integer" value="" label="NCBI taxonomy ID" help="Integer &gt;=0" />
+        </when>
+        <when value="map">
+            <param name="file" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" />
+        </when>
+    </conditional>
+    -->
+</inputs>
+<outputs>
+    <!-- If we only accepted one FASTA file, we could use its human name here... -->
+    <data name="outfile" format="data" label="BLAST database from ${on_string}">
+        <change_format>
+                <when input="dbtype" value="nucl" format="blastdbn"/>
+                <when input="dbtype" value="prot" format="blastdbp"/>
+        </change_format>
+    </data>
+</outputs>
+<requirements>
+    <requirement type="binary">makeblastdb</requirement>
+</requirements>
+<help>
+**What it does**
+
+Make BLAST database from one or more FASTA files and/or BLAST databases.
+
+This is a wrapper for the NCBI BLAST+ tool 'makeblastdb', which is the
+replacement for the 'formatdb' tool in the NCBI 'legacy' BLAST suite.
+
+<!--
+Applying masks to an existing BLAST database will not change the original database; a new database will be created.
+For this reason, it's best to apply all masks at once to minimize the number of unnecessary intermediate databases.
+-->
+
+**Documentation**
+
+http://www.ncbi.nlm.nih.gov/books/NBK1763/
+</help>
+</tool>
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml	Wed Apr 17 09:44:04 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml	Wed Apr 17 09:44:25 2013 -0400
@@ -1,15 +1,17 @@
-<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.12">
+<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.14">
     <description>Search translated nucleotide database with protein query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
     <version_command>tblastn -version</version_command>
-    <command interpreter="python">hide_stderr.py
+    <command>
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
 tblastn
 -query "$query"
 #if $db_opts.db_opts_selector == "db":
   -db "${db_opts.database.fields.path}"
+#elif $db_opts.db_opts_selector == "histdb":
+  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
 #else:
   -subject "$db_opts.subject"
 #end if
@@ -40,12 +42,21 @@
 ## End of advanced options:
 #end if
     </command>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+        <!-- In case the return code has not been set propery check stderr too -->
+        <regex match="Error:" />
+        <regex match="EXception:" />
+    </stdio>
     <inputs>
         <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/>
         <conditional name="db_opts">
             <param name="db_opts_selector" type="select" label="Subject database/sequences">
               <option value="db" selected="True">BLAST Database</option>
-              <option value="file">FASTA file (pairwise e-values)</option>
+              <option value="histdb">BLAST database from your history</option>
+              <option value="file">FASTA file from your history (pairwise e-values)</option>
             </param>
             <when value="db">
                 <param name="database" type="select" label="Nucleotide BLAST database">
@@ -55,10 +66,17 @@
                       <column name="path" index="2"/>
                     </options>
                 </param>
-                <param name="subject" type="hidden" value="" />
+                <param name="histdb" type="hidden" value="" />
+                <param name="subject" type="hidden" value="" />
+            </when>
+            <when value="histdb">
+                <param name="database" type="hidden" value="" />
+                <param name="histdb" type="data" format="blastdbn" label="Nucleotide BLAST database" />
+                <param name="subject" type="hidden" value="" />
             </when>
             <when value="file">
-                <param name="database" type="hidden" value="" />
+                <param name="database" type="hidden" value="" />
+                <param name="histdb" type="hidden" value="" />
                 <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/>
             </when>
         </conditional>
--- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml	Wed Apr 17 09:44:04 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml	Wed Apr 17 09:44:25 2013 -0400
@@ -1,15 +1,17 @@
-<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.12">
+<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.14">
     <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
     <version_command>tblastx -version</version_command>
-    <command interpreter="python">hide_stderr.py
+    <command>
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
 tblastx
 -query "$query"
 #if $db_opts.db_opts_selector == "db":
   -db "${db_opts.database.fields.path}"
+#elif $db_opts.db_opts_selector == "histdb":
+  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
 #else:
   -subject "$db_opts.subject"
 #end if
@@ -40,12 +42,21 @@
 ## End of advanced options:
 #end if
     </command>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+        <!-- In case the return code has not been set propery check stderr too -->
+        <regex match="Error:" />
+        <regex match="EXception:" />
+    </stdio>
     <inputs>
         <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/>
         <conditional name="db_opts">
             <param name="db_opts_selector" type="select" label="Subject database/sequences">
               <option value="db" selected="True">BLAST Database</option>
-              <option value="file">FASTA file (pairwise e-values)</option>
+              <option value="histdb">BLAST database from your history</option>
+              <option value="file">FASTA file from your history (pairwise e-values)</option>
             </param>
             <when value="db">
                 <param name="database" type="select" label="Nucleotide BLAST database">
@@ -55,10 +66,17 @@
                       <column name="path" index="2"/>
                     </options>
                 </param>
-                <param name="subject" type="hidden" value="" />
+                <param name="histdb" type="hidden" value="" />
+                <param name="subject" type="hidden" value="" />
+            </when>
+            <when value="histdb">
+                <param name="database" type="hidden" value="" />
+                <param name="histdb" type="data" format="blastdbn" label="Nucleotide BLAST database" />
+                <param name="subject" type="hidden" value="" />
             </when>
             <when value="file">
-                <param name="database" type="hidden" value="" />
+                <param name="database" type="hidden" value="" />
+                <param name="histdb" type="hidden" value="" />
                 <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/>
             </when>
         </conditional>