Mercurial > repos > peterjc > ncbi_blast_plus
changeset 1:c84837116457 draft
Uploaded v0.0.14
author | peterjc |
---|---|
date | Wed, 17 Apr 2013 09:44:25 -0400 |
parents | b04f55ab182c |
children | b70b142bbc39 |
files | tools/ncbi_blast_plus/hide_stderr.py tools/ncbi_blast_plus/ncbi_blast_plus.txt tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml tools/ncbi_blast_plus/ncbi_makeblastdb.xml tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml |
diffstat | 10 files changed, 445 insertions(+), 80 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/ncbi_blast_plus/hide_stderr.py Wed Apr 17 09:44:04 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ -#!/usr/bin/env python -"""A simple script to redirect stderr to stdout when the return code is zero. - -See https://bitbucket.org/galaxy/galaxy-central/issue/325/ - -Currently Galaxy ignores the return code from command line tools (even if it -is non-zero which by convention indicates an error) and treats any output on -stderr as an error (even though by convention stderr is used for errors or -warnings). - -This script runs the given command line, capturing all stdout and stderr in -memory, and gets the return code. For a zero return code, any stderr (which -should be warnings only) is added to the stdout. That way Galaxy believes -everything is fine. For a non-zero return code, we output stdout as is, and -any stderr, plus the return code to ensure there is some output on stderr. -That way Galaxy treats this as an error. - -Once issue 325 is fixed, this script will not be needed. -""" -import sys -import subprocess - -#Avoid using shell=True when we call subprocess to ensure if the Python -#script is killed, so too is the BLAST process. -try: - words = [] - for w in sys.argv[1:]: - if " " in w: - words.append('"%s"' % w) - else: - words.append(w) - cmd = " ".join(words) - child = subprocess.Popen(sys.argv[1:], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) -except Exception, err: - sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) - sys.exit(1) -#Use .communicate as can get deadlocks with .wait(), -stdout, stderr = child.communicate() -return_code = child.returncode - -if return_code: - sys.stdout.write(stdout) - sys.stderr.write(stderr) - sys.stderr.write("Return error code %i from command:\n" % return_code) - sys.stderr.write("%s\n" % cmd) -else: - sys.stdout.write(stdout) - sys.stdout.write(stderr)
--- a/tools/ncbi_blast_plus/ncbi_blast_plus.txt Wed Apr 17 09:44:04 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blast_plus.txt Wed Apr 17 09:44:25 2013 -0400 @@ -8,9 +8,10 @@ Currently tested with NCBI BLAST 2.2.26+ (i.e. version 2.2.26 of BLAST+), and do not work with the NCBI 'legacy' BLAST suite (e.g. blastall). -Note that these wrappers were originally distributed as part of the main -Galaxy repository, but as of August 2012 moved to the Galaxy Tool Shed. -My thanks to Dannon Baker from the Galaxy development team for this assistance +Note that these wrappers (and the associated datetypes) were originally +distributed as part of the main Galaxy repository, but as of August 2012 +moved to the Galaxy Tool Shed as 'ncbi_blast_plus' (and 'blast_datatypes'). +My thanks to Dannon Baker from the Galaxy development team for his assistance with this. @@ -25,7 +26,7 @@ files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein databases like NR). -You will also need to install the 'blast_datatypes' from the Tool Shed. This +You will also need to install 'blast_datatypes' from the Tool Shed. This defines the BLAST XML file format ('blastxml'). @@ -39,6 +40,10 @@ very large sets of queries where BLAST+ can become memory hungry) - Include warning that BLAST+ with subject FASTA gives pairwise e-values +v0.0.13 - Use the new error handling options in Galaxy (the previously + bundled hide_stderr.py script is no longer needed). +v0.0.14 - Support for makeblastdb and local BLAST databases in the history + (using work from Edward Kirton). Developers
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Wed Apr 17 09:44:25 2013 -0400 @@ -0,0 +1,63 @@ +<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.2"> + <description>Show BLAST database information from blastdbcmd</description> + <command> +blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out $info + </command> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- Suspect blastdbcmd sometimes fails to set error level --> + <regex match="Error:" /> + <regex match="EXception:" /> + </stdio> + <inputs> + <conditional name="db_opts"> + <param name="db_type" type="select" label="Type of BLAST database"> + <option value="nucl" selected="True">Nucleotide</option> + <option value="prot">Protein</option> + </param> + <when value="nucl"> + <param name="database" type="select" label="Nucleotide BLAST database"> + <options from_file="blastdb.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + </when> + <when value="prot"> + <param name="database" type="select" label="Protein BLAST database"> + <options from_file="blastdb_p.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data name="info" format="txt" label="${db_opts.database.fields.name} info" /> + </outputs> + <requirements> + <requirement type="binary">blastdbcmd</requirement> + </requirements> + <help> + +**What it does** + +Calls the NCBI BLAST+ blastdbcmd command line tool with the -info +switch to give summary information about a BLAST database, such as +the size (number of sequences and total length) and date. + +------- + +**References** + +Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402. + +Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Wed Apr 17 09:44:25 2013 -0400 @@ -0,0 +1,135 @@ +<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.3"> + <description>Extract sequence(s) from BLAST database</description> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" + +##TODO: What about -ctrl_a and -target_only as advanced options? + +#if $id_opts.id_type=="file": +-entry_batch "$id_opts.entries" +#else: +##Perform some simple search/replaces to remove whitespace +##and make it comma separated, and escape any pipe characters +-entry "$id_opts.entries.replace('\r',',').replace('\n',',').replace(' ','').replace(',,',',').replace(',,',',').strip(',').replace('|','\|')" +#end if + +##When building a BLAST database, to ensure unique IDs makeblastdb will +##do things like turning a FASTA entry with ID of ERP44 into lcl|ERP44 +##(if using -parse_seqids) or simply assign it an ID using the record +##number like gnl|BL_ORD_ID|123 (to cope with duplicate IDs in the FASTA +##file). In -parse_seqids mode, a duplicate FASTA ID gives an error. +## +##The BLAST plain text and XML output will contain these BLAST IDs, but +##the tabular output does not (at least, not in BLAST 2.2.25+). +##Therefore in general, Galaxy users won't care about the (internal) +##BLAST identifiers. +## +##The blastdbcmd FASTA output will also contain these IDs, but in the +##context of the BLAST tabular output they are not helpful. Therefore +##to recover the original ID as used in the FASTA file for makeblastdb +##we need a litte post processing. +## +##We remove the NCBI's lcl|... or gnl|BL_ORD_ID|123 prefixes +##using sed, however the exact syntax differs for Mac OS X's sed + +#if str($outfmt)=="blastid": +-out "$seq" +#else if sys.platform == "darwin": +| sed -E 's/^>(lcl\||gnl\|BL_ORD_ID\|[0-9]* )/>/1' > "$seq" +#else: +| sed 's/>\(lcl|\|gnl|BL_ORD_ID|[0-9]* \)/>/1' > "$seq" +#end if + </command> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- Suspect blastdbcmd sometimes fails to set error level --> + <regex match="Error:" /> + <regex match="EXception:" /> + </stdio> + <inputs> + <conditional name="db_opts"> + <param name="db_type" type="select" label="Type of BLAST database"> + <option value="nucl" selected="True">Nucleotide</option> + <option value="prot">Protein</option> + </param> + <when value="nucl"> + <param name="database" type="select" label="Nucleotide BLAST database"> + <options from_file="blastdb.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + </when> + <when value="prot"> + <param name="database" type="select" label="Protein BLAST database"> + <options from_file="blastdb_p.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + </when> + </conditional> + <conditional name="id_opts"> + <param name="id_type" type="select" label="Type of identifier list"> + <option value="file">From file</option> + <option value="prompt">User entered</option> + </param> + <when value="file"> + <param name="entries" type="data" format="txt,tabular" label="Sequence identifier(s)" help="Plain text file with one ID per line (i.e. single column tabular file)"/> + </when> + <when value="prompt"> + <param name="entries" type="text" label="Sequence identifier(s)" help="Comma or new line separated list." optional="False" area="True" size="10x30"/> + </when> + </conditional> + <param name="outfmt" type="select" label="Output format"> + <option value="original">FASTA with original identifiers</option> + <option value="blastid">FASTA with BLAST assigned identifiers</option> + </param> + </inputs> + <outputs> + <data name="seq" format="fasta" label="Sequences from ${db_opts.database.fields.name}" /> + </outputs> + <requirements> + <requirement type="binary">blastdbcmd</requirement> + </requirements> + <help> + +**What it does** + +Extracts FASTA formatted sequences from a BLAST database +using the NCBI BLAST+ blastdbcmd command line tool. + +.. class:: warningmark + +**BLAST assigned identifiers** + +When a BLAST database is constructed from a FASTA file, the +original identifiers can be replaced with BLAST assigned +identifiers, partly to ensure uniqueness. e.g. Sometimes +a prefix of 'lcl|' is added (lcl is short for local), +or an arbitrary name starting 'gnl|BL_ORD_ID|' is created. + +If you are using the tabular output from BLAST, it will contain +the original identifiers - not the BLAST assigned identifiers +suitable for use with the blastdbcmd tool. + +If you are using the XML or plain text output, this will also +contain the BLAST assigned identifiers. However, this means +getting a list of BLAST assigned identifiers isn't straightforward. + +------- + +**References** + +Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402. + +Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005. + + </help> +</tool>
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Wed Apr 17 09:44:04 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Wed Apr 17 09:44:25 2013 -0400 @@ -1,15 +1,17 @@ -<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.12"> +<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.14"> <description>Search nucleotide database with nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> - <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism> + <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> <version_command>blastn -version</version_command> - <command interpreter="python">hide_stderr.py + <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces blastn -query "$query" #if $db_opts.db_opts_selector == "db": -db "${db_opts.database.fields.path}" +#elif $db_opts.db_opts_selector == "histdb": + -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" #else: -subject "$db_opts.subject" #end if @@ -39,12 +41,21 @@ ## End of advanced options: #end if </command> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- In case the return code has not been set propery check stderr too --> + <regex match="Error:" /> + <regex match="EXception:" /> + </stdio> <inputs> <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> <conditional name="db_opts"> <param name="db_opts_selector" type="select" label="Subject database/sequences"> <option value="db" selected="True">BLAST Database</option> - <option value="file">FASTA file (pairwise e-values)</option> + <option value="histdb">BLAST database from your history</option> + <option value="file">FASTA file from your history (pairwise e-values)</option> </param> <when value="db"> <param name="database" type="select" label="Nucleotide BLAST database"> @@ -54,10 +65,17 @@ <column name="path" index="2"/> </options> </param> + <param name="histdb" type="hidden" value="" /> <param name="subject" type="hidden" value="" /> </when> + <when value="histdb"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="data" format="blastdbn" label="Nucleotide BLAST database" /> + <param name="subject" type="hidden" value="" /> + </when> <when value="file"> - <param name="database" type="hidden" value="" /> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="hidden" value="" /> <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> </when> </conditional>
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Wed Apr 17 09:44:04 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Wed Apr 17 09:44:25 2013 -0400 @@ -1,15 +1,17 @@ -<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.12"> +<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.14"> <description>Search protein database with protein query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> - <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism> + <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> <version_command>blastp -version</version_command> - <command interpreter="python">hide_stderr.py + <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces blastp -query "$query" #if $db_opts.db_opts_selector == "db": -db "${db_opts.database.fields.path}" +#elif $db_opts.db_opts_selector == "histdb": + -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" #else: -subject "$db_opts.subject" #end if @@ -40,12 +42,21 @@ ## End of advanced options: #end if </command> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- In case the return code has not been set propery check stderr too --> + <regex match="Error:" /> + <regex match="EXception:" /> + </stdio> <inputs> <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> <conditional name="db_opts"> <param name="db_opts_selector" type="select" label="Subject database/sequences"> <option value="db" selected="True">BLAST Database</option> - <option value="file">FASTA file (pairwise e-values)</option> + <option value="histdb">BLAST database from your history</option> + <option value="file">FASTA file from your history (pairwise e-values)</option> </param> <when value="db"> <param name="database" type="select" label="Protein BLAST database"> @@ -55,10 +66,17 @@ <column name="path" index="2"/> </options> </param> + <param name="histdb" type="hidden" value="" /> <param name="subject" type="hidden" value="" /> </when> + <when value="histdb"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="data" format="blastdbp" label="Protein BLAST database" /> + <param name="subject" type="hidden" value="" /> + </when> <when value="file"> <param name="database" type="hidden" value="" /> + <param name="histdb" type="hidden" value="" /> <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> </when> </conditional>
--- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Wed Apr 17 09:44:04 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Wed Apr 17 09:44:25 2013 -0400 @@ -1,15 +1,17 @@ -<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.12"> +<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.14"> <description>Search protein database with translated nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> - <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism> + <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> <version_command>blastx -version</version_command> - <command interpreter="python">hide_stderr.py + <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces blastx -query "$query" #if $db_opts.db_opts_selector == "db": -db "${db_opts.database.fields.path}" +#elif $db_opts.db_opts_selector == "histdb": + -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" #else: -subject "$db_opts.subject" #end if @@ -40,12 +42,21 @@ ## End of advanced options: #end if </command> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- In case the return code has not been set propery check stderr too --> + <regex match="Error:" /> + <regex match="EXception:" /> + </stdio> <inputs> <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> <conditional name="db_opts"> <param name="db_opts_selector" type="select" label="Subject database/sequences"> <option value="db" selected="True">BLAST Database</option> - <option value="file">FASTA file (pairwise e-values)</option> + <option value="histdb">BLAST database from your history</option> + <option value="file">FASTA file from your history (pairwise e-values)</option> </param> <when value="db"> <param name="database" type="select" label="Protein BLAST database"> @@ -55,10 +66,17 @@ <column name="path" index="2"/> </options> </param> - <param name="subject" type="hidden" value="" /> + <param name="histdb" type="hidden" value="" /> + <param name="subject" type="hidden" value="" /> + </when> + <when value="histdb"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="data" format="blastdbp" label="Protein BLAST database" /> + <param name="subject" type="hidden" value="" /> </when> <when value="file"> - <param name="database" type="hidden" value="" /> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="hidden" value="" /> <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> </when> </conditional>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Wed Apr 17 09:44:25 2013 -0400 @@ -0,0 +1,121 @@ +<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.1"> +<description>Make BLAST database</description> +<version_command>makeblastdb -version</version_command> +<command> +makeblastdb -out ${os.path.join($outfile.extra_files_path,'blastdb')} +$parse_seqids +$hash_index +## Single call to -in with multiple filenames space separated with outer quotes +## (presumably any filenames with spaces would be a problem). Note this gives +## some extra spaces, e.g. -in " file1 file2 file3 " but BLAST seems happy: +-in " +#for $i in $in +${i.file} #end for +" +#if $title: +-title "$title" +#else: +##Would default to being based on the cryptic Galaxy filenames, which is unhelpful +-title "BLAST Database" +#end if +-dbtype $dbtype +## #set $sep = '-mask_data ' +## #for $i in $mask_data +## $sep${i.file} +## #set $set = ', ' +## #end for +## #set $sep = '-gi_mask -gi_mask_name ' +## #for $i in $gi_mask +## $sep${i.file} +## #set $set = ', ' +## #end for +## #if $tax.select == 'id': +## -taxid $tax.id +## #else if $tax.select == 'map': +## -taxid_map $tax.map +## #end if +</command> +<stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- In case the return code has not been set propery check stderr too --> + <regex match="Error:" /> + <regex match="EXception:" /> +</stdio> +<inputs> + <param name="dbtype" type="select" display="radio" label="Molecule type of input"> + <option value="prot">protein</option> + <option value="nucl">nucleotide</option> + </param> + <!-- TODO Allow merging of existing BLAST databases (conditional on the database type) + <repeat name="in" title="Blast or Fasta Database" min="1"> + <param name="file" type="data" format="fasta,blastdbn,blastdbp" label="Blast or Fasta database" /> + </repeat> + --> + <repeat name="in" title="FASTA file" min="1"> + <param name="file" type="data" format="fasta" /> + </repeat> + <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" /> + <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" /> + <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values." help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." /> + + <!-- SEQUENCE MASKING OPTIONS --> + <!-- TODO + <repeat name="mask_data" title="Provide one or more files containing masking data"> + <param name="file" type="data" format="asnb" label="File containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" /> + </repeat> + <repeat name="gi_mask" title="Create GI indexed masking data"> + <param name="file" type="data" format="asnb" label="Masking data output file" /> + </repeat> + --> + + <!-- TAXONOMY OPTIONS --> + <!-- TODO + <conditional name="tax"> + <param name="select" type="select" label="Taxonomy options"> + <option value="">Do not assign sequences to Taxonomy IDs</option> + <option value="id">Assign all sequences to one Taxonomy ID</option> + <option value="map">Supply text file mapping sequence IDs to taxnomy IDs</option> + </param> + <when value=""> + </when> + <when value="id"> + <param name="id" type="integer" value="" label="NCBI taxonomy ID" help="Integer >=0" /> + </when> + <when value="map"> + <param name="file" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" /> + </when> + </conditional> + --> +</inputs> +<outputs> + <!-- If we only accepted one FASTA file, we could use its human name here... --> + <data name="outfile" format="data" label="BLAST database from ${on_string}"> + <change_format> + <when input="dbtype" value="nucl" format="blastdbn"/> + <when input="dbtype" value="prot" format="blastdbp"/> + </change_format> + </data> +</outputs> +<requirements> + <requirement type="binary">makeblastdb</requirement> +</requirements> +<help> +**What it does** + +Make BLAST database from one or more FASTA files and/or BLAST databases. + +This is a wrapper for the NCBI BLAST+ tool 'makeblastdb', which is the +replacement for the 'formatdb' tool in the NCBI 'legacy' BLAST suite. + +<!-- +Applying masks to an existing BLAST database will not change the original database; a new database will be created. +For this reason, it's best to apply all masks at once to minimize the number of unnecessary intermediate databases. +--> + +**Documentation** + +http://www.ncbi.nlm.nih.gov/books/NBK1763/ +</help> +</tool>
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Wed Apr 17 09:44:04 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Wed Apr 17 09:44:25 2013 -0400 @@ -1,15 +1,17 @@ -<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.12"> +<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.14"> <description>Search translated nucleotide database with protein query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> - <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism> + <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> <version_command>tblastn -version</version_command> - <command interpreter="python">hide_stderr.py + <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces tblastn -query "$query" #if $db_opts.db_opts_selector == "db": -db "${db_opts.database.fields.path}" +#elif $db_opts.db_opts_selector == "histdb": + -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" #else: -subject "$db_opts.subject" #end if @@ -40,12 +42,21 @@ ## End of advanced options: #end if </command> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- In case the return code has not been set propery check stderr too --> + <regex match="Error:" /> + <regex match="EXception:" /> + </stdio> <inputs> <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> <conditional name="db_opts"> <param name="db_opts_selector" type="select" label="Subject database/sequences"> <option value="db" selected="True">BLAST Database</option> - <option value="file">FASTA file (pairwise e-values)</option> + <option value="histdb">BLAST database from your history</option> + <option value="file">FASTA file from your history (pairwise e-values)</option> </param> <when value="db"> <param name="database" type="select" label="Nucleotide BLAST database"> @@ -55,10 +66,17 @@ <column name="path" index="2"/> </options> </param> - <param name="subject" type="hidden" value="" /> + <param name="histdb" type="hidden" value="" /> + <param name="subject" type="hidden" value="" /> + </when> + <when value="histdb"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="data" format="blastdbn" label="Nucleotide BLAST database" /> + <param name="subject" type="hidden" value="" /> </when> <when value="file"> - <param name="database" type="hidden" value="" /> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="hidden" value="" /> <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> </when> </conditional>
--- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Wed Apr 17 09:44:04 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Wed Apr 17 09:44:25 2013 -0400 @@ -1,15 +1,17 @@ -<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.12"> +<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.14"> <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> - <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism> + <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> <version_command>tblastx -version</version_command> - <command interpreter="python">hide_stderr.py + <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces tblastx -query "$query" #if $db_opts.db_opts_selector == "db": -db "${db_opts.database.fields.path}" +#elif $db_opts.db_opts_selector == "histdb": + -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" #else: -subject "$db_opts.subject" #end if @@ -40,12 +42,21 @@ ## End of advanced options: #end if </command> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- In case the return code has not been set propery check stderr too --> + <regex match="Error:" /> + <regex match="EXception:" /> + </stdio> <inputs> <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> <conditional name="db_opts"> <param name="db_opts_selector" type="select" label="Subject database/sequences"> <option value="db" selected="True">BLAST Database</option> - <option value="file">FASTA file (pairwise e-values)</option> + <option value="histdb">BLAST database from your history</option> + <option value="file">FASTA file from your history (pairwise e-values)</option> </param> <when value="db"> <param name="database" type="select" label="Nucleotide BLAST database"> @@ -55,10 +66,17 @@ <column name="path" index="2"/> </options> </param> - <param name="subject" type="hidden" value="" /> + <param name="histdb" type="hidden" value="" /> + <param name="subject" type="hidden" value="" /> + </when> + <when value="histdb"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="data" format="blastdbn" label="Nucleotide BLAST database" /> + <param name="subject" type="hidden" value="" /> </when> <when value="file"> - <param name="database" type="hidden" value="" /> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="hidden" value="" /> <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> </when> </conditional>