Previous changeset 31:f2478dc77ccb (2013-10-10) Next changeset 33:5402f9b0d508 (2013-11-28) |
Commit message:
Uploaded v0.0.22a, more macros, $GALAXY_SLOTS, more descriptive output names, test makeblastdb |
modified:
tools/ncbi_blast_plus/README.rst tools/ncbi_blast_plus/blastxml_to_tabular.py tools/ncbi_blast_plus/blastxml_to_tabular.xml tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml tools/ncbi_blast_plus/ncbi_macros.xml tools/ncbi_blast_plus/ncbi_makeblastdb.xml tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml tools/ncbi_blast_plus/repository_dependencies.xml |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/README.rst --- a/tools/ncbi_blast_plus/README.rst Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/README.rst Mon Nov 25 10:58:46 2013 -0500 |
b |
@@ -127,6 +127,10 @@ defined in updated blast_datatypes on Galaxy ToolShed. - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26 - Now depends on package_blast_plus_2_2_27 in ToolShed +v0.0.22 - More use macros to simplify the wrappers + - Set number of threads via $GALAXY_SLOTS environment variable + - More descriptive default output names + - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18) ======= ====================================================================== |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/blastxml_to_tabular.py --- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Mon Nov 25 10:58:46 2013 -0500 |
[ |
@@ -63,7 +63,7 @@ import re if "-v" in sys.argv or "--version" in sys.argv: - print "v0.0.12" + print "v0.0.22" sys.exit(0) if sys.version_info[:2] >= ( 2, 5 ): @@ -228,7 +228,10 @@ ] if extended: - sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(">")) + try: + sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(" >")) + except IndexError as e: + stop_err("Problem splitting multuple hits?\n%r\n--> %s" % (hit_def, e)) #print hit_def, "-->", sallseqid positive = hsp.findtext("Hsp_positive") ppos = "%0.2f" % (100*float(positive)/float(length)) |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/blastxml_to_tabular.xml --- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Mon Nov 25 10:58:46 2013 -0500 |
b |
@@ -1,4 +1,4 @@ -<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.0.11"> +<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.0.22"> <description>Convert BLAST XML output to tabular</description> <version_command interpreter="python">blastxml_to_tabular.py --version</version_command> <command interpreter="python"> @@ -17,7 +17,7 @@ </param> </inputs> <outputs> - <data name="tabular_file" format="tabular" label="BLAST results as tabular" /> + <data name="tabular_file" format="tabular" label="$blastxml_file.display_name (as tabular)" /> </outputs> <requirements> </requirements> |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml --- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Mon Nov 25 10:58:46 2013 -0500 |
b |
@@ -1,13 +1,10 @@ -<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.21"> +<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.22"> <description>Show BLAST database information from blastdbcmd</description> - <requirements> - <requirement type="binary">blastdbcmd</requirement> - <requirement type="package" version="2.2.27">blast+</requirement> - </requirements> - <version_command>blastdbcmd -version</version_command> <macros> + <token name="@BINARY@">blastdbcmd</token> <import>ncbi_macros.xml</import> </macros> + <expand macro="requirements" /> <command> blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out "$info" </command> @@ -33,17 +30,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ </help> </tool> |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Mon Nov 25 10:58:46 2013 -0500 |
b |
@@ -1,13 +1,10 @@ -<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.21"> +<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.22"> <description>Extract sequence(s) from BLAST database</description> - <requirements> - <requirement type="binary">blastdbcmd</requirement> - <requirement type="package" version="2.2.27">blast+</requirement> - </requirements> - <version_command>blastdbcmd -version</version_command> <macros> + <token name="@BINARY@">blastdbcmd</token> <import>ncbi_macros.xml</import> </macros> + <expand macro="requirements" /> <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -105,17 +102,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ </help> </tool> |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Mon Nov 25 10:58:46 2013 -0500 |
b |
b'@@ -1,53 +1,29 @@\n-<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.21">\n+<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.22">\n <description>Search nucleotide database with nucleotide query sequence(s)</description>\n <!-- If job splitting is enabled, break up the query file into parts -->\n- <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>\n- <requirements>\n- <requirement type="binary">blastn</requirement>\n- <requirement type="package" version="2.2.27">blast+</requirement>\n- </requirements>\n- <version_command>blastn -version</version_command>\n+ <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>\n <macros>\n+ <token name="@BINARY@">blastn</token>\n <import>ncbi_macros.xml</import>\n </macros>\n+ <expand macro="requirements" />\n <command>\n ## The command is a Cheetah template which allows some Python based syntax.\n ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n blastn\n -query "$query"\n-#if $db_opts.db_opts_selector == "db":\n- -db "${db_opts.database.fields.path}"\n-#elif $db_opts.db_opts_selector == "histdb":\n- -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n-#else:\n- -subject "$db_opts.subject"\n-#end if\n+@BLAST_DB_SUBJECT@\n -task $blast_type\n -evalue $evalue_cutoff\n--out "$output1"\n-##Set the extended list here so if/when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n- -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n-#else:\n- -outfmt $out_format\n-#end if\n--num_threads 8\n+@BLAST_OUTPUT@\n+@THREADS@\n #if $adv_opts.adv_opts_selector=="advanced":\n-$adv_opts.filter_query\n $adv_opts.strand\n-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n-## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n--max_target_seqs $adv_opts.max_hits\n-#end if\n+@ADVANCED_OPTIONS@\n #if (str($adv_opts.identity_cutoff) and float(str($adv_opts.identity_cutoff)) > 0 ):\n -perc_identity $adv_opts.identity_cutoff\n #end if\n-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n--word_size $adv_opts.word_size\n-#end if\n $adv_opts.ungapped\n-$adv_opts.parse_deflines\n ## End of advanced options:\n #end if\n </command>\n@@ -69,38 +45,25 @@\n <option value="vecscreen">vecscreen</option>\n -->\n </param>\n- <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />\n+ <expand macro="input_evalue" />\n <expand macro="input_out_format" />\n- <conditional name="adv_opts">\n- <param name="adv_opts_selector" type="select" label="Advanced Options">\n- <option value="basic" selected="True">Hide Advanced Options</option>\n- <option value="advanced">Show Advanced Options</option>\n+ <expand macro="advanced_options">\n+ <!-- Could use a select (yes, no, other) where other allows setting \'level window linker\' -->\n+ <param name="filter_query" type="boolean" label="Filter out low complexity regions (with DUST)" truevalue="-dust yes" falsevalue="-dust no" checked="true" />\n+ <expand macro="input_strand" />\n+ <expand macro="input_max_hits" />\n+ <param name="identity_cutoff" type="float" min="0" max="100" value="0" label="Percent identity cutoff (-perc_identity)" help="Use zero for no cutoff" />\n+ \n+ <!-- I\'d like word_size to be optional, with minimum 4 for blastn -->\n+ <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minim'..b"not* advised because it is slower (only one\n-CPU is used), but more importantly gives e-values for pairwise\n-searches (very small e-values which will look overly signficiant).\n-In most cases you should instead turn the other FASTA file into a\n-database first using *makeblastdb* and search against that.\n+@FASTA_WARNING@\n \n -----\n \n-**Output format**\n-\n-Because Galaxy focuses on processing tabular data, the default output of this\n-tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n-\n-====== ========= ============================================\n-Column NCBI name Description\n------- --------- --------------------------------------------\n- 1 qseqid Query Seq-id (ID of your sequence)\n- 2 sseqid Subject Seq-id (ID of the database hit)\n- 3 pident Percentage of identical matches\n- 4 length Alignment length\n- 5 mismatch Number of mismatches\n- 6 gapopen Number of gap openings\n- 7 qstart Start of alignment in query\n- 8 qend End of alignment in query\n- 9 sstart Start of alignment in subject (database hit)\n- 10 send End of alignment in subject (database hit)\n- 11 evalue Expectation value (E-value)\n- 12 bitscore Bit score\n-====== ========= ============================================\n-\n-The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n-included by selecting the extended tabular output. The extra columns are\n-included *after* the standard 12 columns. This is so that you can write\n-workflow filtering steps that accept either the 12 or 24 column tabular\n-BLAST output. Galaxy now uses this extended 24 column output by default.\n-\n-====== ============= ===========================================\n-Column NCBI name Description\n------- ------------- -------------------------------------------\n- 13 sallseqid All subject Seq-id(s), separated by a ';'\n- 14 score Raw score\n- 15 nident Number of identical matches\n- 16 positive Number of positive-scoring matches\n- 17 gaps Total number of gaps\n- 18 ppos Percentage of positive-scoring matches\n- 19 qframe Query frame\n- 20 sframe Subject frame\n- 21 qseq Aligned part of query sequence\n- 22 sseq Aligned part of subject sequence\n- 23 qlen Query sequence length\n- 24 slen Subject sequence length\n-====== ============= ===========================================\n-\n-The third option is BLAST XML output, which is designed to be parsed by\n-another program, and is understood by some Galaxy tools.\n-\n-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+@OUTPUT_FORMAT@\n \n -------\n \n@@ -204,17 +102,6 @@\n If you use this Galaxy tool in work leading to a scientific publication please\n cite the following papers:\n \n-Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n-Galaxy tools and workflows for sequence analysis with applications\n-in molecular plant pathology. PeerJ 1:e167\n-http://dx.doi.org/10.7717/peerj.167\n-\n-Christiam Camacho et al. (2009).\n-BLAST+: architecture and applications.\n-BMC Bioinformatics. 15;10:421.\n-http://dx.doi.org/10.1186/1471-2105-10-421\n-\n-This wrapper is available to install into other Galaxy Instances via the Galaxy\n-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+@REFERENCES@\n </help>\n </tool>\n" |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Mon Nov 25 10:58:46 2013 -0500 |
b |
b'@@ -1,51 +1,27 @@\n-<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.21">\n+<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.22">\n <description>Search protein database with protein query sequence(s)</description>\n <!-- If job splitting is enabled, break up the query file into parts -->\n- <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>\n- <requirements>\n- <requirement type="binary">blastp</requirement>\n- <requirement type="package" version="2.2.27">blast+</requirement>\n- </requirements>\n- <version_command>blastp -version</version_command>\n+ <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />\n <macros>\n+ <token name="@BINARY@">blastp</token>\n <import>ncbi_macros.xml</import>\n </macros>\n+ <expand macro="requirements" />\n <command>\n ## The command is a Cheetah template which allows some Python based syntax.\n ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n blastp\n -query "$query"\n-#if $db_opts.db_opts_selector == "db":\n- -db "${db_opts.database.fields.path}"\n-#elif $db_opts.db_opts_selector == "histdb":\n- -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n-#else:\n- -subject "$db_opts.subject"\n-#end if\n+@BLAST_DB_SUBJECT@\n -task $blast_type\n -evalue $evalue_cutoff\n--out "$output1"\n-##Set the extended list here so if/when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n- -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n-#else:\n- -outfmt $out_format\n-#end if\n--num_threads 8\n+@BLAST_OUTPUT@\n+@THREADS@\n #if $adv_opts.adv_opts_selector=="advanced":\n-$adv_opts.filter_query\n -matrix $adv_opts.matrix\n-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n-## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n--max_target_seqs $adv_opts.max_hits\n-#end if\n-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n--word_size $adv_opts.word_size\n-#end if\n+@ADVANCED_OPTIONS@\n ##Ungapped disabled for now - see comments below\n ##$adv_opts.ungapped\n-$adv_opts.parse_deflines\n ## End of advanced options:\n #end if\n </command>\n@@ -61,40 +37,25 @@\n <option value="blastp">blastp</option>\n <option value="blastp-short">blastp-short</option>\n </param>\n- <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />\n+ <expand macro="input_evalue" />\n <expand macro="input_out_format" />\n- <conditional name="adv_opts">\n- <param name="adv_opts_selector" type="select" label="Advanced Options">\n- <option value="basic" selected="True">Hide Advanced Options</option>\n- <option value="advanced">Show Advanced Options</option>\n- </param>\n- <when value="basic" />\n- <when value="advanced">\n- <!-- Could use a select (yes, no, other) where other allows setting \'window locut hicut\' -->\n- <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" />\n-\n- <expand macro="input_scoring_matrix" />\n-\n- <!-- Why doesn\'t optional override a validator? I want to accept an empty string OR a non-negative integer -->\n- <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">\n- <validator type="in_range" min="0" />\n- </param>\n- <!-- I\'d like word_size to be optional, with minimum 2 for blastp -->\n- '..b"not* advised because it is slower (only one\n-CPU is used), but more importantly gives e-values for pairwise\n-searches (very small e-values which will look overly signficiant).\n-In most cases you should instead turn the other FASTA file into a\n-database first using *makeblastdb* and search against that.\n+@FASTA_WARNING@\n \n -----\n \n-**Output format**\n-\n-Because Galaxy focuses on processing tabular data, the default output of this\n-tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n-\n-====== ========= ============================================\n-Column NCBI name Description\n------- --------- --------------------------------------------\n- 1 qseqid Query Seq-id (ID of your sequence)\n- 2 sseqid Subject Seq-id (ID of the database hit)\n- 3 pident Percentage of identical matches\n- 4 length Alignment length\n- 5 mismatch Number of mismatches\n- 6 gapopen Number of gap openings\n- 7 qstart Start of alignment in query\n- 8 qend End of alignment in query\n- 9 sstart Start of alignment in subject (database hit)\n- 10 send End of alignment in subject (database hit)\n- 11 evalue Expectation value (E-value)\n- 12 bitscore Bit score\n-====== ========= ============================================\n-\n-The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n-included by selecting the extended tabular output. The extra columns are\n-included *after* the standard 12 columns. This is so that you can write\n-workflow filtering steps that accept either the 12 or 24 column tabular\n-BLAST output. Galaxy now uses this extended 24 column output by default.\n-\n-====== ============= ===========================================\n-Column NCBI name Description\n------- ------------- -------------------------------------------\n- 13 sallseqid All subject Seq-id(s), separated by a ';'\n- 14 score Raw score\n- 15 nident Number of identical matches\n- 16 positive Number of positive-scoring matches\n- 17 gaps Total number of gaps\n- 18 ppos Percentage of positive-scoring matches\n- 19 qframe Query frame\n- 20 sframe Subject frame\n- 21 qseq Aligned part of query sequence\n- 22 sseq Aligned part of subject sequence\n- 23 qlen Query sequence length\n- 24 slen Subject sequence length\n-====== ============= ===========================================\n-\n-The third option is BLAST XML output, which is designed to be parsed by\n-another program, and is understood by some Galaxy tools.\n-\n-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+@OUTPUT_FORMAT@\n \n -------\n \n@@ -246,17 +142,6 @@\n If you use this Galaxy tool in work leading to a scientific publication please\n cite the following papers:\n \n-Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n-Galaxy tools and workflows for sequence analysis with applications\n-in molecular plant pathology. PeerJ 1:e167\n-http://dx.doi.org/10.7717/peerj.167\n-\n-Christiam Camacho et al. (2009).\n-BLAST+: architecture and applications.\n-BMC Bioinformatics. 15;10:421.\n-http://dx.doi.org/10.1186/1471-2105-10-421\n-\n-This wrapper is available to install into other Galaxy Instances via the Galaxy\n-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+@REFERENCES@\n </help>\n </tool>\n" |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Mon Nov 25 10:58:46 2013 -0500 |
b |
b'@@ -1,51 +1,27 @@\n-<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.21">\n+<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.22">\n <description>Search protein database with translated nucleotide query sequence(s)</description>\n <!-- If job splitting is enabled, break up the query file into parts -->\n- <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>\n- <requirements>\n- <requirement type="binary">blastx</requirement>\n- <requirement type="package" version="2.2.27">blast+</requirement>\n- </requirements>\n- <version_command>blastx -version</version_command>\n+ <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>\n <macros>\n+ <token name="@BINARY@">blastx</token>\n <import>ncbi_macros.xml</import>\n </macros>\n+ <expand macro="requirements" />\n <command>\n ## The command is a Cheetah template which allows some Python based syntax.\n ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n blastx\n -query "$query"\n-#if $db_opts.db_opts_selector == "db":\n- -db "${db_opts.database.fields.path}"\n-#elif $db_opts.db_opts_selector == "histdb":\n- -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n-#else:\n- -subject "$db_opts.subject"\n-#end if\n+@BLAST_DB_SUBJECT@\n -query_gencode $query_gencode\n -evalue $evalue_cutoff\n--out "$output1"\n-##Set the extended list here so if/when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n- -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n-#else:\n- -outfmt $out_format\n-#end if\n--num_threads 8\n+@BLAST_OUTPUT@\n+@THREADS@\n #if $adv_opts.adv_opts_selector=="advanced":\n-$adv_opts.filter_query\n $adv_opts.strand\n -matrix $adv_opts.matrix\n-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n-## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n--max_target_seqs $adv_opts.max_hits\n-#end if\n-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n--word_size $adv_opts.word_size\n-#end if\n+@ADVANCED_OPTIONS@\n $adv_opts.ungapped\n-$adv_opts.parse_deflines\n ## End of advanced options:\n #end if\n </command>\n@@ -57,42 +33,22 @@\n \n <expand macro="input_conditional_protein_db" />\n <expand macro="input_query_gencode" />\n- <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />\n+ <expand macro="input_evalue" />\n \n <expand macro="input_out_format" />\n-\n- <conditional name="adv_opts">\n- <param name="adv_opts_selector" type="select" label="Advanced Options">\n- <option value="basic" selected="True">Hide Advanced Options</option>\n- <option value="advanced">Show Advanced Options</option>\n- </param>\n- <when value="basic" />\n- <when value="advanced">\n- <!-- Could use a select (yes, no, other) where other allows setting \'window locut hicut\' -->\n- <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />\n- <param name="strand" type="select" label="Query strand(s) to search against database/subject">\n- <option value="-strand both">Both</option>\n- <option value="-strand plus">Plus (forward)</option>\n- <option value="-strand minus">Minus (reverse complement)</option>\n- </param>\n-\n- <expand macro="input_scoring_matrix" />\n-\n- <!-- Why doesn\'t optional override a validator? I want to accept an empty strin'..b"ot* advised because it is slower (only one\n-CPU is used), but more importantly gives e-values for pairwise\n-searches (very small e-values which will look overly signficiant).\n-In most cases you should instead turn the other FASTA file into a\n-database first using *makeblastdb* and search against that.\n+@FASTA_WARNING@\n \n -----\n \n-**Output format**\n-\n-Because Galaxy focuses on processing tabular data, the default output of this\n-tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n-\n-====== ========= ============================================\n-Column NCBI name Description\n------- --------- --------------------------------------------\n- 1 qseqid Query Seq-id (ID of your sequence)\n- 2 sseqid Subject Seq-id (ID of the database hit)\n- 3 pident Percentage of identical matches\n- 4 length Alignment length\n- 5 mismatch Number of mismatches\n- 6 gapopen Number of gap openings\n- 7 qstart Start of alignment in query\n- 8 qend End of alignment in query\n- 9 sstart Start of alignment in subject (database hit)\n- 10 send End of alignment in subject (database hit)\n- 11 evalue Expectation value (E-value)\n- 12 bitscore Bit score\n-====== ========= ============================================\n-\n-The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n-included by selecting the extended tabular output. The extra columns are\n-included *after* the standard 12 columns. This is so that you can write\n-workflow filtering steps that accept either the 12 or 24 column tabular\n-BLAST output. Galaxy now uses this extended 24 column output by default.\n-\n-====== ============= ===========================================\n-Column NCBI name Description\n------- ------------- -------------------------------------------\n- 13 sallseqid All subject Seq-id(s), separated by a ';'\n- 14 score Raw score\n- 15 nident Number of identical matches\n- 16 positive Number of positive-scoring matches\n- 17 gaps Total number of gaps\n- 18 ppos Percentage of positive-scoring matches\n- 19 qframe Query frame\n- 20 sframe Subject frame\n- 21 qseq Aligned part of query sequence\n- 22 sseq Aligned part of subject sequence\n- 23 qlen Query sequence length\n- 24 slen Subject sequence length \n-====== ============= ===========================================\n-\n-The third option is BLAST XML output, which is designed to be parsed by\n-another program, and is understood by some Galaxy tools.\n-\n-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+@OUTPUT_FORMAT@\n \n -------\n \n@@ -215,17 +106,6 @@\n If you use this Galaxy tool in work leading to a scientific publication please\n cite the following papers:\n \n-Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n-Galaxy tools and workflows for sequence analysis with applications\n-in molecular plant pathology. PeerJ 1:e167\n-http://dx.doi.org/10.7717/peerj.167\n-\n-Christiam Camacho et al. (2009).\n-BLAST+: architecture and applications.\n-BMC Bioinformatics. 15;10:421.\n-http://dx.doi.org/10.1186/1471-2105-10-421\n-\n-This wrapper is available to install into other Galaxy Instances via the Galaxy\n-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+@REFERENCES@\n </help>\n </tool>\n" |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Mon Nov 25 10:58:46 2013 -0500 |
b |
@@ -1,4 +1,4 @@ -<tool id="ncbi_dustmasker_wrapper" name="NCBI dustmasker" version="0.0.21"> +<tool id="ncbi_dustmasker_wrapper" name="NCBI dustmasker" version="0.0.22"> <!-- dustmasker wrapper from Edward Kirton and Nicola Soranzo --> <description>masks low complexity regions</description> <requirements> @@ -99,17 +99,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers (a more specific paper covering this wrapper is planned): -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ </help> </tool> |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/ncbi_macros.xml --- a/tools/ncbi_blast_plus/ncbi_macros.xml Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_macros.xml Mon Nov 25 10:58:46 2013 -0500 |
b |
b'@@ -1,6 +1,5 @@\n <macros>\n- <macro name="output_change_format">\n-\n+ <xml name="output_change_format">\n <change_format>\n <when input="out_format" value="0" format="txt"/>\n <when input="out_format" value="0 -html" format="html"/>\n@@ -10,9 +9,8 @@\n <when input="out_format" value="4 -html" format="html"/>\n <when input="out_format" value="5" format="blastxml"/>\n </change_format>\n-\n- </macro>\n- <macro name="input_out_format">\n+ </xml>\n+ <xml name="input_out_format">\n <param name="out_format" type="select" label="Output format">\n <option value="6">Tabular (standard 12 columns)</option>\n <option value="ext" selected="True">Tabular (extended 24 columns)</option>\n@@ -27,8 +25,8 @@\n <option value="-outfmt 11">BLAST archive format (ASN.1)</option>\n -->\n </param>\n- </macro>\n- <macro name="input_scoring_matrix">\n+ </xml>\n+ <xml name="input_scoring_matrix">\n <param name="matrix" type="select" label="Scoring matrix">\n <option value="BLOSUM90">BLOSUM90</option>\n <option value="BLOSUM80">BLOSUM80</option>\n@@ -39,8 +37,8 @@\n <option value="PAM70">PAM70</option>\n <option value="PAM30">PAM30</option>\n </param>\n- </macro>\n- <macro name="stdio">\n+ </xml>\n+ <xml name="stdio">\n <stdio>\n <!-- Anything other than zero is an error -->\n <exit_code range="1:" />\n@@ -49,8 +47,8 @@\n <regex match="Error:" />\n <regex match="Exception:" />\n </stdio>\n- </macro>\n- <macro name="input_query_gencode">\n+ </xml>\n+ <xml name="input_query_gencode">\n <param name="query_gencode" type="select" label="Query genetic code">\n <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details -->\n <option value="1" select="True">1. Standard</option>\n@@ -72,9 +70,8 @@\n <option value="23">23. Thraustochytrium Mitochondrial Code</option>\n <option value="24">24. Pterobranchia mitochondrial code</option>\n </param>\n- </macro>\n-\n- <macro name="input_db_gencode">\n+ </xml>\n+ <xml name="input_db_gencode">\n <param name="db_gencode" type="select" label="Database/subject genetic code">\n <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details -->\n <option value="1" select="True">1. Standard</option>\n@@ -96,9 +93,8 @@\n <option value="23">23. Thraustochytrium Mitochondrial Code</option>\n <option value="24">24. Pterobranchia mitochondrial code</option>\n </param>\n- </macro>\n-\n- <macro name="input_conditional_nucleotide_db">\n+ </xml>\n+ <xml name="input_conditional_nucleotide_db">\n <conditional name="db_opts">\n <param name="db_opts_selector" type="select" label="Subject database/sequences">\n <option value="db" selected="True">Locally installed BLAST database</option>\n@@ -127,9 +123,8 @@\n <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> \n </when>\n </conditional>\n- </macro>\n-\n- <macro name="input_conditional_protein_db">\n+ </xml>\n+ <xml name="input_conditional_protein_db">\n <conditional name="db_opts">\n <param name="db_opts_selector" type="select" label="Subject database/sequences">\n <option value="db" selected="True">Locally installed BLAST database</option>\n@@ -158,9 +153,8 @@\n <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> \n </when>\n </conditional>\n- </macro>\n-\n- <macro name="input_conditional_pssm">\n+ </xml>\n+ <xml name="input_conditional_pssm">\n <conditional name="db_opts">\n <param name="db_opts_selector" type="select" label="Prot'..b' \n+Christiam Camacho et al. (2009).\n+BLAST+: architecture and applications.\n+BMC Bioinformatics. 15;10:421.\n+http://dx.doi.org/10.1186/1471-2105-10-421\n \n+This wrapper is available to install into other Galaxy Instances via the Galaxy\n+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+ </token>\n+ <token name="@OUTPUT_FORMAT@">**Output format**\n+\n+Because Galaxy focuses on processing tabular data, the default output of this\n+tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n+\n+====== ========= ============================================\n+Column NCBI name Description\n+------ --------- --------------------------------------------\n+ 1 qseqid Query Seq-id (ID of your sequence)\n+ 2 sseqid Subject Seq-id (ID of the database hit)\n+ 3 pident Percentage of identical matches\n+ 4 length Alignment length\n+ 5 mismatch Number of mismatches\n+ 6 gapopen Number of gap openings\n+ 7 qstart Start of alignment in query\n+ 8 qend End of alignment in query\n+ 9 sstart Start of alignment in subject (database hit)\n+ 10 send End of alignment in subject (database hit)\n+ 11 evalue Expectation value (E-value)\n+ 12 bitscore Bit score\n+====== ========= ============================================\n+\n+The BLAST+ tools can optionally output additional columns of information,\n+but this takes longer to calculate. Most (but not all) of these columns are\n+included by selecting the extended tabular output. The extra columns are\n+included *after* the standard 12 columns. This is so that you can write\n+workflow filtering steps that accept either the 12 or 24 column tabular\n+BLAST output. Galaxy now uses this extended 24 column output by default.\n+\n+====== ============= ===========================================\n+Column NCBI name Description\n+------ ------------- -------------------------------------------\n+ 13 sallseqid All subject Seq-id(s), separated by a \';\'\n+ 14 score Raw score\n+ 15 nident Number of identical matches\n+ 16 positive Number of positive-scoring matches\n+ 17 gaps Total number of gaps\n+ 18 ppos Percentage of positive-scoring matches\n+ 19 qframe Query frame\n+ 20 sframe Subject frame\n+ 21 qseq Aligned part of query sequence\n+ 22 sseq Aligned part of subject sequence\n+ 23 qlen Query sequence length\n+ 24 slen Subject sequence length\n+====== ============= ===========================================\n+\n+The third option is BLAST XML output, which is designed to be parsed by\n+another program, and is understood by some Galaxy tools.\n+\n+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+ </token>\n+ <token name="@FASTA_WARNING@">.. class:: warningmark\n+\n+You can also search against a FASTA file of subject (target)\n+sequences. This is *not* advised because it is slower (only one\n+CPU is used), but more importantly gives e-values for pairwise\n+searches (very small e-values which will look overly signficiant).\n+In most cases you should instead turn the other FASTA file into a\n+database first using *makeblastdb* and search against that.\n+ </token>\n+ <token name="@SEARCH_TIME_WARNING@">.. class:: warningmark\n+\n+**Note**. Database searches may take a substantial amount of time.\n+For large input datasets it is advisable to allow overnight processing. \n+\n+-----\n+ </token>\n </macros>\n' |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/ncbi_makeblastdb.xml --- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Mon Nov 25 10:58:46 2013 -0500 |
b |
@@ -1,13 +1,10 @@ -<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.21"> +<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.22"> <description>Make BLAST database</description> - <requirements> - <requirement type="binary">makeblastdb</requirement> - <requirement type="package" version="2.2.27">blast+</requirement> - </requirements> - <version_command>makeblastdb -version</version_command> <macros> + <token name="@BINARY@">makeblastdb</token> <import>ncbi_macros.xml</import> </macros> + <expand macro="requirements" /> <command> makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}" $parse_seqids @@ -46,34 +43,36 @@ ## #else if $tax.select == 'map': ## -taxid_map $tax.map ## #end if +## -------------------------------------------------------------------- +## Capture the stdout log information to the primary file (plain text): +>> "$outfile" </command> - <expand macro="stdio" /> - <inputs> <param name="dbtype" type="select" display="radio" label="Molecule type of input"> <option value="prot">protein</option> <option value="nucl">nucleotide</option> </param> <!-- TODO Allow merging of existing BLAST databases (conditional on the database type) + NOTE Double check the new database would be self contained first <repeat name="in" title="BLAST or FASTA Database" min="1"> <param name="file" type="data" format="fasta,blastdbn,blastdbp" label="BLAST or FASTA database" /> </repeat> --> + <!-- TODO Switch this to using <param ... multiple="true" /> instead of <repeat> block? --> <repeat name="in" title="FASTA file" min="1"> <param name="file" type="data" format="fasta" /> </repeat> <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" /> <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" /> - <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values." help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." /> - + <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." /> <!-- SEQUENCE MASKING OPTIONS --> <repeat name="mask_data" title="Masking data file"> - <param name="file" type="data" format="maskinfo-asn1,maskinfo-asn1-binary" label="ASN.1 file containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" /> + <param name="mask_data_file" type="data" format="maskinfo-asn1,maskinfo-asn1-binary" label="ASN.1 file containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" /> </repeat> <!-- TODO <repeat name="gi_mask" title="Create GI indexed masking data"> - <param name="file" type="data" format="asnb" label="Masking data output file" /> + <param name="gi_mask_file" type="data" format="asnb" label="Masking data output file" /> </repeat> --> @@ -106,6 +105,25 @@ </data> </outputs> <tests> + <!-- Note the (two line) PIN file is not reproducible run to run. + --> + <test> + <param name="dbtype" value="prot" /> + <param name="file" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="title" value="Just 4 human proteins" /> + <param name="parse_seqids" value="" /> + <param name="hash_index" value="true" /> + <output name="out_file" file="four_human_proteins.fasta.log" ftype="blastdbp" lines_diff="6"> + <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" /> + <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" /> + <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" /> + <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" /> + <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" /> + <extra_files type="file" value="four_human_proteins.fasta.phi" name="blastdb.phi" /> + <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" /> + <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" /> + </output> + </test> </tests> <help> **What it does** @@ -129,17 +147,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ </help> </tool> |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Mon Nov 25 10:58:46 2013 -0500 |
b |
b'@@ -1,15 +1,12 @@\n-<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.0.21">\n+<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.0.22">\n <description>Search protein domain database (PSSMs) with protein query sequence(s)</description>\n <!-- If job splitting is enabled, break up the query file into parts -->\n- <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>\n- <requirements>\n- <requirement type="binary">rpsblast</requirement>\n- <requirement type="package" version="2.2.27">blast+</requirement>\n- </requirements>\n- <version_command>rpsblast -version</version_command>\n+ <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />\n <macros>\n+ <token name="@BINARY@">deltablast</token>\n <import>ncbi_macros.xml</import>\n </macros>\n+ <expand macro="requirements" />\n <command>\n ## The command is a Cheetah template which allows some Python based syntax.\n ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n@@ -21,25 +18,10 @@\n -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n #end if\n -evalue $evalue_cutoff\n--out "$output1"\n-##Set the extended list here so if/when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n- -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n-#else:\n- -outfmt $out_format\n-#end if\n--num_threads 8\n+@BLAST_OUTPUT@\n+@THREADS@\n #if $adv_opts.adv_opts_selector=="advanced":\n-$adv_opts.filter_query\n-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n-## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n--max_target_seqs $adv_opts.max_hits\n-#end if\n-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n--word_size $adv_opts.word_size\n-#end if\n-$adv_opts.parse_deflines\n+@ADVANCED_OPTIONS@\n ## End of advanced options:\n #end if\n </command>\n@@ -51,30 +33,17 @@\n \n <expand macro="input_conditional_pssm" />\n \n- <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />\n+ <expand macro="input_evalue" />\n \n <expand macro="input_out_format" />\n \n- <conditional name="adv_opts">\n- <param name="adv_opts_selector" type="select" label="Advanced Options">\n- <option value="basic" selected="True">Hide Advanced Options</option>\n- <option value="advanced">Show Advanced Options</option>\n- </param>\n- <when value="basic" />\n- <when value="advanced">\n- <!-- Could use a select (yes, no, other) where other allows setting \'window locut hicut\' -->\n- <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" />\n- <!-- Why doesn\'t optional override a validator? I want to accept an empty string OR a non-negative integer -->\n- <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">\n- <validator type="in_range" min="0" />\n- </param>\n- <!-- I\'d like word_size to be optional, with minimum 2 for rpsblast -->\n- <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">\n- <validator type="in_range" min="0" />\n- </param>\n- <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help'..b'sblast on ${on_string}">\n@@ -85,12 +54,7 @@\n </outputs>\n <help>\n \n-.. class:: warningmark\n-\n-**Note**. Database searches may take a substantial amount of time.\n-For large input datasets it is advisable to allow overnight processing. \n-\n------\n+@SEARCH_TIME_WARNING@\n \n **What it does**\n \n@@ -129,60 +93,7 @@\n \n -----\n \n-**Output format**\n-\n-Because Galaxy focuses on processing tabular data, the default output of this\n-tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n-\n-====== ========= ============================================\n-Column NCBI name Description\n------- --------- --------------------------------------------\n- 1 qseqid Query Seq-id (ID of your sequence)\n- 2 sseqid Subject Seq-id (ID of the database hit)\n- 3 pident Percentage of identical matches\n- 4 length Alignment length\n- 5 mismatch Number of mismatches\n- 6 gapopen Number of gap openings\n- 7 qstart Start of alignment in query\n- 8 qend End of alignment in query\n- 9 sstart Start of alignment in subject (database hit)\n- 10 send End of alignment in subject (database hit)\n- 11 evalue Expectation value (E-value)\n- 12 bitscore Bit score\n-====== ========= ============================================\n-\n-The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n-included by selecting the extended tabular output. The extra columns are\n-included *after* the standard 12 columns. This is so that you can write\n-workflow filtering steps that accept either the 12 or 24 column tabular\n-BLAST output. Galaxy now uses this extended 24 column output by default.\n-\n-====== ============= ===========================================\n-Column NCBI name Description\n------- ------------- -------------------------------------------\n- 13 sallseqid All subject Seq-id(s), separated by a \';\'\n- 14 score Raw score\n- 15 nident Number of identical matches\n- 16 positive Number of positive-scoring matches\n- 17 gaps Total number of gaps\n- 18 ppos Percentage of positive-scoring matches\n- 19 qframe Query frame\n- 20 sframe Subject frame\n- 21 qseq Aligned part of query sequence\n- 22 sseq Aligned part of subject sequence\n- 23 qlen Query sequence length\n- 24 slen Subject sequence length\n-====== ============= ===========================================\n-\n-The third option is BLAST XML output, which is designed to be parsed by\n-another program, and is understood by some Galaxy tools.\n-\n-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+@OUTPUT_FORMAT@\n \n -------\n \n@@ -191,17 +102,6 @@\n If you use this Galaxy tool in work leading to a scientific publication please\n cite the following papers:\n \n-Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n-Galaxy tools and workflows for sequence analysis with applications\n-in molecular plant pathology. PeerJ 1:e167\n-http://dx.doi.org/10.7717/peerj.167\n-\n-Christiam Camacho et al. (2009).\n-BLAST+: architecture and applications.\n-BMC Bioinformatics. 15;10:421.\n-http://dx.doi.org/10.1186/1471-2105-10-421\n-\n-This wrapper is available to install into other Galaxy Instances via the Galaxy\n-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+@REFERENCES@\n </help>\n </tool>\n' |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Mon Nov 25 10:58:46 2013 -0500 |
b |
b'@@ -1,15 +1,12 @@\n-<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.0.21">\n+<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.0.22">\n <description>Search protein domain database (PSSMs) with translated nucleotide query sequence(s)</description>\n <!-- If job splitting is enabled, break up the query file into parts -->\n- <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>\n- <requirements>\n- <requirement type="binary">rpstblastn</requirement>\n- <requirement type="package" version="2.2.27">blast+</requirement>\n- </requirements>\n- <version_command>rpstblastn -version</version_command>\n+ <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>\n <macros>\n+ <token name="@BINARY@">rpstblastn</token>\n <import>ncbi_macros.xml</import>\n </macros>\n+ <expand macro="requirements" />\n <command>\n ## The command is a Cheetah template which allows some Python based syntax.\n ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n@@ -21,26 +18,11 @@\n -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n #end if\n -evalue $evalue_cutoff\n--out "$output1"\n-## Set the extended list here so if/when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n- -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n-#else:\n- -outfmt $out_format\n-#end if\n+@BLAST_OUTPUT@\n ## rpstblastn does not support multiple threads up to release 2.2.27+. Added in BLAST 2.2.28+.\n ##-num_threads 8\n #if $adv_opts.adv_opts_selector=="advanced":\n-$adv_opts.filter_query\n-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n-## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n--max_target_seqs $adv_opts.max_hits\n-#end if\n-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n--word_size $adv_opts.word_size\n-#end if\n-$adv_opts.parse_deflines\n+@ADVANCED_OPTIONS@\n ## End of advanced options:\n #end if\n </command>\n@@ -50,30 +32,18 @@\n \n <expand macro="input_conditional_pssm" />\n \n- <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />\n+ <expand macro="input_evalue" />\n \n <expand macro="input_out_format" />\n \n- <conditional name="adv_opts">\n- <param name="adv_opts_selector" type="select" label="Advanced Options">\n- <option value="basic" selected="True">Hide Advanced Options</option>\n- <option value="advanced">Show Advanced Options</option>\n- </param>\n- <when value="basic" />\n- <when value="advanced">\n- <!-- Could use a select (yes, no, other) where other allows setting \'window locut hicut\' -->\n- <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" />\n- <!-- Why doesn\'t optional override a validator? I want to accept an empty string OR a non-negative integer -->\n- <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">\n- <validator type="in_range" min="0" />\n- </param>\n- <!-- I\'d like word_size to be optional, with minimum 2 for rpsblast -->\n- <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">\n- <validator type="in_range" min="0" />\n- </param>\n- <param name="parse_deflines" type="bool'..b'blastn on ${on_string}">\n@@ -82,12 +52,7 @@\n </outputs>\n <help>\n \n-.. class:: warningmark\n-\n-**Note**. Database searches may take a substantial amount of time.\n-For large input datasets it is advisable to allow overnight processing. \n-\n------\n+@SEARCH_TIME_WARNING@\n \n **What it does**\n \n@@ -126,60 +91,7 @@\n \n -----\n \n-**Output format**\n-\n-Because Galaxy focuses on processing tabular data, the default output of this\n-tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n-\n-====== ========= ============================================\n-Column NCBI name Description\n------- --------- --------------------------------------------\n- 1 qseqid Query Seq-id (ID of your sequence)\n- 2 sseqid Subject Seq-id (ID of the database hit)\n- 3 pident Percentage of identical matches\n- 4 length Alignment length\n- 5 mismatch Number of mismatches\n- 6 gapopen Number of gap openings\n- 7 qstart Start of alignment in query\n- 8 qend End of alignment in query\n- 9 sstart Start of alignment in subject (database hit)\n- 10 send End of alignment in subject (database hit)\n- 11 evalue Expectation value (E-value)\n- 12 bitscore Bit score\n-====== ========= ============================================\n-\n-The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n-included by selecting the extended tabular output. The extra columns are\n-included *after* the standard 12 columns. This is so that you can write\n-workflow filtering steps that accept either the 12 or 24 column tabular\n-BLAST output. Galaxy now uses this extended 24 column output by default.\n-\n-====== ============= ===========================================\n-Column NCBI name Description\n------- ------------- -------------------------------------------\n- 13 sallseqid All subject Seq-id(s), separated by a \';\'\n- 14 score Raw score\n- 15 nident Number of identical matches\n- 16 positive Number of positive-scoring matches\n- 17 gaps Total number of gaps\n- 18 ppos Percentage of positive-scoring matches\n- 19 qframe Query frame\n- 20 sframe Subject frame\n- 21 qseq Aligned part of query sequence\n- 22 sseq Aligned part of subject sequence\n- 23 qlen Query sequence length\n- 24 slen Subject sequence length\n-====== ============= ===========================================\n-\n-The third option is BLAST XML output, which is designed to be parsed by\n-another program, and is understood by some Galaxy tools.\n-\n-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+@OUTPUT_FORMAT@\n \n -------\n \n@@ -188,17 +100,6 @@\n If you use this Galaxy tool in work leading to a scientific publication please\n cite the following papers:\n \n-Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n-Galaxy tools and workflows for sequence analysis with applications\n-in molecular plant pathology. PeerJ 1:e167\n-http://dx.doi.org/10.7717/peerj.167\n-\n-Christiam Camacho et al. (2009).\n-BLAST+: architecture and applications.\n-BMC Bioinformatics. 15;10:421.\n-http://dx.doi.org/10.1186/1471-2105-10-421\n-\n-This wrapper is available to install into other Galaxy Instances via the Galaxy\n-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+@REFERENCES@\n </help>\n </tool>\n' |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Mon Nov 25 10:58:46 2013 -0500 |
b |
b'@@ -1,51 +1,27 @@\n-<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.21">\n+<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.22">\n <description>Search translated nucleotide database with protein query sequence(s)</description>\n <!-- If job splitting is enabled, break up the query file into parts -->\n- <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>\n- <requirements>\n- <requirement type="binary">tblastn</requirement>\n- <requirement type="package" version="2.2.27">blast+</requirement>\n- </requirements>\n- <version_command>tblastn -version</version_command>\n+ <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>\n <macros>\n+ <token name="@BINARY@">tblastn</token>\n <import>ncbi_macros.xml</import>\n </macros>\n+ <expand macro="requirements" />\n <command>\n ## The command is a Cheetah template which allows some Python based syntax.\n ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n tblastn\n -query "$query"\n-#if $db_opts.db_opts_selector == "db":\n- -db "${db_opts.database.fields.path}"\n-#elif $db_opts.db_opts_selector == "histdb":\n- -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n-#else:\n- -subject "$db_opts.subject"\n-#end if\n+@BLAST_DB_SUBJECT@\n -evalue $evalue_cutoff\n--out "$output1"\n-##Set the extended list here so if/when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n- -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n-#else:\n- -outfmt $out_format\n-#end if\n--num_threads 8\n+@BLAST_OUTPUT@\n+@THREADS@\n #if $adv_opts.adv_opts_selector=="advanced":\n -db_gencode $adv_opts.db_gencode\n-$adv_opts.filter_query\n -matrix $adv_opts.matrix\n-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n-## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n--max_target_seqs $adv_opts.max_hits\n-#end if\n-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n--word_size $adv_opts.word_size\n-#end if\n+@ADVANCED_OPTIONS@\n ##Ungapped disabled for now - see comments below\n ##$adv_opts.ungapped\n-$adv_opts.parse_deflines\n ## End of advanced options:\n #end if\n </command>\n@@ -57,43 +33,26 @@\n \n <expand macro="input_conditional_nucleotide_db" />\n <expand macro="input_out_format" />\n- <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />\n-\n- <conditional name="adv_opts">\n- <param name="adv_opts_selector" type="select" label="Advanced Options">\n- <option value="basic" selected="True">Hide Advanced Options</option>\n- <option value="advanced">Show Advanced Options</option>\n- </param>\n- <when value="basic" />\n- <when value="advanced">\n-\n- <expand macro="input_db_gencode" />\n-\n- <!-- Could use a select (yes, no, other) where other allows setting \'window locut hicut\' -->\n- <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />\n-\n- <expand macro="input_scoring_matrix" />\n+ <expand macro="input_evalue" />\n+ <expand macro="advanced_options">\n+ <expand macro="input_db_gencode" />\n \n- <!-- Why doesn\'t optional override a validator? I want to accept an empty string OR a non-negative integer -->\n- <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">\n- <validator type="in_range" min="0'..b"not* advised because it is slower (only one\n-CPU is used), but more importantly gives e-values for pairwise\n-searches (very small e-values which will look overly signficiant).\n-In most cases you should instead turn the other FASTA file into a\n-database first using *makeblastdb* and search against that.\n+@FASTA_WARNING@\n \n -----\n \n-**Output format**\n-\n-Because Galaxy focuses on processing tabular data, the default output of this\n-tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n-\n-====== ========= ============================================\n-Column NCBI name Description\n------- --------- --------------------------------------------\n- 1 qseqid Query Seq-id (ID of your sequence)\n- 2 sseqid Subject Seq-id (ID of the database hit)\n- 3 pident Percentage of identical matches\n- 4 length Alignment length\n- 5 mismatch Number of mismatches\n- 6 gapopen Number of gap openings\n- 7 qstart Start of alignment in query\n- 8 qend End of alignment in query\n- 9 sstart Start of alignment in subject (database hit)\n- 10 send End of alignment in subject (database hit)\n- 11 evalue Expectation value (E-value)\n- 12 bitscore Bit score\n-====== ========= ============================================\n-\n-The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n-included by selecting the extended tabular output. The extra columns are\n-included *after* the standard 12 columns. This is so that you can write\n-workflow filtering steps that accept either the 12 or 24 column tabular\n-BLAST output. Galaxy now uses this extended 24 column output by default.\n-\n-====== ============= ===========================================\n-Column NCBI name Description\n------- ------------- -------------------------------------------\n- 13 sallseqid All subject Seq-id(s), separated by a ';'\n- 14 score Raw score\n- 15 nident Number of identical matches\n- 16 positive Number of positive-scoring matches\n- 17 gaps Total number of gaps\n- 18 ppos Percentage of positive-scoring matches\n- 19 qframe Query frame\n- 20 sframe Subject frame\n- 21 qseq Aligned part of query sequence\n- 22 sseq Aligned part of subject sequence\n- 23 qlen Query sequence length\n- 24 slen Subject sequence length\n-====== ============= ===========================================\n-\n-The third option is BLAST XML output, which is designed to be parsed by\n-another program, and is understood by some Galaxy tools.\n-\n-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+@OUTPUT_FORMAT@\n \n -------\n \n@@ -262,17 +156,6 @@\n If you use this Galaxy tool in work leading to a scientific publication please\n cite the following papers:\n \n-Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n-Galaxy tools and workflows for sequence analysis with applications\n-in molecular plant pathology. PeerJ 1:e167\n-http://dx.doi.org/10.7717/peerj.167\n-\n-Christiam Camacho et al. (2009).\n-BLAST+: architecture and applications.\n-BMC Bioinformatics. 15;10:421.\n-http://dx.doi.org/10.1186/1471-2105-10-421\n-\n-This wrapper is available to install into other Galaxy Instances via the Galaxy\n-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+@REFERENCES@\n </help>\n </tool>\n" |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Mon Nov 25 10:58:46 2013 -0500 |
b |
b'@@ -1,51 +1,29 @@\n-<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.21">\n+<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.22">\n <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description>\n <!-- If job splitting is enabled, break up the query file into parts -->\n- <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>\n- <requirements>\n- <requirement type="binary">tblastx</requirement>\n- <requirement type="package" version="2.2.27">blast+</requirement>\n- </requirements>\n- <version_command>tblastx -version</version_command>\n+ <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>\n <macros>\n+ <token name="@BINARY@">tblastx</token>\n <import>ncbi_macros.xml</import>\n </macros>\n+ <expand macro="requirements" />\n <command>\n ## The command is a Cheetah template which allows some Python based syntax.\n ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n tblastx\n -query "$query"\n-#if $db_opts.db_opts_selector == "db":\n- -db "${db_opts.database.fields.path}"\n-#elif $db_opts.db_opts_selector == "histdb":\n- -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n-#else:\n- -subject "$db_opts.subject"\n-#end if\n+@BLAST_DB_SUBJECT@\n -query_gencode $query_gencode\n -evalue $evalue_cutoff\n--out "$output1"\n-##Set the extended list here so if/when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n- -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n-#else:\n- -outfmt $out_format\n-#end if\n--num_threads 8\n+@BLAST_OUTPUT@\n+@THREADS@\n #if $adv_opts.adv_opts_selector=="advanced":\n -db_gencode $adv_opts.db_gencode\n-$adv_opts.filter_query\n $adv_opts.strand\n -matrix $adv_opts.matrix\n ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n ## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n--max_target_seqs $adv_opts.max_hits\n-#end if\n-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n--word_size $adv_opts.word_size\n-#end if\n-$adv_opts.parse_deflines\n+@ADVANCED_OPTIONS@\n ## End of advanced options:\n #end if\n </command>\n@@ -57,42 +35,24 @@\n \n <expand macro="input_conditional_nucleotide_db" />\n <expand macro="input_query_gencode" />\n- <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />\n+ <expand macro="input_evalue" />\n \n <expand macro="input_out_format" />\n-\n- <conditional name="adv_opts">\n- <param name="adv_opts_selector" type="select" label="Advanced Options">\n- <option value="basic" selected="True">Hide Advanced Options</option>\n- <option value="advanced">Show Advanced Options</option>\n- </param>\n- <when value="basic" />\n- <when value="advanced">\n-\n- <expand macro="input_db_gencode" />\n+ <expand macro="advanced_options">\n+ <expand macro="input_db_gencode" />\n \n- <!-- Could use a select (yes, no, other) where other allows setting \'window locut hicut\' -->\n- <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />\n- <param name="strand" type="select" label="Query strand(s) to search against database/subject">\n- <option value="-strand both">Both</option>\n- <option value="-strand plus">Plus (forward)</option>\n- <option value="-strand minus">Minus (reverse complement)</opt'..b"*not* advised because it is slower (only one\n-CPU is used), but more importantly gives e-values for pairwise\n-searches (very small e-values which will look overly signficiant).\n-In most cases you should instead turn the other FASTA file into a\n-database first using *makeblastdb* and search against that.\n+@FASTA_WARNING@\n \n -----\n \n-**Output format**\n-\n-Because Galaxy focuses on processing tabular data, the default output of this\n-tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n-\n-====== ========= ============================================\n-Column NCBI name Description\n------- --------- --------------------------------------------\n- 1 qseqid Query Seq-id (ID of your sequence)\n- 2 sseqid Subject Seq-id (ID of the database hit)\n- 3 pident Percentage of identical matches\n- 4 length Alignment length\n- 5 mismatch Number of mismatches\n- 6 gapopen Number of gap openings\n- 7 qstart Start of alignment in query\n- 8 qend End of alignment in query\n- 9 sstart Start of alignment in subject (database hit)\n- 10 send End of alignment in subject (database hit)\n- 11 evalue Expectation value (E-value)\n- 12 bitscore Bit score\n-====== ========= ============================================\n-\n-The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n-included by selecting the extended tabular output. The extra columns are\n-included *after* the standard 12 columns. This is so that you can write\n-workflow filtering steps that accept either the 12 or 24 column tabular\n-BLAST output. Galaxy now uses this extended 24 column output by default.\n-\n-====== ============= ===========================================\n-Column NCBI name Description\n------- ------------- -------------------------------------------\n- 13 sallseqid All subject Seq-id(s), separated by a ';'\n- 14 score Raw score\n- 15 nident Number of identical matches\n- 16 positive Number of positive-scoring matches\n- 17 gaps Total number of gaps\n- 18 ppos Percentage of positive-scoring matches\n- 19 qframe Query frame\n- 20 sframe Subject frame\n- 21 qseq Aligned part of query sequence\n- 22 sseq Aligned part of subject sequence\n- 23 qlen Query sequence length\n- 24 slen Subject sequence length\n-====== ============= ===========================================\n-\n-The third option is BLAST XML output, which is designed to be parsed by\n-another program, and is understood by some Galaxy tools.\n-\n-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+@OUTPUT_FORMAT@\n \n -------\n \n@@ -195,17 +90,6 @@\n If you use this Galaxy tool in work leading to a scientific publication please\n cite the following papers:\n \n-Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n-Galaxy tools and workflows for sequence analysis with applications\n-in molecular plant pathology. PeerJ 1:e167\n-http://dx.doi.org/10.7717/peerj.167\n-\n-Christiam Camacho et al. (2009).\n-BLAST+: architecture and applications.\n-BMC Bioinformatics. 15;10:421.\n-http://dx.doi.org/10.1186/1471-2105-10-421\n-\n-This wrapper is available to install into other Galaxy Instances via the Galaxy\n-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+@REFERENCES@\n </help>\n </tool>\n" |
b |
diff -r f2478dc77ccb -r b2795652d2b4 tools/ncbi_blast_plus/repository_dependencies.xml --- a/tools/ncbi_blast_plus/repository_dependencies.xml Thu Oct 10 11:53:57 2013 -0400 +++ b/tools/ncbi_blast_plus/repository_dependencies.xml Mon Nov 25 10:58:46 2013 -0500 |
b |
@@ -1,4 +1,4 @@ <?xml version="1.0"?> <repositories description="This requires the BLAST datatype definitions (e.g. the BLAST XML format)."> - <repository changeset_revision="7ceb2ae30ff4" name="blast_datatypes" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="e36c60d13c94" name="blast_datatypes" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> </repositories> |