Previous changeset 3:cf4903f5c81f (2013-04-17) Next changeset 5:d42346e675c4 (2013-04-17) |
Commit message:
Uploaded v0.0.19 pre-release attempt 1 |
modified:
tools/ncbi_blast_plus/ncbi_blast_plus.txt tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml tools/ncbi_blast_plus/ncbi_makeblastdb.xml tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml |
added:
tool-data/blastdb.loc.sample tool-data/blastdb_d.loc.sample tool-data/blastdb_p.loc.sample tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml |
removed:
tools/ncbi_blast_plus/blastdb.loc.sample tools/ncbi_blast_plus/blastdb_p.loc.sample |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tool-data/blastdb.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/blastdb.loc.sample Wed Apr 17 09:47:27 2013 -0400 |
b |
@@ -0,0 +1,39 @@ +#This is a sample file distributed with Galaxy that is used to define a +#list of nucleotide BLAST databases, using three columns tab separated +#(longer whitespace are TAB characters): +# +#<unique_id> <database_caption> <base_name_path> +# +#The captions typically contain spaces and might end with the build date. +#It is important that the actual database name does not have a space in +#it, and that there are only two tabs on each line. +# +#So, for example, if your database is nt and the path to your base name +#is /depot/data2/galaxy/blastdb/nt/nt.chunk, then the blastdb.loc entry +#would look like this: +# +#nt_02_Dec_2009 nt 02 Dec 2009 /depot/data2/galaxy/blastdb/nt/nt.chunk +# +#and your /depot/data2/galaxy/blastdb/nt directory would contain all of +#your "base names" (e.g.): +# +#-rw-r--r-- 1 wychung galaxy 23437408 2008-04-09 11:26 nt.chunk.00.nhr +#-rw-r--r-- 1 wychung galaxy 3689920 2008-04-09 11:26 nt.chunk.00.nin +#-rw-r--r-- 1 wychung galaxy 251215198 2008-04-09 11:26 nt.chunk.00.nsq +#...etc... +# +#Your blastdb.loc file should include an entry per line for each "base name" +#you have stored. For example: +# +#nt_02_Dec_2009 nt 02 Dec 2009 /depot/data2/galaxy/blastdb/nt/nt.chunk +#wgs_30_Nov_2009 wgs 30 Nov 2009 /depot/data2/galaxy/blastdb/wgs/wgs.chunk +#test_20_Sep_2008 test 20 Sep 2008 /depot/data2/galaxy/blastdb/test/test +#...etc... +# +#See also blastdb_p.loc which is for any protein BLAST database, and +#blastdb_d.loc which is for any protein domains databases (like CDD). +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. +# |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tool-data/blastdb_d.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/blastdb_d.loc.sample Wed Apr 17 09:47:27 2013 -0400 |
b |
@@ -0,0 +1,32 @@ +#This is a sample file distributed with Galaxy that is used to define a +#list of protein domain databases, using three columns tab separated +#(longer whitespace are TAB characters): +# +#<unique_id> <database_caption> <base_name_path> +# +#The captions typically contain spaces and might end with the build date. +#It is important that the actual database name does not have a space in it, +#and that there are only two tabs on each line. +# +#So, for example, if your database is CDD and the path to your base name +#is /data/blastdb/Cdd, then the blastdb_d.loc entry would look like this: +# +#Cdd{tab}NCBI Conserved Domains Database (CDD){tab}/data/blastdb/Cdd +# +#and your /data/blastdb directory would contain all of the files associated +#with the database, /data/blastdb/Cdd.*. +# +#Your blastdb_d.loc file should include an entry per line for each "base name" +#you have stored. For example: +# +#Cdd NCBI CDD /data/blastdb/domains/Cdd +#Kog KOG (eukaryotes) /data/blastdb/domains/Kog +#Cog COG (prokaryotes) /data/blastdb/domains/Cog +#Pfam Pfam-A /data/blastdb/domains/Pfam +#Smart SMART /data/blastdb/domains/Smart +#Tigr TIGR /data/blastdb/domains/Tigr +#Prk Protein Clusters database /data/blastdb/domains/Prk +#...etc... +# +#See also blastdb.loc which is for any nucleotide BLAST database, and +#blastdb_p.loc which is for any protein BLAST databases. |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tool-data/blastdb_p.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/blastdb_p.loc.sample Wed Apr 17 09:47:27 2013 -0400 |
b |
@@ -0,0 +1,27 @@ +#This is a sample file distributed with Galaxy that is used to define a +#list of protein BLAST databases, using three columns tab separated +#(longer whitespace are TAB characters): +# +#<unique_id> <database_caption> <base_name_path> +# +#The captions typically contain spaces and might end with the build date. +#It is important that the actual database name does not have a space in +#it, and that there are only two tabs on each line. +# +#So, for example, if your database is NR and the path to your base name +#is /data/blastdb/nr, then the blastdb_p.loc entry would look like this: +# +#nr{tab}NCBI NR (non redundant){tab}/data/blastdb/nr +# +#and your /data/blastdb directory would contain all of the files associated +#with the database, /data/blastdb/nr.*. +# +#Your blastdb_p.loc file should include an entry per line for each "base name" +#you have stored. For example: +# +#nr_05Jun2010 NCBI NR (non redundant) 05 Jun 2010 /data/blastdb/05Jun2010/nr +#nr_15Aug2010 NCBI NR (non redundant) 15 Aug 2010 /data/blastdb/15Aug2010/nr +#...etc... +# +#See also blastdb.loc which is for any nucleotide BLAST database, and +#blastdb_d.loc which is for any protein domains databases (like CDD). |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/blastdb.loc.sample --- a/tools/ncbi_blast_plus/blastdb.loc.sample Wed Apr 17 09:45:28 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,38 +0,0 @@ -#This is a sample file distributed with Galaxy that is used to define a -#list of nucleotide BLAST databases, using three columns tab separated -#(longer whitespace are TAB characters): -# -#<unique_id> <database_caption> <base_name_path> -# -#The captions typically contain spaces and might end with the build date. -#It is important that the actual database name does not have a space in it, -#and that the first tab that appears in the line is right before the path. -# -#So, for example, if your database is nt and the path to your base name -#is /depot/data2/galaxy/blastdb/nt/nt.chunk, then the blastdb.loc entry -#would look like this: -# -#nt_02_Dec_2009 nt 02 Dec 2009 /depot/data2/galaxy/blastdb/nt/nt.chunk -# -#and your /depot/data2/galaxy/blastdb/nt directory would contain all of -#your "base names" (e.g.): -# -#-rw-r--r-- 1 wychung galaxy 23437408 2008-04-09 11:26 nt.chunk.00.nhr -#-rw-r--r-- 1 wychung galaxy 3689920 2008-04-09 11:26 nt.chunk.00.nin -#-rw-r--r-- 1 wychung galaxy 251215198 2008-04-09 11:26 nt.chunk.00.nsq -#...etc... -# -#Your blastdb.loc file should include an entry per line for each "base name" -#you have stored. For example: -# -#nt_02_Dec_2009 nt 02 Dec 2009 /depot/data2/galaxy/blastdb/nt/nt.chunk -#wgs_30_Nov_2009 wgs 30 Nov 2009 /depot/data2/galaxy/blastdb/wgs/wgs.chunk -#test_20_Sep_2008 test 20 Sep 2008 /depot/data2/galaxy/blastdb/test/test -#...etc... -# -#See also blastdb_p.loc which is for any protein BLAST database. -# -#Note that for backwards compatibility with workflows, the unique ID of -#an entry must be the path that was in the original loc file, because that -#is the value stored in the workflow for that parameter. -# |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/blastdb_p.loc.sample --- a/tools/ncbi_blast_plus/blastdb_p.loc.sample Wed Apr 17 09:45:28 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,27 +0,0 @@ -#This is a sample file distributed with Galaxy that is used to define a -#list of protein BLAST databases, using three columns tab separated -#(longer whitespace are TAB characters): -# -#<unique_id> <database_caption> <base_name_path> -# -#The captions typically contain spaces and might end with the build date. -#It is important that the actual database name does not have a space in it, -#and that the first tab that appears in the line is right before the path. -# -#So, for example, if your database is NR and the path to your base name -#is /data/blastdb/nr, then the blastdb_p.loc entry would look like this: -# -#nr NCBI NR (non redundant) /data/blastdb/nr -# -#and your /data/blastdb directory would contain all of the files associated -#with the database, /data/blastdb/nr.*. -# -#Your blastdb_p.loc file should include an entry per line for each "base name" -#you have stored. For example: -# -#nr_05Jun2010 NCBI NR (non redundant) 05 Jun 2010 /data/blastdb/05Jun2010/nr -#nr_15Aug2010 NCBI NR (non redundant) 15 Aug 2010 /data/blastdb/15Aug2010/nr -#...etc... -# -#See also blastdb.loc which is for any nucleotide BLAST database. -# |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_blast_plus.txt --- a/tools/ncbi_blast_plus/ncbi_blast_plus.txt Wed Apr 17 09:45:28 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blast_plus.txt Wed Apr 17 09:47:27 2013 -0400 |
b |
@@ -8,7 +8,7 @@ Currently tested with NCBI BLAST 2.2.26+ (i.e. version 2.2.26 of BLAST+), and do not work with the NCBI 'legacy' BLAST suite (e.g. blastall). -Note that these wrappers (and the associated datetypes) were originally +Note that these wrappers (and the associated datatypes) were originally distributed as part of the main Galaxy repository, but as of August 2012 moved to the Galaxy Tool Shed as 'ncbi_blast_plus' (and 'blast_datatypes'). My thanks to Dannon Baker from the Galaxy development team for his assistance @@ -25,16 +25,16 @@ You must tell Galaxy about any system level BLAST databases using configuration files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein -databases like NR), located in the tool-data folder. Sample fils are included -which explain the tab based format to use. +databases like NR), located in the tool-data/ folder. Sample files are included +which explain the tab-based format to use. Manual Installation =================== For those not using Galaxy's automated installation from the Tool Shed, put -the XML and Python files under tools/ncbi_blast_plus and add the XML files -to your tool_conf.xml as normal (and do the same in tool_conf.xml.sample +the XML and Python files in the tools/ncbi_blast_plus/ folder and add the XML +files to your tool_conf.xml as normal (and do the same in tool_conf.xml.sample in order to run the unit tests). For example, use: <section name="NCBI BLAST+" id="ncbi_blast_plus_tools"> @@ -46,6 +46,8 @@ <tool file="ncbi_blast_plus/ncbi_makeblastdb.xml" /> <tool file="ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml" /> <tool file="ncbi_blast_plus/ncbi_blastdbcmd_info.xml" /> + <tool file="ncbi_blast_plus/ncbi_rpsblast_wrapper.xml" /> + <tool file="ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml" /> <tool file="ncbi_blast_plus/blastxml_to_tabular.xml" /> </section> @@ -55,13 +57,13 @@ You must tell Galaxy about any system level BLAST databases using configuration files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein -databases like NR), located in the tool-data folder. Sample fils are included -which explain the tab based format to use. +databases like NR), located in the tool-data/ folder. Sample files are included +which explain the tab-based format to use. You must install the NCBI BLAST+ standalone tools somewhere on the system path. Currently the unit tests are written using "BLAST 2.2.26+". -Run the functional tests (adusting the section identifier to match your +Run the functional tests (adjusting the section identifier to match your tool_conf.xml.sample file): ./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools @@ -89,20 +91,27 @@ v0.0.17 - The BLAST+ search tools now default to extended tabular output (all too often our users where having to re-run searches just to get one of the missing columns like query or subject length) +v0.0.18 - Defensive quoting of filenames in case of spaces (where possible, + BLAST+ handling of some mult-file arguments is problematic). +v0.0.19 - Added wrappers for rpsblast and rpstblastn, and new blastdb_d.loc + for the domain databases they use (e.g. CDD). + - Correct case of exception regular expression (for error handling + fall-back in case the return code is not set properly). Developers ========== -This script and related tools are being developed on the following hg branch: -http://bitbucket.org/peterjc/galaxy-central/src/tools +This script and related tools are being developed on the 'tools' branch of the +following Mercurial repository: +https://bitbucket.org/peterjc/galaxy-central/ -For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball I use +For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use the following command from the Galaxy root folder: $ ./tools/ncbi_blast_plus/make_ncbi_blast_plus.sh -This similifies ensuring a consistent set of files is bundled each time, +This simplifies ensuring a consistent set of files is bundled each time, including all the relevant test files. |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml --- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Wed Apr 17 09:45:28 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Wed Apr 17 09:47:27 2013 -0400 |
b |
@@ -1,7 +1,7 @@ -<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.2"> +<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.4"> <description>Show BLAST database information from blastdbcmd</description> <command> -blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out $info +blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out "$info" </command> <stdio> <!-- Anything other than zero is an error --> @@ -9,7 +9,7 @@ <exit_code range=":-1" /> <!-- Suspect blastdbcmd sometimes fails to set error level --> <regex match="Error:" /> - <regex match="EXception:" /> + <regex match="Exception:" /> </stdio> <inputs> <conditional name="db_opts"> |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Wed Apr 17 09:45:28 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Wed Apr 17 09:47:27 2013 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.3"> +<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.4"> <description>Extract sequence(s) from BLAST database</description> <command> ## The command is a Cheetah template which allows some Python based syntax. @@ -48,7 +48,7 @@ <exit_code range=":-1" /> <!-- Suspect blastdbcmd sometimes fails to set error level --> <regex match="Error:" /> - <regex match="EXception:" /> + <regex match="Exception:" /> </stdio> <inputs> <conditional name="db_opts"> |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Wed Apr 17 09:45:28 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Wed Apr 17 09:47:27 2013 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.17"> +<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.19"> <description>Search nucleotide database with nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> @@ -17,7 +17,7 @@ #end if -task $blast_type -evalue $evalue_cutoff --out $output1 +-out "$output1" ##Set the extended list here so if/when we add things, saved workflows are not affected #if str($out_format)=="ext": -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" @@ -47,13 +47,13 @@ <exit_code range=":-1" /> <!-- In case the return code has not been set propery check stderr too --> <regex match="Error:" /> - <regex match="EXception:" /> + <regex match="Exception:" /> </stdio> <inputs> <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> <conditional name="db_opts"> <param name="db_opts_selector" type="select" label="Subject database/sequences"> - <option value="db" selected="True">BLAST Database</option> + <option value="db" selected="True">Locally installed BLAST database</option> <option value="histdb">BLAST database from your history</option> <option value="file">FASTA file from your history (see warning note below)</option> </param> |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Wed Apr 17 09:45:28 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Wed Apr 17 09:47:27 2013 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.17"> +<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.19"> <description>Search protein database with protein query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> @@ -17,7 +17,7 @@ #end if -task $blast_type -evalue $evalue_cutoff --out $output1 +-out "$output1" ##Set the extended list here so if/when we add things, saved workflows are not affected #if str($out_format)=="ext": -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" @@ -48,13 +48,13 @@ <exit_code range=":-1" /> <!-- In case the return code has not been set propery check stderr too --> <regex match="Error:" /> - <regex match="EXception:" /> + <regex match="Exception:" /> </stdio> <inputs> <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> <conditional name="db_opts"> <param name="db_opts_selector" type="select" label="Subject database/sequences"> - <option value="db" selected="True">BLAST Database</option> + <option value="db" selected="True">Locally installed BLAST database</option> <option value="histdb">BLAST database from your history</option> <option value="file">FASTA file from your history (see warning note below)</option> </param> |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Wed Apr 17 09:45:28 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Wed Apr 17 09:47:27 2013 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.17"> +<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.19"> <description>Search protein database with translated nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> @@ -17,7 +17,7 @@ #end if -query_gencode $query_gencode -evalue $evalue_cutoff --out $output1 +-out "$output1" ##Set the extended list here so if/when we add things, saved workflows are not affected #if str($out_format)=="ext": -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" @@ -48,13 +48,13 @@ <exit_code range=":-1" /> <!-- In case the return code has not been set propery check stderr too --> <regex match="Error:" /> - <regex match="EXception:" /> + <regex match="Exception:" /> </stdio> <inputs> <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> <conditional name="db_opts"> <param name="db_opts_selector" type="select" label="Subject database/sequences"> - <option value="db" selected="True">BLAST Database</option> + <option value="db" selected="True">Locally installed BLAST database</option> <option value="histdb">BLAST database from your history</option> <option value="file">FASTA file from your history (see warning note below)</option> </param> |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_makeblastdb.xml --- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Wed Apr 17 09:45:28 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Wed Apr 17 09:47:27 2013 -0400 |
b |
@@ -1,8 +1,8 @@ -<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.1"> +<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.3"> <description>Make BLAST database</description> <version_command>makeblastdb -version</version_command> <command> -makeblastdb -out ${os.path.join($outfile.extra_files_path,'blastdb')} +makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}" $parse_seqids $hash_index ## Single call to -in with multiple filenames space separated with outer quotes @@ -41,7 +41,7 @@ <exit_code range=":-1" /> <!-- In case the return code has not been set propery check stderr too --> <regex match="Error:" /> - <regex match="EXception:" /> + <regex match="Exception:" /> </stdio> <inputs> <param name="dbtype" type="select" display="radio" label="Molecule type of input"> |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Wed Apr 17 09:47:27 2013 -0400 |
b |
b'@@ -0,0 +1,235 @@\n+<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.0.3">\n+ <description>Search protein domain database (PSSMs) with protein query sequence(s)</description>\n+ <!-- If job splitting is enabled, break up the query file into parts -->\n+ <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>\n+ <version_command>rpsblast -version</version_command>\n+ <command>\n+## The command is a Cheetah template which allows some Python based syntax.\n+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n+rpsblast\n+-query "$query"\n+#if $db_opts.db_opts_selector == "db":\n+ -db "${db_opts.database.fields.path}"\n+#elif $db_opts.db_opts_selector == "histdb":\n+ -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n+#end if\n+-evalue $evalue_cutoff\n+-out "$output1"\n+##Set the extended list here so if/when we add things, saved workflows are not affected\n+#if str($out_format)=="ext":\n+ -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n+#else:\n+ -outfmt $out_format\n+#end if\n+-num_threads 8\n+#if $adv_opts.adv_opts_selector=="advanced":\n+$adv_opts.filter_query\n+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n+## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n+-max_target_seqs $adv_opts.max_hits\n+#end if\n+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n+-word_size $adv_opts.word_size\n+#end if\n+$adv_opts.parse_deflines\n+## End of advanced options:\n+#end if\n+ </command>\n+ <stdio>\n+ <!-- Anything other than zero is an error -->\n+ <exit_code range="1:" />\n+ <exit_code range=":-1" />\n+ <!-- In case the return code has not been set propery check stderr too -->\n+ <regex match="Error:" />\n+ <regex match="Exception:" />\n+ </stdio>\n+ <inputs>\n+ <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> \n+ <conditional name="db_opts">\n+ <param name="db_opts_selector" type="select" label="Protein domain database (PSSM)">\n+ <option value="db" selected="True">Locally installed BLAST database</option>\n+\t <!-- TODO - define new datatype\n+ <option value="histdb">BLAST protein domain database from your history</option>\n+\t -->\n+ </param>\n+ <when value="db">\n+ <param name="database" type="select" label="Protein domain database">\n+ <options from_file="blastdb_d.loc">\n+ <column name="value" index="0"/>\n+ <column name="name" index="1"/>\n+ <column name="path" index="2"/>\n+ </options>\n+ </param>\n+ <param name="histdb" type="hidden" value="" />\n+ <param name="subject" type="hidden" value="" /> \n+ </when>\n+\t <!-- TODO - define new datatype\n+ <when value="histdb">\n+ <param name="database" type="hidden" value="" />\n+ <param name="histdb" type="data" format="blastdbd" label="Protein domain database" />\n+ <param name="subject" type="hidden" value="" />\n+ </when>\n+\t -->\n+ </conditional>\n+ <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />\n+ <param name="out_format" type="select" label="Output format">\n+ <option value="6">Tabular (standard 12 columns)</option>\n+ <option value="ext" selected="True">Tabular (extended 24 columns)</option>\n+ <option value="5">BLAST XML</option>\n+ <option value="0">Pairwise text</option>\n+ <option value="0 -html">Pairwise HTML</option>\n+ '..b"rce, the eukaryotic \n+counterpart to COGs, see http://www.ncbi.nlm.nih.gov/COG/new/\n+\n+*Cog* - PSSMs from automatically aligned sequences and sequence\n+fragments classified in the COGs resource, which focuses primarily\n+on prokaryotes, see http://www.ncbi.nlm.nih.gov/COG/new/\n+\n+*Pfam* - PSSMs from Pfam-A seed alignment database, see\n+http://pfam.sanger.ac.uk/\n+\n+*Smart* - PSSMs from SMART domain alignment database, see\n+http://smart.embl-heidelberg.de/\n+\n+*Tigr* - PSSMs from TIGRFAM database of protein families, see\n+http://www.jcvi.org/cms/research/projects/tigrfams/overview/\n+\n+*Prk* - PSSms from automatically aligned stable clusters in the\n+Protein Clusters database, see\n+http://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&db=proteinclusters\n+\n+The exact list of domain databases offered will depend on how your\n+local Galaxy has been configured.\n+\n+-----\n+\n+**Output format**\n+\n+Because Galaxy focuses on processing tabular data, the default output of this\n+tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n+\n+====== ========= ============================================\n+Column NCBI name Description\n+------ --------- --------------------------------------------\n+ 1 qseqid Query Seq-id (ID of your sequence)\n+ 2 sseqid Subject Seq-id (ID of the database hit)\n+ 3 pident Percentage of identical matches\n+ 4 length Alignment length\n+ 5 mismatch Number of mismatches\n+ 6 gapopen Number of gap openings\n+ 7 qstart Start of alignment in query\n+ 8 qend End of alignment in query\n+ 9 sstart Start of alignment in subject (database hit)\n+ 10 send End of alignment in subject (database hit)\n+ 11 evalue Expectation value (E-value)\n+ 12 bitscore Bit score\n+====== ========= ============================================\n+\n+The BLAST+ tools can optionally output additional columns of information,\n+but this takes longer to calculate. Most (but not all) of these columns are\n+included by selecting the extended tabular output. The extra columns are\n+included *after* the standard 12 columns. This is so that you can write\n+workflow filtering steps that accept either the 12 or 24 column tabular\n+BLAST output. Galaxy now uses this extended 24 column output by default.\n+\n+====== ============= ===========================================\n+Column NCBI name Description\n+------ ------------- -------------------------------------------\n+ 13 sallseqid All subject Seq-id(s), separated by a ';'\n+ 14 score Raw score\n+ 15 nident Number of identical matches\n+ 16 positive Number of positive-scoring matches\n+ 17 gaps Total number of gaps\n+ 18 ppos Percentage of positive-scoring matches\n+ 19 qframe Query frame\n+ 20 sframe Subject frame\n+ 21 qseq Aligned part of query sequence\n+ 22 sseq Aligned part of subject sequence\n+ 23 qlen Query sequence length\n+ 24 slen Subject sequence length\n+====== ============= ===========================================\n+\n+The third option is BLAST XML output, which is designed to be parsed by\n+another program, and is understood by some Galaxy tools.\n+\n+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+\n+-------\n+\n+**References**\n+\n+Marchler-Bauer A, Bryant SH. CD-Search: protein domain annotations on the fly. Nucleic Acids Res. 2004 Jul 1;32(Web Server issue):W327-31.\n+\n+ </help>\n+</tool>\n" |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Wed Apr 17 09:45:28 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Wed Apr 17 09:47:27 2013 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.17"> +<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.19"> <description>Search translated nucleotide database with protein query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> @@ -16,7 +16,7 @@ -subject "$db_opts.subject" #end if -evalue $evalue_cutoff --out $output1 +-out "$output1" ##Set the extended list here so if/when we add things, saved workflows are not affected #if str($out_format)=="ext": -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" @@ -48,13 +48,13 @@ <exit_code range=":-1" /> <!-- In case the return code has not been set propery check stderr too --> <regex match="Error:" /> - <regex match="EXception:" /> + <regex match="Exception:" /> </stdio> <inputs> <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> <conditional name="db_opts"> <param name="db_opts_selector" type="select" label="Subject database/sequences"> - <option value="db" selected="True">BLAST Database</option> + <option value="db" selected="True">Locally installed BLAST database</option> <option value="histdb">BLAST database from your history</option> <option value="file">FASTA file from your history (see warning note below)</option> </param> |
b |
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Wed Apr 17 09:45:28 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Wed Apr 17 09:47:27 2013 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.17"> +<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.19"> <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> @@ -17,7 +17,7 @@ #end if -query_gencode $query_gencode -evalue $evalue_cutoff --out $output1 +-out "$output1" ##Set the extended list here so if/when we add things, saved workflows are not affected #if str($out_format)=="ext": -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" @@ -48,13 +48,13 @@ <exit_code range=":-1" /> <!-- In case the return code has not been set propery check stderr too --> <regex match="Error:" /> - <regex match="EXception:" /> + <regex match="Exception:" /> </stdio> <inputs> <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> <conditional name="db_opts"> <param name="db_opts_selector" type="select" label="Subject database/sequences"> - <option value="db" selected="True">BLAST Database</option> + <option value="db" selected="True">Locally installed BLAST database</option> <option value="histdb">BLAST database from your history</option> <option value="file">FASTA file from your history (see warning note below)</option> </param> |