# HG changeset patch
# User peterjc
# Date 1366206447 14400
# Node ID 4afb12181d1a221fde41e9c1b66408b803c6022f
# Parent  cf4903f5c81f975b7a3b90accf9dac411d907d55
Uploaded v0.0.19 pre-release attempt 1

diff -r cf4903f5c81f -r 4afb12181d1a tool-data/blastdb.loc.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/blastdb.loc.sample	Wed Apr 17 09:47:27 2013 -0400
@@ -0,0 +1,39 @@
+#This is a sample file distributed with Galaxy that is used to define a
+#list of nucleotide BLAST databases, using three columns tab separated
+#(longer whitespace are TAB characters):
+#
+#<unique_id>	<database_caption>	<base_name_path>
+#
+#The captions typically contain spaces and might end with the build date.
+#It is important that the actual database name does not have a space in
+#it, and that there are only two tabs on each line.
+#
+#So, for example, if your database is nt and the path to your base name 
+#is /depot/data2/galaxy/blastdb/nt/nt.chunk, then the blastdb.loc entry 
+#would look like this:
+#
+#nt_02_Dec_2009      nt 02 Dec 2009      /depot/data2/galaxy/blastdb/nt/nt.chunk
+#
+#and your /depot/data2/galaxy/blastdb/nt directory would contain all of 
+#your "base names" (e.g.):
+#
+#-rw-r--r--  1 wychung galaxy  23437408 2008-04-09 11:26 nt.chunk.00.nhr
+#-rw-r--r--  1 wychung galaxy   3689920 2008-04-09 11:26 nt.chunk.00.nin
+#-rw-r--r--  1 wychung galaxy 251215198 2008-04-09 11:26 nt.chunk.00.nsq
+#...etc...
+#
+#Your blastdb.loc file should include an entry per line for each "base name" 
+#you have stored.  For example:
+#
+#nt_02_Dec_2009		nt 02 Dec 2009		/depot/data2/galaxy/blastdb/nt/nt.chunk
+#wgs_30_Nov_2009	wgs 30 Nov 2009	/depot/data2/galaxy/blastdb/wgs/wgs.chunk
+#test_20_Sep_2008	test 20 Sep 2008	/depot/data2/galaxy/blastdb/test/test
+#...etc...
+#
+#See also blastdb_p.loc which is for any protein BLAST database, and
+#blastdb_d.loc which is for any protein domains databases (like CDD).
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter.
+#
diff -r cf4903f5c81f -r 4afb12181d1a tool-data/blastdb_d.loc.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/blastdb_d.loc.sample	Wed Apr 17 09:47:27 2013 -0400
@@ -0,0 +1,32 @@
+#This is a sample file distributed with Galaxy that is used to define a
+#list of protein domain databases, using three columns tab separated
+#(longer whitespace are TAB characters):
+#
+#<unique_id>	<database_caption>	<base_name_path>
+#
+#The captions typically contain spaces and might end with the build date.
+#It is important that the actual database name does not have a space in it,
+#and that there are only two tabs on each line.
+#
+#So, for example, if your database is CDD and the path to your base name
+#is /data/blastdb/Cdd, then the blastdb_d.loc entry would look like this:
+#
+#Cdd{tab}NCBI Conserved Domains Database (CDD){tab}/data/blastdb/Cdd
+#
+#and your /data/blastdb directory would contain all of the files associated
+#with the database, /data/blastdb/Cdd.*.
+#
+#Your blastdb_d.loc file should include an entry per line for each "base name"
+#you have stored. For example:
+#
+#Cdd	NCBI CDD	/data/blastdb/domains/Cdd
+#Kog	KOG (eukaryotes)	/data/blastdb/domains/Kog
+#Cog	COG (prokaryotes)	/data/blastdb/domains/Cog
+#Pfam	Pfam-A	/data/blastdb/domains/Pfam
+#Smart	SMART	/data/blastdb/domains/Smart
+#Tigr	TIGR	/data/blastdb/domains/Tigr
+#Prk	Protein Clusters database	/data/blastdb/domains/Prk
+#...etc...
+#
+#See also blastdb.loc which is for any nucleotide BLAST database, and
+#blastdb_p.loc which is for any protein BLAST databases.
diff -r cf4903f5c81f -r 4afb12181d1a tool-data/blastdb_p.loc.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/blastdb_p.loc.sample	Wed Apr 17 09:47:27 2013 -0400
@@ -0,0 +1,27 @@
+#This is a sample file distributed with Galaxy that is used to define a
+#list of protein BLAST databases, using three columns tab separated
+#(longer whitespace are TAB characters):
+#
+#<unique_id>	<database_caption>	<base_name_path>
+#
+#The captions typically contain spaces and might end with the build date.
+#It is important that the actual database name does not have a space in
+#it, and that there are only two tabs on each line.
+#
+#So, for example, if your database is NR and the path to your base name
+#is /data/blastdb/nr, then the blastdb_p.loc entry would look like this:
+#
+#nr{tab}NCBI NR (non redundant){tab}/data/blastdb/nr
+#
+#and your /data/blastdb directory would contain all of the files associated
+#with the database, /data/blastdb/nr.*.
+#
+#Your blastdb_p.loc file should include an entry per line for each "base name"
+#you have stored. For example:
+#
+#nr_05Jun2010	NCBI NR (non redundant) 05 Jun 2010	/data/blastdb/05Jun2010/nr
+#nr_15Aug2010	NCBI NR (non redundant) 15 Aug 2010	/data/blastdb/15Aug2010/nr
+#...etc...
+#
+#See also blastdb.loc which is for any nucleotide BLAST database, and
+#blastdb_d.loc which is for any protein domains databases (like CDD).
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/blastdb.loc.sample
--- a/tools/ncbi_blast_plus/blastdb.loc.sample	Wed Apr 17 09:45:28 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-#This is a sample file distributed with Galaxy that is used to define a
-#list of nucleotide BLAST databases, using three columns tab separated
-#(longer whitespace are TAB characters):
-#
-#<unique_id>	<database_caption>	<base_name_path>
-#
-#The captions typically contain spaces and might end with the build date.
-#It is important that the actual database name does not have a space in it,
-#and that the first tab that appears in the line is right before the path.
-#
-#So, for example, if your database is nt and the path to your base name 
-#is /depot/data2/galaxy/blastdb/nt/nt.chunk, then the blastdb.loc entry 
-#would look like this:
-#
-#nt_02_Dec_2009      nt 02 Dec 2009      /depot/data2/galaxy/blastdb/nt/nt.chunk
-#
-#and your /depot/data2/galaxy/blastdb/nt directory would contain all of 
-#your "base names" (e.g.):
-#
-#-rw-r--r--  1 wychung galaxy  23437408 2008-04-09 11:26 nt.chunk.00.nhr
-#-rw-r--r--  1 wychung galaxy   3689920 2008-04-09 11:26 nt.chunk.00.nin
-#-rw-r--r--  1 wychung galaxy 251215198 2008-04-09 11:26 nt.chunk.00.nsq
-#...etc...
-#
-#Your blastdb.loc file should include an entry per line for each "base name" 
-#you have stored.  For example:
-#
-#nt_02_Dec_2009		nt 02 Dec 2009		/depot/data2/galaxy/blastdb/nt/nt.chunk
-#wgs_30_Nov_2009	wgs 30 Nov 2009	/depot/data2/galaxy/blastdb/wgs/wgs.chunk
-#test_20_Sep_2008	test 20 Sep 2008	/depot/data2/galaxy/blastdb/test/test
-#...etc...
-#
-#See also blastdb_p.loc which is for any protein BLAST database.
-#
-#Note that for backwards compatibility with workflows, the unique ID of
-#an entry must be the path that was in the original loc file, because that
-#is the value stored in the workflow for that parameter.
-#
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/blastdb_p.loc.sample
--- a/tools/ncbi_blast_plus/blastdb_p.loc.sample	Wed Apr 17 09:45:28 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-#This is a sample file distributed with Galaxy that is used to define a
-#list of protein BLAST databases, using three columns tab separated
-#(longer whitespace are TAB characters):
-#
-#<unique_id>	<database_caption>	<base_name_path>
-#
-#The captions typically contain spaces and might end with the build date.
-#It is important that the actual database name does not have a space in it,
-#and that the first tab that appears in the line is right before the path.
-#
-#So, for example, if your database is NR and the path to your base name
-#is /data/blastdb/nr, then the blastdb_p.loc entry would look like this:
-#
-#nr		NCBI NR (non redundant)		/data/blastdb/nr
-#
-#and your /data/blastdb directory would contain all of the files associated
-#with the database, /data/blastdb/nr.*.
-#
-#Your blastdb_p.loc file should include an entry per line for each "base name"
-#you have stored. For example:
-#
-#nr_05Jun2010	NCBI NR (non redundant) 05 Jun 2010		/data/blastdb/05Jun2010/nr
-#nr_15Aug2010	NCBI NR (non redundant) 15 Aug 2010		/data/blastdb/15Aug2010/nr
-#...etc...
-#
-#See also blastdb.loc which is for any nucleotide BLAST database.
-#
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_blast_plus.txt
--- a/tools/ncbi_blast_plus/ncbi_blast_plus.txt	Wed Apr 17 09:45:28 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blast_plus.txt	Wed Apr 17 09:47:27 2013 -0400
@@ -8,7 +8,7 @@
 Currently tested with NCBI BLAST 2.2.26+ (i.e. version 2.2.26 of BLAST+),
 and do not work with the NCBI 'legacy' BLAST suite (e.g. blastall).
 
-Note that these wrappers (and the associated datetypes) were originally
+Note that these wrappers (and the associated datatypes) were originally
 distributed as part of the main Galaxy repository, but as of August 2012
 moved to the Galaxy Tool Shed as 'ncbi_blast_plus' (and 'blast_datatypes').
 My thanks to Dannon Baker from the Galaxy development team for his assistance
@@ -25,16 +25,16 @@
 
 You must tell Galaxy about any system level BLAST databases using configuration
 files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein
-databases like NR), located in the tool-data folder. Sample fils are included
-which explain the tab based format to use.
+databases like NR), located in the tool-data/ folder. Sample files are included
+which explain the tab-based format to use.
 
 
 Manual Installation
 ===================
 
 For those not using Galaxy's automated installation from the Tool Shed, put
-the XML and Python files under tools/ncbi_blast_plus and add the XML files
-to your tool_conf.xml as normal (and do the same in tool_conf.xml.sample
+the XML and Python files in the tools/ncbi_blast_plus/ folder and add the XML
+files to your tool_conf.xml as normal (and do the same in tool_conf.xml.sample
 in order to run the unit tests). For example, use:
 
   <section name="NCBI BLAST+" id="ncbi_blast_plus_tools">
@@ -46,6 +46,8 @@
     <tool file="ncbi_blast_plus/ncbi_makeblastdb.xml" />
     <tool file="ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml" />
     <tool file="ncbi_blast_plus/ncbi_blastdbcmd_info.xml" />
+    <tool file="ncbi_blast_plus/ncbi_rpsblast_wrapper.xml" />
+    <tool file="ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml" />
     <tool file="ncbi_blast_plus/blastxml_to_tabular.xml" />
   </section>
 
@@ -55,13 +57,13 @@
 
 You must tell Galaxy about any system level BLAST databases using configuration
 files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein
-databases like NR), located in the tool-data folder. Sample fils are included
-which explain the tab based format to use.
+databases like NR), located in the tool-data/ folder. Sample files are included
+which explain the tab-based format to use.
 
 You must install the NCBI BLAST+ standalone tools somewhere on the system
 path. Currently the unit tests are written using "BLAST 2.2.26+".
 
-Run the functional tests (adusting the section identifier to match your
+Run the functional tests (adjusting the section identifier to match your
 tool_conf.xml.sample file):
 
 ./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools
@@ -89,20 +91,27 @@
 v0.0.17 - The BLAST+ search tools now default to extended tabular output
           (all too often our users where having to re-run searches just to
           get one of the missing columns like query or subject length)
+v0.0.18 - Defensive quoting of filenames in case of spaces (where possible,
+          BLAST+ handling of some mult-file arguments is problematic).
+v0.0.19 - Added wrappers for rpsblast and rpstblastn, and new blastdb_d.loc
+          for the domain databases they use (e.g. CDD).
+        - Correct case of exception regular expression (for error handling
+          fall-back in case the return code is not set properly).
 
 
 Developers
 ==========
 
-This script and related tools are being developed on the following hg branch:
-http://bitbucket.org/peterjc/galaxy-central/src/tools
+This script and related tools are being developed on the 'tools' branch of the
+following Mercurial repository:
+https://bitbucket.org/peterjc/galaxy-central/
 
-For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball I use
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use
 the following command from the Galaxy root folder:
 
 $ ./tools/ncbi_blast_plus/make_ncbi_blast_plus.sh
 
-This similifies ensuring a consistent set of files is bundled each time,
+This simplifies ensuring a consistent set of files is bundled each time,
 including all the relevant test files.
 
 
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml	Wed Apr 17 09:45:28 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml	Wed Apr 17 09:47:27 2013 -0400
@@ -1,7 +1,7 @@
-<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.2">
+<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.4">
     <description>Show BLAST database information from blastdbcmd</description>
     <command>
-blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out $info
+blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out "$info"
     </command>
     <stdio>
         <!-- Anything other than zero is an error -->
@@ -9,7 +9,7 @@
         <exit_code range=":-1" />
 	<!-- Suspect blastdbcmd sometimes fails to set error level -->
 	<regex match="Error:" />
-	<regex match="EXception:" />
+	<regex match="Exception:" />
     </stdio>
     <inputs>
         <conditional name="db_opts">
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml	Wed Apr 17 09:45:28 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml	Wed Apr 17 09:47:27 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.3">
+<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.4">
     <description>Extract sequence(s) from BLAST database</description>
     <command>
 ## The command is a Cheetah template which allows some Python based syntax.
@@ -48,7 +48,7 @@
         <exit_code range=":-1" />
 	<!-- Suspect blastdbcmd sometimes fails to set error level -->
 	<regex match="Error:" />
-	<regex match="EXception:" />
+	<regex match="Exception:" />
     </stdio>
     <inputs>
         <conditional name="db_opts">
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml	Wed Apr 17 09:45:28 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml	Wed Apr 17 09:47:27 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.17">
+<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.19">
     <description>Search nucleotide database with nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
@@ -17,7 +17,7 @@
 #end if
 -task $blast_type
 -evalue $evalue_cutoff
--out $output1
+-out "$output1"
 ##Set the extended list here so if/when we add things, saved workflows are not affected
 #if str($out_format)=="ext":
     -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
@@ -47,13 +47,13 @@
         <exit_code range=":-1" />
         <!-- In case the return code has not been set propery check stderr too -->
         <regex match="Error:" />
-        <regex match="EXception:" />
+        <regex match="Exception:" />
     </stdio>
     <inputs>
         <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
         <conditional name="db_opts">
             <param name="db_opts_selector" type="select" label="Subject database/sequences">
-              <option value="db" selected="True">BLAST Database</option>
+              <option value="db" selected="True">Locally installed BLAST database</option>
               <option value="histdb">BLAST database from your history</option>
               <option value="file">FASTA file from your history (see warning note below)</option>
             </param>
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml	Wed Apr 17 09:45:28 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml	Wed Apr 17 09:47:27 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.17">
+<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.19">
     <description>Search protein database with protein query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
@@ -17,7 +17,7 @@
 #end if
 -task $blast_type
 -evalue $evalue_cutoff
--out $output1
+-out "$output1"
 ##Set the extended list here so if/when we add things, saved workflows are not affected
 #if str($out_format)=="ext":
     -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
@@ -48,13 +48,13 @@
         <exit_code range=":-1" />
         <!-- In case the return code has not been set propery check stderr too -->
         <regex match="Error:" />
-        <regex match="EXception:" />
+        <regex match="Exception:" />
     </stdio>
     <inputs>
         <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> 
         <conditional name="db_opts">
             <param name="db_opts_selector" type="select" label="Subject database/sequences">
-              <option value="db" selected="True">BLAST Database</option>
+              <option value="db" selected="True">Locally installed BLAST database</option>
               <option value="histdb">BLAST database from your history</option>
               <option value="file">FASTA file from your history (see warning note below)</option>
             </param>
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml	Wed Apr 17 09:45:28 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml	Wed Apr 17 09:47:27 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.17">
+<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.19">
     <description>Search protein database with translated nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
@@ -17,7 +17,7 @@
 #end if
 -query_gencode $query_gencode
 -evalue $evalue_cutoff
--out $output1
+-out "$output1"
 ##Set the extended list here so if/when we add things, saved workflows are not affected
 #if str($out_format)=="ext":
     -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
@@ -48,13 +48,13 @@
         <exit_code range=":-1" />
         <!-- In case the return code has not been set propery check stderr too -->
         <regex match="Error:" />
-        <regex match="EXception:" />
+        <regex match="Exception:" />
     </stdio>
     <inputs>
         <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
         <conditional name="db_opts">
             <param name="db_opts_selector" type="select" label="Subject database/sequences">
-              <option value="db" selected="True">BLAST Database</option>
+              <option value="db" selected="True">Locally installed BLAST database</option>
               <option value="histdb">BLAST database from your history</option>
               <option value="file">FASTA file from your history (see warning note below)</option>
             </param>
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_makeblastdb.xml
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml	Wed Apr 17 09:45:28 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml	Wed Apr 17 09:47:27 2013 -0400
@@ -1,8 +1,8 @@
-<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.1">
+<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.3">
 <description>Make BLAST database</description>
 <version_command>makeblastdb -version</version_command>
 <command>
-makeblastdb -out ${os.path.join($outfile.extra_files_path,'blastdb')}
+makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}"
 $parse_seqids
 $hash_index
 ## Single call to -in with multiple filenames space separated with outer quotes
@@ -41,7 +41,7 @@
     <exit_code range=":-1" />
     <!-- In case the return code has not been set propery check stderr too -->
     <regex match="Error:" />
-    <regex match="EXception:" />
+    <regex match="Exception:" />
 </stdio>
 <inputs>
     <param name="dbtype" type="select" display="radio" label="Molecule type of input">
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml	Wed Apr 17 09:47:27 2013 -0400
@@ -0,0 +1,235 @@
+<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.0.3">
+    <description>Search protein domain database (PSSMs) with protein query sequence(s)</description>
+    <!-- If job splitting is enabled, break up the query file into parts -->
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>
+    <version_command>rpsblast -version</version_command>
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+rpsblast
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+  -db "${db_opts.database.fields.path}"
+#elif $db_opts.db_opts_selector == "histdb":
+  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
+#end if
+-evalue $evalue_cutoff
+-out "$output1"
+##Set the extended list here so if/when we add things, saved workflows are not affected
+#if str($out_format)=="ext":
+    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
+#else:
+    -outfmt $out_format
+#end if
+-num_threads 8
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+    </command>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+        <!-- In case the return code has not been set propery check stderr too -->
+        <regex match="Error:" />
+        <regex match="Exception:" />
+    </stdio>
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> 
+        <conditional name="db_opts">
+            <param name="db_opts_selector" type="select" label="Protein domain database (PSSM)">
+              <option value="db" selected="True">Locally installed BLAST database</option>
+	      <!-- TODO - define new datatype
+              <option value="histdb">BLAST protein domain database from your history</option>
+	      -->
+            </param>
+            <when value="db">
+                <param name="database" type="select" label="Protein domain database">
+                    <options from_file="blastdb_d.loc">
+                      <column name="value" index="0"/>
+                      <column name="name" index="1"/>
+                      <column name="path" index="2"/>
+                    </options>
+                </param>
+                <param name="histdb" type="hidden" value="" />
+                <param name="subject" type="hidden" value="" /> 
+            </when>
+	    <!-- TODO - define new datatype
+            <when value="histdb">
+                <param name="database" type="hidden" value="" />
+                <param name="histdb" type="data" format="blastdbd" label="Protein domain database" />
+                <param name="subject" type="hidden" value="" />
+            </when>
+	    -->
+        </conditional>
+        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <param name="out_format" type="select" label="Output format">
+            <option value="6">Tabular (standard 12 columns)</option>
+            <option value="ext" selected="True">Tabular (extended 24 columns)</option>
+            <option value="5">BLAST XML</option>
+            <option value="0">Pairwise text</option>
+            <option value="0 -html">Pairwise HTML</option>
+            <option value="2">Query-anchored text</option>
+            <option value="2 -html">Query-anchored HTML</option>
+            <option value="4">Flat query-anchored text</option>
+            <option value="4 -html">Flat query-anchored HTML</option>
+            <!--
+            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
+            -->
+        </param>
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" />
+                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
+                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!-- I'd like word_size to be optional, with minimum 2 for rpsblast -->
+                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
+                    <validator type="in_range" min="0" />
+                </param>
+                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="RPS-BLAST on ${database.fields.name}">
+            <change_format>
+                <when input="out_format" value="0" format="txt"/>
+                <when input="out_format" value="0 -html" format="html"/>
+                <when input="out_format" value="2" format="txt"/>
+                <when input="out_format" value="2 -html" format="html"/>
+                <when input="out_format" value="4" format="txt"/>
+                <when input="out_format" value="4 -html" format="html"/>
+                <when input="out_format" value="5" format="blastxml"/>
+            </change_format>
+        </data>
+    </outputs>
+    <requirements>
+        <requirement type="binary">rpsblast</requirement>
+    </requirements>
+    <help>
+    
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
+
+-----
+
+**What it does**
+
+Search a *protein domain database* using a *protein query*,
+using the NCBI BLAST+ rpsblast command line tool.
+
+The protein domain databases use position-specific scoring matrices
+(PSSMs) and are available for a number of domain collections including:
+
+*CDD* - NCBI curarated meta-collection of domains, see
+http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml#NCBI_curated_domains
+
+*Kog* - PSSMs from automatically aligned sequences and sequence
+fragments classified in the KOGs resource, the eukaryotic 
+counterpart to COGs, see http://www.ncbi.nlm.nih.gov/COG/new/
+
+*Cog* - PSSMs from automatically aligned sequences and sequence
+fragments classified in the COGs resource, which focuses primarily
+on prokaryotes, see http://www.ncbi.nlm.nih.gov/COG/new/
+
+*Pfam* - PSSMs from Pfam-A seed alignment database, see
+http://pfam.sanger.ac.uk/
+
+*Smart* - PSSMs from SMART domain alignment database, see
+http://smart.embl-heidelberg.de/
+
+*Tigr* - PSSMs from TIGRFAM database of protein families, see
+http://www.jcvi.org/cms/research/projects/tigrfams/overview/
+
+*Prk* - PSSms from automatically aligned stable clusters in the
+Protein Clusters database, see
+http://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&amp;db=proteinclusters
+
+The exact list of domain databases offered will depend on how your
+local Galaxy has been configured.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 24 column tabular
+BLAST output. Galaxy now uses this extended 24 column output by default.
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+    23 qlen          Query sequence length
+    24 slen          Subject sequence length
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Marchler-Bauer A, Bryant SH. CD-Search: protein domain annotations on the fly. Nucleic Acids Res. 2004 Jul 1;32(Web Server issue):W327-31.
+
+    </help>
+</tool>
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml	Wed Apr 17 09:45:28 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml	Wed Apr 17 09:47:27 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.17">
+<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.19">
     <description>Search translated nucleotide database with protein query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
@@ -16,7 +16,7 @@
   -subject "$db_opts.subject"
 #end if
 -evalue $evalue_cutoff
--out $output1
+-out "$output1"
 ##Set the extended list here so if/when we add things, saved workflows are not affected
 #if str($out_format)=="ext":
     -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
@@ -48,13 +48,13 @@
         <exit_code range=":-1" />
         <!-- In case the return code has not been set propery check stderr too -->
         <regex match="Error:" />
-        <regex match="EXception:" />
+        <regex match="Exception:" />
     </stdio>
     <inputs>
         <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> 
         <conditional name="db_opts">
             <param name="db_opts_selector" type="select" label="Subject database/sequences">
-              <option value="db" selected="True">BLAST Database</option>
+              <option value="db" selected="True">Locally installed BLAST database</option>
               <option value="histdb">BLAST database from your history</option>
               <option value="file">FASTA file from your history (see warning note below)</option>
             </param>
diff -r cf4903f5c81f -r 4afb12181d1a tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml	Wed Apr 17 09:45:28 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml	Wed Apr 17 09:47:27 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.17">
+<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.19">
     <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>
@@ -17,7 +17,7 @@
 #end if
 -query_gencode $query_gencode
 -evalue $evalue_cutoff
--out $output1
+-out "$output1"
 ##Set the extended list here so if/when we add things, saved workflows are not affected
 #if str($out_format)=="ext":
     -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"
@@ -48,13 +48,13 @@
         <exit_code range=":-1" />
         <!-- In case the return code has not been set propery check stderr too -->
         <regex match="Error:" />
-        <regex match="EXception:" />
+        <regex match="Exception:" />
     </stdio>
     <inputs>
         <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
         <conditional name="db_opts">
             <param name="db_opts_selector" type="select" label="Subject database/sequences">
-              <option value="db" selected="True">BLAST Database</option>
+              <option value="db" selected="True">Locally installed BLAST database</option>
               <option value="histdb">BLAST database from your history</option>
               <option value="file">FASTA file from your history (see warning note below)</option>
             </param>