changeset 1:5e9d5e536b79 draft

Uploaded v0.1.02 preview 2, clarify sample blastdb loc files, etc
author peterjc
date Tue, 03 Mar 2015 05:32:18 -0500
parents 432ea9614cc9
children 6119ddccd8a3
files tool-data/blastdb.loc.sample tool-data/blastdb_d.loc.sample tool-data/blastdb_p.loc.sample tools/ncbi_blast_plus/README.rst tools/ncbi_blast_plus/repository_dependencies.xml
diffstat 5 files changed, 125 insertions(+), 81 deletions(-) [+]
line wrap: on
line diff
--- a/tool-data/blastdb.loc.sample	Fri Jan 30 08:27:28 2015 -0500
+++ b/tool-data/blastdb.loc.sample	Tue Mar 03 05:32:18 2015 -0500
@@ -1,39 +1,44 @@
-#This is a sample file distributed with Galaxy that is used to define a
-#list of nucleotide BLAST databases, using three columns tab separated
-#(longer whitespace are TAB characters):
+# This is a sample file distributed with Galaxy that is used to define a
+# list of nucleotide BLAST databases, using three columns tab separated:
 #
-#<unique_id>	<database_caption>	<base_name_path>
+# <unique_id>{tab}<database_caption>{tab}<base_name_path>
+#
+# The captions typically contain spaces and might end with the build date.
+# It is important that the actual database name does not have a space in
+# it, and that there are only two tabs on each line.
 #
-#The captions typically contain spaces and might end with the build date.
-#It is important that the actual database name does not have a space in
-#it, and that there are only two tabs on each line.
+# You can download the NCBI provided protein databases like NR from here:
+# ftp://ftp.ncbi.nlm.nih.gov/blast/db/
 #
-#So, for example, if your database is nt and the path to your base name 
-#is /depot/data2/galaxy/blastdb/nt/nt.chunk, then the blastdb.loc entry 
-#would look like this:
-#
-#nt_02_Dec_2009      nt 02 Dec 2009      /depot/data2/galaxy/blastdb/nt/nt.chunk
-#
-#and your /depot/data2/galaxy/blastdb/nt directory would contain all of 
-#your "base names" (e.g.):
+# For simplicity, many Galaxy servers are configured to offer just a live
+# version of each NCBI BLAST database (updated with the NCBI provided
+# Perl scripts or similar). In this case, we recommend using the case
+# sensistive base-name of the NCBI BLAST databases as the unique id.
+# Consistent naming is important for sharing workflows between Galaxy
+# servers.
 #
-#-rw-r--r--  1 wychung galaxy  23437408 2008-04-09 11:26 nt.chunk.00.nhr
-#-rw-r--r--  1 wychung galaxy   3689920 2008-04-09 11:26 nt.chunk.00.nin
-#-rw-r--r--  1 wychung galaxy 251215198 2008-04-09 11:26 nt.chunk.00.nsq
-#...etc...
+# For example, consider the NCBI partially non-redundant nucleotide 
+# nt BLAST database, where you have downloaded and decompressed the
+# files under /data/blastdb/ meaning at the command line BLAST+ would
+# would look at the files /data/blastdb/nt.n* when run with:
 #
-#Your blastdb.loc file should include an entry per line for each "base name" 
-#you have stored.  For example:
+# $ blastn -db /data/blastdb/nt -query ...
+#
+# In this case use nr (lower case to match the NCBI file naming) as the
+# unique id in the first column of blastdb_p.loc, giving an entry like
+# this:
+#
+# nt{tab}NCBI partially non-redundant (nt){tab}/data/blastdb/nt
 #
-#nt_02_Dec_2009		nt 02 Dec 2009		/depot/data2/galaxy/blastdb/nt/nt.chunk
-#wgs_30_Nov_2009	wgs 30 Nov 2009	/depot/data2/galaxy/blastdb/wgs/wgs.chunk
-#test_20_Sep_2008	test 20 Sep 2008	/depot/data2/galaxy/blastdb/test/test
-#...etc...
+# Alternatively, rather than a "live" mirror of the NCBI databases which
+# are updated automatically, for full reproducibility the Galaxy Team
+# recommend saving date-stamped copies of the databases. In this case
+# your blastdb.loc file should include an entry per line for each
+# version you have stored. For example:
 #
-#You can download the NCBI provided protein databases like NT from here:
-#ftp://ftp.ncbi.nlm.nih.gov/blast/db/
+# nt_05Jun2010{tab}NCBI nt (partially non-redundant) 05 Jun 2010{tab}/data/blastdb/05Jun2010/nt
+# nt_15Aug2010{tab}NCBI nt (partially non-redundant) 15 Aug 2010{tab}/data/blastdb/15Aug2010/nt
+# ...etc...
 #
-#See also blastdb_p.loc which is for any protein BLAST database, and
-#blastdb_d.loc which is for any protein domains databases (like CDD).
-
-
+# See also blastdb_p.loc which is for any protein BLAST database, and
+# blastdb_d.loc which is for any protein domains databases (like CDD).
--- a/tool-data/blastdb_d.loc.sample	Fri Jan 30 08:27:28 2015 -0500
+++ b/tool-data/blastdb_d.loc.sample	Tue Mar 03 05:32:18 2015 -0500
@@ -1,35 +1,57 @@
-#This is a sample file distributed with Galaxy that is used to define a
-#list of protein domain databases, using three columns tab separated
-#(longer whitespace are TAB characters):
+# This is a sample file distributed with Galaxy that is used to define a
+# list of protein domain databases, using three columns tab separated
+# (longer whitespace are TAB characters):
+#
+# <unique_id>{tab}<database_caption>{tab}<base_name_path>
 #
-#<unique_id>	<database_caption>	<base_name_path>
+# The captions typically contain spaces and might end with the build date.
+# It is important that the actual database name does not have a space in
+# it, and that there are only two tabs on each line.
+#
+# You can download the NCBI provided databases as tar-balls from here:
+# ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/
 #
-#The captions typically contain spaces and might end with the build date.
-#It is important that the actual database name does not have a space in it,
-#and that there are only two tabs on each line.
+# For simplicity, many Galaxy servers are configured to offer just a live
+# version of each NCBI BLAST database (updated with the NCBI provided
+# Perl scripts or similar). In this case, we recommend using the case
+# sensistive base-name of the NCBI BLAST databases as the unique id.
+# Consistent naming is important for sharing workflows between Galaxy
+# servers.
 #
-#You can download the NCBI provided databases as tar-balls from here:
-#ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/
+# For example, consider the NCBI Conserved Domains Database (CDD), where
+# you have downloaded and decompressed the files under the directory
+# /data/blastdb/domains/ meaning at the command line BLAST+ would be
+# run as follows any would look at the files /data/blastdb/domains/Cdd.*:
 #
-#So, for example, if your database is CDD and the path to your base name
-#is /data/blastdb/Cdd, then the blastdb_d.loc entry would look like this:
-#
-#Cdd{tab}NCBI Conserved Domains Database (CDD){tab}/data/blastdb/Cdd
+# $ rpsblast -db /data/blastdb/domains/Cdd -query ...
 #
-#and your /data/blastdb directory would contain all of the files associated
-#with the database, /data/blastdb/Cdd.*.
+# In this case use Cdd (title case to match the NCBI file naming) as the
+# unique id in the first column of blastdb_d.loc, giving an entry like
+# this:
 #
-#Your blastdb_d.loc file should include an entry per line for each "base name"
-#you have stored. For example:
+# Cdd{tab}NCBI Conserved Domains Database (CDD){tab}/data/blastdb/domains/Cdd
+#
+# Your blastdb_d.loc file should include an entry per line for each "base name"
+# you have stored. For example:
 #
-#Cdd	NCBI CDD	/data/blastdb/domains/Cdd
-#Kog	KOG (eukaryotes)	/data/blastdb/domains/Kog
-#Cog	COG (prokaryotes)	/data/blastdb/domains/Cog
-#Pfam	Pfam-A	/data/blastdb/domains/Pfam
-#Smart	SMART	/data/blastdb/domains/Smart
-#Tigr	TIGR	/data/blastdb/domains/Tigr
-#Prk	Protein Clusters database	/data/blastdb/domains/Prk
-#...etc...
+# Cdd{tab}NCBI CDD{tab}/data/blastdb/domains/Cdd
+# Kog{tab}KOG (eukaryotes){tab}/data/blastdb/domains/Kog
+# Cog{tab}COG (prokaryotes){tab}/data/blastdb/domains/Cog
+# Pfam{tab}Pfam-A{tab}/data/blastdb/domains/Pfam
+# Smart{tab}SMART{tab}/data/blastdb/domains/Smart
+# Tigr{tab}TIGR	/data/blastdb/domains/Tigr
+# Prk{tab}Protein Clusters database{tab}/data/blastdb/domains/Prk
+# ...etc...
 #
-#See also blastdb.loc which is for any nucleotide BLAST database, and
-#blastdb_p.loc which is for any protein BLAST databases.
+# Alternatively, rather than a "live" mirror of the NCBI databases which
+# are updated automatically, for full reproducibility the Galaxy Team
+# recommend saving date-stamped copies of the databases. In this case
+# your blastdb_d.loc file should include an entry per line for each
+# version you have stored. For example:
+#
+# Cdd_05Jun2010{tab}NCBI CDD 05 Jun 2010{tab}/data/blastdb/domains/05Jun2010/Cdd
+# Cdd_15Aug2010{tab}NCBI CDD 15 Aug 2010{tab}/data/blastdb/domains/15Aug2010/Cdd
+# ...etc...
+#
+# See also blastdb.loc which is for any nucleotide BLAST database, and
+# blastdb_p.loc which is for any protein BLAST databases.
--- a/tool-data/blastdb_p.loc.sample	Fri Jan 30 08:27:28 2015 -0500
+++ b/tool-data/blastdb_p.loc.sample	Tue Mar 03 05:32:18 2015 -0500
@@ -1,30 +1,44 @@
-#This is a sample file distributed with Galaxy that is used to define a
-#list of protein BLAST databases, using three columns tab separated
-#(longer whitespace are TAB characters):
+# This is a sample file distributed with Galaxy that is used to define a
+# list of protein BLAST databases, using three columns tab separated:
 #
-#<unique_id>	<database_caption>	<base_name_path>
+# <unique_id>{tab}<database_caption>{tab}<base_name_path>
+#
+# The captions typically contain spaces and might end with the build date.
+# It is important that the actual database name does not have a space in
+# it, and that there are only two tabs on each line.
 #
-#The captions typically contain spaces and might end with the build date.
-#It is important that the actual database name does not have a space in
-#it, and that there are only two tabs on each line.
+# You can download the NCBI provided protein databases like NR from here:
+# ftp://ftp.ncbi.nlm.nih.gov/blast/db/
 #
-#So, for example, if your database is NR and the path to your base name
-#is /data/blastdb/nr, then the blastdb_p.loc entry would look like this:
-#
-#nr{tab}NCBI NR (non redundant){tab}/data/blastdb/nr
+# For simplicity, many Galaxy servers are configured to offer just a live
+# version of each NCBI BLAST database (updated with the NCBI provided
+# Perl scripts or similar). In this case, we recommend using the case
+# sensistive base-name of the NCBI BLAST databases as the unique id.
+# Consistent naming is important for sharing workflows between Galaxy
+# servers.
 #
-#and your /data/blastdb directory would contain all of the files associated
-#with the database, /data/blastdb/nr.*.
+# For example, consider the NCBI "non-redundant" protein BLAST database
+# where you have downloaded and decompressed the files under /data/blastdb/
+# meaning at the command line BLAST+ would be run with something like
+# which would look at the files /data/blastdb/nr.p*:
 #
-#Your blastdb_p.loc file should include an entry per line for each "base name"
-#you have stored. For example:
+# $ blastp -db /data/blastdb/nr -query ...
+#
+# In this case use nr (lower case to match the NCBI file naming) as the
+# unique id in the first column of blastdb_p.loc, giving an entry like
+# this:
+#
+# nr{tab}NCBI non-redundant (nr){tab}/data/blastdb/nr
 #
-#nr_05Jun2010	NCBI NR (non redundant) 05 Jun 2010	/data/blastdb/05Jun2010/nr
-#nr_15Aug2010	NCBI NR (non redundant) 15 Aug 2010	/data/blastdb/15Aug2010/nr
-#...etc...
+# Alternatively, rather than a "live" mirror of the NCBI databases which
+# are updated automatically, for full reproducibility the Galaxy Team
+# recommend saving date-stamped copies of the databases. In this case
+# your blastdb_p.loc file should include an entry per line for each
+# version you have stored. For example:
 #
-#You can download the NCBI provided protein databases like NR from here:
-#ftp://ftp.ncbi.nlm.nih.gov/blast/db/
+# nr_05Jun2010{tab}NCBI NR (non redundant) 05 Jun 2010{tab}/data/blastdb/05Jun2010/nr
+# nr_15Aug2010{tab}NCBI NR (non redundant) 15 Aug 2010{tab}/data/blastdb/15Aug2010/nr
+# ...etc...
 #
-#See also blastdb.loc which is for any nucleotide BLAST database, and
-#blastdb_d.loc which is for any protein domains databases (like CDD).
+# See also blastdb.loc which is for any nucleotide BLAST database, and
+# blastdb_d.loc which is for any protein domains databases (like CDD).
--- a/tools/ncbi_blast_plus/README.rst	Fri Jan 30 08:27:28 2015 -0500
+++ b/tools/ncbi_blast_plus/README.rst	Tue Mar 03 05:32:18 2015 -0500
@@ -19,6 +19,9 @@
 These wrappers are available from the Galaxy Tool Shed at:
 http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
 
+In-development test releases are available from the Test Tool Shed at:
+http://testtoolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus/
+
 
 Citation
 ========
--- a/tools/ncbi_blast_plus/repository_dependencies.xml	Fri Jan 30 08:27:28 2015 -0500
+++ b/tools/ncbi_blast_plus/repository_dependencies.xml	Tue Mar 03 05:32:18 2015 -0500
@@ -1,4 +1,4 @@
 <?xml version="1.0"?>
 <repositories description="This requires the BLAST datatype definitions (e.g. the BLAST XML format).">
-    <repository changeset_revision="236046d1d441" name="blast_datatypes" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="da92fef90117" name="blast_datatypes" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" />
 </repositories>