changeset 41:af4da561893b draft

Uploaded v0.1.0 preview 4, adding taxonomy columns etc
author peterjc
date Wed, 26 Feb 2014 13:26:14 -0500
parents f83e5d79b6ab
children fa7be13bcba9
files tools/ncbi_blast_plus/README.rst tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml tools/ncbi_blast_plus/ncbi_macros.xml
diffstat 4 files changed, 47 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/tools/ncbi_blast_plus/README.rst	Wed Feb 26 10:35:01 2014 -0500
+++ b/tools/ncbi_blast_plus/README.rst	Wed Feb 26 13:26:14 2014 -0500
@@ -82,6 +82,14 @@
 * ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR)
 * ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD)
 
+If using the optional taxonomy columns, you will also need to download the
+NCBI taxonomy files (``taxdb.btd`` and ``taxdb.bti`` from ``taxdb.tar.gz`` on
+the BLAST database FTP site). Currently explicit version tracking of the
+taxonomy is not supported, and in order to use this you must set the
+``$BLASTDB`` environment variable to include the path where you unzipped the
+taxonomy files. If this is not done, the taxonomy columns like species name
+will appear as ``N/A`` in the tabular output.
+
 The BLAST+ binaries support multi-threaded operation, which is handled via the
 $GALAXY_SLOTS environment variable. This should be set automatically by Galaxy
 via your job runner settings, which allows you to (for example) allocate four
@@ -151,7 +159,8 @@
         - Now depends on package_blast_plus_2_2_28 in ToolShed.
         - Extended tabular output includes 'salltitles' as column 25.
 v0.1.00 - Now depends on package_blast_plus_2_2_29 in ToolShed.
-        - Tablar output now includes option to pick specific columns.
+        - Tablar output now includes option to pick specific columns,
+          including previously unavailable taxonomy columns.
         - BLAST XML to tabular tool supports multiple input files.
         - More detailed descriptions for BLASTN and BLASTP task option.
         - Wrappers for segmasker, dustmasker and convert2blastmask.
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml	Wed Feb 26 10:35:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml	Wed Feb 26 13:26:14 2014 -0500
@@ -65,7 +65,7 @@
         </expand>
     </inputs>
     <outputs>
-        <data name="output1" format="tabular" label="${blast_type.value_label} $query.name vs @ON_DB_SUBJECT@">
+        <data name="output1" format="tabular" label="${blast_type.value} $query.name vs @ON_DB_SUBJECT@">
             <expand macro="output_change_format" />
         </data>
     </outputs>
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml	Wed Feb 26 10:35:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml	Wed Feb 26 13:26:14 2014 -0500
@@ -55,7 +55,7 @@
         </expand>
     </inputs>
     <outputs>
-        <data name="output1" format="tabular" label="${blast_type.value_label} $query.name vs @ON_DB_SUBJECT@">
+        <data name="output1" format="tabular" label="${blast_type.value} $query.name vs @ON_DB_SUBJECT@">
             <expand macro="output_change_format" />
         </data>
     </outputs>
--- a/tools/ncbi_blast_plus/ncbi_macros.xml	Wed Feb 26 10:35:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml	Wed Feb 26 13:26:14 2014 -0500
@@ -59,7 +59,34 @@
                     <option value="slen">slen = Subject sequence length</option>
                     <option value="salltitles">salltitles = All subject title(s), separated by a '&lt;&gt;'</option>
                 </param>
-                <!-- TODO, the other columns, like taxonomy -->
+                <param name="ids_cols" type="select" multiple="true" display="checkboxes" label="Other identifer columns">
+                    <option value="qgi">qgi = Query GI</option>
+                    <option value="qacc">qacc = Query accesion</option>
+                    <option value="qaccver">qaccver = Query accesion.version</option>
+                    <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a ';'</option>
+                    <option value="sgi">sgi = Subject GI</option>
+                    <option value="sallgi">sallgi = All subject GIs</option>
+                    <option value="sacc">sacc = Subject accession</option>
+                    <option value="saccver">saccver = Subject accession.version</option>
+                    <option value="sallacc">sallacc = All subject accessions</option>
+                    <option value="stitle">stitle = Subject Title</option>
+                </param>
+                <param name="misc_cols" type="select" multiple="true" display="checkboxes" label="Miscellaneous columns">
+                    <option value="sstrand">sstrand = Subject Strand</option>
+                    <!-- Is it really worth including 'frames' given have 'qframe' and 'sframe'? -->
+                    <option value="frames">frames = Query and subject frames separated by a '/'</option>
+                    <option value="btop">btop = Blast traceback operations (BTOP)</option>
+                    <option value="qcovs">qcovs = Query Coverage Per Subject</option>
+                    <option value="qcovhsp">qcovhsp = Query Coverage Per HSP</option>
+                </param>
+                <param name="tax_cols" type="select" multiple="true" display="checkboxes" label="Taxonomy columns">
+                    <option value="staxids">staxids = unique Subject Taxonomy ID(s), separated by a ';' (in numerical order)</option>
+                    <!-- TODO, how to handle the taxonomy data file dependency? If missing these give N/A -->
+                    <option value="sscinames">sscinames = unique Subject Scientific Name(s), separated by a ';'</option>
+                    <option value="scomnames">scomnames = unique Subject Common Name(s), separated by a ';'</option>
+                    <option value="sblastnames">sblastnames = unique Subject Blast Name(s), separated by a ';' (in alphabetical order)</option>
+                    <option value="sskingdoms">sskingdoms = unique Subject Super Kingdom(s), separated by a ';' (in alphabetical order)</option>
+                </param>
             </when>
             <when value="5"/>
             <when value="0"/>
@@ -317,7 +344,7 @@
 #elif str($output.out_format)=="cols"
 ##Pick your own columns. Galaxy gives us it comma separated, BLAST+ wants space separated:
 ##TODO - Can we catch the user picking no columns and raise an error here?
-#set cols = (str($output.std_cols)+","+str($output.ext_cols)).replace("None", "").replace(",,", ",").replace(",", " ").strip()
+#set cols = (str($output.std_cols)+","+str($output.ext_cols)+","+str($output.ids_cols)+","+str($output.misc_cols)+","+str($output.tax_cols)).replace("None", "").replace(",,", ",").replace(",", " ").strip()
     -outfmt "6 $cols"
 #else:
     -outfmt $output.out_format
@@ -379,7 +406,7 @@
 ====== ========= ============================================
 
 The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
+but this takes longer to calculate. Many commonly used extra columns are
 included by selecting the extended tabular output. The extra columns are
 included *after* the standard 12 columns. This is so that you can write
 workflow filtering steps that accept either the 12 or 25 column tabular
@@ -403,7 +430,11 @@
     25 salltitles    All subject title(s), separated by a '&lt;&gt;'
 ====== ============= ===========================================
 
-The third option is BLAST XML output, which is designed to be parsed by
+The third option is to customise the tabular output by selecting which
+columns you want, from the standard set of 12, the default set of 25,
+or any of the additional columns BLAST+ offers (including species name).
+
+The fourth option is BLAST XML output, which is designed to be parsed by
 another program, and is understood by some Galaxy tools.
 
 You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).