diff kraken2tax.xml @ 0:5f655b0279cc draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/blob/master/tool_collections/taxonomy/kraken2tax/ commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
author devteam
date Mon, 09 Nov 2015 12:27:14 -0500
parents
children 65068f909cc7
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kraken2tax.xml	Mon Nov 09 12:27:14 2015 -0500
@@ -0,0 +1,103 @@
+<tool id="Kraken2Tax" name="Convert Kraken" version="1.1">
+  <description>data to Galaxy taxonomy representation</description>
+    <requirements>
+        <requirement type="package" version="4.1.0">gnu_awk</requirement>
+        <requirement type="package" version="8d245994d7">gb_taxonomy</requirement>
+    </requirements>
+  <command>
+<![CDATA[
+  awk '{ print \$${read_name}, \$${tax_id} }' OFS="\t" "${input}" | taxonomy-reader "${ncbi_taxonomy.fields.path}/names.dmp" "${ncbi_taxonomy.fields.path}/nodes.dmp" 1 > "${out_file}"
+]]>
+  </command>
+  <inputs>
+    <param format="tabular" name="input" type="data" label="Choose dataset to convert"/>
+    <param label="Select a taxonomy database" name="ncbi_taxonomy" type="select">
+        <options from_data_table="ncbi_taxonomy">
+            <validator message="No built-in databases are available" type="no_options" />
+        </options>
+    </param>
+    <param name="read_name" label="Read name" type="data_column" data_ref="input" value="2" help="Select column containing read names"/>
+    <param name="tax_id" label="Taxonomy ID field" type="data_column" data_ref="input" numerical="True" value="3" help="Select column containing taxonomy ID"/>
+  </inputs>
+  <outputs>
+    <data format="taxonomy" name="out_file" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" ftype="tabular" value="kraken2tax.txt"/>
+      <param name="read_name" value="2"/>
+      <param name="tax_id" value="3"/>
+      <output name="out_file" file="kraken2tax-test1.txt"/>
+    </test>
+  </tests>
+  <help>
+
+.. class:: infomark
+
+Use *Filter and Sort->Filter* to restrict output of this tool to desired taxonomic ranks. You can also use *Text Manipulation->Cut* to remove unwanted columns from the output.
+
+------
+
+**What it does**
+
+This tool is designed to translate results of the Kraken metagenomic classifier (see citations below) to the full representation of NCBI taxonomy. It does so by using Taxonomic ID field provided by Kraken. The output of this tool can be directly visualized by the Krona tool. It is based on `gb_taxonomy_tools` developed by https://github.com/spond.
+
+-------
+
+**Example**
+
+Suppose you have Kraken output that looks like this (here the second field is the name of a sequencing read and the third is the taxonomic ID)::
+
+  C   Read_1   9606  465   Q:1
+
+and you want to obtain the full taxonomic representation for this read. Setting **Read name** and **Taxonomy ID field** parameters to **2** and **3**, respectively, will produce the following output (you may need to scroll sideways to see the entire line)::
+
+  1      2    3    4         5       6 7 8        9        10            11       12 13               14       15         16          17        18  19  20 21  22  23           24
+  Read_1 9606 root Eukaryota Metazoa n n Chordata Craniata Gnathostomata Mammalia n  Euarchontoglires Primates Haplorrhini Hominoidea Hominidae n   n   n  Homo n  Homo sapiens n
+
+In other words the tool printed *Read name*, *Taxonomy ID field*, and appended 22 columns containing taxonomic ranks from Superkingdom to Subspecies. Below is a formal definition of the output columns::
+
+    Column Definition
+   ------- -------------------------------------------------
+         1 Name (specified by 'Read name' dropdown)
+         2 taxID   (specified by 'Taxonomy ID field' dropdown)
+         3 root
+         4 superkingdom
+         5 kingdom
+         6 subkingdom
+         7 superphylum
+         8 phylum
+         9 subphylum
+        10 superclass
+        11 class
+        12 subclass
+        13 superorder
+        14 order
+        15 suborder
+        16 superfamily
+        17 family
+        18 subfamily
+        19 tribe
+        20 subtribe
+        21 genus
+        22 subgenus
+        23 species
+        24 subspecies
+
+------
+
+.. class:: warningmark
+
+**Why do I have these "n" things?** 
+
+Be aware that the NCBI taxonomy (ftp://ftp.ncbi.nih.gov/pub/taxonomy/) this tool relies upon is incomplete.  This means that for many species one or more ranks are absent and represented as "**n**". In the above example *subkingdom*, *superphylum* etc. are missing.
+
+
+  </help>
+  <citations>
+    <citation type="doi">10.1186/gb-2014-15-3-r46</citation>
+    <citation type="doi"> 10.1101/gr.094508.109</citation>
+  </citations>
+</tool>
+
+