annotate kraken.xml @ 1:656215d2a793 draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/blob/master/tool_collections/kraken/kraken/ commit cb1743eafd4ca98be0148d557770ef8635cc8d4c-dirty
author devteam
date Tue, 19 May 2015 16:41:06 -0400
parents 7d6b55e6417e
children 898ded2d4fff
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
1 <?xml version="1.0"?>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
2 <tool id="kraken" name="Kraken" version="1.0.0">
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
3 <description>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
4 assign taxonomic labels to short DNA reads
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
5 </description>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
6 <macros>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
7 <import>macros.xml</import>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
8 </macros>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
9 <command>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
10 <![CDATA[
1
656215d2a793 planemo upload for repository https://github.com/galaxyproject/tools-devteam/blob/master/tool_collections/kraken/kraken/ commit cb1743eafd4ca98be0148d557770ef8635cc8d4c-dirty
devteam
parents: 0
diff changeset
11 @SET_DATABASE_PATH@ &&
0
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
12 kraken --threads \${GALAXY_SLOTS:-1} @INPUT_DATABASE@
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
13 "$input_sequences"
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
14 #if $split_reads:
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
15 --classified-out "${classified_out}" --unclassified-out "${unclassified_out}"
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
16 #end if
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
17 --output "${output}" &&
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
18 kraken-translate --db ${kraken_database.fields.name} "${output}" > "${translated}"
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
19 ]]>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
20 </command>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
21 <inputs>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
22 <param format="fasta,fastq,fastqsanger" label="Input sequences" name="input_sequences" type="data" />
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
23 <param label="Output classified and unclassified reads" name="split_reads" type="boolean" />
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
24 <expand macro="input_database" />
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
25 </inputs>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
26 <outputs>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
27 <data format="tabular" label="${tool.name} on ${on_string}: Classified reads" name="classified_out">
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
28 <filter>(split_reads)</filter>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
29 </data>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
30 <data format="tabular" label="${tool.name} on ${on_string}: Unclassified reads" name="unclassified_out">
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
31 <filter>(split_reads)</filter>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
32 </data>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
33 <data format="tabular" label="${tool.name} on ${on_string}: Histogram" name="histogram">
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
34 <filter>(draw_histogram)</filter>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
35 </data>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
36 <data format="tabular" label="${tool.name} on ${on_string}: Classification" name="output" />
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
37 <data format="tabular" label="${tool.name} on ${on_string}: Translated classification" name="translated" />
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
38 </outputs>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
39 <help>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
40 <![CDATA[
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
41 **What it does**
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
42
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
43 Kraken is a taxonomic sequence classifier that assigns taxonomic labels to short DNA reads. It does this by examining the k-mers within a read and querying a database with those k-mers. This database contains a mapping of every k-mer in Kraken's genomic library to the lowest common ancestor (LCA) in a taxonomic tree of all genomes that contain that k-mer. The set of LCA taxa that correspond to the k-mers in a read are then analyzed to create a single taxonomic label for the read; this label can be any of the nodes in the taxonomic tree. Kraken is designed to be rapid, sensitive, and highly precise. Our tests on various real and simulated data have shown Kraken to have sensitivity slightly lower than Megablast with precision being slightly higher. On a set of simulated 100 bp reads, Kraken processed over 1.3 million reads per minute on a single core in normal operation, and over 4.1 million reads per minute in quick operation.
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
44
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
45 **Usage**
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
46
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
47 Kraken classifies a set of sequences (reads) with the commands below:
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
48
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
49 kraken --db $DBNAME sequences.fa > sequences.kraken
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
50
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
51 or
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
52
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
53 kraken --db $DBNAME sequences.fq > sequences.kraken
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
54
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
55
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
56 -DBNAME is the name of the Kraken Database to be used.
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
57
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
58 -sequences.fa or sequences.fq is the FASTA or FASTQ input file containing the desired sequences for classification.
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
59
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
60 -sequences.kraken is the generated output.
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
61
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
62
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
63
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
64 **Options**
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
65
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
66 The kraken program allows several different sequencing modifiers (parameters):
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
67
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
68 **Multithreading:** Use the --threads NUM switch to use multiple threads.
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
69
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
70 **Sequence filtering:** Classified or unclassified sequences can be sent to a file for later processing, using the --classified-out and --unclassified-out switches, respectively.
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
71
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
72
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
73
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
74 **Output Format**
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
75
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
76 Each sequence classified by Kraken results in a single line of output. Output lines contain five tab-delimited fields; from left to right, they are:
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
77
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
78 1. "C"/"U": one letter code indicating that the sequence was either classified or unclassified.
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
79 2. The sequence ID, obtained from the FASTA/FASTQ header.
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
80 3. The taxonomy ID Kraken used to label the sequence; this is 0 if the sequence is unclassified.
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
81 4. The length of the sequence in bp.
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
82
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
83 5. A space-delimited list indicating the LCA mapping of each k-mer in the sequence. For example, "562:13 561:4 A:31 0:1 562:3" would indicate that:
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
84 a) the first 13 k-mers mapped to taxonomy ID #562
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
85 b) the next 4 k-mers mapped to taxonomy ID #561
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
86 c) the next 31 k-mers contained an ambiguous nucleotide
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
87 d) the next k-mer was not in the database
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
88 e) the last 3 k-mers mapped to taxonomy ID #562
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
89 ]]>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
90 </help>
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
91 <expand macro="requirements" />
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
92 <expand macro="stdio" />
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
93 <expand macro="version_command" />
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
94 <expand macro="citations" />
7d6b55e6417e Uploaded
devteam
parents:
diff changeset
95 </tool>