annotate fasta_to_tabular.xml @ 1:5cabbe4cfaf4 draft

planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author devteam
date Tue, 13 Oct 2015 12:20:15 -0400
parents ae709fd50581
children ff4751ce764d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
1 <tool id="fasta2tab" name="FASTA-to-Tabular" version="1.1.0">
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
2 <description>converter</description>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
3 <command interpreter="python">fasta_to_tabular.py $input $output $keep_first $descr_columns</command>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
4 <inputs>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
5 <param name="input" type="data" format="fasta" label="Convert these sequences"/>
1
5cabbe4cfaf4 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
6 <param name="descr_columns" type="integer" value="1" label="How many columns to divide title string into?" help="Typically 2 to take the ID (first word) and decription (rest) as two columns, or 1 to give a single column">
0
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
7 <validator type="in_range" min="1" />
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
8 </param>
1
5cabbe4cfaf4 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
9 <param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="Applies only to the first column taken from the title string ('0' = keep the whole thing), useful when your sequence identifiers are all the same length.">
0
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
10 <validator type="in_range" min="0" />
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
11 </param>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
12 </inputs>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
13 <outputs>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
14 <data name="output" format="tabular"/>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
15 </outputs>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
16 <tests>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
17 <test>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
18 <param name="input" value="454.fasta" />
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
19 <param name="descr_columns" value="1"/>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
20 <param name="keep_first" value="0"/>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
21 <output name="output" file="fasta_to_tabular_out1.tabular" />
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
22 </test>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
23
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
24 <test>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
25 <param name="input" value="4.fasta" />
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
26 <param name="descr_columns" value="1"/>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
27 <param name="keep_first" value="0"/>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
28 <output name="output" file="fasta_to_tabular_out2.tabular" />
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
29 </test>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
30
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
31 <test>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
32 <param name="input" value="454.fasta" />
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
33 <param name="descr_columns" value="1"/>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
34 <param name="keep_first" value="14"/>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
35 <output name="output" file="fasta_to_tabular_out3.tabular" />
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
36 </test>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
37
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
38 <test>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
39 <param name="input" value="454.fasta" />
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
40 <param name="descr_columns" value="2"/>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
41 <param name="keep_first" value="0"/>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
42 <output name="output" file="fasta_to_tabular_out4.tabular" />
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
43 </test>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
44
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
45 <test>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
46 <param name="input" value="454.fasta" />
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
47 <param name="descr_columns" value="5"/>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
48 <param name="keep_first" value="0"/>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
49 <output name="output" file="fasta_to_tabular_out5.tabular" />
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
50 </test>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
51
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
52 <test>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
53 <param name="input" value="454.fasta" />
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
54 <param name="descr_columns" value="5"/>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
55 <param name="keep_first" value="10"/>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
56 <output name="output" file="fasta_to_tabular_out6.tabular" />
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
57 </test>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
58
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
59 </tests>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
60 <help>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
61
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
62 **What it does**
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
63
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
64 This tool converts FASTA formatted sequences to TAB-delimited format.
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
65
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
66 Many tools consider the first word of the FASTA "&gt;" title line to be an identifier, and any remaining text to be a free form description.
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
67 It is therefore useful to split this text into two columns in Galaxy (identifier and any description) by setting **How many columns to divide title string into?** to **2**.
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
68 In some cases the description can be usefully broken up into more columns -- see the examples .
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
69
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
70 The option *How many characters to keep?* allows to select a specified number of letters from the beginning of each FASTA entry.
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
71 With the introduction of the **How many columns to divide title string into?** option this setting is of limited use, but does still allow you to truncate the identifier.
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
72
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
73 -----
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
74
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
75 **Example**
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
76
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
77 Suppose you have the following FASTA formatted sequences from a Roche (454) FLX sequencing run::
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
78
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
79 &gt;EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
80 TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
81 TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
82 &gt;EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
83 AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAA
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
84
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
85 Running this tool with the default settings will produce this (2 column output):
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
86
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
87 ========================================================================== =======================================
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
88 EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
89 EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
90 ========================================================================== =======================================
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
91
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
92 Having the full title line (the FASTA "&gt;" line text) as a column is not always ideal.
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
93
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
94 The **How many characters to keep?** option is useful if your identifiers are all the same length.
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
95 In this example the identifier is 14 characters, so setting **How many characters to keep?** to **14** (and leaving **How many columns to divide title string into?** as the default, **1**) will produce this (2 column output):
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
96
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
97 ============== =======================================
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
98 EYKX4VC02EQLO5 TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
99 EYKX4VC02D4GS2 AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
100 ============== =======================================
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
101
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
102 If however your FASTA file has identifiers of variable length, it is better to split the text into at least two columns.
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
103 Running this tool with **How many columns to divide title string into?** to **2** will produce this (3 column output):
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
104
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
105 ============== =========================================================== =======================================
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
106 EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
107 EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
108 ============== =========================================================== =======================================
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
109
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
110 Running this tool with **How many columns to divide title string into?** to **5** will produce this (5 column output):
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
111
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
112 ============== ========== ============ ======== ========================== =======================================
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
113 EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
114 EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
115 ============== ========== ============ ======== ========================== =======================================
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
116
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
117 Running this tool with **How many columns to divide title string into?** to **5** and **How many characters to keep?** to **10** will produce this (5 column output).
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
118 Notice that only the first column is truncated to 10 characters -- and be careful not to trim your sequence names too much (generally they should be unique):
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
119
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
120 ========== ========== ============ ======== ========================== =======================================
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
121 EYKX4VC02E length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
122 EYKX4VC02D length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
123 ========== ========== ============ ======== ========================== =======================================
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
124
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
125 Note the sequences have been truncated for display purposes in the above tables.
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
126
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
127 </help>
ae709fd50581 Imported from capsule None
devteam
parents:
diff changeset
128 </tool>