fasta_to_tabular: fasta_to_tabular.xml annotate

annotate fasta_to_tabular.xml @ 1:5cabbe4cfaf4 draft

planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734

author	devteam
date	Tue, 13 Oct 2015 12:20:15 -0400
parents	ae709fd50581
children	ff4751ce764d

rev	line source
0 ae709fd50581 Imported from capsule None devteam parents: diff changeset	1 <tool id="fasta2tab" name="FASTA-to-Tabular" version="1.1.0">
ae709fd50581 Imported from capsule None devteam parents: diff changeset	2 <description>converter</description>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	3 <command interpreter="python">fasta_to_tabular.py $input $output $keep_first $descr_columns</command>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	4 <inputs>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	5 <param name="input" type="data" format="fasta" label="Convert these sequences"/>
1 5cabbe4cfaf4 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	6 <param name="descr_columns" type="integer" value="1" label="How many columns to divide title string into?" help="Typically 2 to take the ID (first word) and decription (rest) as two columns, or 1 to give a single column">
0 ae709fd50581 Imported from capsule None devteam parents: diff changeset	7 <validator type="in_range" min="1" />
ae709fd50581 Imported from capsule None devteam parents: diff changeset	8 </param>
1 5cabbe4cfaf4 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	9 <param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="Applies only to the first column taken from the title string ('0' = keep the whole thing), useful when your sequence identifiers are all the same length.">
0 ae709fd50581 Imported from capsule None devteam parents: diff changeset	10 <validator type="in_range" min="0" />
ae709fd50581 Imported from capsule None devteam parents: diff changeset	11 </param>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	12 </inputs>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	13 <outputs>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	14 <data name="output" format="tabular"/>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	15 </outputs>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	16 <tests>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	17 <test>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	18 <param name="input" value="454.fasta" />
ae709fd50581 Imported from capsule None devteam parents: diff changeset	19 <param name="descr_columns" value="1"/>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	20 <param name="keep_first" value="0"/>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	21 <output name="output" file="fasta_to_tabular_out1.tabular" />
ae709fd50581 Imported from capsule None devteam parents: diff changeset	22 </test>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	23
ae709fd50581 Imported from capsule None devteam parents: diff changeset	24 <test>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	25 <param name="input" value="4.fasta" />
ae709fd50581 Imported from capsule None devteam parents: diff changeset	26 <param name="descr_columns" value="1"/>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	27 <param name="keep_first" value="0"/>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	28 <output name="output" file="fasta_to_tabular_out2.tabular" />
ae709fd50581 Imported from capsule None devteam parents: diff changeset	29 </test>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	30
ae709fd50581 Imported from capsule None devteam parents: diff changeset	31 <test>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	32 <param name="input" value="454.fasta" />
ae709fd50581 Imported from capsule None devteam parents: diff changeset	33 <param name="descr_columns" value="1"/>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	34 <param name="keep_first" value="14"/>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	35 <output name="output" file="fasta_to_tabular_out3.tabular" />
ae709fd50581 Imported from capsule None devteam parents: diff changeset	36 </test>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	37
ae709fd50581 Imported from capsule None devteam parents: diff changeset	38 <test>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	39 <param name="input" value="454.fasta" />
ae709fd50581 Imported from capsule None devteam parents: diff changeset	40 <param name="descr_columns" value="2"/>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	41 <param name="keep_first" value="0"/>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	42 <output name="output" file="fasta_to_tabular_out4.tabular" />
ae709fd50581 Imported from capsule None devteam parents: diff changeset	43 </test>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	44
ae709fd50581 Imported from capsule None devteam parents: diff changeset	45 <test>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	46 <param name="input" value="454.fasta" />
ae709fd50581 Imported from capsule None devteam parents: diff changeset	47 <param name="descr_columns" value="5"/>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	48 <param name="keep_first" value="0"/>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	49 <output name="output" file="fasta_to_tabular_out5.tabular" />
ae709fd50581 Imported from capsule None devteam parents: diff changeset	50 </test>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	51
ae709fd50581 Imported from capsule None devteam parents: diff changeset	52 <test>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	53 <param name="input" value="454.fasta" />
ae709fd50581 Imported from capsule None devteam parents: diff changeset	54 <param name="descr_columns" value="5"/>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	55 <param name="keep_first" value="10"/>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	56 <output name="output" file="fasta_to_tabular_out6.tabular" />
ae709fd50581 Imported from capsule None devteam parents: diff changeset	57 </test>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	58
ae709fd50581 Imported from capsule None devteam parents: diff changeset	59 </tests>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	60 <help>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	61
ae709fd50581 Imported from capsule None devteam parents: diff changeset	62 What it does
ae709fd50581 Imported from capsule None devteam parents: diff changeset	63
ae709fd50581 Imported from capsule None devteam parents: diff changeset	64 This tool converts FASTA formatted sequences to TAB-delimited format.
ae709fd50581 Imported from capsule None devteam parents: diff changeset	65
ae709fd50581 Imported from capsule None devteam parents: diff changeset	66 Many tools consider the first word of the FASTA ">" title line to be an identifier, and any remaining text to be a free form description.
ae709fd50581 Imported from capsule None devteam parents: diff changeset	67 It is therefore useful to split this text into two columns in Galaxy (identifier and any description) by setting How many columns to divide title string into? to 2.
ae709fd50581 Imported from capsule None devteam parents: diff changeset	68 In some cases the description can be usefully broken up into more columns -- see the examples .
ae709fd50581 Imported from capsule None devteam parents: diff changeset	69
ae709fd50581 Imported from capsule None devteam parents: diff changeset	70 The option How many characters to keep? allows to select a specified number of letters from the beginning of each FASTA entry.
ae709fd50581 Imported from capsule None devteam parents: diff changeset	71 With the introduction of the How many columns to divide title string into? option this setting is of limited use, but does still allow you to truncate the identifier.
ae709fd50581 Imported from capsule None devteam parents: diff changeset	72
ae709fd50581 Imported from capsule None devteam parents: diff changeset	73 -----
ae709fd50581 Imported from capsule None devteam parents: diff changeset	74
ae709fd50581 Imported from capsule None devteam parents: diff changeset	75 Example
ae709fd50581 Imported from capsule None devteam parents: diff changeset	76
ae709fd50581 Imported from capsule None devteam parents: diff changeset	77 Suppose you have the following FASTA formatted sequences from a Roche (454) FLX sequencing run::
ae709fd50581 Imported from capsule None devteam parents: diff changeset	78
ae709fd50581 Imported from capsule None devteam parents: diff changeset	79 >EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_
ae709fd50581 Imported from capsule None devteam parents: diff changeset	80 TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG
ae709fd50581 Imported from capsule None devteam parents: diff changeset	81 TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG
ae709fd50581 Imported from capsule None devteam parents: diff changeset	82 >EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_
ae709fd50581 Imported from capsule None devteam parents: diff changeset	83 AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAA
ae709fd50581 Imported from capsule None devteam parents: diff changeset	84
ae709fd50581 Imported from capsule None devteam parents: diff changeset	85 Running this tool with the default settings will produce this (2 column output):
ae709fd50581 Imported from capsule None devteam parents: diff changeset	86
ae709fd50581 Imported from capsule None devteam parents: diff changeset	87 ========================================================================== =======================================
ae709fd50581 Imported from capsule None devteam parents: diff changeset	88 EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
ae709fd50581 Imported from capsule None devteam parents: diff changeset	89 EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
ae709fd50581 Imported from capsule None devteam parents: diff changeset	90 ========================================================================== =======================================
ae709fd50581 Imported from capsule None devteam parents: diff changeset	91
ae709fd50581 Imported from capsule None devteam parents: diff changeset	92 Having the full title line (the FASTA ">" line text) as a column is not always ideal.
ae709fd50581 Imported from capsule None devteam parents: diff changeset	93
ae709fd50581 Imported from capsule None devteam parents: diff changeset	94 The How many characters to keep? option is useful if your identifiers are all the same length.
ae709fd50581 Imported from capsule None devteam parents: diff changeset	95 In this example the identifier is 14 characters, so setting How many characters to keep? to 14 (and leaving How many columns to divide title string into? as the default, 1) will produce this (2 column output):
ae709fd50581 Imported from capsule None devteam parents: diff changeset	96
ae709fd50581 Imported from capsule None devteam parents: diff changeset	97 ============== =======================================
ae709fd50581 Imported from capsule None devteam parents: diff changeset	98 EYKX4VC02EQLO5 TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
ae709fd50581 Imported from capsule None devteam parents: diff changeset	99 EYKX4VC02D4GS2 AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
ae709fd50581 Imported from capsule None devteam parents: diff changeset	100 ============== =======================================
ae709fd50581 Imported from capsule None devteam parents: diff changeset	101
ae709fd50581 Imported from capsule None devteam parents: diff changeset	102 If however your FASTA file has identifiers of variable length, it is better to split the text into at least two columns.
ae709fd50581 Imported from capsule None devteam parents: diff changeset	103 Running this tool with How many columns to divide title string into? to 2 will produce this (3 column output):
ae709fd50581 Imported from capsule None devteam parents: diff changeset	104
ae709fd50581 Imported from capsule None devteam parents: diff changeset	105 ============== =========================================================== =======================================
ae709fd50581 Imported from capsule None devteam parents: diff changeset	106 EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
ae709fd50581 Imported from capsule None devteam parents: diff changeset	107 EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
ae709fd50581 Imported from capsule None devteam parents: diff changeset	108 ============== =========================================================== =======================================
ae709fd50581 Imported from capsule None devteam parents: diff changeset	109
ae709fd50581 Imported from capsule None devteam parents: diff changeset	110 Running this tool with How many columns to divide title string into? to 5 will produce this (5 column output):
ae709fd50581 Imported from capsule None devteam parents: diff changeset	111
ae709fd50581 Imported from capsule None devteam parents: diff changeset	112 ============== ========== ============ ======== ========================== =======================================
ae709fd50581 Imported from capsule None devteam parents: diff changeset	113 EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
ae709fd50581 Imported from capsule None devteam parents: diff changeset	114 EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
ae709fd50581 Imported from capsule None devteam parents: diff changeset	115 ============== ========== ============ ======== ========================== =======================================
ae709fd50581 Imported from capsule None devteam parents: diff changeset	116
ae709fd50581 Imported from capsule None devteam parents: diff changeset	117 Running this tool with How many columns to divide title string into? to 5 and How many characters to keep? to 10 will produce this (5 column output).
ae709fd50581 Imported from capsule None devteam parents: diff changeset	118 Notice that only the first column is truncated to 10 characters -- and be careful not to trim your sequence names too much (generally they should be unique):
ae709fd50581 Imported from capsule None devteam parents: diff changeset	119
ae709fd50581 Imported from capsule None devteam parents: diff changeset	120 ========== ========== ============ ======== ========================== =======================================
ae709fd50581 Imported from capsule None devteam parents: diff changeset	121 EYKX4VC02E length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
ae709fd50581 Imported from capsule None devteam parents: diff changeset	122 EYKX4VC02D length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
ae709fd50581 Imported from capsule None devteam parents: diff changeset	123 ========== ========== ============ ======== ========================== =======================================
ae709fd50581 Imported from capsule None devteam parents: diff changeset	124
ae709fd50581 Imported from capsule None devteam parents: diff changeset	125 Note the sequences have been truncated for display purposes in the above tables.
ae709fd50581 Imported from capsule None devteam parents: diff changeset	126
ae709fd50581 Imported from capsule None devteam parents: diff changeset	127 </help>
ae709fd50581 Imported from capsule None devteam parents: diff changeset	128 </tool>

Mercurial > repos > devteam > fasta_to_tabular

annotate fasta_to_tabular.xml @ 1:5cabbe4cfaf4 draft