comparison POGs.xml @ 0:b2895c835ea8 draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 38545eb765e0df7fcc6b8130e8e5f87cf4481122
author abims-sbr
date Thu, 13 Apr 2017 05:46:54 -0400
parents
children be2128ad0030
comparison
equal deleted inserted replaced
-1:000000000000 0:b2895c835ea8
1 <tool name="POGs" id="POGs" version="1.0">
2
3 <description>
4 Find orthologous groups
5 </description>
6
7 <macros>
8 <import>macros.xml</import>
9 </macros>
10
11 <requirements>
12 <expand macro="python_required" />
13 <!-- <requirement type="package" version="1.3.1">samtools</requirement> -->
14 </requirements>
15
16 <command>
17 <![CDATA[
18 python $__tool_directory__/scripts/S01_get_locus_ortholog_part1.py ${zip_file}
19 > ${output} &&
20
21 python $__tool_directory__/scripts/S02_get_locus_ortholog_part2.py ${zip}
22 >> ${output};
23 ]]>
24 </command>
25
26 <inputs>
27 <param name="zip" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file" help="Contains the files filter after the tool oase" />
28 <param name="zip_file" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file " help="Contains homologous sequences in PROTEIC format, ONLY A ZIP FILE" />
29 </inputs>
30
31 <outputs>
32 <data format="txt" name="output" label="POGs" />
33 <data format="no_unzip.zip" name="output_zip" label="POGs_locus_orthologs_unaligned" from_work_dir="POGs_locus_orthologs_unaligned.zip" />
34 </outputs>
35
36 <tests>
37 <test>
38 <param name="zip" ftype="zip" value="filter_oase_results_for_pogs.zip" />
39 <param name="zip_file" ftype="zip" value="test_03_output_Pairwise_PROT_inputPOGS.zip" />
40 <output name="output" value="test_03.out" />
41 </test>
42 </tests>
43
44 <help>
45 ============
46 What it does
47 ============
48
49 | This tool takes a zip archive containing nucleic fasta sequence files and a zip archive containing a file per pairwise with the homologous sequences (in proteic format). It searches for orthologous sequences.
50 | There are 2 outputs.
51 |
52 | The script was written by **Eric Fontanillas**.
53 | The wrapper was written by **Julie Baffard**.
54
55
56 --------
57
58 ======
59 Inputs
60 ======
61
62 option **Select a zip file containing the input files** :
63
64 | the input zip file must have the extension .ort.zip
65 | At the beginning, when you upload your input, you have to change the extension .zip to .ort.zip
66
67
68 --------
69
70 =======
71 Outputs
72 =======
73
74 This tool, produces the following files :
75
76 **POGs** :
77
78 | is the general output. It gives different informations : number of locus, each locus, ...
79 | it gives for example, all the locus containing 2 species :
80 | 2_sp : [[name_of_sequence,name_of_squence],...,[name_of_sequence, name_of_sequence]]
81 | it's the same for locus containing 3 species, 4 species, ...
82 |
83
84 **POGs_locus_orthologs_unaligned** :
85
86 | is the output (in zip format) which contains each locus.
87 | one file corresponding to one locus.
88
89 the sequences of each locus are in nucleic format.
90
91 .. class:: warningmark
92
93 The zip output has to be downloaded (and extracts the files with a file archiver software), you cannot visualize them with the "eye icon" through the interface.
94
95 --------
96
97 ===============
98 Working Example
99 ===============
100
101 ---------------
102 The input files
103 ---------------
104
105
106 | 3 files with 200 nucleic sequences each : Ac.fasta Ap.fasta Pf.fasta.
107 | a zip file with 3 files containing the homologous sequences of each pairwise.
108 |
109
110 ----------------
111 The output files
112 ----------------
113
114 **POGs**
115
116 | Number of locus = 56
117 |
118 | NUMBER OF REMAINING LOCUS AFTER 1RST TREATMENT [INTRA LOCUS] = 55
119
120 NUMBER OF REMAINING LOCUS AFTER 2ND TREATMENT [INTER LOCUS] = 53
121
122 | 2_sp : [['&gt;Ac111_5/5_0.688_714', '&gt;Ap70_3/8_0.688_1127'], ['&gt;Ac99_4/4_0.571_7188', '&gt;Ap74_6/9_0.406_9467'], ['&gt;Ac200_7/8_0.694_1836', '&gt;Ap53_1/1_1.000_938'],
123 | ['&gt;Ac181_6/10_0.432_8240', '&gt;Ap103_3/10_0.420_9102'], ['&gt;Ac4_2/2_1.000_230', '&gt;Ap27_4/6_0.556_8404'], ['&gt;Ac189_9/10_0.839_7194', '&gt;Ap97_3/4_0.452_3023'],
124 | ['&gt;Ac145_4/5_0.538_1334', '&gt;Ap168_5/6_0.824_2007'], ['&gt;Ac178_1/1_1.000_758', '&gt;Ap202_3/3_0.905_2656'], ['&gt;Ac100_1/2_1.000_2338', '&gt;Ap77_5/8_0.405_866'],
125 | ['&gt;Ac86_3/5_0.780_4131', '&gt;Ap41_5/10_0.362_5218'], ['&gt;Ac140_4/4_0.886_637', '&gt;Ap62_1/3_0.667_482'], ['&gt;Ac144_1/1_1.000_4240', '&gt;Ap162_1/1_1.000_4925'],
126 | ['&gt;Ac34_3/8_0.548_7600', '&gt;Ap144_6/10_0.690_9671'], ['&gt;Ac187_1/7_0.618_12870', '&gt;Ap73_7/10_0.343_2424'], ['&gt;Ac120_1/1_1.000_522', '&gt;Pf74_1/1_1.000_506'],
127 | ['&gt;Ac108_2/10_0.395_1982', '&gt;Pf26_1/1_1.000_1717'], ['&gt;Ac88_4/4_0.833_906', '&gt;Pf102_1/1_1.000_1876'], ['&gt;Ac202_1/1_1.000_247', '&gt;Pf36_2/3_0.800_1482'],
128 | ['&gt;Ac121_1/1_1.000_659', '&gt;Pf7_1/1_1.000_2255'], ['&gt;Ac129_1/5_0.636_1798', '&gt;Pf100_2/4_0.625_1698'], ['&gt;Ac172_1/1_1.000_348', '&gt;Pf118_1/1_1.000_1108'],
129 | ['&gt;Ac66_3/4_0.462_1465', '&gt;Pf229_1/1_1.000_1541'], ['&gt;Ac47_2/7_0.412_1703', '&gt;Pf1_1/2_1.000_1890'], ['&gt;Ac171_3/3_0.926_338', '&gt;Pf4_1/1_1.000_1337'],
130 | ['&gt;Ac194_1/5_0.875_275', '&gt;Pf106_1/2_1.000_994'], ['&gt;Ap2_1/2_1.000_264', '&gt;Pf19_1/1_1.000_1879'], ['&gt;Ap35_1/1_1.000_2505', '&gt;Pf70_1/1_1.000_1462'],
131 | ['&gt;Ap116_3/4_0.600_1380', '&gt;Pf78_1/1_1.000_1409'], ['&gt;Ap193_8/9_0.614_1707', '&gt;Pf21_1/8_0.630_1314'], ['&gt;Ap51_1/2_1.000_4292', '&gt;Pf164_1/1_1.000_1021'],
132 | ['&gt;Ap170_1/3_0.778_1303', '&gt;Pf87_2/4_0.700_773'], ['&gt;Ap186_6/10_0.521_4506', '&gt;Pf72_1/1_1.000_2344'],['&gt;Ap114_1/2_1.000_859', '&gt;Pf192_1/1_1.000_957'],
133 | ['&gt;Ap65_3/7_0.278_914', '&gt;Pf94_1/1_1.000_1135'], ['&gt;Ap164_4/5_0.560_219', '&gt;Pf163_4/4_0.556_866'], ['&gt;Ap199_3/3_0.846_958', '&gt;Pf58_1/3_0.667_1387'],
134 | ['&gt;Ap183_7/8_0.326_2702', '&gt;Pf166_1/2_1.000_2528'], ['&gt;Ap89_3/3_0.905_211', '&gt;Pf3_4/4_0.688_1776'], ['&gt;Ap34_1/1_1.000_2528', '&gt;Pf76_3/3_0.500_1714'],
135 | ['&gt;Ap16_1/3_0.714_734', '&gt;Pf139_1/1_1.000_508'], ['&gt;Ap119_9/9_0.595_3402', '&gt;Pf108_1/1_1.000_1932'], ['&gt;Ap120_5/10_0.223_8988', '&gt;Pf57_1/1_1.000_2461'],
136 | ['&gt;Ap43_1/3_0.778_644', '&gt;Pf2_1/1_1.000_582']]
137 |
138 | 3_sp : [['&gt;Ac112_1/2_1.000_2711', '&gt;Ap90_7/10_0.397_2604', '&gt;Pf79_1/1_1.000_4515'], ['&gt;Ac148_1/1_1.000_596', '&gt;Ap32_1/1_1.000_664', '&gt;Pf180_1/2_1.000_2336'],
139 | ['&gt;Ac177_1/2_1.000_1664', '&gt;Ap147_1/2_1.000_1728', '&gt;Pf178_1/1_1.000_1671'], ['&gt;Ac124_3/3_0.375_1572', '&gt;Ap177_1/3_0.667_625', '&gt;Pf115_1/1_1.000_557'],
140 | ['&gt;Ac53_3/4_0.375_999', '&gt;Ap40_1/1_1.000_1011', '&gt;Pf201_1/1_1.000_908'], ['&gt;Ac65_1/2_0.812_306', '&gt;Ap59_7/10_0.227_2379', '&gt;Pf32_3/3_0.818_1014'],
141 | ['&gt;Ac190_7/10_0.319_2573', '&gt;Ap166_7/10_0.279_1859', '&gt;Pf13_7/7_0.514_1779'], ['&gt;Ac1_1/2_1.000_6683', '&gt;Ap55_1/9_1.000_267', '&gt;Pf6_1/2_1.000_3095'],
142 | ['&gt;Ac91_2/2_0.696_4298', '&gt;Ap75_5/5_0.810_1434', '&gt;Pf69_1/1_1.000_1712'], ['&gt;Ac29_6/9_0.639_3877', '&gt;Ap124_4/8_0.267_1557', '&gt;Pf18_43/95_1.000_2422']]
143 |
144 |
145
146 **POGs_locus_orthologs_unaligned**
147
148 | Save as *Galaxy{number}-[POGs_locus_orthologs_unaligned].zip*
149 | If you unzip the file, a number of files are extracted (depends on the number of locus) : locus{nb}_sp{nb_of_species}.fasta
150 | For example the file locus1_sp2.fasta :
151 |
152 | &gt;Ac111_5/5_0.688_714
153 | ATCCGGAATGACCTTCGGCGGAGCCAGACACGCTGCTGACATCCGGGCAGACGACATGCACCAGTCGAGTGCCGGAGTGATCAACCTGCAGATGGGCACCAACCAGGGAGCGACCCAAAGCGGCATGTCCATGGGAGGTCGTCGGGACATCAGTT
154 | AAAAGGGGGCGTTTCACATTTCGTCCAGGATTCTGGAACTGTCCGGATCACAGTGCCACTATCCGTGTTTGATCAATTCAAATCGCTAACTTAAAACGGCCTGTATATAGCTACACTTCATTAACAAAACATTTAGCAATGCCTTATTCTATAACAAGCTACTG
155 | TATCAGCATAATTATGACTTTCCAATACTTCTTTTTAGCAGATCTTTAATTCTTATTCTTTCTTGTTCGCTGCGTACACTTTTTAACAATAACGTAACATTCTGTATTCATTGATCTATGCAGTTTTATATATTTGGAGTACTTGTTAACTTTAAATATCTGTTGAATGTTA
156 | CTTTTGAAATGTTGTCTCTAAATATTCGCATTAATTAACGTCGTGTTCTTCATCACAGTCAATGTTAACAGCAGCCATAGCACTTTAATTTATTTTGTTACTTTGTTTATTACGCATGTTTCTAAAATTATTCTTTCATTAAAGCTATTGTTCGTTGCACGTTGATAAGT
157
158 ATTGTTGTTGCGTTTCGTGATTCTATACATAATATATTCAAAACAATTAAAAA
159
160 | &gt;Ap70_3/8_0.688_1127
161 | GCAGGCCGGACCAACAAGGGCGCTAGCCAATCCGGAATGACCTTCGGCGGAGCCAGACACGCTGCTGACATCCGGGCAGACGACATGCACCAGTCGAGTGCCGGAGTGATCAACCTACAGATGGGCACCAACCAGGGGGCGTCCCAAAGCGGCA
162 | TGTCCATGGGAGGCCGTCGGGACATCAGTTAAAAGGGGGCGTTTCACATTTCGTGCCAGCGTTCTGGAACTGTCCGGATCACAGTGCCACCATCCGAGTGTTTGGTCAATTCAAATCGCTAACTTAAAACGGCCTGTATATAGCTACATTTCATTAACAAA
163 | ACATTTAGCGATGCCTTGTTCTATAACAAATTATTGTATCAGCATAATTATTACTTTCCAGTACTTCTTTATAGCAGATCTTTAATTATTGTTCTTTCTTGTTCGCTGCGTACTTTTTAAAAAGAATACAACATTCTGTATTCATTGATCTATGCAGTTTTATATATTTGGA
164
165 GTACTTGTTAACTTTAAATATATGTTAAATGTTACTTTTGAAATGTTGTCTCTAAATATTCGCATTAATTAACGTCGTGCTCTTCATCACAGTCAAAGTTAACATTTAACATATATTTAAAGTTAACAAGTACTCCAAATATATAAAACTGCATAGATCAATGAATACAGA
166 ATGTTGTATTCTTTTTAAAAAGTACGCAGCGAACAAGAAAGAACAATAATTAAAGATCTGCTATAAAGAAGTACTGGAAAGTAATAATTATGCTGATACAATAATTTGTTATAGAACAAGGCATCGCTAAATGTTTTGTTAATGAAATGTAGCTATATACAGGCCGTTT
167 TAAGTTAGCGATTTGAATTGACCAAACACTCGGATGGTGGCACTGTGATCCGGACAGTTCCAGAACGCTGGCACGAAATGTGAAACGCCCCCTTTTAACTGATGTCCCGACGGCCTCCCATGGACATGCCGCTTTGGGACGCCCCCTGGTTGGTGCCCA
168 TCTGTAGGTTGATCACTCCGGCACTCGACTGGTGCATGTCGTCTGCCCGGATGTCAGCAGCGTGTCTGGCTCCGCCGAAGGTCATTCCGGATTGGCTAGCGCCCTTGTTGGATCCAGCTTGCAGACCGATACGGCCTGGCC
169
170
171 | An other example, the file locus_3_sp_5.fasta
172 |
173
174 &gt;Ac53_3/4_0.375_999
175 CCGATCTTGTCTGGTGTTTTGCTGCCCCCTAGCGAGTGACGACAAACTCGTTGGTACCTTGCTTAGCGCGATAGAAGGGTTGAACATGTTGGCTGCTGGGCTAAGAACTCTAAAAAGCCTTGCCCCTCGGGGTTGCGTAGCGTGGTCGTGTACTTCGGT
176 GCATGCCAAGCATACCCTACCAGACTTACCATATGATTACAATGCCCTGGAGCCACACATCAGTGCTGAAATCATGCTGCTGCATCACACCAAGCATCACCAGACGTATGTCAACAACCTGAATGTTGCAGAGGAGAAGTTTCATGAGGCTACAGAGAAA
177 GGTGATGTAACCACAGCAGTATCACTGATGCCAGCCCTAAGATTTAATGGTGGTGGACACATCAACCATACTATATTTTGGAAGAACATGTCACCAAATGGTGGTGGAGAGCCATCTGGCGAACTGATGGAGGCCATCAAACGTGACTTTGGCTCATTTGAA
178 AACATGAAGAACATGTTGAGTACATCAACCACTGCAGTGCAAGGCTCTGGCTGGGGATGGCTTGGGTACAATAAAAAGATGAAGAAACTAGAAATTGCCACCTGTGCCAACCAAGATCCATTAGAGGGCACTACAGGTCTCGTCCCGTTGTTTGGCATAG
179 ATGTCTGGGAACATGCCTATTACTTGCAGTACAAGAATGTTCGTCCAGACTATGTAAAGGCTATTTGGAATGTGGCCAACTGGGATGACATCATGGAACGTTACAACAATGCCAGAAAATAAACTGTTAAACAAATAATTAATATATTAATGTGTTGCAATTTTTG
180 TCAATTGGTACATACACAATTTTGTTCATAAAGAAAATTGTGATTACTTTCTGGCAACTAGTTCCCAGTGAATACAGCAATTGTTCTGCCAAGGATGCATTTGGATTGAGAAGGCACCCAGAATGGGTTATCCGTGAATGTCTATTGGAATGTGGCACCATACG
181 ATGTTTACTGTATTAGTTACAATTAAAA
182
183 | &gt;Ap40_1/1_1.000_1011
184 | CTGATGTGTGGCCCCCTAGCGAGTGAAGACAAACTCGATAGCACCCAGACAGTTCTGTTGGTTAGATAAAAGGGAGAAACATGCTGGCTGCTGGGCTAAGAACTCTAAAAAGCCTTGCTCCTCGTGGTGGTCTAGCTTGGTCTTGTACCTCGGTACATGC
185 | CAAACACACACTGCCAGACTTGCCGTATGATTATAATGCTCTGGAGCCACACATCAGTGCTGAAATCATGTTGCTGCATCACACAAAACATCACCAGACGTATGTGAACAACCTGAATATTGCAGAGGAGAAGTTTCATGAGGCTACCGAGAAAGGCGATG
186 | TGACCACAGCAGTATCACTGATGCCAGCCCTAAGATTTAATGGTGGTGGACATATCAACCATACTATATTTTGGAAGAACATGTCACCAAATGGTGGTGGAGAACCATCTGGCGAACTGATGGAGGCCATCAAACGTGACTTTGGTTCATTTGAAAACATGA
187 | AGAACATGCTGAGTACAGCAACCACTGCAGTACAAGGCTCTGGCTGGGGATGGCTTGGGTACAATAAAAAGATGAAGAAACTAGAAATTGCCACCTGTGCCAACCAAGATCCATTAGAGGGCACTACAGGTCTTGTCCCACTGTTTGGTATCGATGTCTG
188 | GGAACATGCCTATTATTTACAGTACAAGAATGTTCGTCCAGACTATGTAAAGGCTATTTGGAATGTGGCTAACTGGGATGATATCATGGAGCGTTACAACAATGCCAGAAAATAAACTTAAATACATCATTATTTAGTTAATGTGCGACTTTTGTTTGTTAATCAGT
189 | TCACACACCATTCTATTCACAAAGAAAATGGTGTATTTGCTTTCTGTTCAACTGGTTCCCGGTGAATACAGCAGTTGTTCTGCCAAGGATGTATTTGGATTGAGAAGGCACCAAGAATGGGCTGTCACTGAATGTCAATTGGAATGTAGCCTCAATGTTTACT
190
191 GTATTACCTACAATTAAAATGATTATGATATAACCAAG
192
193 | &gt;Pf201_1/1_1.000_908
194 | CTTCTGGAACGATGTTGGCTGGGTATAGAACTTTGGCGAGGGTTATCCCACGTGGTGTCAGCTCCGCTTGGGCAAGTACAATAGTACATAACAAGCACACCTTGCCAGATTTACCATATGATTATAATGCCTTGGAACCACACATCAGTGCTGAAATAATGC
195
196 TCCTTCATCATACAAAGCATCACCAGACATATGTGAACAATCTGAATGTAGCTGAAGAAAAGTTTCATGAAGCCACGGAGAAAGGTGATGTCACTACAGCTGTGTCGCTAATGCCAGCACTAAGATTTAATGGCGGAGGACACATCAATCACACCATTTTCT
197 GGAAGAACATGTCTCCTAATGGCGGAGGAGAGCCTTCTGGCGAGTTGATGGAAGCCATTAAACGTGATTTTGGTTCATTTGAGAATATGAAAAACATGTTAAGTACAGCTACAACAGCTGTCCAAGGATCTGGCTGGGGATGGCTTGGTTATAACAAAAAG
198 ATGAAAAAGCTCGAGATAGCCACTTGTGCCAACCAGGATCCACTGGAAGGAACAACAGGATTAATTCCACTGTTTGGTATTGACGTCTGGGAGCATGCTTACTATCTGCAATATAAAAATGTACGTCCAGATTATGTTAAAGCTATCTGGAATGTGGCCAACT
199 GGGATGATATTACAGAGCGCTACAACAATGCGAAGAAATAGATTTGCTGGGACATATGAATAGTGCTTGGCAGAGCCTAATGTTGTGTTTATTTCTGTGTTTCTTATCAATCAGTTGATGTATTAGCCCGGTGTATTGACATGAAGAATTGGACAATGATTTCAAA
200 TACATTTGAACAATAAATGTATCATGATCAAGCATTATTTTGTTAGTTGTCTCCTCCACTGAGAAGACCAAGTTTAATAAACAACAAACACAGA
201 </help>
202
203 <expand macro="citations" />
204
205 </tool>