comparison pairwise.xml @ 0:e95d4b20c62d draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 38545eb765e0df7fcc6b8130e8e5f87cf4481122
author abims-sbr
date Thu, 13 Apr 2017 05:46:29 -0400
parents
children 5f68b2fc02c1
comparison
equal deleted inserted replaced
-1:000000000000 0:e95d4b20c62d
1 <?xml version="1.0"?>
2
3 <tool name="Pairwise" id="pairwise" version="1.0">
4
5 <description>
6 Run reciproque tblastx pairwise
7 </description>
8
9 <macros>
10 <import>macros.xml</import>
11 </macros>
12
13 <requirements>
14 <expand macro="python_required" />
15 <requirement type="package" version="2.2.22">blast-legacy</requirement>
16 <requirement type="package" version="1.3.1">samtools</requirement>
17 </requirements>
18
19 <command>
20 <![CDATA[
21 ln -s $__tool_directory__/scripts/S02_xxx_patron_pipeline.sh .
22 &&
23 ln -s $__tool_directory__/scripts/S03_run_blast_with_k_filter.sh .
24 &&
25 ln -s $__tool_directory__/scripts/S04_run_blast2_with_k_filter.sh .
26 &&
27 ln -s $__tool_directory__/scripts/S05_script_extract_match_v20_blastx.py .
28 &&
29 ln -s $__tool_directory__/scripts/S06_post_processing_of_pairwise.py .
30 &&
31 ln -s $__tool_directory__/scripts/S07_format_match_get_back_nucleotides.py .
32 &&
33 ln -s $__tool_directory__/scripts/S08_script_extract_match_v20_blastx.py .
34 &&
35 ln -s $__tool_directory__/scripts/S09_post_processing_of_pairwise.py .
36 &&
37 ln -s $__tool_directory__/scripts/S10_compare_list_pairs_for_reciprocal_best_hits_test.py .
38 &&
39 ln -s $__tool_directory__/scripts/S11_post_processing_of_pairwise.py .
40 &&
41 ln -s $__tool_directory__/scripts/S12_prot2dna.py .
42 &&
43 ln -s $__tool_directory__/scripts/S13_zip.py .
44 &&
45 python $__tool_directory__/scripts/S01_organize_rbh.py 8 ${e_value} ${zip}
46 > ${output};
47 ]]>
48 </command>
49
50 <inputs>
51 <param name="zip" type="data" format="no_unzip.zip,zip" multiple="true" label="Choose your ZIP file" help="Contains the output of the filter tool" />
52 <param name="e_value" type="float" value="1e-5" label="e_value" help="By default, it's 1e-5. you can write the e-value like this: 1e-5" />
53 </inputs>
54
55 <outputs>
56 <data format="txt" name="output" label="Pairwise" />
57 <data format="no_unzip.zip" name="output_zip_DNA" label="Pairwise_zip_DNA" from_work_dir="output_file_DNA.zip" />
58 <data format="no_unzip.zip" name="output_zip_PROT" label="Pairwise_zip_PROT" from_work_dir="output_file_PROT.zip" />
59 </outputs>
60
61 <tests>
62 <test>
63 <param name="zip" ftype="zip" value="test_02_input_pairwise.zip" />
64 <param name="e-value" value="1e-5" />
65 <output name="output" >
66 <assert_contents>
67 <has_line line="('PfPfiji_Trinity.fasta', 'ApApomp_Trinity.fasta')"/>
68 <has_line line="('PfPfiji_Trinity.fasta', 'AmAmphi_Trinity.fasta')"/>
69 <has_line line="('PfPfiji_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/>
70 <has_line line="('ApApomp_Trinity.fasta', 'AmAmphi_Trinity.fasta')"/>
71 <has_line line="('ApApomp_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/>
72 <has_line line="('AmAmphi_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/>
73
74 <has_text text="Number of pairwises parsed = 2" />
75 <has_text text="Number of pairwises parsed = 3" />
76 <has_text text="Number of pairwises parsed = 0" />
77 <has_text text="Number of pairwises parsed = 5" />
78 <has_text text="Number of pairwises parsed = 1" />
79
80 </assert_contents>
81 </output>
82 </test>
83 </tests>
84
85 <help>
86
87 ============
88 What it does
89 ============
90
91 | This tool takes a zip archive containing nucleic fasta sequence files and searches different homologous genes from pairwise comparaisons.
92 | There are 3 outputs.
93 |
94 | The run blast was written by the **NCBI**.
95 | The script was written by **Eric Fontanillas**.
96 | The wrapper was written by **Julie Baffard**.
97
98 --------
99
100 ==========
101 Parameters
102 ==========
103
104 The choice of parameters is possible :
105
106 **-e** :
107 | is the option for the choice of the e-value.
108 | By default it's 10.
109 |
110
111 --------
112
113 =======
114 Outputs
115 =======
116
117 This tool, produces the following files :
118
119 **Pairwise** :
120 | is the general output. It gives the information about what the tool is doing (for each pairwise).
121 |
122
123 **Pairwise_zip_DNA.zip** :
124 | is the output wich contains nucleic sequences (of the pairwise) that are homologues. The sequences are with nucleotides. Shows :
125 | the name of the query sequence
126 | the part of the sequence in nucleotides
127 | the name of the match sequence
128 | the part of the sequence in nucleotides
129 |
130
131 **Pairwise_zip_PROT.zip** :
132 | is the output wich contains proteic sequences (of the pairwise) that are homologues. The sequences are with protein. Shows :
133 | the name of the query sequence (the name of the sequence || the position (Start and End) of the homologous sequences || divergence || number of gaps || real divergence || the length of the homologous sequence)
134 | the part of the sequence in protein
135 | the name of the match sequence (the name of the sequence || the position (Start and End) of the homologous sequences || divergence || number of gaps || real divergence || the length of the homologous sequence)
136 | the part of the sequence in protein
137
138 .. class:: warningmark
139
140 The two zip outputs have to be downloaded (and extracts the files with a file archiver software), you cannot visualize them with the "eye icon" through the interface.
141
142 --------
143
144 ===============
145 Working Example
146 ===============
147
148 ---------------------------
149 The input files and options
150 ---------------------------
151
152 **Input files**
153 | 3 files with 200 nucleic sequences each : Ap.fasta, Ac.fasta et Pp.fasta
154 |
155 **Parameters**
156 | e-value = 1e-20
157 |
158
159 ----------------
160 The output files
161 ----------------
162
163 **Pairwise**
164
165 | -------------------- Pairwise Pp_Ap --------------------
166 |
167 | database : Pp.fasta
168 | query file : Ap.fasta
169 |
170 | ***** START run BLAST *****
171 | ***** END run BLAST *****
172 |
173 |
174 | database : Ap.fasta
175 | query file : only the sequences of Pp.fasta who matched during the last BLAST
176 |
177 | ***** START run BLAST *****
178 | ***** END run BLAST *****
179 |
180 | [3/5] Get pairs of sequences ...
181 | Get list of fasta name involved in RBH
182 | Number of pairwises parsed = 15
183 | Get subset of Alvinella db
184 | Get subset of Paralvinella db
185 |
186 | -------------------- Pairwise Pp_Ac --------------------
187 |
188 | database : Pp.fasta
189 | query file : Ac.fasta
190 |
191 | ***** START run BLAST *****
192 | ***** END run BLAST *****
193 |
194 |
195 | database : Ac.fasta
196 | query file : only the sequences of Pp.fasta who matched during the last BLAST
197 |
198 | ***** START run BLAST *****
199 | ***** END run BLAST *****
200 |
201 | [3/5] Get pairs of sequences ...
202 | Get list of fasta name involved in RBH
203 | Number of pairwises parsed = 13
204 | Get subset of Alvinella db
205 | Get subset of Paralvinella db
206 |
207 |
208 | -------------------- Pairwise Ap_Ac --------------------
209 |
210 | database : Ap.fasta
211 | query file : Ac.fasta
212
213 | ***** START run BLAST *****
214 | ***** END run BLAST *****
215 |
216 |
217 | database : Ac.fasta
218 | query file : only the sequences of Ap.fasta who matched during the last BLAST
219 |
220 | ***** START run BLAST *****
221 | ***** END run BLAST *****
222 |
223 | [3/5] Get pairs of sequences ...
224 | Get list of fasta name involved in RBH
225 | Number of pairwises parsed = 24
226 | Get subset of Alvinella db
227 | Get subset of Paralvinella db
228 |
229 |
230
231 **Pairwise_output_file_PROT**
232
233 | Save as *Galaxy{number}-[Pairwise_output_file_PROT].zip*
234 | If you unzip the file, a number of file appears (number of pairwise) : 19_ReciprocalBestHits_{name_of_pairwise}.fasta
235 | For example the 4 last sequences of the file 19_ReciprocalBestHits_Pp_Ap.fasta
236 |
237 | &gt;Ap123_1/1_1.000_748||254...478||[[1/1]][[1/6]]||29.3333333333||0||29.3333333333||75.0
238 | FVRITVGDEMSRRPKFAMITWVGPEVSPMKRAKVSTDKAFVKQIFQNFAKEIQTSERSELEEEYVRQEVMKAGGA
239 | &gt;Pp_146_1/2_1.000_713||259...483||[[1/1]][[1/6]]||29.3333333333||0||29.3333333333||75.0
240 | FAYIRCTNEESKRSKFAMITWIGQGVEAMKRAKVSMDKQFLKEIFQNFAREFQTSEKSELDEVCIKHALAIDDGA
241 | &gt;Ap66_1/1_1.000_400||192...398||[[1/1]][[1/6]]||21.7391304348||0||21.7391304348||69.0
242 | LSTSLLNWRKHTLCF*GMKLILIILLISFIIPAILFLLSIFTTMRMPESREKFRPYECGFDPNHSARTP
243 | &gt;Pp_201_2/2_1.000_691||14...220||[[1/1]][[1/6]]||21.7391304348||0||21.7391304348||69.0
244 | LSTSLLN*RKQPFASEEMKLLILLLFISALIPRILIILSIFTSIRTPKNREKSSPYECGFDPNHSARTP
245 |
246 |
247
248 **Pairwise_output_file_DNA**
249
250 | Save as *Galaxy{number}-[Pairwise_output_file_DNA].zip*
251 | If you unzip the file, a number of file appears (number of pairwise) : 25_DNAalignment_corresponding_to_protein_from_19_RBH_{name_of_pairwise}.fasta
252 | For example the 4 last sequences of the file 25_DNAalignement_corresponding_to_protein_from_19_RBH_Pp_Ap.fasta
253 |
254 | &gt;Ap123_1/1_1.000_748
255 | CCAGTAACAAGCCGCCACGGGTCCGTCGTGTCTTCTCTTCAAGGAAAGGTTGACAGATTCTCGTACGCTAGACGTCGCCACCTACTCGTCCTGGACTCCGGTGCCGTAGGTGGCGCCACCTGCTTTCATCACTTCCTGCCTA
256 | ACGTACTCCTCTTCTAGCTCCGATCTCTCGCTCGTCTGGATCTCTTTGGCAAAGTTCTGGAATATCTGCTTGACGAACGCCTTGTCCGTGCTGACTTTGGCGCGCTTCATTGGGCTCACTTCCGGTCCGACCCACGTGATCA
257 | TGGCGAACTTCGGTCTTCTGCTCATTTCGTCCCCGACGGTAATACGGACAAAGGCGAACGCCCGCTGGTCATCTTGTAGTTTTGATAACAGATCCTCGTATTCGGTTCCTGTAGAGTCCAGTATAATATTGTCGCCATCATA
258 | CGTCACAAACGCCCAGTTTGTCTCCGTCGCGTCGCTCCTGACGTCTTCGTAAGCCTGTCCGATAGCCTCTCTGTCGATGTCTGCCATGCTGCTGGTCCCGCTCTCGACGCTAATGAGCCAATCACGACTTCTGACAGACGAG
259 | TAGACATGCAGACAGCCAGACGGACTGACGGACTGACG
260 | &gt;Pp_146_1/2_1.000_713
261 | CATTAATTGTGTGTCTGGTTGTGGGTGTGTGTTATAAGAGACATCACTTAGTGTATACTGATGTCCACGTGGTAGTTGACCAGCATGTCGAATATGGATAGGGACTCGATCTTGAATGGCTATGAGGAGGTTCGCAACGACGA
262 | CTCGGACATTAACTGGGCTTTCGTAACGTATTCACCTGACAACAAACTAGTACTTGATTCAACTGGCACAGACTACTTCCAGCTCCAGGAGAAATATCAAGATGATATGCGAGGATTTGCTTACATCCGGTGCACTAACGAGG
263 | AGAGTAAACGTTCTAAATTTGCCATGATTACCTGGATTGGACAAGGAGTGGAAGCAATGAAGCGTGCCAAGGTCAGCATGGACAAACAGTTCCTAAAGGAAATCTTCCAGAATTTCGCAAGAGAATTTCAGACGAGTGAAAAG
264 | TCAGAGCTTGATGAGGTCTGTATTAAACACGCGCTTGCCATTGACGATGGAGCTGGTTGCAAAGTGGAAAGCGAGGACACGAGAAAAGGGGCCTTTCTCAGGAAAGAGGATGACACTGAAGTGGAAAGGGAAACTAATGTCAA
265 | CAATGTCTCCGGTGTCGTGGAAGAAGATGATGACGCAAAAAATGCAAATGATTTTAATTACGAAGAGGACTGTAACAATGAATAGGTGCATGTCGATGATTTATATAGAGAACTAGACTTCGCACTCGCTAGGTGGTTGAT
266 | &gt;Ap66_1/1_1.000_400
267 | TGATCGTCTTATAAACCTAACTTGAAAAACCTTCCTACCATTTAGGGCTAGCAGCCCTATTAATTATCACACCTATCGCAGCGCTCTCACTATAATTATAAGTATTGCGCCGGGTTTGAACGGATAGCTCTGATGCTGCTAATT
268 | ACGGGACCTAATAATCCCCAATACTTTATCCTTAGAGAGCTGTACCTCTTAGCACCAGTCTTTTAAACTGGCGAAAGCACACTTTATGCTTCTAAGGAATGAAACTAATTCTTATAATCCTACTAATCTCTTTTATCATCCCCG
269 | CCATTCTATTTTTACTCTCGATCTTTACTACTATGCGCATGCCAGAGAGCCGTGAAAAATTTAGGCCCTACGAGTGCGGGTTTGACCCCAATCACTCGGCCCGAACCCCATT
270 | &gt;Pp_201_2/2_1.000_691
271 | ATCGTAGGGAAAAAGGTGTTCGTGCAGAATGATTGGGGTCAAATCCACATTCGTAGGGGCTAGATTTTTCACGGTTTTTAGGTGTACGAATAGAGGTGAAGATTGATAGGATGATTAAAATTCTTGGGATTAATGCTGAAATAAA
272 | GAGAAGTAGGATTAAAAGTTTCATTTCCTCAGAAGCAAAGGGTTGCTTTCGTCAGTTTAAAAGACTGGTGCTAAGTAGGTACAGCTCTCTAAGGG
273
274 </help>
275
276 <expand macro="citations" />
277
278 </tool>