comparison pairwise.xml @ 4:6709645eff5d draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
author abims-sbr
date Wed, 17 Jan 2018 08:53:53 -0500
parents 5f68b2fc02c1
children 6e44f0c3e7c1
comparison
equal deleted inserted replaced
3:5f68b2fc02c1 4:6709645eff5d
20 ln -s '$input' '$input.element_identifier'; 20 ln -s '$input' '$input.element_identifier';
21 #set $infiles = $infiles + $input.element_identifier + "," 21 #set $infiles = $infiles + $input.element_identifier + ","
22 #end for 22 #end for
23 #set $infiles = $infiles[:-1] 23 #set $infiles = $infiles[:-1]
24 24
25 ln -s $__tool_directory__/scripts/functions.py . &&
25 ln -s $__tool_directory__/scripts/S02_xxx_patron_pipeline.sh . && 26 ln -s $__tool_directory__/scripts/S02_xxx_patron_pipeline.sh . &&
26 ln -s $__tool_directory__/scripts/S03_run_blast_with_k_filter.sh . && 27 ln -s $__tool_directory__/scripts/S03_run_blast_with_k_filter.sh . &&
27 ln -s $__tool_directory__/scripts/S04_run_blast2_with_k_filter.sh . && 28 ln -s $__tool_directory__/scripts/S04_run_blast2_with_k_filter.sh . &&
28 ln -s $__tool_directory__/scripts/S05_script_extract_match_v20_blastx.py . && 29 ln -s $__tool_directory__/scripts/S05_script_extract_match_v20_blastx.py . &&
29 ln -s $__tool_directory__/scripts/S06_post_processing_of_pairwise.py . && 30 ln -s $__tool_directory__/scripts/S06_post_processing_of_pairwise.py . &&
52 <discover_datasets pattern="__name_and_ext__" directory="outputs_prot" /> 53 <discover_datasets pattern="__name_and_ext__" directory="outputs_prot" />
53 </collection> 54 </collection>
54 </outputs> 55 </outputs>
55 56
56 <tests> 57 <tests>
58 <test>
59 <param name="inputs" ftype="fasta" value="inputs2/PfPfiji_trinity.fasta,inputs2/ApApomp_trinity.fasta,inputs2/AmAmphi_trinity.fasta,inputs2/AcAcaud_trinity.fasta" />
60 <param name="e-value" value="1e-5" />
61 <output_collection name="output_fasta_dna" type="list">
62 <element name="DNAalignment_corresponding_to_protein_from_RBH_AmAmphi_AcAcaud" value="outputs_dna2/DNAalignment_corresponding_to_protein_from_RBH_AmAmphi_AcAcaud.fasta" />
63 <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AcAcaud" value="outputs_dna2/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AcAcaud.fasta" />
64 <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AmAmphi" value="outputs_dna2/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AmAmphi.fasta" />
65 <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_ApApomp" value="outputs_dna2/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_ApApomp.fasta" />
66 </output_collection>
67 </test>
57 <test> 68 <test>
58 <param name="inputs" ftype="fasta" value="inputs/PfPfiji_Trinity.fasta,inputs/ApApomp_Trinity.fasta,inputs/AmAmphi_Trinity.fasta,inputs/AcAcaud_Trinity.fasta" /> 69 <param name="inputs" ftype="fasta" value="inputs/PfPfiji_Trinity.fasta,inputs/ApApomp_Trinity.fasta,inputs/AmAmphi_Trinity.fasta,inputs/AcAcaud_Trinity.fasta" />
59 <param name="e-value" value="1e-5" /> 70 <param name="e-value" value="1e-5" />
60 <output name="output" > 71 <output name="output" >
61 <assert_contents> 72 <assert_contents>
92 </test> 103 </test>
93 </tests> 104 </tests>
94 105
95 <help> 106 <help>
96 107
97 @HELP_AUTHORS@ 108 @HELPAUTHORS@
109
110 <![CDATA[
111 ---------
98 112
99 ============ 113 **Description**
100 What it does
101 ============
102 114
103 | This tool takes a 'data collection list' containing nucleic fasta sequence files and searches different homologous genes from pairwise comparaisons. 115 This tool searches for different homologous genes from pairwise comparisons between a set of fasta files (one file per species).
104 | There are 3 outputs. 116
105 | 117 --------
118
119 **Parameters**
120
121 - 'Input files' : a collection of fasta files (one file per species)
122 - 'e_value' : the blast e-value. By default it's 1e-5.
106 123
107 -------- 124 --------
108 125
109 ========== 126 **Outputs**
110 Parameters
111 ==========
112 127
113 The choice of parameters is possible : 128 - 'Pairwise' : the general output. It gives the information about what the tool has done for each pairwise.
114 129
115 **-e** : 130 - 'Pairwise_DNA' : the output which contains nucleic sequences (of the pairwise) that are homologous. The sequences are with nucleotides. It shows for both the query and match :
116 | is the option for the choice of the e-value. 131 the name
117 | By default it's 10. 132 the sequence in nucleotides
118 | 133
134 - 'Pairwise_PROT' : the output which contains proteic sequences (of the pairwise) that are homologous. The sequences are with protein. It shows :
135 Name, position, length, and part of the sequence in protein for query and match sequences
136 Divergence
137 Number of gaps
138 Real divergence
119 139
120 -------- 140 --------
121 141
122 ======= 142 **The AdaptSearch Pipeline**
123 Outputs
124 =======
125 143
126 This tool, produces the following files : 144 .. image:: ../../adaptsearch_picture_helps.png :heigth: 593 :width: 852
127 145
128 **Pairwise**: 146 ---------
129 | is the general output. It gives the information about what the tool is doing (for each pairwise).
130 |
131
132 **Pairwise DNA**:
133 | is the output wich contains nucleic sequences (of the pairwise) that are homologues. The sequences are with nucleotides. Shows:
134 | the name of the query sequence
135 | the part of the sequence in nucleotides
136 | the name of the match sequence
137 | the part of the sequence in nucleotides
138 |
139
140 **Pairwise PROT**:
141 | is the output wich contains proteic sequences (of the pairwise) that are homologues. The sequences are with protein. Shows:
142 | the name of the query sequence (the name of the sequence || the position (Start and End) of the homologous sequences || divergence || number of gaps || real divergence || the length of the homologous sequence)
143 | the part of the sequence in protein
144 | the name of the match sequence (the name of the sequence || the position (Start and End) of the homologous sequences || divergence || number of gaps || real divergence || the length of the homologous sequence)
145 | the part of the sequence in protein
146
147 --------
148
149 ===============
150 Working Example
151 ===============
152
153 ---------------------------
154 The input files and options
155 ---------------------------
156
157 **Input files**
158 | 3 files with 200 nucleic sequences each : Ap.fasta, Ac.fasta et Pp.fasta
159 |
160 **Parameters**
161 | e-value = 1e-20
162 |
163
164 ----------------
165 The output files
166 ----------------
167
168 **Pairwise**
169
170 | -------------------- Pairwise Pp_Ap --------------------
171 |
172 | database : Pp.fasta
173 | query file : Ap.fasta
174 |
175 | ***** START run BLAST *****
176 | ***** END run BLAST *****
177 |
178 |
179 | database : Ap.fasta
180 | query file : only the sequences of Pp.fasta who matched during the last BLAST
181 |
182 | ***** START run BLAST *****
183 | ***** END run BLAST *****
184 |
185 | [3/5] Get pairs of sequences ...
186 | Get list of fasta name involved in RBH
187 | Number of pairwises parsed = 15
188 | Get subset of Alvinella db
189 | Get subset of Paralvinella db
190 |
191 | -------------------- Pairwise Pp_Ac --------------------
192 |
193 | database : Pp.fasta
194 | query file : Ac.fasta
195 |
196 | ***** START run BLAST *****
197 | ***** END run BLAST *****
198 |
199 |
200 | database : Ac.fasta
201 | query file : only the sequences of Pp.fasta who matched during the last BLAST
202 |
203 | ***** START run BLAST *****
204 | ***** END run BLAST *****
205 |
206 | [3/5] Get pairs of sequences ...
207 | Get list of fasta name involved in RBH
208 | Number of pairwises parsed = 13
209 | Get subset of Alvinella db
210 | Get subset of Paralvinella db
211 |
212 |
213 | -------------------- Pairwise Ap_Ac --------------------
214 |
215 | database : Ap.fasta
216 | query file : Ac.fasta
217
218 | ***** START run BLAST *****
219 | ***** END run BLAST *****
220 |
221 |
222 | database : Ac.fasta
223 | query file : only the sequences of Ap.fasta who matched during the last BLAST
224 |
225 | ***** START run BLAST *****
226 | ***** END run BLAST *****
227 |
228 | [3/5] Get pairs of sequences ...
229 | Get list of fasta name involved in RBH
230 | Number of pairwises parsed = 24
231 | Get subset of Alvinella db
232 | Get subset of Paralvinella db
233 |
234 |
235
236 **Pairwise_output_file_PROT**
237
238 | For example the 4 last sequences of the file 19_ReciprocalBestHits_Pp_Ap.fasta
239 |
240 | &gt;Ap123_1/1_1.000_748||254...478||[[1/1]][[1/6]]||29.3333333333||0||29.3333333333||75.0
241 | FVRITVGDEMSRRPKFAMITWVGPEVSPMKRAKVSTDKAFVKQIFQNFAKEIQTSERSELEEEYVRQEVMKAGGA
242 | &gt;Pp_146_1/2_1.000_713||259...483||[[1/1]][[1/6]]||29.3333333333||0||29.3333333333||75.0
243 | FAYIRCTNEESKRSKFAMITWIGQGVEAMKRAKVSMDKQFLKEIFQNFAREFQTSEKSELDEVCIKHALAIDDGA
244 | &gt;Ap66_1/1_1.000_400||192...398||[[1/1]][[1/6]]||21.7391304348||0||21.7391304348||69.0
245 | LSTSLLNWRKHTLCF*GMKLILIILLISFIIPAILFLLSIFTTMRMPESREKFRPYECGFDPNHSARTP
246 | &gt;Pp_201_2/2_1.000_691||14...220||[[1/1]][[1/6]]||21.7391304348||0||21.7391304348||69.0
247 | LSTSLLN*RKQPFASEEMKLLILLLFISALIPRILIILSIFTSIRTPKNREKSSPYECGFDPNHSARTP
248 |
249 |
250
251 **Pairwise_output_file_DNA**
252
253 | For example the 4 last sequences of the file 25_DNAalignement_corresponding_to_protein_from_19_RBH_Pp_Ap.fasta
254 |
255 | &gt;Ap123_1/1_1.000_748
256 | CCAGTAACAAGCCGCCACGGGTCCGTCGTGTCTTCTCTTCAAGGAAAGGTTGACAGATTCTCGTACGCTAGACGTCGCCACCTACTCGTCCTGGACTCCGGTGCCGTAGGTGGCGCCACCTGCTTTCATCACTTCCTGCCTA
257 | ACGTACTCCTCTTCTAGCTCCGATCTCTCGCTCGTCTGGATCTCTTTGGCAAAGTTCTGGAATATCTGCTTGACGAACGCCTTGTCCGTGCTGACTTTGGCGCGCTTCATTGGGCTCACTTCCGGTCCGACCCACGTGATCA
258 | TGGCGAACTTCGGTCTTCTGCTCATTTCGTCCCCGACGGTAATACGGACAAAGGCGAACGCCCGCTGGTCATCTTGTAGTTTTGATAACAGATCCTCGTATTCGGTTCCTGTAGAGTCCAGTATAATATTGTCGCCATCATA
259 | CGTCACAAACGCCCAGTTTGTCTCCGTCGCGTCGCTCCTGACGTCTTCGTAAGCCTGTCCGATAGCCTCTCTGTCGATGTCTGCCATGCTGCTGGTCCCGCTCTCGACGCTAATGAGCCAATCACGACTTCTGACAGACGAG
260 | TAGACATGCAGACAGCCAGACGGACTGACGGACTGACG
261 | &gt;Pp_146_1/2_1.000_713
262 | CATTAATTGTGTGTCTGGTTGTGGGTGTGTGTTATAAGAGACATCACTTAGTGTATACTGATGTCCACGTGGTAGTTGACCAGCATGTCGAATATGGATAGGGACTCGATCTTGAATGGCTATGAGGAGGTTCGCAACGACGA
263 | CTCGGACATTAACTGGGCTTTCGTAACGTATTCACCTGACAACAAACTAGTACTTGATTCAACTGGCACAGACTACTTCCAGCTCCAGGAGAAATATCAAGATGATATGCGAGGATTTGCTTACATCCGGTGCACTAACGAGG
264 | AGAGTAAACGTTCTAAATTTGCCATGATTACCTGGATTGGACAAGGAGTGGAAGCAATGAAGCGTGCCAAGGTCAGCATGGACAAACAGTTCCTAAAGGAAATCTTCCAGAATTTCGCAAGAGAATTTCAGACGAGTGAAAAG
265 | TCAGAGCTTGATGAGGTCTGTATTAAACACGCGCTTGCCATTGACGATGGAGCTGGTTGCAAAGTGGAAAGCGAGGACACGAGAAAAGGGGCCTTTCTCAGGAAAGAGGATGACACTGAAGTGGAAAGGGAAACTAATGTCAA
266 | CAATGTCTCCGGTGTCGTGGAAGAAGATGATGACGCAAAAAATGCAAATGATTTTAATTACGAAGAGGACTGTAACAATGAATAGGTGCATGTCGATGATTTATATAGAGAACTAGACTTCGCACTCGCTAGGTGGTTGAT
267 | &gt;Ap66_1/1_1.000_400
268 | TGATCGTCTTATAAACCTAACTTGAAAAACCTTCCTACCATTTAGGGCTAGCAGCCCTATTAATTATCACACCTATCGCAGCGCTCTCACTATAATTATAAGTATTGCGCCGGGTTTGAACGGATAGCTCTGATGCTGCTAATT
269 | ACGGGACCTAATAATCCCCAATACTTTATCCTTAGAGAGCTGTACCTCTTAGCACCAGTCTTTTAAACTGGCGAAAGCACACTTTATGCTTCTAAGGAATGAAACTAATTCTTATAATCCTACTAATCTCTTTTATCATCCCCG
270 | CCATTCTATTTTTACTCTCGATCTTTACTACTATGCGCATGCCAGAGAGCCGTGAAAAATTTAGGCCCTACGAGTGCGGGTTTGACCCCAATCACTCGGCCCGAACCCCATT
271 | &gt;Pp_201_2/2_1.000_691
272 | ATCGTAGGGAAAAAGGTGTTCGTGCAGAATGATTGGGGTCAAATCCACATTCGTAGGGGCTAGATTTTTCACGGTTTTTAGGTGTACGAATAGAGGTGAAGATTGATAGGATGATTAAAATTCTTGGGATTAATGCTGAAATAAA
273 | GAGAAGTAGGATTAAAAGTTTCATTTCCTCAGAAGCAAAGGGTTGCTTTCGTCAGTTTAAAAGACTGGTGCTAAGTAGGTACAGCTCTCTAAGGG
274
275 ---------------------------------------------------
276 147
277 Changelog 148 Changelog
278 --------- 149 ---------
279 150
280 **Version 2.0 - 18/04/2017** 151 **Version 2.0 - 18/04/2017**
282 - NEW: Replace the zip between tools by Dataset Collection 153 - NEW: Replace the zip between tools by Dataset Collection
283 154
284 155
285 **Version 1.0 - 13/04/2017** 156 **Version 1.0 - 13/04/2017**
286 157
287 - TEST: Add funtional test with planemo 158 - TEST: Add functional test with planemo
288 159
289 - IMPROVEMENT: Use conda dependencies for blast, samtools and python 160 - IMPROVEMENT: Use conda dependencies for blast, samtools and python
290 161
162 ]]>
291 </help> 163 </help>
292 164
293 <expand macro="citations" /> 165 <expand macro="citations" />
294 166
295 </tool> 167 </tool>