Mercurial > repos > artbio > concatenate_multiple_datasets
comparison catWrapper.xml @ 0:122dbfdf0826 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/concat_multi_datasets commit 75811bd85c9d175d9bc7b2c4fd723adc3a361f0f
author | artbio |
---|---|
date | Tue, 09 Jul 2019 09:51:52 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:122dbfdf0826 |
---|---|
1 <tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.4.1"> | |
2 <description>tail-to-head by specifying how</description> | |
3 <command><![CDATA[ | |
4 #if $headers == 0: | |
5 #set $concat_command = "cat" | |
6 #else: | |
7 #set $concat_command = 'tail -q -n +'+ str(int($headers)+1) | |
8 #end if | |
9 printf "Concatenating files:\n" && | |
10 #if $global_condition.input_type == "singles": | |
11 #if $dataset_names == "No": | |
12 #for $file in $global_condition.inputs | |
13 printf "${file.element_identifier}..." && | |
14 #if $file.ext: | |
15 #if $file.ext[-2:] == "gz" and $headers != 0: | |
16 gzip -dc '$file' | $concat_command | gzip -c >> '$out_file1' && | |
17 #else: | |
18 $concat_command '$file' >> '$out_file1' && | |
19 #end if | |
20 #else: | |
21 $concat_command '$file' >> '$out_file1' && | |
22 #end if | |
23 printf "Done\n" && | |
24 #end for | |
25 sleep 1 | |
26 #else: | |
27 #for $file in $global_condition.inputs | |
28 printf "${file.element_identifier}..." && | |
29 #if $file.ext[-2:] == "gz" and $headers != 0: | |
30 printf "# ${file.element_identifier}\n" | gzip -c >> '$out_file1' && | |
31 gzip -dc "$file" | $concat_command |gzip -c >> '$out_file1' && | |
32 #else: | |
33 printf "# ${file.element_identifier}\n" >> '$out_file1' && | |
34 $concat_command "$file" >> '$out_file1' && | |
35 #end if | |
36 printf "Done\n" && | |
37 #end for | |
38 sleep 1 | |
39 #end if | |
40 #else if $global_condition.input_type == "simple_collections": | |
41 mkdir concatenated && | |
42 #if $dataset_names == "No": | |
43 #for $x, $y in zip($global_condition.input_1, $global_condition.input_2): | |
44 printf "${x.element_identifier} and ${y.element_identifier}..." && | |
45 #if $x.ext[-2:] == "gz" and $headers != 0: | |
46 gzip -dc '$x' | $concat_command | gzip -c > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && | |
47 gzip -dc '$y' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && | |
48 #else: | |
49 $concat_command '$x' '$y' > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && | |
50 #end if | |
51 printf "Done\n" && | |
52 #end for | |
53 sleep 1 | |
54 #else: | |
55 #for $x, $y in zip($global_condition.input_1, $global_condition.input_2) | |
56 printf "${x.element_identifier} and ${y.element_identifier}..." && | |
57 #if $x.ext[-2:] == "gz" and $headers != 0: | |
58 printf "# ${x.element_identifier}\n" | gzip -c > concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && | |
59 gzip -dc '$x' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && | |
60 printf "# ${y.element_identifier}\n" | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && | |
61 gzip -dc '$y' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && | |
62 #else: | |
63 printf "# ${x.element_identifier}\n" > concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && | |
64 $concat_command '$x'>> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && | |
65 printf "# ${y.element_identifier}\n" >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && | |
66 $concat_command '$y' >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && | |
67 #end if | |
68 printf "Done\n" && | |
69 #end for | |
70 sleep 1 | |
71 #end if | |
72 #else if $global_condition.input_type == "paired_collection": | |
73 #if $global_condition.paired_cat_type == "by_strand": | |
74 mkdir concatenated && | |
75 #if $dataset_names == "No": | |
76 #for $file in $global_condition.inputs | |
77 printf "${file.element_identifier}- forward and reverse..." && | |
78 #if $file['forward'].ext[-2:] == "gz" and $headers != 0: | |
79 gzip -dc $file['forward'] | $concat_command | gzip -c >> concatenated/forward.listed.${file['forward'].ext}.listed && | |
80 gzip -dc $file['reverse'] | $concat_command | gzip -c >> concatenated/reverse.listed.${file['reverse'].ext}.listed && | |
81 #else: | |
82 $concat_command $file['forward'] >> concatenated/forward.listed.${file['forward'].ext}.listed && | |
83 $concat_command $file['reverse'] >> concatenated/reverse.listed.${file['reverse'].ext}.listed && | |
84 #end if | |
85 printf "Done\n" && | |
86 #end for | |
87 sleep 1 | |
88 #else: | |
89 #for $file in $global_condition.inputs.keys(): | |
90 printf "$file - forward and reverse..." && | |
91 #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz" and $headers != 0: | |
92 printf "# ${file}_forward\n" | gzip -c >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && | |
93 gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && | |
94 printf "# ${file}_reverse\n" | gzip -c >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && | |
95 gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && | |
96 #else: | |
97 printf "# ${file}_forward\n" >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && | |
98 $concat_command $global_condition.inputs[$file]['forward'] >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && | |
99 printf "# ${file}_reverse\n" >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && | |
100 $concat_command $global_condition.inputs[$file]['reverse'] >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && | |
101 #end if | |
102 printf "Done\n" && | |
103 #end for | |
104 sleep 1 | |
105 #end if | |
106 #else if $global_condition.paired_cat_type == "by_pair": | |
107 mkdir concatenated && | |
108 #if $dataset_names == "No": | |
109 #for $file in $global_condition.inputs.keys(): | |
110 printf "$file - forward and reverse..." && | |
111 #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz" and $headers != 0: | |
112 gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c | |
113 > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
114 gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c | |
115 >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
116 #else: | |
117 $concat_command $global_condition.inputs[$file]['forward'] | |
118 > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
119 $concat_command $global_condition.inputs[$file]['reverse'] | |
120 >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
121 #end if | |
122 printf "Done\n" && | |
123 #end for | |
124 sleep 1 | |
125 #else: | |
126 #for $file in $global_condition.inputs.keys(): | |
127 printf "$file - forward and reverse..." && | |
128 #if $global_condition.inputs[$file]['reverse'].ext[-2:] == "gz" and $headers != 0: | |
129 printf "# ${file}_forward\n" | gzip -c > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
130 gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c | |
131 >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
132 printf "# ${file}_reverse\n" | gzip -c >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
133 gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c | |
134 >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
135 #else: | |
136 printf "# ${file}_forward\n" > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
137 $concat_command $global_condition.inputs[$file]['forward'] | |
138 >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
139 printf "# ${file}_reverse\n" >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
140 $concat_command $global_condition.inputs[$file]['reverse'] | |
141 >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
142 #end if | |
143 printf "Done\n" && | |
144 #end for | |
145 sleep 1 | |
146 #end if | |
147 #else if $global_condition.paired_cat_type == "all": | |
148 mkdir concatenated && | |
149 #set $base_name=$global_condition.inputs.element_identifier | |
150 #set $extention=$global_condition.inputs[$global_condition.inputs.keys()[0]]['forward'].ext | |
151 #if $dataset_names == "No": | |
152 #for $file in $global_condition.inputs.keys(): | |
153 printf "$file - forward and reverse..." && | |
154 #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz" and $headers != 0: | |
155 gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> c'$paired_out_file' && | |
156 gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> '$paired_out_file' && | |
157 #else: | |
158 $concat_command | |
159 $global_condition.inputs[$file]['forward'] | |
160 >> '$paired_out_file' && | |
161 $concat_command | |
162 $global_condition.inputs[$file]['reverse'] | |
163 >> '$paired_out_file' && | |
164 #end if | |
165 printf "Done\n" && | |
166 #end for | |
167 sleep 1 | |
168 #else: | |
169 #for $file in $global_condition.inputs.keys(): | |
170 printf "$file - forward and reverse..." && | |
171 #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz" and $headers != 0: | |
172 printf "# ${file}_forward\n" | gzip -c > '$paired_out_file' && | |
173 gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> '$paired_out_file' && | |
174 printf "# ${file}_reverse\n" | gzip -c >> '$paired_out_file' && | |
175 gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> '$paired_out_file' && | |
176 #else: | |
177 printf "# ${file}_forward\n" > '$paired_out_file' && | |
178 $concat_command | |
179 $global_condition.inputs[$file]['forward'] | |
180 >> '$paired_out_file' && | |
181 printf "# ${file}_reverse\n" >> '$paired_out_file' && | |
182 $concat_command | |
183 $global_condition.inputs[$file]['reverse'] | |
184 >> '$paired_out_file' && | |
185 #end if | |
186 printf "Done\n" && | |
187 #end for | |
188 sleep 1 | |
189 #end if | |
190 #end if | |
191 #else if $global_condition.input_type == "nested_collection": | |
192 mkdir concatenated && | |
193 #if $dataset_names == "No": | |
194 #for $sub_list in $global_condition.inputs: | |
195 #set $file_base_name=$sub_list.element_identifier | |
196 #for $sub_list_element in $sub_list: | |
197 printf "${file_base_name} - ${sub_list_element.element_identifier}..." && | |
198 #if $sub_list_element.ext[-2:] == "gz" and $headers != 0: | |
199 gzip -dc ${sub_list_element} | $concat_command | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && | |
200 #else: | |
201 $concat_command ${sub_list_element} >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && | |
202 #end if | |
203 printf "Done\n" && | |
204 #end for | |
205 #end for | |
206 sleep 1 | |
207 #else: | |
208 #for $sub_list in $global_condition.inputs: | |
209 #set $file_base_name=$sub_list.element_identifier | |
210 #for $sub_list_element in $sub_list: | |
211 printf "${file_base_name} - ${sub_list_element.element_identifier}..." && | |
212 #if $sub_list_element.ext[-2:] == "gz" and $headers != 0: | |
213 printf "# ${sub_list_element.element_identifier}\n" | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && | |
214 gzip -dc ${sub_list_element} | $concat_command | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && | |
215 #else: | |
216 printf "# ${sub_list_element.element_identifier}\n" >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && | |
217 $concat_command ${sub_list_element} >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && | |
218 #end if | |
219 printf "Done\n" && | |
220 #end for | |
221 #end for | |
222 sleep 1 | |
223 #end if | |
224 #end if | |
225 ]]> | |
226 </command> | |
227 <inputs> | |
228 <conditional name="global_condition"> | |
229 <param name="input_type" type="select" label="What type of data do you wish to concatenate?" help="Depending on the type of input selected the concatenation options will differ"> | |
230 <option value="singles">Single datasets</option> | |
231 <option value="simple_collections">2 Collections</option> | |
232 <option value="paired_collection">Paired collection</option> | |
233 <option value="nested_collection">Nested collection</option> | |
234 </param> | |
235 <when value="singles"> | |
236 <param name="inputs" type="data" label="Concatenate Datasets" multiple="True" help="All inputed datasets will be concatenated tail-to-head."/> | |
237 </when> | |
238 <when value="paired_collection"> | |
239 <param name="inputs" type="data_collection" collection_type="list:paired" label="Input paired collection to concatenate"/> | |
240 <param name="paired_cat_type" type="select" label="What type of concatenation do you wish to perform?"> | |
241 <option value="by_strand">Concatenate all datsets of same strand (outputs a single pair of datasets)</option> | |
242 <option value="by_pair">Concatenate pairs of datasets (outputs an unpaired collection of datasets)</option> | |
243 <option value="all">Concatenate all datasets into a single file regardless of strand (outputs a single file)</option> | |
244 </param> | |
245 </when> | |
246 <when value="simple_collections"> | |
247 <param name="input_1" type="data_collection" collection_type="list" label="Input first collection" help="The first collection contains the datasets that will be written first in the concatenated file" /> | |
248 <param name="input_2" type="data_collection" collection_type="list" label="Input second collection" help="The second collection contains the datasets that will be written last in the concatenated file" /> | |
249 </when> | |
250 <when value="nested_collection"> | |
251 <param name="inputs" type="data_collection" collection_type="list:list" label="Input nested collection" help="The Nested collection which items you want to concatenate." /> | |
252 </when> | |
253 </conditional> | |
254 <param name="dataset_names" type="boolean" label="Include dataset names?" truevalue="Yes" falsevalue="No" checked="false" help="If 'Yes' is selected '#name of dataset' will be added when concatenating."/> | |
255 <param name="headers" type="integer" label="Number of lines to skip at the beginning of each concatenation:" value="0" help="This paremeter exists so as to not concatenate comments or headers contained at the start of the files."/> | |
256 </inputs> | |
257 <outputs> | |
258 <data name="out_file1" format_source="inputs" metadata_source="inputs" label="Concatenated datasets"> | |
259 <filter>global_condition['input_type'] == 'singles'</filter> | |
260 </data> | |
261 <data name="paired_out_file" label="${global_condition.inputs.element_identifier}" auto_format="true"> | |
262 <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'all'</filter> | |
263 </data> | |
264 <collection name="paired_output" type="paired" label="Concatenation by strtand"> | |
265 <discover_datasets pattern="(?P<name>.*)\.listed\.(?P<ext>.*)\.listed" visible="false" directory="concatenated"/> | |
266 <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_strand'</filter> | |
267 </collection> | |
268 <collection name="list_output" type="list" label="Concatenation by pairs"> | |
269 <discover_datasets pattern="(?P<identifier_0>.*)\.listed\.(?P<ext>.*)\.listed" visible="false" directory="concatenated"/> | |
270 <filter>(global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair') or (global_condition['input_type'] == 'simple_collections') or (global_condition['input_type'] == 'nested_collection')</filter> | |
271 </collection> | |
272 </outputs> | |
273 <tests> | |
274 <!-- Single files concatenation --> | |
275 <test> <!-- Test 2 single files concatenation with no other option --> | |
276 <param name="input_type" value="singles" /> | |
277 <param name="inputs" value="1.bed,2.bed"/> | |
278 <param name="dataset_names" value="No" /> | |
279 <param name="headers" value="0" /> | |
280 <output name="out_file1" file="cat_wrapper_out1.bed"/> | |
281 </test> | |
282 <test> <!-- Test 2 single files concatenation with dataset names activated --> | |
283 <param name="input_type" value="singles" /> | |
284 <param name="inputs" value="1.bed,2.bed"/> | |
285 <param name="dataset_names" value="Yes" /> | |
286 <param name="headers" value="0" /> | |
287 <output name="out_file1" file="cat_wrapper_out2.bed"/> | |
288 </test> | |
289 <test> <!-- Test 2 single files concatenation skipping 1 line --> | |
290 <param name="input_type" value="singles" /> | |
291 <param name="inputs" value="1.bed,2.bed"/> | |
292 <param name="dataset_names" value="No" /> | |
293 <param name="headers" value="1" /> | |
294 <output name="out_file1" file="cat_wrapper_out3.bed"/> | |
295 </test> | |
296 <test> <!-- Test gz handling with no options --> | |
297 <param name="input_type" value="singles" /> | |
298 <param name="inputs" value="1_f.fastq.gz,1_r.fastq.gz"/> | |
299 <param name="dataset_names" value="No" /> | |
300 <param name="headers" value="0" /> | |
301 <output name="out_file1" file="1.fastq.gz" decompress="True"/> | |
302 </test> | |
303 <test> <!-- Test gz handling with options --> | |
304 <param name="input_type" value="singles" /> | |
305 <param name="inputs" value="1_f.fastq.gz,1_r.fastq.gz"/> | |
306 <param name="dataset_names" value="Yes" /> | |
307 <param name="headers" value="4" /> | |
308 <output name="out_file1" file="1_options.fastq.gz" decompress="True"/> | |
309 </test> | |
310 <!-- Test paired options --> | |
311 <test> <!-- Test paired collection concatenation by_pair with no other option --> | |
312 <param name="input_type" value="paired_collection" /> | |
313 <param name="paired_cat_type" value="by_pair"/> | |
314 <param name="inputs"> | |
315 <collection type="list:paired"> | |
316 <element name="2"> | |
317 <collection type="paired"> | |
318 <element name="forward" value="2_f.fastq"/> | |
319 <element name="reverse" value="2_r.fastq"/> | |
320 </collection> | |
321 </element> | |
322 <element name="3"> | |
323 <collection type="paired"> | |
324 <element name="forward" value="3_f.fastq"/> | |
325 <element name="reverse" value="3_r.fastq"/> | |
326 </collection> | |
327 </element> | |
328 <element name="4"> | |
329 <collection type="paired"> | |
330 <element name="forward" value="4_f.fastq"/> | |
331 <element name="reverse" value="4_r.fastq"/> | |
332 </collection> | |
333 </element> | |
334 </collection> | |
335 </param> | |
336 <param name="dataset_names" value="No" /> | |
337 <param name="headers" value="0" /> | |
338 <output_collection name="list_output" type="list" > | |
339 <element name="2" file="2.fastq"/> | |
340 <element name="3" file="3.fastq"/> | |
341 <element name="4" file="4.fastq"/> | |
342 </output_collection> | |
343 </test> | |
344 <test> <!-- Test paired collection concatenation by_strand with no other option --> | |
345 <param name="input_type" value="paired_collection" /> | |
346 <param name="paired_cat_type" value="by_strand"/> | |
347 <param name="inputs"> | |
348 <collection type="list:paired"> | |
349 <element name="2"> | |
350 <collection type="paired"> | |
351 <element name="forward" value="2_f.fastq"/> | |
352 <element name="reverse" value="2_r.fastq"/> | |
353 </collection> | |
354 </element> | |
355 <element name="3"> | |
356 <collection type="paired"> | |
357 <element name="forward" value="3_f.fastq"/> | |
358 <element name="reverse" value="3_r.fastq"/> | |
359 </collection> | |
360 </element> | |
361 <element name="4"> | |
362 <collection type="paired"> | |
363 <element name="forward" value="4_f.fastq"/> | |
364 <element name="reverse" value="4_r.fastq"/> | |
365 </collection> | |
366 </element> | |
367 </collection> | |
368 </param> | |
369 <param name="dataset_names" value="No" /> | |
370 <param name="headers" value="0" /> | |
371 <output_collection name="paired_output" type="paired" > | |
372 <element name="forward" file="f.fastq"/> | |
373 <element name="reverse" file="r.fastq"/> | |
374 </output_collection> | |
375 </test> | |
376 <test> <!-- Test 2 collections concatenation --> | |
377 <param name="input_type" value="simple_collections" /> | |
378 <param name="collection_cat_type" value="two_collections"/> | |
379 <param name="input_1"> | |
380 <collection type="list"> | |
381 <element name="2" value="2_f.fastq"/> | |
382 <element name="3" value="3_f.fastq"/> | |
383 <element name="4" value="4_f.fastq"/> | |
384 </collection> | |
385 </param> | |
386 <param name="input_2"> | |
387 <collection type="list"> | |
388 <element name="2" value="2_r.fastq"/> | |
389 <element name="3" value="3_r.fastq"/> | |
390 <element name="4" value="4_r.fastq"/> | |
391 </collection> | |
392 </param> | |
393 <param name="dataset_names" value="No" /> | |
394 <param name="headers" value="0" /> | |
395 <output_collection name="list_output" type="list" count="3" > | |
396 <element name="2" file="2.fastq"/> | |
397 <element name="3" file="3.fastq"/> | |
398 <element name="4" file="4.fastq"/> | |
399 </output_collection> | |
400 </test> | |
401 <test> <!-- Test 2 collections concatenation with other options--> | |
402 <param name="input_type" value="simple_collections" /> | |
403 <param name="collection_cat_type" value="two_collections"/> | |
404 <param name="input_1"> | |
405 <collection type="list"> | |
406 <element name="1_f.fastq.gz" value="1_f.fastq.gz"/> | |
407 </collection> | |
408 </param> | |
409 <param name="input_2"> | |
410 <collection type="list"> | |
411 <element name="1_r.fastq.gz" value="1_r.fastq.gz"/> | |
412 </collection> | |
413 </param> | |
414 <param name="dataset_names" value="Yes" /> | |
415 <param name="headers" value="4" /> | |
416 <output_collection name="list_output" type="list" count="1" > | |
417 <element name="1_f.fastq.gz_1_r.fastq.gz" file="1_options.fastq.gz" decompress="True"/> | |
418 </output_collection> | |
419 </test> | |
420 <test> <!-- Test nested collections concatenation --> | |
421 <param name="input_type" value="nested_collection" /> | |
422 <param name="inputs"> | |
423 <collection type="list:list"> | |
424 <element name="2"> | |
425 <collection type="list"> | |
426 <element name="2_f" value="2_f.fastq" ftype="fastq"/> | |
427 <element name="2_r" value="2_r.fastq" ftype="fastq"/> | |
428 </collection> | |
429 </element> | |
430 <element name="3"> | |
431 <collection type="list"> | |
432 <element name="3" value="3.fastq" ftype="fastq"/> | |
433 </collection> | |
434 </element> | |
435 </collection> | |
436 </param> | |
437 <param name="dataset_names" value="No" /> | |
438 <param name="headers" value="0" /> | |
439 <output_collection name="list_output" type="list" count="2" > | |
440 <element name="2" file="2.fastq"/> | |
441 <element name="3" file="3.fastq"/> | |
442 </output_collection> | |
443 </test> | |
444 <test> <!-- Test nested collections concatenation with options and gzip--> | |
445 <param name="input_type" value="nested_collection" /> | |
446 <param name="inputs"> | |
447 <collection type="list:list"> | |
448 <element name="1"> | |
449 <collection type="list"> | |
450 <element name="1_f.fastq.gz" value="1_f.fastq.gz" ftype="fastq.gz"/> | |
451 <element name="1_r.fastq.gz" value="1_r.fastq.gz" ftype="fastq.gz"/> | |
452 </collection> | |
453 </element> | |
454 </collection> | |
455 </param> | |
456 <param name="dataset_names" value="Yes" /> | |
457 <param name="headers" value="4" /> | |
458 <output_collection name="list_output" type="list" count="1" > | |
459 <element name="1" file="1_options.fastq.gz" decompress="True"/> | |
460 </output_collection> | |
461 </test> | |
462 </tests> | |
463 <help> | |
464 | |
465 .. class:: warningmark | |
466 | |
467 **WARNINGS:** | |
468 | |
469 - This tool does not check if the datasets being concatenated are in the same format. | |
470 - When concatenating 2 collections make sure the first collection is the one with the most items. | |
471 - This tool can't handle nested collection deeper than list:list. | |
472 | |
473 ----- | |
474 | |
475 **What it does** | |
476 | |
477 Concatenates datasets and paired collections with multiple options: | |
478 | |
479 - When the input is a paired collection: | |
480 | |
481 - concatenation by strand : forward and reverse datasets are concatenated separately and a list with a single forward - reverse dataset pair is returned | |
482 | |
483 - concatenation by pair : forward - reverse dataset pairs are concatenated and a simple dataset collection is returned | |
484 | |
485 - whole collection concatenation : all datasets in the collection are concatenated and a single dataset is returned | |
486 | |
487 - When the inputs are 2 collections: datasets are concatenated in a pairwise combination and a single dataset collection is returned | |
488 | |
489 - When nested collection concatenation: datasets in each sub-collection are concatenated and a simple dataset collection is returned | |
490 | |
491 - Skipping lines before concatenation to avoid headers | |
492 | |
493 - Add the name of the concatenated files as separator | |
494 | |
495 ----- | |
496 | |
497 **Single datasets concatenation example** | |
498 | |
499 Concatenating Dataset:: | |
500 | |
501 chrX 151087187 151087355 A 0 - | |
502 chrX 151572400 151572481 B 0 + | |
503 | |
504 with Dataset1:: | |
505 | |
506 chr1 151242630 151242955 X 0 + | |
507 chr1 151271715 151271999 Y 0 + | |
508 chr1 151278832 151279227 Z 0 - | |
509 | |
510 and with Dataset2:: | |
511 | |
512 chr2 100000030 200000955 P 0 + | |
513 chr2 100000015 200000999 Q 0 + | |
514 | |
515 will result in the following:: | |
516 | |
517 chrX 151087187 151087355 A 0 - | |
518 chrX 151572400 151572481 B 0 + | |
519 chr1 151242630 151242955 X 0 + | |
520 chr1 151271715 151271999 Y 0 + | |
521 chr1 151278832 151279227 Z 0 - | |
522 chr2 100000030 200000955 P 0 + | |
523 chr2 100000015 200000999 Q 0 + | |
524 | |
525 ----- | |
526 | |
527 **2 Collections concatenation** | |
528 | |
529 1rst collection:: | |
530 | |
531 a | |
532 b | |
533 c | |
534 d | |
535 | |
536 2nd collection:: | |
537 | |
538 1 | |
539 2 | |
540 3 | |
541 4 | |
542 | |
543 Concatenation result:: | |
544 | |
545 A single collection containing: | |
546 | |
547 a concatenated with 1 | |
548 b concatenated with 2 | |
549 c concatenated with 3 | |
550 d concatenated with 4 | |
551 | |
552 ----- | |
553 | |
554 **Paired collection concatenation example** | |
555 | |
556 1rst pair:: | |
557 | |
558 forward - reverse | |
559 | |
560 2nd pair:: | |
561 | |
562 forward - reverse | |
563 | |
564 - Concatenation by strand:: | |
565 | |
566 concatenates: | |
567 | |
568 1rst forward + 2nd forward | |
569 1rst reverse + 2nd reverse | |
570 | |
571 outputs: | |
572 | |
573 1 pair | |
574 | |
575 - Concatenation by pair:: | |
576 | |
577 concatenates: | |
578 | |
579 1rst forward + 1rst reverse | |
580 2nd forward + 2nd reverse | |
581 | |
582 outputs: | |
583 | |
584 2 datasets | |
585 | |
586 - Concatenate all:: | |
587 | |
588 concatenates: | |
589 | |
590 1rst forward + 1rst reverse + 2nd forward + 2nd reverse | |
591 | |
592 outputs: | |
593 | |
594 1 dataset | |
595 | |
596 ----- | |
597 | |
598 **Nested collection concatenation example** | |
599 | |
600 Nested collection: | |
601 | |
602 - Experiment | |
603 | |
604 - Sample_1 | |
605 | |
606 - Sample_1_file_1 | |
607 - Sample_1_file_2 | |
608 | |
609 - Sample_2 | |
610 | |
611 - Sample_2_file_1 | |
612 - Sample_2_file_2 | |
613 - Sample_2_file_3 | |
614 | |
615 Concatenation result:: | |
616 | |
617 A single collection containing: | |
618 | |
619 - Sample_1: (Sample_1_file_1 + Sample_1_file_2) | |
620 - Sample_2: (Sample_2_file_1 + Sample_2_file_2 + Sample_2_file_3) | |
621 | |
622 ----- | |
623 | |
624 **When selecting "Include dataset names" when concatenating files**: | |
625 | |
626 1rst file name="first_tabular":: | |
627 | |
628 chrX 151087187 151087355 A 0 - | |
629 chrX 151572400 151572481 B 0 + | |
630 | |
631 2nd file name="second_tabular":: | |
632 | |
633 chr1 151242630 151242955 X 0 + | |
634 chr1 151271715 151271999 Y 0 + | |
635 chr1 151278832 151279227 Z 0 - | |
636 | |
637 output:: | |
638 | |
639 # first_tabular | |
640 chrX 151087187 151087355 A 0 - | |
641 chrX 151572400 151572481 B 0 + | |
642 # second_tabular | |
643 chr1 151242630 151242955 X 0 + | |
644 chr1 151271715 151271999 Y 0 + | |
645 chr1 151278832 151279227 Z 0 - | |
646 | |
647 ----- | |
648 | |
649 **Skiping lines** | |
650 | |
651 1rst file:: | |
652 | |
653 chrX 151087187 151087355 A 0 - | |
654 chrX 151572400 151572481 B 0 + | |
655 | |
656 2nd file:: | |
657 | |
658 chr1 151242630 151242955 X 0 + | |
659 chr1 151271715 151271999 Y 0 + | |
660 chr1 151278832 151279227 Z 0 - | |
661 | |
662 skipping 1 line | |
663 | |
664 output:: | |
665 | |
666 chrX 151572400 151572481 B 0 + | |
667 chr1 151271715 151271999 Y 0 + | |
668 chr1 151278832 151279227 Z 0 - | |
669 | |
670 ----- | |
671 | |
672 Adapted from galaxy's catWrapper.xml to allow multiple input files. | |
673 | |
674 </help> | |
675 </tool> |