Mercurial > repos > artbio > concatenate_multiple_datasets
diff catWrapper.xml @ 0:122dbfdf0826 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/concat_multi_datasets commit 75811bd85c9d175d9bc7b2c4fd723adc3a361f0f
author | artbio |
---|---|
date | Tue, 09 Jul 2019 09:51:52 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/catWrapper.xml Tue Jul 09 09:51:52 2019 -0400 @@ -0,0 +1,675 @@ +<tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.4.1"> + <description>tail-to-head by specifying how</description> + <command><![CDATA[ + #if $headers == 0: + #set $concat_command = "cat" + #else: + #set $concat_command = 'tail -q -n +'+ str(int($headers)+1) + #end if + printf "Concatenating files:\n" && + #if $global_condition.input_type == "singles": + #if $dataset_names == "No": + #for $file in $global_condition.inputs + printf "${file.element_identifier}..." && + #if $file.ext: + #if $file.ext[-2:] == "gz" and $headers != 0: + gzip -dc '$file' | $concat_command | gzip -c >> '$out_file1' && + #else: + $concat_command '$file' >> '$out_file1' && + #end if + #else: + $concat_command '$file' >> '$out_file1' && + #end if + printf "Done\n" && + #end for + sleep 1 + #else: + #for $file in $global_condition.inputs + printf "${file.element_identifier}..." && + #if $file.ext[-2:] == "gz" and $headers != 0: + printf "# ${file.element_identifier}\n" | gzip -c >> '$out_file1' && + gzip -dc "$file" | $concat_command |gzip -c >> '$out_file1' && + #else: + printf "# ${file.element_identifier}\n" >> '$out_file1' && + $concat_command "$file" >> '$out_file1' && + #end if + printf "Done\n" && + #end for + sleep 1 + #end if + #else if $global_condition.input_type == "simple_collections": + mkdir concatenated && + #if $dataset_names == "No": + #for $x, $y in zip($global_condition.input_1, $global_condition.input_2): + printf "${x.element_identifier} and ${y.element_identifier}..." && + #if $x.ext[-2:] == "gz" and $headers != 0: + gzip -dc '$x' | $concat_command | gzip -c > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && + gzip -dc '$y' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && + #else: + $concat_command '$x' '$y' > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && + #end if + printf "Done\n" && + #end for + sleep 1 + #else: + #for $x, $y in zip($global_condition.input_1, $global_condition.input_2) + printf "${x.element_identifier} and ${y.element_identifier}..." && + #if $x.ext[-2:] == "gz" and $headers != 0: + printf "# ${x.element_identifier}\n" | gzip -c > concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + gzip -dc '$x' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + printf "# ${y.element_identifier}\n" | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + gzip -dc '$y' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + #else: + printf "# ${x.element_identifier}\n" > concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + $concat_command '$x'>> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + printf "# ${y.element_identifier}\n" >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + $concat_command '$y' >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + #end if + printf "Done\n" && + #end for + sleep 1 + #end if + #else if $global_condition.input_type == "paired_collection": + #if $global_condition.paired_cat_type == "by_strand": + mkdir concatenated && + #if $dataset_names == "No": + #for $file in $global_condition.inputs + printf "${file.element_identifier}- forward and reverse..." && + #if $file['forward'].ext[-2:] == "gz" and $headers != 0: + gzip -dc $file['forward'] | $concat_command | gzip -c >> concatenated/forward.listed.${file['forward'].ext}.listed && + gzip -dc $file['reverse'] | $concat_command | gzip -c >> concatenated/reverse.listed.${file['reverse'].ext}.listed && + #else: + $concat_command $file['forward'] >> concatenated/forward.listed.${file['forward'].ext}.listed && + $concat_command $file['reverse'] >> concatenated/reverse.listed.${file['reverse'].ext}.listed && + #end if + printf "Done\n" && + #end for + sleep 1 + #else: + #for $file in $global_condition.inputs.keys(): + printf "$file - forward and reverse..." && + #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz" and $headers != 0: + printf "# ${file}_forward\n" | gzip -c >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && + gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && + printf "# ${file}_reverse\n" | gzip -c >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && + gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && + #else: + printf "# ${file}_forward\n" >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && + $concat_command $global_condition.inputs[$file]['forward'] >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && + printf "# ${file}_reverse\n" >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && + $concat_command $global_condition.inputs[$file]['reverse'] >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && + #end if + printf "Done\n" && + #end for + sleep 1 + #end if + #else if $global_condition.paired_cat_type == "by_pair": + mkdir concatenated && + #if $dataset_names == "No": + #for $file in $global_condition.inputs.keys(): + printf "$file - forward and reverse..." && + #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz" and $headers != 0: + gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c + > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + #else: + $concat_command $global_condition.inputs[$file]['forward'] + > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command $global_condition.inputs[$file]['reverse'] + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + #end if + printf "Done\n" && + #end for + sleep 1 + #else: + #for $file in $global_condition.inputs.keys(): + printf "$file - forward and reverse..." && + #if $global_condition.inputs[$file]['reverse'].ext[-2:] == "gz" and $headers != 0: + printf "# ${file}_forward\n" | gzip -c > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + printf "# ${file}_reverse\n" | gzip -c >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + #else: + printf "# ${file}_forward\n" > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command $global_condition.inputs[$file]['forward'] + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + printf "# ${file}_reverse\n" >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command $global_condition.inputs[$file]['reverse'] + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + #end if + printf "Done\n" && + #end for + sleep 1 + #end if + #else if $global_condition.paired_cat_type == "all": + mkdir concatenated && + #set $base_name=$global_condition.inputs.element_identifier + #set $extention=$global_condition.inputs[$global_condition.inputs.keys()[0]]['forward'].ext + #if $dataset_names == "No": + #for $file in $global_condition.inputs.keys(): + printf "$file - forward and reverse..." && + #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz" and $headers != 0: + gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> c'$paired_out_file' && + gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> '$paired_out_file' && + #else: + $concat_command + $global_condition.inputs[$file]['forward'] + >> '$paired_out_file' && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> '$paired_out_file' && + #end if + printf "Done\n" && + #end for + sleep 1 + #else: + #for $file in $global_condition.inputs.keys(): + printf "$file - forward and reverse..." && + #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz" and $headers != 0: + printf "# ${file}_forward\n" | gzip -c > '$paired_out_file' && + gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> '$paired_out_file' && + printf "# ${file}_reverse\n" | gzip -c >> '$paired_out_file' && + gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> '$paired_out_file' && + #else: + printf "# ${file}_forward\n" > '$paired_out_file' && + $concat_command + $global_condition.inputs[$file]['forward'] + >> '$paired_out_file' && + printf "# ${file}_reverse\n" >> '$paired_out_file' && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> '$paired_out_file' && + #end if + printf "Done\n" && + #end for + sleep 1 + #end if + #end if + #else if $global_condition.input_type == "nested_collection": + mkdir concatenated && + #if $dataset_names == "No": + #for $sub_list in $global_condition.inputs: + #set $file_base_name=$sub_list.element_identifier + #for $sub_list_element in $sub_list: + printf "${file_base_name} - ${sub_list_element.element_identifier}..." && + #if $sub_list_element.ext[-2:] == "gz" and $headers != 0: + gzip -dc ${sub_list_element} | $concat_command | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && + #else: + $concat_command ${sub_list_element} >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && + #end if + printf "Done\n" && + #end for + #end for + sleep 1 + #else: + #for $sub_list in $global_condition.inputs: + #set $file_base_name=$sub_list.element_identifier + #for $sub_list_element in $sub_list: + printf "${file_base_name} - ${sub_list_element.element_identifier}..." && + #if $sub_list_element.ext[-2:] == "gz" and $headers != 0: + printf "# ${sub_list_element.element_identifier}\n" | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && + gzip -dc ${sub_list_element} | $concat_command | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && + #else: + printf "# ${sub_list_element.element_identifier}\n" >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && + $concat_command ${sub_list_element} >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && + #end if + printf "Done\n" && + #end for + #end for + sleep 1 + #end if + #end if + ]]> + </command> + <inputs> + <conditional name="global_condition"> + <param name="input_type" type="select" label="What type of data do you wish to concatenate?" help="Depending on the type of input selected the concatenation options will differ"> + <option value="singles">Single datasets</option> + <option value="simple_collections">2 Collections</option> + <option value="paired_collection">Paired collection</option> + <option value="nested_collection">Nested collection</option> + </param> + <when value="singles"> + <param name="inputs" type="data" label="Concatenate Datasets" multiple="True" help="All inputed datasets will be concatenated tail-to-head."/> + </when> + <when value="paired_collection"> + <param name="inputs" type="data_collection" collection_type="list:paired" label="Input paired collection to concatenate"/> + <param name="paired_cat_type" type="select" label="What type of concatenation do you wish to perform?"> + <option value="by_strand">Concatenate all datsets of same strand (outputs a single pair of datasets)</option> + <option value="by_pair">Concatenate pairs of datasets (outputs an unpaired collection of datasets)</option> + <option value="all">Concatenate all datasets into a single file regardless of strand (outputs a single file)</option> + </param> + </when> + <when value="simple_collections"> + <param name="input_1" type="data_collection" collection_type="list" label="Input first collection" help="The first collection contains the datasets that will be written first in the concatenated file" /> + <param name="input_2" type="data_collection" collection_type="list" label="Input second collection" help="The second collection contains the datasets that will be written last in the concatenated file" /> + </when> + <when value="nested_collection"> + <param name="inputs" type="data_collection" collection_type="list:list" label="Input nested collection" help="The Nested collection which items you want to concatenate." /> + </when> + </conditional> + <param name="dataset_names" type="boolean" label="Include dataset names?" truevalue="Yes" falsevalue="No" checked="false" help="If 'Yes' is selected '#name of dataset' will be added when concatenating."/> + <param name="headers" type="integer" label="Number of lines to skip at the beginning of each concatenation:" value="0" help="This paremeter exists so as to not concatenate comments or headers contained at the start of the files."/> + </inputs> + <outputs> + <data name="out_file1" format_source="inputs" metadata_source="inputs" label="Concatenated datasets"> + <filter>global_condition['input_type'] == 'singles'</filter> + </data> + <data name="paired_out_file" label="${global_condition.inputs.element_identifier}" auto_format="true"> + <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'all'</filter> + </data> + <collection name="paired_output" type="paired" label="Concatenation by strtand"> + <discover_datasets pattern="(?P<name>.*)\.listed\.(?P<ext>.*)\.listed" visible="false" directory="concatenated"/> + <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_strand'</filter> + </collection> + <collection name="list_output" type="list" label="Concatenation by pairs"> + <discover_datasets pattern="(?P<identifier_0>.*)\.listed\.(?P<ext>.*)\.listed" visible="false" directory="concatenated"/> + <filter>(global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair') or (global_condition['input_type'] == 'simple_collections') or (global_condition['input_type'] == 'nested_collection')</filter> + </collection> + </outputs> + <tests> + <!-- Single files concatenation --> + <test> <!-- Test 2 single files concatenation with no other option --> + <param name="input_type" value="singles" /> + <param name="inputs" value="1.bed,2.bed"/> + <param name="dataset_names" value="No" /> + <param name="headers" value="0" /> + <output name="out_file1" file="cat_wrapper_out1.bed"/> + </test> + <test> <!-- Test 2 single files concatenation with dataset names activated --> + <param name="input_type" value="singles" /> + <param name="inputs" value="1.bed,2.bed"/> + <param name="dataset_names" value="Yes" /> + <param name="headers" value="0" /> + <output name="out_file1" file="cat_wrapper_out2.bed"/> + </test> + <test> <!-- Test 2 single files concatenation skipping 1 line --> + <param name="input_type" value="singles" /> + <param name="inputs" value="1.bed,2.bed"/> + <param name="dataset_names" value="No" /> + <param name="headers" value="1" /> + <output name="out_file1" file="cat_wrapper_out3.bed"/> + </test> + <test> <!-- Test gz handling with no options --> + <param name="input_type" value="singles" /> + <param name="inputs" value="1_f.fastq.gz,1_r.fastq.gz"/> + <param name="dataset_names" value="No" /> + <param name="headers" value="0" /> + <output name="out_file1" file="1.fastq.gz" decompress="True"/> + </test> + <test> <!-- Test gz handling with options --> + <param name="input_type" value="singles" /> + <param name="inputs" value="1_f.fastq.gz,1_r.fastq.gz"/> + <param name="dataset_names" value="Yes" /> + <param name="headers" value="4" /> + <output name="out_file1" file="1_options.fastq.gz" decompress="True"/> + </test> + <!-- Test paired options --> + <test> <!-- Test paired collection concatenation by_pair with no other option --> + <param name="input_type" value="paired_collection" /> + <param name="paired_cat_type" value="by_pair"/> + <param name="inputs"> + <collection type="list:paired"> + <element name="2"> + <collection type="paired"> + <element name="forward" value="2_f.fastq"/> + <element name="reverse" value="2_r.fastq"/> + </collection> + </element> + <element name="3"> + <collection type="paired"> + <element name="forward" value="3_f.fastq"/> + <element name="reverse" value="3_r.fastq"/> + </collection> + </element> + <element name="4"> + <collection type="paired"> + <element name="forward" value="4_f.fastq"/> + <element name="reverse" value="4_r.fastq"/> + </collection> + </element> + </collection> + </param> + <param name="dataset_names" value="No" /> + <param name="headers" value="0" /> + <output_collection name="list_output" type="list" > + <element name="2" file="2.fastq"/> + <element name="3" file="3.fastq"/> + <element name="4" file="4.fastq"/> + </output_collection> + </test> + <test> <!-- Test paired collection concatenation by_strand with no other option --> + <param name="input_type" value="paired_collection" /> + <param name="paired_cat_type" value="by_strand"/> + <param name="inputs"> + <collection type="list:paired"> + <element name="2"> + <collection type="paired"> + <element name="forward" value="2_f.fastq"/> + <element name="reverse" value="2_r.fastq"/> + </collection> + </element> + <element name="3"> + <collection type="paired"> + <element name="forward" value="3_f.fastq"/> + <element name="reverse" value="3_r.fastq"/> + </collection> + </element> + <element name="4"> + <collection type="paired"> + <element name="forward" value="4_f.fastq"/> + <element name="reverse" value="4_r.fastq"/> + </collection> + </element> + </collection> + </param> + <param name="dataset_names" value="No" /> + <param name="headers" value="0" /> + <output_collection name="paired_output" type="paired" > + <element name="forward" file="f.fastq"/> + <element name="reverse" file="r.fastq"/> + </output_collection> + </test> + <test> <!-- Test 2 collections concatenation --> + <param name="input_type" value="simple_collections" /> + <param name="collection_cat_type" value="two_collections"/> + <param name="input_1"> + <collection type="list"> + <element name="2" value="2_f.fastq"/> + <element name="3" value="3_f.fastq"/> + <element name="4" value="4_f.fastq"/> + </collection> + </param> + <param name="input_2"> + <collection type="list"> + <element name="2" value="2_r.fastq"/> + <element name="3" value="3_r.fastq"/> + <element name="4" value="4_r.fastq"/> + </collection> + </param> + <param name="dataset_names" value="No" /> + <param name="headers" value="0" /> + <output_collection name="list_output" type="list" count="3" > + <element name="2" file="2.fastq"/> + <element name="3" file="3.fastq"/> + <element name="4" file="4.fastq"/> + </output_collection> + </test> + <test> <!-- Test 2 collections concatenation with other options--> + <param name="input_type" value="simple_collections" /> + <param name="collection_cat_type" value="two_collections"/> + <param name="input_1"> + <collection type="list"> + <element name="1_f.fastq.gz" value="1_f.fastq.gz"/> + </collection> + </param> + <param name="input_2"> + <collection type="list"> + <element name="1_r.fastq.gz" value="1_r.fastq.gz"/> + </collection> + </param> + <param name="dataset_names" value="Yes" /> + <param name="headers" value="4" /> + <output_collection name="list_output" type="list" count="1" > + <element name="1_f.fastq.gz_1_r.fastq.gz" file="1_options.fastq.gz" decompress="True"/> + </output_collection> + </test> + <test> <!-- Test nested collections concatenation --> + <param name="input_type" value="nested_collection" /> + <param name="inputs"> + <collection type="list:list"> + <element name="2"> + <collection type="list"> + <element name="2_f" value="2_f.fastq" ftype="fastq"/> + <element name="2_r" value="2_r.fastq" ftype="fastq"/> + </collection> + </element> + <element name="3"> + <collection type="list"> + <element name="3" value="3.fastq" ftype="fastq"/> + </collection> + </element> + </collection> + </param> + <param name="dataset_names" value="No" /> + <param name="headers" value="0" /> + <output_collection name="list_output" type="list" count="2" > + <element name="2" file="2.fastq"/> + <element name="3" file="3.fastq"/> + </output_collection> + </test> + <test> <!-- Test nested collections concatenation with options and gzip--> + <param name="input_type" value="nested_collection" /> + <param name="inputs"> + <collection type="list:list"> + <element name="1"> + <collection type="list"> + <element name="1_f.fastq.gz" value="1_f.fastq.gz" ftype="fastq.gz"/> + <element name="1_r.fastq.gz" value="1_r.fastq.gz" ftype="fastq.gz"/> + </collection> + </element> + </collection> + </param> + <param name="dataset_names" value="Yes" /> + <param name="headers" value="4" /> + <output_collection name="list_output" type="list" count="1" > + <element name="1" file="1_options.fastq.gz" decompress="True"/> + </output_collection> + </test> + </tests> + <help> + +.. class:: warningmark + +**WARNINGS:** + +- This tool does not check if the datasets being concatenated are in the same format. +- When concatenating 2 collections make sure the first collection is the one with the most items. +- This tool can't handle nested collection deeper than list:list. + +----- + +**What it does** + +Concatenates datasets and paired collections with multiple options: + + - When the input is a paired collection: + + - concatenation by strand : forward and reverse datasets are concatenated separately and a list with a single forward - reverse dataset pair is returned + + - concatenation by pair : forward - reverse dataset pairs are concatenated and a simple dataset collection is returned + + - whole collection concatenation : all datasets in the collection are concatenated and a single dataset is returned + + - When the inputs are 2 collections: datasets are concatenated in a pairwise combination and a single dataset collection is returned + + - When nested collection concatenation: datasets in each sub-collection are concatenated and a simple dataset collection is returned + + - Skipping lines before concatenation to avoid headers + + - Add the name of the concatenated files as separator + +----- + +**Single datasets concatenation example** + +Concatenating Dataset:: + + chrX 151087187 151087355 A 0 - + chrX 151572400 151572481 B 0 + + +with Dataset1:: + + chr1 151242630 151242955 X 0 + + chr1 151271715 151271999 Y 0 + + chr1 151278832 151279227 Z 0 - + +and with Dataset2:: + + chr2 100000030 200000955 P 0 + + chr2 100000015 200000999 Q 0 + + +will result in the following:: + + chrX 151087187 151087355 A 0 - + chrX 151572400 151572481 B 0 + + chr1 151242630 151242955 X 0 + + chr1 151271715 151271999 Y 0 + + chr1 151278832 151279227 Z 0 - + chr2 100000030 200000955 P 0 + + chr2 100000015 200000999 Q 0 + + +----- + +**2 Collections concatenation** + +1rst collection:: + + a + b + c + d + +2nd collection:: + + 1 + 2 + 3 + 4 + +Concatenation result:: + + A single collection containing: + + a concatenated with 1 + b concatenated with 2 + c concatenated with 3 + d concatenated with 4 + +----- + +**Paired collection concatenation example** + +1rst pair:: + + forward - reverse + +2nd pair:: + + forward - reverse + +- Concatenation by strand:: + + concatenates: + + 1rst forward + 2nd forward + 1rst reverse + 2nd reverse + + outputs: + + 1 pair + +- Concatenation by pair:: + + concatenates: + + 1rst forward + 1rst reverse + 2nd forward + 2nd reverse + + outputs: + + 2 datasets + +- Concatenate all:: + + concatenates: + + 1rst forward + 1rst reverse + 2nd forward + 2nd reverse + + outputs: + + 1 dataset + +----- + +**Nested collection concatenation example** + +Nested collection: + + - Experiment + + - Sample_1 + + - Sample_1_file_1 + - Sample_1_file_2 + + - Sample_2 + + - Sample_2_file_1 + - Sample_2_file_2 + - Sample_2_file_3 + +Concatenation result:: + + A single collection containing: + + - Sample_1: (Sample_1_file_1 + Sample_1_file_2) + - Sample_2: (Sample_2_file_1 + Sample_2_file_2 + Sample_2_file_3) + +----- + +**When selecting "Include dataset names" when concatenating files**: + +1rst file name="first_tabular":: + + chrX 151087187 151087355 A 0 - + chrX 151572400 151572481 B 0 + + +2nd file name="second_tabular":: + + chr1 151242630 151242955 X 0 + + chr1 151271715 151271999 Y 0 + + chr1 151278832 151279227 Z 0 - + +output:: + + # first_tabular + chrX 151087187 151087355 A 0 - + chrX 151572400 151572481 B 0 + + # second_tabular + chr1 151242630 151242955 X 0 + + chr1 151271715 151271999 Y 0 + + chr1 151278832 151279227 Z 0 - + +----- + +**Skiping lines** + +1rst file:: + + chrX 151087187 151087355 A 0 - + chrX 151572400 151572481 B 0 + + +2nd file:: + + chr1 151242630 151242955 X 0 + + chr1 151271715 151271999 Y 0 + + chr1 151278832 151279227 Z 0 - + +skipping 1 line + +output:: + + chrX 151572400 151572481 B 0 + + chr1 151271715 151271999 Y 0 + + chr1 151278832 151279227 Z 0 - + +----- + +Adapted from galaxy's catWrapper.xml to allow multiple input files. + + </help> +</tool>