Mercurial > repos > matthias > stacks2_shortreads
diff stacks_shortreads.xml @ 0:061c9e133d1e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit 98327d2948ae1ccb5aef5db9ab88605fd74a0de7-dirty
| author | matthias |
|---|---|
| date | Thu, 29 Nov 2018 11:43:50 -0500 |
| parents | |
| children | 02905edf00e4 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/stacks_shortreads.xml Thu Nov 29 11:43:50 2018 -0500 @@ -0,0 +1,318 @@ +<!-- this is essentially a copy of stacks_procrad minus the unsupported options --> +<tool id="stacks2_shortreads" name="Stacks2: process shortreads" version="@WRAPPER_VERSION@"> +<description>fast cleaning of randomly sheared genomic or transcriptomic data</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <expand macro="version_cmd"/> + <command><![CDATA[ + + #if $input_type.options_type_selector == "single" + + #if $input_type.input_single.is_of_type('fastqsanger') + #set $ext = ".fq" + #set inputype = "fastq" + #else + #set $ext = ".fq.gz" + #set inputype = "gzfastq" + #end if + + ln -s '$input_type.input_single' R1$ext && + #else + + #if $input_type.inputs_paired1.is_of_type('fastqsanger') + #set $ext = ".fq" + #set inputype = "fastq" + #else + #set $ext = ".fq.gz" + #set inputype = "gzfastq" + #end if + + ln -s '$input_type.inputs_paired1' R1$ext && + ln -s '$input_type.inputs_paired2' R2$ext && + #end if + + mkdir stacks_outputs + + && + +process_shortreads + +#if $input_type.options_type_selector == "single" + -f R1$ext +#else + -1 R1$ext + -2 R2$ext +#end if +#if str( $outype ) != "auto" + -y $outype +#end if +-i $inputype +-b '$barcode' +$options_advanced.remove +$options_advanced.discard +$options_advanced.rescue +#if str($options_advanced.truncate) + -t $options_advanced.truncate +#end if +## -E not implemented in Galaxy defaults to phred33 +$capture +-w $options_advanced.sliding +-s $options_advanced.score +##Barcode options: +$input_type.barcode_encoding +##Adapter options: +#if str($options_advanced.adapter_1) != "": + --adapter_1 $options_advanced.adapter_1 +#end if +#if str($options_advanced.adapter_2) != "": + --adapter_2 $options_advanced.adapter_2 +#end if +#if str($options_advanced.adapter_mm) != "": + --adapter_mm $options_advanced.adapter_mm +#end if +##Output options: +$options_advanced.retain_header +## --merge not implemented in Galaxy +##Advanced options: +$options_advanced.no_read_trimming +#if str($options_advanced.len_limit) != "": + --len_limit $options_advanced.len_limit +#end if + $options_advanced.filter_illumina +#if str($options_advanced.barcode_dist_1) != "": + --barcode_dist_1 $options_advanced.barcode_dist_1 +#end if +#if str($options_advanced.barcode_dist_2) != "": + --barcode_dist_2 $options_advanced.barcode_dist_2 +#end if +$options_advanced.mate_pair +$options_advanced.no_overhang + +-o stacks_outputs +## bug in short reads: for single end input discarded reads currently +## are written to working dir -> move them to be consistent with procrad +#if $input_type.options_type_selector == "single" and str($capture) != "": +&& mv *discards stacks_outputs/ +#end if + ]]></command> + + <inputs> + <conditional name="input_type"> + <param name="options_type_selector" type="select" label="Single-end or paired-end reads files"> + <option value="single" selected="True">Single-end files</option> + <option value="paired">Paired-end files</option> + </param> + <when value="single"> + <param name="input_single" argument="-f" format="fastqsanger,fastqsanger.gz" type="data" label="singles-end reads infile(s)" help="input files" /> + + <param name="barcode_encoding" type="select" label="Barcode location"> + <expand macro="barcode_encoding_single" /> + </param> + </when> + <when value="paired"> + <param name="inputs_paired1" argument="-1" format="fastqsanger,fastqsanger.gz" type="data" label="paired-end reads infile(s) 1" help="Files must have this syntax : name_R1_001.fastq" /> + <param name="inputs_paired2" argument="-2" format="fastqsanger,fastqsanger.gz" type="data" label="paired-end reads infile(s) 2" help="Files must have this syntax : name_R2_001.fastq" /> + + <param name="barcode_encoding" type="select" label="Barcode location"> + <expand macro="barcode_encoding_pair" /> + </param> + </when> + </conditional> + + <param name="barcode" argument="-b" type="data" format="tabular,txt" label="Barcode file" help="Barcode file" /> + + <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file" /> + + <section name="options_advanced" title="advanced options" expanded="False"> + <param name="sliding" type="float" value="0.15" min="0" max="1" argument="-w" label="Set the size of the sliding window as a fraction of the read length, between 0 and 1 (default 0.15)" /> + <param name="score" type="integer" value="10" argument="-s" label="Set the score limit. If the average score within the sliding window drops below this value, the read is discarded (default 10)" /> + <param name="remove" type="boolean" checked="false" truevalue="-c" falsevalue="" argument="-c" label="Clean data, remove any read with an uncalled base" /> + <param name="discard" type="boolean" checked="false" truevalue="-q" falsevalue="" argument="-q" label="Discard reads with low quality scores"/> + <param name="rescue" type="boolean" checked="false" truevalue="-r" falsevalue="" argument="-r" label="Rescue barcodes?"/> + <param name="truncate" type="integer" value="" optional="True" argument="-t" label="Truncate final read length to this value" /> + + <param argument="--retain_header" type="boolean" checked="false" truevalue="--retain_header" falsevalue="" label="Retain unmodified FASTQ headers in the output" /> + + <param argument="--filter_illumina" type="boolean" checked="false" truevalue="--filter_illumina" falsevalue="" label="discard reads that have been marked by Illumina's chastity/purity filter as failing" /> + <param argument="--len_limit" type="integer" value="" optional="true" label="minimum sequence length" help="useful if your data has already been trimmed (default 31)"/> + <param argument="--barcode_dist_1" type="integer" value="1" optional="true" label="number of allowed mismatches when rescuing first read barcodes" help="(default 1)"/> + <param argument="--barcode_dist_2" type="integer" value="1" optional="true" label="number of allowed mismatches when rescuing paired read barcodes" help="(default value for single end barcodes)"/> + <param argument="--no_read_trimming" type="boolean" checked="false" truevalue="--no_read_trimming" falsevalue="" label="do not trim low quality reads, just discard them" /> + <param name="mate_pair" argument="--mate-pair" type="boolean" checked="false" truevalue="--mate-pair" falsevalue="" label="raw reads are circularized mate-pair data, first read will be reverse complemented" /> + <param argument="--no_overhang" type="boolean" checked="false" truevalue="--no_overhang" falsevalue="" label="data does not contain an overhang nucleotide between barcode and seqeunce" /> + + <param argument="--adapter_1" type="text" value="" optional="true" label="adaptor sequence that may occur on the single-end read" /> + <param argument="--adapter_2" type="text" value="" optional="true" label="adaptor sequence that may occur on the paired-end" /> + <param argument="--adapter_mm" type="integer" value="" optional="true" label="number of mismatches allowed in the adapter sequence"/> + </section> + + <param name="outype" argument="-y" type="select" label="Output format" > + <option value="auto" selected="True">Same as input</option> + <option value="fastq">fastq</option> + <option value="fasta">fasta</option> + <option value="gzfastq">gzipped fastq</option> + </param> + </inputs> + + <outputs> + <data format="txt" name="output_log" label="${tool.name} on ${on_string} log file" from_work_dir="stacks_outputs/process_shortreads.log" /> + + <collection name="demultiplexed" type="list" label="${tool.name} on ${on_string} Demultiplexed reads"> + <discover_datasets pattern="(?P<name>.+(\.[12])?)\.fq$" ext="fastqsanger" directory="stacks_outputs" /> + <discover_datasets pattern="(?P<name>.+(\.[12])?)\.fq.gz$" ext="fastqsanger.gz" directory="stacks_outputs" /> + <discover_datasets pattern="(?P<name>.+(\.[12])?)\.fa$" ext="fasta" directory="stacks_outputs" /> + </collection> + <collection name="remaining" type="list" label="${tool.name} on ${on_string} Remaining orphan reads"> + <filter>input_type['options_type_selector'] == "paired"</filter> + <discover_datasets pattern="(?P<name>.+\.rem(\.[12])?)\.fq$" ext="fastqsanger" directory="stacks_outputs" /> + <discover_datasets pattern="(?P<name>.+\.rem(\.[12])?)\.fq.gz$" ext="fastqsanger.gz" directory="stacks_outputs" /> + <discover_datasets pattern="(?P<name>.+\.rem(\.[12])?)\.fa$" ext="fasta" directory="stacks_outputs" /> + </collection> + <collection name="discarded" type="list" label="${tool.name} on ${on_string} Discarded reads"> + <filter>capture is True</filter> + <discover_datasets pattern="(?P<name>.+)\.fq\.discards$" ext="fastqsanger" directory="stacks_outputs" /> + <discover_datasets pattern="(?P<name>.+)\.fq\.gz.discards$" ext="fastqsanger.gz" directory="stacks_outputs" /> <!-- discards are never zipped currently--> + <discover_datasets pattern="(?P<name>.+)\.fa\.discards$" ext="fasta" directory="stacks_outputs" /><!-- discards are always called fq even if fasta, so this does not work at the moment --> + </collection> + </outputs> + + <tests> + <test> + <param name="options_type_selector" value="single"/> + <param name="input_single" ftype="fastqsanger" value="procrad/R1.fq"/> + <param name="barcode" value="procrad/barcodes"/> + <param name="discard" value="true"/> + <param name="capture" value="true"/> + <output name="output_log" file="shortreads/process_shortreads.out" lines_diff="4"/> + <output_collection name="demultiplexed"> + <element name="PopA_01" file="demultiplexed/PopA_01.fq" /> + </output_collection> + <output_collection name="discarded"> + <element name="R1" file="procrad/R1.fq.discards"/> + </output_collection> + </test> + <test> + <param name="options_type_selector" value="single"/> + <param name="input_single" ftype="fastqsanger.gz" value="procrad/R1.fq.gzip"/> + <param name="barcode" value="procrad/barcodes"/> + <param name="discard" value="true"/> + <param name="capture" value="true"/> + <param name="outype" value="gzfastq"/> + <output name="output_log" file="shortreads/process_shortreads.out" lines_diff="6"/> + <output_collection name="demultiplexed"> + <element name="PopA_01" ftype="fastqsanger.gz" md5="c7250f50138cbca747b85223aaae9565"/> + </output_collection> + <output_collection name="discarded"> + <element name="R1" ftype="fastqsanger" md5="786b30d864332a2d56d9179f0a53add4"/> + </output_collection> + </test> + <test> + <param name="options_type_selector" value="paired"/> + <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/> + <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/> + <param name="barcode" value="procrad/barcodes"/> + <param name="discard" value="true"/> + <param name="capture" value="true"/> + <output name="output_log" file="shortreads/process_shortreads_paired.out" lines_diff="4"/> + <output_collection name="demultiplexed"> + <element name="PopA_01.1" file="demultiplexed/PopA_01.1.fq"/> + </output_collection> + <output_collection name="remaining"> + <element name="PopA_01.rem.2" file="demultiplexed/PopA_01.rem.2.fq"/> + </output_collection> + <output_collection name="discarded"> + <element name="R1" file="procrad/R1.fq.discards"/> + </output_collection> + </test> + <test> + <param name="options_type_selector" value="paired"/> + <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/> + <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/> + <param name="barcode" value="procrad/barcodes"/> + <param name="discard" value="true"/> + <param name="capture" value="true"/> + <param name="retain_header" value="true"/> + <output name="output_log" file="shortreads/process_shortreads_paired.out" lines_diff="4"/> + <output_collection name="demultiplexed"> + <element name="PopA_01.1" file="demultiplexed/PopA_01.1.fq.header"/> + </output_collection> + <output_collection name="remaining"> + <element name="PopA_01.rem.2" file="demultiplexed/PopA_01.rem.2.fq.header"/> + </output_collection> + <output_collection name="discarded"> + <element name="R1" file="procrad/R1.fq.discards"/> + </output_collection> + </test> + <test> + <param name="options_type_selector" value="paired"/> + <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/> + <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/> + <param name="barcode" value="procrad/barcodes"/> + <param name="discard" value="true"/> + <param name="capture" value="true"/> + <param name="outype" value="fasta"/> + <output name="output_log" file="shortreads/process_shortreads_paired.out" lines_diff="4"/> + <output_collection name="demultiplexed"> + <element name="PopA_01.1" file="demultiplexed/PopA_01.1.fa"/> + </output_collection> + <output_collection name="remaining"> + <element name="PopA_01.rem.2" file="demultiplexed/PopA_01.rem.2.fa"/> + </output_collection> + <output_collection name="discarded"> + <element name="R1" file="procrad/R1.fa.discards"/> + </output_collection> + </test> + </tests> + + + + <help> +<![CDATA[ +.. class:: infomark + +**What it does** + +erforms the same task as process_radtags for fast cleaning of randomly sheared genomic or transcriptomic data, not for RAD data. + +**Help** + +Input files: + +- FASTQ + +- Barcode File Format + +The barcode file is a very simple format: + +======= =========== +Barcode Sample name +======= =========== +ATGGGG PopA_01 +GGGTAA PopA_02 +AGGAAA PopA_03 +TTTAAG PopA_04 +GGTGTG PopA_05 +TGATGT PopA_06 +======= =========== + +Combinatorial barcodes are specified, one per column, separated by a tab: + +======== ======== =========== +Barcode1 Barcode2 Sample name +======== ======== =========== +CGATA ACGTA PopA_01 +CGGCG CGTA PopA_02 +GAAGC CGTA PopA_03 +GAGAT CGTA PopA_04 +CGATA AGCA PopA_05 +CGGCG AGCA PopA_06 +======== ======== =========== + +@STACKS_INFOS@ +]]> + </help> + <expand macro="citation" /> +</tool>
