Mercurial > repos > matthias > stacks2_shortreads
view stacks_shortreads.xml @ 0:061c9e133d1e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit 98327d2948ae1ccb5aef5db9ab88605fd74a0de7-dirty
author | matthias |
---|---|
date | Thu, 29 Nov 2018 11:43:50 -0500 |
parents | |
children | 02905edf00e4 |
line wrap: on
line source
<!-- this is essentially a copy of stacks_procrad minus the unsupported options --> <tool id="stacks2_shortreads" name="Stacks2: process shortreads" version="@WRAPPER_VERSION@"> <description>fast cleaning of randomly sheared genomic or transcriptomic data</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <expand macro="stdio"/> <expand macro="version_cmd"/> <command><![CDATA[ #if $input_type.options_type_selector == "single" #if $input_type.input_single.is_of_type('fastqsanger') #set $ext = ".fq" #set inputype = "fastq" #else #set $ext = ".fq.gz" #set inputype = "gzfastq" #end if ln -s '$input_type.input_single' R1$ext && #else #if $input_type.inputs_paired1.is_of_type('fastqsanger') #set $ext = ".fq" #set inputype = "fastq" #else #set $ext = ".fq.gz" #set inputype = "gzfastq" #end if ln -s '$input_type.inputs_paired1' R1$ext && ln -s '$input_type.inputs_paired2' R2$ext && #end if mkdir stacks_outputs && process_shortreads #if $input_type.options_type_selector == "single" -f R1$ext #else -1 R1$ext -2 R2$ext #end if #if str( $outype ) != "auto" -y $outype #end if -i $inputype -b '$barcode' $options_advanced.remove $options_advanced.discard $options_advanced.rescue #if str($options_advanced.truncate) -t $options_advanced.truncate #end if ## -E not implemented in Galaxy defaults to phred33 $capture -w $options_advanced.sliding -s $options_advanced.score ##Barcode options: $input_type.barcode_encoding ##Adapter options: #if str($options_advanced.adapter_1) != "": --adapter_1 $options_advanced.adapter_1 #end if #if str($options_advanced.adapter_2) != "": --adapter_2 $options_advanced.adapter_2 #end if #if str($options_advanced.adapter_mm) != "": --adapter_mm $options_advanced.adapter_mm #end if ##Output options: $options_advanced.retain_header ## --merge not implemented in Galaxy ##Advanced options: $options_advanced.no_read_trimming #if str($options_advanced.len_limit) != "": --len_limit $options_advanced.len_limit #end if $options_advanced.filter_illumina #if str($options_advanced.barcode_dist_1) != "": --barcode_dist_1 $options_advanced.barcode_dist_1 #end if #if str($options_advanced.barcode_dist_2) != "": --barcode_dist_2 $options_advanced.barcode_dist_2 #end if $options_advanced.mate_pair $options_advanced.no_overhang -o stacks_outputs ## bug in short reads: for single end input discarded reads currently ## are written to working dir -> move them to be consistent with procrad #if $input_type.options_type_selector == "single" and str($capture) != "": && mv *discards stacks_outputs/ #end if ]]></command> <inputs> <conditional name="input_type"> <param name="options_type_selector" type="select" label="Single-end or paired-end reads files"> <option value="single" selected="True">Single-end files</option> <option value="paired">Paired-end files</option> </param> <when value="single"> <param name="input_single" argument="-f" format="fastqsanger,fastqsanger.gz" type="data" label="singles-end reads infile(s)" help="input files" /> <param name="barcode_encoding" type="select" label="Barcode location"> <expand macro="barcode_encoding_single" /> </param> </when> <when value="paired"> <param name="inputs_paired1" argument="-1" format="fastqsanger,fastqsanger.gz" type="data" label="paired-end reads infile(s) 1" help="Files must have this syntax : name_R1_001.fastq" /> <param name="inputs_paired2" argument="-2" format="fastqsanger,fastqsanger.gz" type="data" label="paired-end reads infile(s) 2" help="Files must have this syntax : name_R2_001.fastq" /> <param name="barcode_encoding" type="select" label="Barcode location"> <expand macro="barcode_encoding_pair" /> </param> </when> </conditional> <param name="barcode" argument="-b" type="data" format="tabular,txt" label="Barcode file" help="Barcode file" /> <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file" /> <section name="options_advanced" title="advanced options" expanded="False"> <param name="sliding" type="float" value="0.15" min="0" max="1" argument="-w" label="Set the size of the sliding window as a fraction of the read length, between 0 and 1 (default 0.15)" /> <param name="score" type="integer" value="10" argument="-s" label="Set the score limit. If the average score within the sliding window drops below this value, the read is discarded (default 10)" /> <param name="remove" type="boolean" checked="false" truevalue="-c" falsevalue="" argument="-c" label="Clean data, remove any read with an uncalled base" /> <param name="discard" type="boolean" checked="false" truevalue="-q" falsevalue="" argument="-q" label="Discard reads with low quality scores"/> <param name="rescue" type="boolean" checked="false" truevalue="-r" falsevalue="" argument="-r" label="Rescue barcodes?"/> <param name="truncate" type="integer" value="" optional="True" argument="-t" label="Truncate final read length to this value" /> <param argument="--retain_header" type="boolean" checked="false" truevalue="--retain_header" falsevalue="" label="Retain unmodified FASTQ headers in the output" /> <param argument="--filter_illumina" type="boolean" checked="false" truevalue="--filter_illumina" falsevalue="" label="discard reads that have been marked by Illumina's chastity/purity filter as failing" /> <param argument="--len_limit" type="integer" value="" optional="true" label="minimum sequence length" help="useful if your data has already been trimmed (default 31)"/> <param argument="--barcode_dist_1" type="integer" value="1" optional="true" label="number of allowed mismatches when rescuing first read barcodes" help="(default 1)"/> <param argument="--barcode_dist_2" type="integer" value="1" optional="true" label="number of allowed mismatches when rescuing paired read barcodes" help="(default value for single end barcodes)"/> <param argument="--no_read_trimming" type="boolean" checked="false" truevalue="--no_read_trimming" falsevalue="" label="do not trim low quality reads, just discard them" /> <param name="mate_pair" argument="--mate-pair" type="boolean" checked="false" truevalue="--mate-pair" falsevalue="" label="raw reads are circularized mate-pair data, first read will be reverse complemented" /> <param argument="--no_overhang" type="boolean" checked="false" truevalue="--no_overhang" falsevalue="" label="data does not contain an overhang nucleotide between barcode and seqeunce" /> <param argument="--adapter_1" type="text" value="" optional="true" label="adaptor sequence that may occur on the single-end read" /> <param argument="--adapter_2" type="text" value="" optional="true" label="adaptor sequence that may occur on the paired-end" /> <param argument="--adapter_mm" type="integer" value="" optional="true" label="number of mismatches allowed in the adapter sequence"/> </section> <param name="outype" argument="-y" type="select" label="Output format" > <option value="auto" selected="True">Same as input</option> <option value="fastq">fastq</option> <option value="fasta">fasta</option> <option value="gzfastq">gzipped fastq</option> </param> </inputs> <outputs> <data format="txt" name="output_log" label="${tool.name} on ${on_string} log file" from_work_dir="stacks_outputs/process_shortreads.log" /> <collection name="demultiplexed" type="list" label="${tool.name} on ${on_string} Demultiplexed reads"> <discover_datasets pattern="(?P<name>.+(\.[12])?)\.fq$" ext="fastqsanger" directory="stacks_outputs" /> <discover_datasets pattern="(?P<name>.+(\.[12])?)\.fq.gz$" ext="fastqsanger.gz" directory="stacks_outputs" /> <discover_datasets pattern="(?P<name>.+(\.[12])?)\.fa$" ext="fasta" directory="stacks_outputs" /> </collection> <collection name="remaining" type="list" label="${tool.name} on ${on_string} Remaining orphan reads"> <filter>input_type['options_type_selector'] == "paired"</filter> <discover_datasets pattern="(?P<name>.+\.rem(\.[12])?)\.fq$" ext="fastqsanger" directory="stacks_outputs" /> <discover_datasets pattern="(?P<name>.+\.rem(\.[12])?)\.fq.gz$" ext="fastqsanger.gz" directory="stacks_outputs" /> <discover_datasets pattern="(?P<name>.+\.rem(\.[12])?)\.fa$" ext="fasta" directory="stacks_outputs" /> </collection> <collection name="discarded" type="list" label="${tool.name} on ${on_string} Discarded reads"> <filter>capture is True</filter> <discover_datasets pattern="(?P<name>.+)\.fq\.discards$" ext="fastqsanger" directory="stacks_outputs" /> <discover_datasets pattern="(?P<name>.+)\.fq\.gz.discards$" ext="fastqsanger.gz" directory="stacks_outputs" /> <!-- discards are never zipped currently--> <discover_datasets pattern="(?P<name>.+)\.fa\.discards$" ext="fasta" directory="stacks_outputs" /><!-- discards are always called fq even if fasta, so this does not work at the moment --> </collection> </outputs> <tests> <test> <param name="options_type_selector" value="single"/> <param name="input_single" ftype="fastqsanger" value="procrad/R1.fq"/> <param name="barcode" value="procrad/barcodes"/> <param name="discard" value="true"/> <param name="capture" value="true"/> <output name="output_log" file="shortreads/process_shortreads.out" lines_diff="4"/> <output_collection name="demultiplexed"> <element name="PopA_01" file="demultiplexed/PopA_01.fq" /> </output_collection> <output_collection name="discarded"> <element name="R1" file="procrad/R1.fq.discards"/> </output_collection> </test> <test> <param name="options_type_selector" value="single"/> <param name="input_single" ftype="fastqsanger.gz" value="procrad/R1.fq.gzip"/> <param name="barcode" value="procrad/barcodes"/> <param name="discard" value="true"/> <param name="capture" value="true"/> <param name="outype" value="gzfastq"/> <output name="output_log" file="shortreads/process_shortreads.out" lines_diff="6"/> <output_collection name="demultiplexed"> <element name="PopA_01" ftype="fastqsanger.gz" md5="c7250f50138cbca747b85223aaae9565"/> </output_collection> <output_collection name="discarded"> <element name="R1" ftype="fastqsanger" md5="786b30d864332a2d56d9179f0a53add4"/> </output_collection> </test> <test> <param name="options_type_selector" value="paired"/> <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/> <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/> <param name="barcode" value="procrad/barcodes"/> <param name="discard" value="true"/> <param name="capture" value="true"/> <output name="output_log" file="shortreads/process_shortreads_paired.out" lines_diff="4"/> <output_collection name="demultiplexed"> <element name="PopA_01.1" file="demultiplexed/PopA_01.1.fq"/> </output_collection> <output_collection name="remaining"> <element name="PopA_01.rem.2" file="demultiplexed/PopA_01.rem.2.fq"/> </output_collection> <output_collection name="discarded"> <element name="R1" file="procrad/R1.fq.discards"/> </output_collection> </test> <test> <param name="options_type_selector" value="paired"/> <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/> <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/> <param name="barcode" value="procrad/barcodes"/> <param name="discard" value="true"/> <param name="capture" value="true"/> <param name="retain_header" value="true"/> <output name="output_log" file="shortreads/process_shortreads_paired.out" lines_diff="4"/> <output_collection name="demultiplexed"> <element name="PopA_01.1" file="demultiplexed/PopA_01.1.fq.header"/> </output_collection> <output_collection name="remaining"> <element name="PopA_01.rem.2" file="demultiplexed/PopA_01.rem.2.fq.header"/> </output_collection> <output_collection name="discarded"> <element name="R1" file="procrad/R1.fq.discards"/> </output_collection> </test> <test> <param name="options_type_selector" value="paired"/> <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/> <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/> <param name="barcode" value="procrad/barcodes"/> <param name="discard" value="true"/> <param name="capture" value="true"/> <param name="outype" value="fasta"/> <output name="output_log" file="shortreads/process_shortreads_paired.out" lines_diff="4"/> <output_collection name="demultiplexed"> <element name="PopA_01.1" file="demultiplexed/PopA_01.1.fa"/> </output_collection> <output_collection name="remaining"> <element name="PopA_01.rem.2" file="demultiplexed/PopA_01.rem.2.fa"/> </output_collection> <output_collection name="discarded"> <element name="R1" file="procrad/R1.fa.discards"/> </output_collection> </test> </tests> <help> <![CDATA[ .. class:: infomark **What it does** erforms the same task as process_radtags for fast cleaning of randomly sheared genomic or transcriptomic data, not for RAD data. **Help** Input files: - FASTQ - Barcode File Format The barcode file is a very simple format: ======= =========== Barcode Sample name ======= =========== ATGGGG PopA_01 GGGTAA PopA_02 AGGAAA PopA_03 TTTAAG PopA_04 GGTGTG PopA_05 TGATGT PopA_06 ======= =========== Combinatorial barcodes are specified, one per column, separated by a tab: ======== ======== =========== Barcode1 Barcode2 Sample name ======== ======== =========== CGATA ACGTA PopA_01 CGGCG CGTA PopA_02 GAAGC CGTA PopA_03 GAGAT CGTA PopA_04 CGATA AGCA PopA_05 CGGCG AGCA PopA_06 ======== ======== =========== @STACKS_INFOS@ ]]> </help> <expand macro="citation" /> </tool>