diff stacks_shortreads.xml @ 0:061c9e133d1e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit 98327d2948ae1ccb5aef5db9ab88605fd74a0de7-dirty
author matthias
date Thu, 29 Nov 2018 11:43:50 -0500
parents
children 02905edf00e4
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/stacks_shortreads.xml	Thu Nov 29 11:43:50 2018 -0500
@@ -0,0 +1,318 @@
+<!-- this is essentially a copy of stacks_procrad minus the unsupported options -->
+<tool id="stacks2_shortreads" name="Stacks2: process shortreads" version="@WRAPPER_VERSION@">
+<description>fast cleaning of randomly sheared genomic or transcriptomic data</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <expand macro="version_cmd"/>
+    <command><![CDATA[
+
+        #if $input_type.options_type_selector == "single"
+
+            #if $input_type.input_single.is_of_type('fastqsanger')
+                #set $ext = ".fq"
+                #set inputype = "fastq"
+            #else
+                #set $ext = ".fq.gz"
+                #set inputype = "gzfastq"
+            #end if
+
+            ln -s '$input_type.input_single' R1$ext &&
+        #else
+
+            #if $input_type.inputs_paired1.is_of_type('fastqsanger')
+                #set $ext = ".fq"
+                #set inputype = "fastq"
+            #else
+                #set $ext = ".fq.gz"
+                #set inputype = "gzfastq"
+            #end if
+
+            ln -s '$input_type.inputs_paired1' R1$ext &&
+            ln -s '$input_type.inputs_paired2' R2$ext &&
+        #end if
+
+        mkdir stacks_outputs
+
+        &&
+
+process_shortreads
+
+#if $input_type.options_type_selector == "single"
+    -f R1$ext
+#else
+    -1 R1$ext
+    -2 R2$ext
+#end if
+#if str( $outype ) != "auto"
+    -y $outype
+#end if
+-i $inputype
+-b '$barcode'
+$options_advanced.remove
+$options_advanced.discard
+$options_advanced.rescue
+#if str($options_advanced.truncate)
+    -t $options_advanced.truncate
+#end if
+## -E not implemented in Galaxy defaults to phred33
+$capture
+-w $options_advanced.sliding
+-s $options_advanced.score
+##Barcode options:
+$input_type.barcode_encoding
+##Adapter options:
+#if str($options_advanced.adapter_1) != "":
+    --adapter_1 $options_advanced.adapter_1
+#end if
+#if str($options_advanced.adapter_2) != "":
+    --adapter_2 $options_advanced.adapter_2
+#end if
+#if str($options_advanced.adapter_mm) != "":
+    --adapter_mm $options_advanced.adapter_mm
+#end if
+##Output options:
+$options_advanced.retain_header
+## --merge not implemented in Galaxy
+##Advanced options:
+$options_advanced.no_read_trimming
+#if str($options_advanced.len_limit) != "":
+    --len_limit $options_advanced.len_limit
+#end if
+	    $options_advanced.filter_illumina
+#if str($options_advanced.barcode_dist_1) != "":
+    --barcode_dist_1 $options_advanced.barcode_dist_1
+#end if
+#if str($options_advanced.barcode_dist_2) != "":
+    --barcode_dist_2 $options_advanced.barcode_dist_2
+#end if
+$options_advanced.mate_pair
+$options_advanced.no_overhang
+
+-o stacks_outputs
+## bug in short reads: for single end input discarded reads currently 
+## are written to working dir -> move them to be consistent with procrad
+#if $input_type.options_type_selector == "single" and str($capture) != "":
+&& mv *discards stacks_outputs/
+#end if
+    ]]></command>
+
+    <inputs>
+        <conditional name="input_type">
+            <param name="options_type_selector" type="select" label="Single-end or paired-end reads files">
+                <option value="single" selected="True">Single-end files</option>
+                <option value="paired">Paired-end files</option>
+            </param>
+            <when value="single">
+                <param name="input_single" argument="-f" format="fastqsanger,fastqsanger.gz" type="data" label="singles-end reads infile(s)" help="input files" />
+
+                <param name="barcode_encoding" type="select" label="Barcode location">
+                    <expand macro="barcode_encoding_single" />
+                </param>
+            </when>
+            <when value="paired">
+                <param name="inputs_paired1" argument="-1" format="fastqsanger,fastqsanger.gz" type="data" label="paired-end reads infile(s) 1" help="Files must have this syntax : name_R1_001.fastq" />
+                <param name="inputs_paired2" argument="-2" format="fastqsanger,fastqsanger.gz" type="data" label="paired-end reads infile(s) 2" help="Files must have this syntax : name_R2_001.fastq" />
+
+                <param name="barcode_encoding" type="select" label="Barcode location">
+                    <expand macro="barcode_encoding_pair" />
+                </param>
+            </when>
+        </conditional>
+
+        <param name="barcode" argument="-b" type="data" format="tabular,txt" label="Barcode file" help="Barcode file" />
+
+        <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file" />
+
+        <section name="options_advanced" title="advanced options" expanded="False">
+            <param name="sliding" type="float" value="0.15" min="0" max="1" argument="-w" label="Set the size of the sliding window as a fraction of the read length, between 0 and 1 (default 0.15)" />
+            <param name="score" type="integer" value="10" argument="-s" label="Set the score limit. If the average score within the sliding window drops below this value, the read is discarded (default 10)" />
+            <param name="remove" type="boolean" checked="false" truevalue="-c" falsevalue="" argument="-c" label="Clean data, remove any read with an uncalled base" />
+            <param name="discard" type="boolean" checked="false" truevalue="-q" falsevalue="" argument="-q" label="Discard reads with low quality scores"/>
+            <param name="rescue" type="boolean" checked="false" truevalue="-r" falsevalue="" argument="-r" label="Rescue barcodes?"/>
+            <param name="truncate" type="integer" value="" optional="True" argument="-t" label="Truncate final read length to this value" />
+
+	    <param argument="--retain_header" type="boolean" checked="false" truevalue="--retain_header" falsevalue="" label="Retain unmodified FASTQ headers in the output" />
+
+	    <param argument="--filter_illumina" type="boolean" checked="false" truevalue="--filter_illumina" falsevalue="" label="discard reads that have been marked by Illumina's chastity/purity filter as failing" />
+            <param argument="--len_limit" type="integer" value="" optional="true" label="minimum sequence length" help="useful if your data has already been trimmed (default 31)"/>
+	    <param argument="--barcode_dist_1" type="integer" value="1" optional="true" label="number of allowed mismatches when rescuing first read barcodes" help="(default 1)"/>
+            <param argument="--barcode_dist_2" type="integer" value="1" optional="true" label="number of allowed mismatches when rescuing paired read barcodes" help="(default value for single end barcodes)"/>
+	    <param argument="--no_read_trimming" type="boolean" checked="false" truevalue="--no_read_trimming" falsevalue="" label="do not trim low quality reads, just discard them" />
+	    <param name="mate_pair" argument="--mate-pair" type="boolean" checked="false" truevalue="--mate-pair" falsevalue="" label="raw reads are circularized mate-pair data, first read will be reverse complemented" />
+	    <param argument="--no_overhang" type="boolean" checked="false" truevalue="--no_overhang" falsevalue="" label="data does not contain an overhang nucleotide between barcode and seqeunce" />
+
+	    <param argument="--adapter_1" type="text" value="" optional="true" label="adaptor sequence that may occur on the single-end read" />
+            <param argument="--adapter_2" type="text" value="" optional="true" label="adaptor sequence that may occur on the paired-end" />
+            <param argument="--adapter_mm" type="integer" value="" optional="true" label="number of mismatches allowed in the adapter sequence"/>
+        </section>
+
+        <param name="outype" argument="-y" type="select" label="Output format" >
+            <option value="auto" selected="True">Same as input</option>
+            <option value="fastq">fastq</option>
+            <option value="fasta">fasta</option>
+            <option value="gzfastq">gzipped fastq</option>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data format="txt" name="output_log" label="${tool.name} on ${on_string} log file" from_work_dir="stacks_outputs/process_shortreads.log" />
+
+        <collection name="demultiplexed" type="list" label="${tool.name} on ${on_string} Demultiplexed reads">
+            <discover_datasets pattern="(?P&lt;name&gt;.+(\.[12])?)\.fq$" ext="fastqsanger" directory="stacks_outputs" />
+            <discover_datasets pattern="(?P&lt;name&gt;.+(\.[12])?)\.fq.gz$" ext="fastqsanger.gz" directory="stacks_outputs" />
+            <discover_datasets pattern="(?P&lt;name&gt;.+(\.[12])?)\.fa$" ext="fasta" directory="stacks_outputs" />
+        </collection>
+        <collection name="remaining" type="list" label="${tool.name} on ${on_string} Remaining orphan reads">
+            <filter>input_type['options_type_selector'] == "paired"</filter>
+            <discover_datasets pattern="(?P&lt;name&gt;.+\.rem(\.[12])?)\.fq$" ext="fastqsanger" directory="stacks_outputs" />
+            <discover_datasets pattern="(?P&lt;name&gt;.+\.rem(\.[12])?)\.fq.gz$" ext="fastqsanger.gz" directory="stacks_outputs" />
+            <discover_datasets pattern="(?P&lt;name&gt;.+\.rem(\.[12])?)\.fa$" ext="fasta" directory="stacks_outputs" />
+        </collection>
+        <collection name="discarded" type="list" label="${tool.name} on ${on_string} Discarded reads">
+            <filter>capture is True</filter>
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.fq\.discards$" ext="fastqsanger" directory="stacks_outputs" />
+	    <discover_datasets pattern="(?P&lt;name&gt;.+)\.fq\.gz.discards$" ext="fastqsanger.gz" directory="stacks_outputs" /> <!-- discards are never zipped currently-->
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.fa\.discards$" ext="fasta" directory="stacks_outputs" /><!-- discards are always called fq even if fasta, so this does not work at the moment -->
+        </collection>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="options_type_selector" value="single"/>
+            <param name="input_single" ftype="fastqsanger" value="procrad/R1.fq"/>
+            <param name="barcode" value="procrad/barcodes"/>
+            <param name="discard" value="true"/>
+            <param name="capture" value="true"/>
+            <output name="output_log" file="shortreads/process_shortreads.out" lines_diff="4"/>
+            <output_collection name="demultiplexed">
+                <element name="PopA_01" file="demultiplexed/PopA_01.fq" />
+            </output_collection>
+            <output_collection name="discarded">
+                <element name="R1" file="procrad/R1.fq.discards"/>
+            </output_collection>
+        </test>
+        <test>
+            <param name="options_type_selector" value="single"/>
+            <param name="input_single" ftype="fastqsanger.gz" value="procrad/R1.fq.gzip"/>
+            <param name="barcode" value="procrad/barcodes"/>
+            <param name="discard" value="true"/>
+            <param name="capture" value="true"/>
+            <param name="outype" value="gzfastq"/>
+            <output name="output_log" file="shortreads/process_shortreads.out" lines_diff="6"/>
+            <output_collection name="demultiplexed">
+                <element name="PopA_01" ftype="fastqsanger.gz" md5="c7250f50138cbca747b85223aaae9565"/>
+            </output_collection>
+            <output_collection name="discarded">
+                <element name="R1" ftype="fastqsanger" md5="786b30d864332a2d56d9179f0a53add4"/>
+            </output_collection>
+        </test>
+        <test>
+            <param name="options_type_selector" value="paired"/>
+            <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/>
+            <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/>
+            <param name="barcode" value="procrad/barcodes"/>
+            <param name="discard" value="true"/>
+            <param name="capture" value="true"/>
+            <output name="output_log" file="shortreads/process_shortreads_paired.out" lines_diff="4"/>
+            <output_collection name="demultiplexed">
+                <element name="PopA_01.1" file="demultiplexed/PopA_01.1.fq"/>
+            </output_collection>
+            <output_collection name="remaining">
+                <element name="PopA_01.rem.2" file="demultiplexed/PopA_01.rem.2.fq"/>
+            </output_collection>
+            <output_collection name="discarded">
+                <element name="R1" file="procrad/R1.fq.discards"/>
+            </output_collection>
+        </test>
+        <test>
+            <param name="options_type_selector" value="paired"/>
+            <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/>
+            <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/>
+            <param name="barcode" value="procrad/barcodes"/>
+            <param name="discard" value="true"/>
+            <param name="capture" value="true"/>
+            <param name="retain_header" value="true"/>
+            <output name="output_log" file="shortreads/process_shortreads_paired.out" lines_diff="4"/>
+            <output_collection name="demultiplexed">
+                <element name="PopA_01.1" file="demultiplexed/PopA_01.1.fq.header"/>
+            </output_collection>
+            <output_collection name="remaining">
+                <element name="PopA_01.rem.2" file="demultiplexed/PopA_01.rem.2.fq.header"/>
+            </output_collection>
+            <output_collection name="discarded">
+                <element name="R1" file="procrad/R1.fq.discards"/>
+            </output_collection>
+        </test>
+        <test>
+            <param name="options_type_selector" value="paired"/>
+            <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/>
+            <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/>
+            <param name="barcode" value="procrad/barcodes"/>
+            <param name="discard" value="true"/>
+            <param name="capture" value="true"/>
+            <param name="outype" value="fasta"/>
+            <output name="output_log" file="shortreads/process_shortreads_paired.out" lines_diff="4"/>
+            <output_collection name="demultiplexed">
+                <element name="PopA_01.1" file="demultiplexed/PopA_01.1.fa"/>
+            </output_collection>
+            <output_collection name="remaining">
+                <element name="PopA_01.rem.2" file="demultiplexed/PopA_01.rem.2.fa"/>
+            </output_collection>
+            <output_collection name="discarded">
+                <element name="R1" file="procrad/R1.fa.discards"/>
+            </output_collection>
+        </test>
+    </tests>
+
+
+
+    <help>
+<![CDATA[
+.. class:: infomark
+
+**What it does**
+
+erforms the same task as process_radtags for fast cleaning of randomly sheared genomic or transcriptomic data, not for RAD data.
+
+**Help**
+
+Input files:
+
+- FASTQ
+
+- Barcode File Format
+
+The barcode file is a very simple format:
+
+======= ===========
+Barcode Sample name
+======= ===========
+ATGGGG  PopA_01
+GGGTAA  PopA_02
+AGGAAA  PopA_03
+TTTAAG  PopA_04
+GGTGTG  PopA_05
+TGATGT  PopA_06
+======= ===========
+
+Combinatorial barcodes are specified, one per column, separated by a tab:
+
+======== ======== ===========
+Barcode1 Barcode2 Sample name
+======== ======== ===========
+CGATA    ACGTA    PopA_01
+CGGCG    CGTA     PopA_02
+GAAGC    CGTA     PopA_03
+GAGAT    CGTA     PopA_04
+CGATA    AGCA     PopA_05
+CGGCG    AGCA     PopA_06
+======== ======== ===========
+
+@STACKS_INFOS@
+]]>
+    </help>
+    <expand macro="citation" />
+</tool>