view stacks_shortreads.xml @ 0:061c9e133d1e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit 98327d2948ae1ccb5aef5db9ab88605fd74a0de7-dirty
author matthias
date Thu, 29 Nov 2018 11:43:50 -0500
parents
children 02905edf00e4
line wrap: on
line source

<!-- this is essentially a copy of stacks_procrad minus the unsupported options -->
<tool id="stacks2_shortreads" name="Stacks2: process shortreads" version="@WRAPPER_VERSION@">
<description>fast cleaning of randomly sheared genomic or transcriptomic data</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_cmd"/>
    <command><![CDATA[

        #if $input_type.options_type_selector == "single"

            #if $input_type.input_single.is_of_type('fastqsanger')
                #set $ext = ".fq"
                #set inputype = "fastq"
            #else
                #set $ext = ".fq.gz"
                #set inputype = "gzfastq"
            #end if

            ln -s '$input_type.input_single' R1$ext &&
        #else

            #if $input_type.inputs_paired1.is_of_type('fastqsanger')
                #set $ext = ".fq"
                #set inputype = "fastq"
            #else
                #set $ext = ".fq.gz"
                #set inputype = "gzfastq"
            #end if

            ln -s '$input_type.inputs_paired1' R1$ext &&
            ln -s '$input_type.inputs_paired2' R2$ext &&
        #end if

        mkdir stacks_outputs

        &&

process_shortreads

#if $input_type.options_type_selector == "single"
    -f R1$ext
#else
    -1 R1$ext
    -2 R2$ext
#end if
#if str( $outype ) != "auto"
    -y $outype
#end if
-i $inputype
-b '$barcode'
$options_advanced.remove
$options_advanced.discard
$options_advanced.rescue
#if str($options_advanced.truncate)
    -t $options_advanced.truncate
#end if
## -E not implemented in Galaxy defaults to phred33
$capture
-w $options_advanced.sliding
-s $options_advanced.score
##Barcode options:
$input_type.barcode_encoding
##Adapter options:
#if str($options_advanced.adapter_1) != "":
    --adapter_1 $options_advanced.adapter_1
#end if
#if str($options_advanced.adapter_2) != "":
    --adapter_2 $options_advanced.adapter_2
#end if
#if str($options_advanced.adapter_mm) != "":
    --adapter_mm $options_advanced.adapter_mm
#end if
##Output options:
$options_advanced.retain_header
## --merge not implemented in Galaxy
##Advanced options:
$options_advanced.no_read_trimming
#if str($options_advanced.len_limit) != "":
    --len_limit $options_advanced.len_limit
#end if
	    $options_advanced.filter_illumina
#if str($options_advanced.barcode_dist_1) != "":
    --barcode_dist_1 $options_advanced.barcode_dist_1
#end if
#if str($options_advanced.barcode_dist_2) != "":
    --barcode_dist_2 $options_advanced.barcode_dist_2
#end if
$options_advanced.mate_pair
$options_advanced.no_overhang

-o stacks_outputs
## bug in short reads: for single end input discarded reads currently 
## are written to working dir -> move them to be consistent with procrad
#if $input_type.options_type_selector == "single" and str($capture) != "":
&& mv *discards stacks_outputs/
#end if
    ]]></command>

    <inputs>
        <conditional name="input_type">
            <param name="options_type_selector" type="select" label="Single-end or paired-end reads files">
                <option value="single" selected="True">Single-end files</option>
                <option value="paired">Paired-end files</option>
            </param>
            <when value="single">
                <param name="input_single" argument="-f" format="fastqsanger,fastqsanger.gz" type="data" label="singles-end reads infile(s)" help="input files" />

                <param name="barcode_encoding" type="select" label="Barcode location">
                    <expand macro="barcode_encoding_single" />
                </param>
            </when>
            <when value="paired">
                <param name="inputs_paired1" argument="-1" format="fastqsanger,fastqsanger.gz" type="data" label="paired-end reads infile(s) 1" help="Files must have this syntax : name_R1_001.fastq" />
                <param name="inputs_paired2" argument="-2" format="fastqsanger,fastqsanger.gz" type="data" label="paired-end reads infile(s) 2" help="Files must have this syntax : name_R2_001.fastq" />

                <param name="barcode_encoding" type="select" label="Barcode location">
                    <expand macro="barcode_encoding_pair" />
                </param>
            </when>
        </conditional>

        <param name="barcode" argument="-b" type="data" format="tabular,txt" label="Barcode file" help="Barcode file" />

        <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file" />

        <section name="options_advanced" title="advanced options" expanded="False">
            <param name="sliding" type="float" value="0.15" min="0" max="1" argument="-w" label="Set the size of the sliding window as a fraction of the read length, between 0 and 1 (default 0.15)" />
            <param name="score" type="integer" value="10" argument="-s" label="Set the score limit. If the average score within the sliding window drops below this value, the read is discarded (default 10)" />
            <param name="remove" type="boolean" checked="false" truevalue="-c" falsevalue="" argument="-c" label="Clean data, remove any read with an uncalled base" />
            <param name="discard" type="boolean" checked="false" truevalue="-q" falsevalue="" argument="-q" label="Discard reads with low quality scores"/>
            <param name="rescue" type="boolean" checked="false" truevalue="-r" falsevalue="" argument="-r" label="Rescue barcodes?"/>
            <param name="truncate" type="integer" value="" optional="True" argument="-t" label="Truncate final read length to this value" />

	    <param argument="--retain_header" type="boolean" checked="false" truevalue="--retain_header" falsevalue="" label="Retain unmodified FASTQ headers in the output" />

	    <param argument="--filter_illumina" type="boolean" checked="false" truevalue="--filter_illumina" falsevalue="" label="discard reads that have been marked by Illumina's chastity/purity filter as failing" />
            <param argument="--len_limit" type="integer" value="" optional="true" label="minimum sequence length" help="useful if your data has already been trimmed (default 31)"/>
	    <param argument="--barcode_dist_1" type="integer" value="1" optional="true" label="number of allowed mismatches when rescuing first read barcodes" help="(default 1)"/>
            <param argument="--barcode_dist_2" type="integer" value="1" optional="true" label="number of allowed mismatches when rescuing paired read barcodes" help="(default value for single end barcodes)"/>
	    <param argument="--no_read_trimming" type="boolean" checked="false" truevalue="--no_read_trimming" falsevalue="" label="do not trim low quality reads, just discard them" />
	    <param name="mate_pair" argument="--mate-pair" type="boolean" checked="false" truevalue="--mate-pair" falsevalue="" label="raw reads are circularized mate-pair data, first read will be reverse complemented" />
	    <param argument="--no_overhang" type="boolean" checked="false" truevalue="--no_overhang" falsevalue="" label="data does not contain an overhang nucleotide between barcode and seqeunce" />

	    <param argument="--adapter_1" type="text" value="" optional="true" label="adaptor sequence that may occur on the single-end read" />
            <param argument="--adapter_2" type="text" value="" optional="true" label="adaptor sequence that may occur on the paired-end" />
            <param argument="--adapter_mm" type="integer" value="" optional="true" label="number of mismatches allowed in the adapter sequence"/>
        </section>

        <param name="outype" argument="-y" type="select" label="Output format" >
            <option value="auto" selected="True">Same as input</option>
            <option value="fastq">fastq</option>
            <option value="fasta">fasta</option>
            <option value="gzfastq">gzipped fastq</option>
        </param>
    </inputs>

    <outputs>
        <data format="txt" name="output_log" label="${tool.name} on ${on_string} log file" from_work_dir="stacks_outputs/process_shortreads.log" />

        <collection name="demultiplexed" type="list" label="${tool.name} on ${on_string} Demultiplexed reads">
            <discover_datasets pattern="(?P&lt;name&gt;.+(\.[12])?)\.fq$" ext="fastqsanger" directory="stacks_outputs" />
            <discover_datasets pattern="(?P&lt;name&gt;.+(\.[12])?)\.fq.gz$" ext="fastqsanger.gz" directory="stacks_outputs" />
            <discover_datasets pattern="(?P&lt;name&gt;.+(\.[12])?)\.fa$" ext="fasta" directory="stacks_outputs" />
        </collection>
        <collection name="remaining" type="list" label="${tool.name} on ${on_string} Remaining orphan reads">
            <filter>input_type['options_type_selector'] == "paired"</filter>
            <discover_datasets pattern="(?P&lt;name&gt;.+\.rem(\.[12])?)\.fq$" ext="fastqsanger" directory="stacks_outputs" />
            <discover_datasets pattern="(?P&lt;name&gt;.+\.rem(\.[12])?)\.fq.gz$" ext="fastqsanger.gz" directory="stacks_outputs" />
            <discover_datasets pattern="(?P&lt;name&gt;.+\.rem(\.[12])?)\.fa$" ext="fasta" directory="stacks_outputs" />
        </collection>
        <collection name="discarded" type="list" label="${tool.name} on ${on_string} Discarded reads">
            <filter>capture is True</filter>
            <discover_datasets pattern="(?P&lt;name&gt;.+)\.fq\.discards$" ext="fastqsanger" directory="stacks_outputs" />
	    <discover_datasets pattern="(?P&lt;name&gt;.+)\.fq\.gz.discards$" ext="fastqsanger.gz" directory="stacks_outputs" /> <!-- discards are never zipped currently-->
            <discover_datasets pattern="(?P&lt;name&gt;.+)\.fa\.discards$" ext="fasta" directory="stacks_outputs" /><!-- discards are always called fq even if fasta, so this does not work at the moment -->
        </collection>
    </outputs>

    <tests>
        <test>
            <param name="options_type_selector" value="single"/>
            <param name="input_single" ftype="fastqsanger" value="procrad/R1.fq"/>
            <param name="barcode" value="procrad/barcodes"/>
            <param name="discard" value="true"/>
            <param name="capture" value="true"/>
            <output name="output_log" file="shortreads/process_shortreads.out" lines_diff="4"/>
            <output_collection name="demultiplexed">
                <element name="PopA_01" file="demultiplexed/PopA_01.fq" />
            </output_collection>
            <output_collection name="discarded">
                <element name="R1" file="procrad/R1.fq.discards"/>
            </output_collection>
        </test>
        <test>
            <param name="options_type_selector" value="single"/>
            <param name="input_single" ftype="fastqsanger.gz" value="procrad/R1.fq.gzip"/>
            <param name="barcode" value="procrad/barcodes"/>
            <param name="discard" value="true"/>
            <param name="capture" value="true"/>
            <param name="outype" value="gzfastq"/>
            <output name="output_log" file="shortreads/process_shortreads.out" lines_diff="6"/>
            <output_collection name="demultiplexed">
                <element name="PopA_01" ftype="fastqsanger.gz" md5="c7250f50138cbca747b85223aaae9565"/>
            </output_collection>
            <output_collection name="discarded">
                <element name="R1" ftype="fastqsanger" md5="786b30d864332a2d56d9179f0a53add4"/>
            </output_collection>
        </test>
        <test>
            <param name="options_type_selector" value="paired"/>
            <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/>
            <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/>
            <param name="barcode" value="procrad/barcodes"/>
            <param name="discard" value="true"/>
            <param name="capture" value="true"/>
            <output name="output_log" file="shortreads/process_shortreads_paired.out" lines_diff="4"/>
            <output_collection name="demultiplexed">
                <element name="PopA_01.1" file="demultiplexed/PopA_01.1.fq"/>
            </output_collection>
            <output_collection name="remaining">
                <element name="PopA_01.rem.2" file="demultiplexed/PopA_01.rem.2.fq"/>
            </output_collection>
            <output_collection name="discarded">
                <element name="R1" file="procrad/R1.fq.discards"/>
            </output_collection>
        </test>
        <test>
            <param name="options_type_selector" value="paired"/>
            <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/>
            <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/>
            <param name="barcode" value="procrad/barcodes"/>
            <param name="discard" value="true"/>
            <param name="capture" value="true"/>
            <param name="retain_header" value="true"/>
            <output name="output_log" file="shortreads/process_shortreads_paired.out" lines_diff="4"/>
            <output_collection name="demultiplexed">
                <element name="PopA_01.1" file="demultiplexed/PopA_01.1.fq.header"/>
            </output_collection>
            <output_collection name="remaining">
                <element name="PopA_01.rem.2" file="demultiplexed/PopA_01.rem.2.fq.header"/>
            </output_collection>
            <output_collection name="discarded">
                <element name="R1" file="procrad/R1.fq.discards"/>
            </output_collection>
        </test>
        <test>
            <param name="options_type_selector" value="paired"/>
            <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/>
            <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/>
            <param name="barcode" value="procrad/barcodes"/>
            <param name="discard" value="true"/>
            <param name="capture" value="true"/>
            <param name="outype" value="fasta"/>
            <output name="output_log" file="shortreads/process_shortreads_paired.out" lines_diff="4"/>
            <output_collection name="demultiplexed">
                <element name="PopA_01.1" file="demultiplexed/PopA_01.1.fa"/>
            </output_collection>
            <output_collection name="remaining">
                <element name="PopA_01.rem.2" file="demultiplexed/PopA_01.rem.2.fa"/>
            </output_collection>
            <output_collection name="discarded">
                <element name="R1" file="procrad/R1.fa.discards"/>
            </output_collection>
        </test>
    </tests>



    <help>
<![CDATA[
.. class:: infomark

**What it does**

erforms the same task as process_radtags for fast cleaning of randomly sheared genomic or transcriptomic data, not for RAD data.

**Help**

Input files:

- FASTQ

- Barcode File Format

The barcode file is a very simple format:

======= ===========
Barcode Sample name
======= ===========
ATGGGG  PopA_01
GGGTAA  PopA_02
AGGAAA  PopA_03
TTTAAG  PopA_04
GGTGTG  PopA_05
TGATGT  PopA_06
======= ===========

Combinatorial barcodes are specified, one per column, separated by a tab:

======== ======== ===========
Barcode1 Barcode2 Sample name
======== ======== ===========
CGATA    ACGTA    PopA_01
CGGCG    CGTA     PopA_02
GAAGC    CGTA     PopA_03
GAGAT    CGTA     PopA_04
CGATA    AGCA     PopA_05
CGGCG    AGCA     PopA_06
======== ======== ===========

@STACKS_INFOS@
]]>
    </help>
    <expand macro="citation" />
</tool>