view stacks_kmerfilter.xml @ 1:36b792bf9dd6 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit 8b047549e9e8791a5ca9424b1ef391e8980aba79-dirty
author matthias
date Thu, 29 Nov 2018 12:17:07 -0500
parents ef1485568631
children 324a729c257f
line wrap: on
line source

<tool id="stacks2_kmerfilter" name="Stacks2: kmer filter" version="@WRAPPER_VERSION@">
<description>Identify PCR clones</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_cmd"/>
    <command><![CDATA[

        #if $data_type.dt_select == "single"

            #if $data_type.fname.is_of_type('fastqsanger')
                #set $ext = ".fq"
                #set inputype = "fastq"
            #else
                #set $ext = ".fq.gz"
                #set inputype = "gzfastq"
            #end if

            ln -s '$data_type.fname' R1$ext &&
        #else

            #if $data_type.fwd.is_of_type('fastqsanger')
                #set $ext = ".fq"
                #set inputype = "fastq"
            #else
                #set $ext = ".fq.gz"
                #set inputype = "gzfastq"
            #end if

            ln -s '$data_type.fwd' R1$ext &&
            ln -s '$data_type.rev' R2$ext &&
        #end if

        mkdir clone_outputs

        &&

        clone_filter
            #if $data_type.dt_select == 'single':
                -f R1$ext
                
            #else
                -1 R1$ext
                -2 R2$ext
            #end if

            -i $inputype

            -o clone_outputs
            $capture

            #if $oligo_len_1
                --oligo_len_1 $oligo_len_1
            #end if
            #if $oligo_len_2
                --oligo_len_2 $oligo_len_2
            #end if
            $data_type.barcode_encoding
            $retain_oligo
            -y gzfastq

    ]]></command>
    <inputs>
        <conditional name="data_type">
            <param name="dt_select" type="select" label="Single or Paired-end">
                <option value="single">Single</option>
                <option value="pair">Pair</option>
            </param>
            <when value="single">
                <param name="fname" type="data" format="fastqsanger,fastqsanger.gz" label="FASTQ" />
                <param name="barcode_encoding" type="select" label="Barcode location">
                    <expand macro="barcode_encoding_single" />
                </param>

            </when>
            <when value="pair">
                <param name="fwd" type="data" format="fastqsanger,fastqsanger.gz" label="Forward FASTQ" />
                <param name="rev" type="data" format="fastqsanger,fastqsanger.gz" label="Reverse FASTQ" />
                <param name="barcode_encoding" type="select" label="Barcode location">
                    <expand macro="barcode_encoding_pair" />
                </param>
            </when>
        </conditional>
        <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file" />
        <section name="options_filtering" title="Filtering options" expanded="False">
            <param argument="--rare" type="boolean" checked="false" truevalue="--rare" falsevalue="" label="turn on filtering based on rare k-mers" />
            <param argument="--abundant" type="boolean" checked="false" truevalue="--abundant" falsevalue="" label="turn on filtering based on abundant k-mers" />
            <param argument="--k_len" type="integer" value="15" label="k-mer size" />
        </section>
        <section name="options_advanced_filtering" title="Advanced fitering options" expanded="False">
        </section>
            <param argument="--max_k_freq" type="integer" value="20" label="number of times a kmer must occur to be considered abundant" />
            <param argument="--min_lim" type="integer" value="" optional="true" label="number of rare kmers occuring in a row required to discard a read" help="(0/empty: 80% of the k-mer length)." />
            <param argument="--max_lim" type="integer" value="" optional="true" label="number of abundant kmers required to discard a read" help="(0/empty: 80% of the k-mers in a read)" />
        <section name="options_normalization" title="Normalization options" expanded="False">
            <param argument="--normalize" type="integer" value="" label="normalize read depth according to k-mer coverage" />
        </section>
        <section name="options_kmer_char" title="Characterizing K-mers options" expanded="False">
            <param argument="--write_k_freq" type="boolean" checked="false" truevalue="--write_k_freq" falsevalue="" label="write kmers along with their frequency of occurrence and exit" />
            <param argument="--k_dist" type="boolean" checked="false" truevalue="--k_dist" falsevalue="" label="print k-mer frequency distribution and exit" />
        </section>
        <section name="options_advanced_input" title="Advanced input options" expanded="False">
            <param argument="--read_k_freq" type="boolean" checked="false" truevalue="--read_k_freq" falsevalue="" label="read a set of kmers along with their frequencies of occurrence instead of reading raw input files" />
        </section>
    </inputs>
    <outputs>
        <data format="fastqsanger.gz" name="clean" from_work_dir="clone_outputs/R1.fq.gz"  label="${tool.name} on ${on_string}">
            <filter>data_type['dt_select'] == 'single'</filter>
        </data>

        <data format="fastqsanger.gz" name="clean_fwd" from_work_dir="clone_outputs/R1.1.fq.gz" label="${tool.name} on ${on_string} Forward reads">
            <filter>data_type['dt_select'] == 'pair'</filter>
        </data>
        <data format="fastqsanger.gz" name="clean_rev" from_work_dir="clone_outputs/R2.2.fq.gz" label="${tool.name} on ${on_string} Reverse reads">
            <filter>data_type['dt_select'] == 'pair'</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <conditional name="data_type">
                <param name="dt_select" value="single" />
                <param name="fname" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
            </conditional>
            <param name="oligo_len_1" value="6" />
            <output name="clean" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.single.gz"/>
        </test>
	<!--<test>
            <conditional name="data_type">
                <param name="dt_select" value="single" />
                <param name="fname" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
                <param name="barcode_encoding" value="\-\-inline_null" />
                
                </conditional>
            <output name="clean" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz"/>
        </test>
        <test>
            <conditional name="data_type">
                <param name="dt_select" value="pair" />
                <param name="fwd" ftype="fastqsanger" value="clonefilter/R1_0001.1.fq.gz" />
                <param name="rev" ftype="fastqsanger" value="clonefilter/R2_0001.2.fq.gz" />
            </conditional>
            <output name="clean_fwd" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz"/>
            <output name="clean_rev" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz"/>
        </test>
        <test>
            <conditional name="data_type">
                <param name="dt_select" value="pair" />
                <param name="fwd" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
                <param name="rev" ftype="fastqsanger.gz" value="clonefilter/R2_0001.2.fq.gz" />
            </conditional>
            <output name="clean_fwd" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz"/>
            <output name="clean_rev" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz"/>
    </test>-->
    </tests>
    <help>
<![CDATA[
.. class:: infomark

Allows paired or single-end reads to be filtered according to the number or rare or abundant kmers they contain. Useful for both RAD datasets as well as randomly sheared genomic or transcriptomic data. 

@STACKS_INFOS@
]]>
    </help>
    <expand macro="citation" />
</tool>