view dada2_makeSequenceTable.xml @ 4:383501d6aeb0 draft

planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit 5b1603bbcd3f139cad5c876be83fcb39697b5613-dirty
author matthias
date Mon, 29 Apr 2019 09:52:30 -0400
parents c3834c230b0a
children ec4a183cc713
line wrap: on
line source

<tool id="dada2_makeSequenceTable" name="dada2: makeSequenceTable" version="@DADA2_VERSION@+galaxy@WRAPPER_VERSION@">
    <description>construct a sequence table (analogous to OTU table)</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
    Rscript '$dada2_script'
    ]]></command>
    <configfiles>
        <configfile name="dada2_script"><![CDATA[
@READ_FOO@

library(dada2, quietly=T)
#if $plot == "yes"
library(ggplot2, quietly=T)
#end if

samples <- list()
#for $s in $samples:
    #if $len($samples) == 1
    samples <- readRDS('$s')
    #else
    samples[["$s.element_identifier"]] <- readRDS('$s')
    #end if
#end for
## make sequence table
seqtab <- makeSequenceTable(samples, orderBy = "$orderBy")


reads.per.seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum)
df <- data.frame(length=as.numeric(names(reads.per.seqlen)), count=reads.per.seqlen)

#if $plot == "yes"
pdf( '$plot_output' )
ggplot(data=df, aes(x=length, y=count)) +
    geom_col() +
#if $filter_cond.filter_select != "no"
    geom_vline( xintercept=c($filter_cond.min-0.5, $filter_cond.max+0.5) ) +
#end if
    theme_bw()
bequiet <- dev.off()
#end if

## filter by seqlengths
#if $filter_cond.filter_select != "no"
    seqtab <- seqtab[, nchar(colnames(seqtab)) %in% seq($filter_cond.min, $filter_cond.max)]
#end if

write.table(seqtab, "$stable", quote=F, sep="\t", row.names = T, col.names = NA)
    ]]></configfile>
    </configfiles>
    <inputs>
        <param name="samples" type="data" multiple="true" format="@DADA_UNIQUES@" label="samples" />
        <param argument="orderBy" type="select" label="Column order">
            <option value="abundance">abundance</option>
            <option value="nsamples">nsamples</option>
        </param>
        <conditional name="filter_cond">
            <param name="filter_select" type="select" label="Length filter method">
                <option value="no">No filter</option>
                <option value="minmax">Specify minimum and maximum sequence lengths</option>
            </param>
            <when value="no"/>
            <when value="minmax">
                <param name="min" type="integer" value="" label="Minimum sequence length"/>
                <param name="max" type="integer" value="" label="Maximum sequence length"/>
            </when>
        </conditional>
        <param name="plot" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="plot sequence length distribution" />
    </inputs>
    <outputs>
        <data name="stable" format="dada2_sequencetable" label="${tool.name} on ${on_string}"/>
        <data name="plot_output" format="pdf" label="${tool.name} on ${on_string}: sequence length distribution">
            <filter>plot</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="samples" ftype="dada2_mergepairs" value="mergePairs_F3D0.Rdata"/>
            <output name="stable" value="makeSequenceTable_F3D0.tab" ftype="dada2_sequencetable" />
        </test>
    </tests>
    <help><![CDATA[
Description
...........

This function constructs a sequence table -- more precisely an amplicon sequence variant table (ASV) table -- a higher-resolution version of the OTU table produced by traditional methods.

The sequence table is a matrix with rows corresponding to (and named by) the samples, and columns corresponding to (and named by) the sequence variants.

Usage
.....

**Input**: The result of derepFastq, dada, or mergePairs.

**Output**: A data set of type dada2_sequencetable, i.e. a tabular with a row for each sample, and a column for each unique sequence across all the samples. The columns are named by the sequence.

Details
.......

Sequences that are much longer or shorter than expected may be the result of non-specific priming. You can remove non-target-length by applying a length filter. This is analogous to “cutting a band” in-silico to get amplicons of the targeted length.

@HELP_OVERVIEW@
    ]]></help>
    <expand macro="citations"/>
</tool>