view dexseq/dexseq.xml @ 0:7604d324c5aa draft

Uploaded
author pavanvidem
date Fri, 28 Aug 2015 08:37:31 -0400
parents
children
line wrap: on
line source

<tool id="deseq2_single" name="DESeq2" version="2.0.2">
    <description>Determines differentially expressed features from count tables</description>
    <requirements>
        <requirement type="package" version="3.1.2">R</requirement>
        <requirement type="binary">Rscript</requirement>
        <requirement type="package" version="1.6.1">deseq2</requirement>
        <requirement type="R-module">DESeq2</requirement>
    </requirements>
    <code file="single_helper.py" />
    <command>
        #import json
        Rscript /usr/local/galaxy/galaxy-dist/tools/test/deseq2/dexseq.R
            #set $reportdir = $deseq_out.files_path
            -o "$deseq_out"
            -p 12
            #set $temp_factor_names = list()
            #for $factor in $rep_factorName:
                #set $temp_factor = list()
                #for $level in $factor.rep_factorLevel:
                    #set $count_files = list()
                    #for $file in $level.countsFile:
                        $count_files.append(str($file))
                    #end for
                    $temp_factor.append( {str($level.factorLevel): $count_files} )
                #end for
                $temp_factor_names.append([str($factor.factorName), $temp_factor])
            #end for
            -f '#echo json.dumps(temp_factor_names)#'
            -a $gtf
            #if $report:
                -p "$reportdir"
            #end if
            -c $fdr_cutoff
    </command>
    <stdio>
        <regex match="Execution halted" 
           source="both" 
           level="fatal" 
           description="Execution halted." />
        <regex match="Input-Error 01" 
           source="both" 
           level="fatal" 
           description="Error in your input parameters: Make sure you only apply factors to selected samples." />
        <regex match="Error in" 
           source="both" 
           level="fatal" 
           description="An undefined error occured, please check your intput carefully and contact your administrator." />
    </stdio>
    <inputs>
        <param name="gtf" type="data" label="GTF file created from dexseq_count tool"/>
        <repeat name="rep_factorName" title="Factor" min="1">
            <param name="factorName" type="text" value="FactorName" label="Specify a factor name" 
                help="Only letters, numbers and underscores will be retained in this field">
                <sanitizer>
                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
                </sanitizer>
            </param>
            <repeat name="rep_factorLevel" title="Factor level" min="2" max="2" default="2">
                <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level"
                    help="Only letters, numbers and underscores will be retained in this field">
                    <sanitizer>
                        <valid initial="string.letters,string.digits"><add value="_" /></valid>
                    </sanitizer>
                </param>
                <param name="countsFile" type="data" multiple="true" label="Counts file"/>
            </repeat>
        </repeat>
        <param name="report" type="boolean" truevalue="" falsevalue="" checked="true"
            label="Visualise the analysis results?"
            help="output an additional html file" />
        <param name="fdr_cutoff" type="float" min="0" max="1" value="0.05" label="All the genes under this FDR threshold will be shown in the html report."/>
    </inputs>

    <outputs>
        <data format="tabular" name="dexseq_out" label="DESeq2 result file on ${on_string}"/>
        <data format="pdf" name="plots" label="DESeq2 plots on ${on_string}">
            <filter>pdf == True</filter>
        </data>
    </outputs>
    <code file="dexseq_helper.py" />
    <help>

.. class:: infomark

**What it does** 

Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution


**Inputs**

DESeq2_ takes count tables that generated from the htseq-count as input. Count tables must be generated for each sample individually. DESeq2 is capable of handling multiple factors that effect your experiment. The first factor you input is considered as the primary factor that affects gene expressions. You also input several secondary factors that might influence your experiment. But the final output will be changes in genes due to primary factor in presence of secondary factors. Each factor has two levels/states. You need to select appropriate count table from your history for each factor level.

The following table gives some examples of factors and their levels:

========= ============== ===============
Factor    Factor level 1 Factor level 2 
--------- -------------- ---------------
Treatment Treated        Untreated
--------- -------------- ---------------
Condition Knockdown      Wildtype
--------- -------------- ---------------
TimePoint Day4           Day1
--------- -------------- ---------------
SeqType   SingleEnd      PairedEnd
--------- -------------- ---------------
Gender    Female         Male
========= ============== ===============

*Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2. Here the order of factor levels is important. For example, for the factor 'Treatment' given in above table, DESeq2 computes fold changes of 'Treated' samples against 'Untreated', i.e. the values correspond to up or down regulations of genes in Treated samples.

**Output**

DESeq2_ generates a tabular file containing the different columns and optional visualized results as PDF.

====== ==========================================================
Column Description
------ ----------------------------------------------------------
     1 Gene Identifiers
     2 mean normalised counts, averaged over all samples from both conditions
     3 the logarithm (to basis 2) of the fold change (See the note in inputs section)
     4 standard error estimate for the log2 fold change estimate
     5 Wald statistic
     6 p value for the statistical significance of this change
     7 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
       which controls false discovery rate (FDR)
====== ==========================================================





.. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html


    </help>
    <citations>
        <citation type="doi">10.1101/002832</citation>
    </citations>
</tool>