view deseq2.xml @ 20:bbea9c694b34 draft

Uploaded
author bgruening
date Wed, 19 Feb 2014 06:11:20 -0500
parents 72613128bd65
children d32de046ba31
line wrap: on
line source

<tool id="deseq2" name="DESeq2" version="2.0.1">
    <description>Determines differentially expressed features from count data</description>
    <requirements>
        <requirement type="binary">Rscript</requirement>
        <requirement type="R-module">DESeq2</requirement>
        <requirement type="package" version="3.0.1">R_3_0_1</requirement>
        <requirement type="package" version="1.0.17">deseq2</requirement>
        <!--<requirement type="set_environment">DESEQ2_SCRIPT_PATH</requirement>-->
    </requirements>
    <command interpreter="Rscript">
        #import json
        deseq2.R
            -o "$deseq_out"
            --outfilefiltered "$deseq_out_filtered"

            #if $pdf:
                -p "$plots" 
            #end if

            -i "$input_matrix"

            #set $temp_factor_name = list()
            #for $factor in $rep_factorName:
                #set $temp_factor = dict()
                #for $level in $factor.rep_factorLevel:
                    ##$temp_factor_list.append( '%s::%s:%s' % ($factor.factorName.replace(' ','_'), $level.factorLevel, $level.factorIndex) )
                    $temp_factor.update({str($level.factorLevel): map(int, str($level.factorIndex).split(','))})
                #end for
                $temp_factor_name.append([str($factor.factorName), $temp_factor])

            #end for

                ##-m "#echo ' '.join( $temp_factor_list )#"
                -m '#echo json.dumps(temp_factor_name)#'
            ##--organism "$organism"
            ##-t "$fittype"
            -c $countthreshold
    </command>
    <stdio>
        <regex match="Execution halted" 
           source="both" 
           level="fatal" 
           description="Execution halted." />
        <regex match="Input-Error 01" 
           source="both" 
           level="fatal" 
           description="Error in your input parameters: Make sure you only apply factors to selected samples." />
        <regex match="Error in" 
           source="both" 
           level="fatal" 
           description="An undefined error occured, please check your intput carefully and contact your administrator." />
    </stdio>
    <inputs>
        <param format="tabular" name="input_matrix" type="data" label="Countmatrix" help="You can create a count matrix with the tool 
            'Count reads in features with htseq-count'"/>

        <repeat name="rep_factorName" title="Factor/Condition" min="1">
            <param name="factorName" type="text" value="FactorName" label="Specify a factor name" help="" />
            <repeat name="rep_factorLevel" title="Factor level" min="1">
                <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level" help="" />

                <param name="factorIndex" label="Select columns that are associated with this factor level" type="data_column" data_ref="input_matrix"
                    numerical="True" multiple="true" use_header_names="true" size="120" display="checkboxes">
                    <validator type="no_options" message="Please select at least one column." />
                </param>
            </repeat>
        </repeat>
        <!--
        <param name="control_cols" type="select" display="checkboxes" multiple="true" optional="True" label="Select columns containing first condition" 
            dynamic_options="get_matrix_header( input_dataset=input_matrix )" help="insert useful info here">
            <validator type="no_options" message="Please select at least one column."/>
        </param>            
        <param name="experiement_cols" type="select" display="checkboxes" multiple="true" optional="True" label="Select columns containing second condition" 
            dynamic_options="get_matrix_header( input_dataset=input_matrix )" help="insert useful info here">
            <validator type="no_options" message="Please select at least one column."/>
        </param>             
                 
        <repeat name="factor" title="factor">
            <param name="factor_name" type="text" value="Factor Name" label="Specify a factor name" 
                help="Order of factors you add effects the design formual and hence effects whole analysis. Design formual will be created as follows: ~factor1+factor2+factor3+...+factorN+condition"/>
            <param name="factor_index" type="select" display="checkboxes" multiple="true" optional="True" label="Choose sample to compare with" 
                    dynamic_options="get_matrix_header( input_dataset=input_matrix )" help="Select columns that are associated with a factor">
                <validator type="no_options" message="Please select at least one column."/>
            </param>
        </repeat>
        -->
        <!--param name="organism" size="10" type="select">
            <option value="human">human</option>
            <option value="mouse">mouse</option>
            <option value="fly">fly</option>
            <option value="other">other</option>
        </param-->
        <param name="countthreshold" size="10" type="float" value="10.0" label="Filter out features with mean normalized counts lower than this value"/>
        <param name="fittype" type="select" label="Type of fitting of dispersions to the mean intensity">
            <option value="parametric">parametric</option>
            <option value="local">local</option>
            <option value="mean">mean</option>
        </param>
        <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" 
            label="Visualising the analysis results"
            help="output an additional PDF files" />
    </inputs>

    <outputs>
        <data format="tabular" name="deseq_out" label="DESeq2 result file on ${on_string}" />
        <data format="tabular" name="deseq_out_filtered" label="Independent filtering result file on ${on_string}" />
        <data format="pdf" name="plots" label="DESeq2 plots on ${on_string}">
            <filter>pdf == True</filter>
        </data>
    </outputs>
    <code file="deseq_helper.py" />

    <help>

.. class:: infomark

**What it does** 

Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution


**Inputs**

DESeq2_ requires one count matrix as input file. You can use the tool



**Output**

DESeq2_ generates a tabular file containing the different columns and optional visualized results as PDF.

====== ==========================================================
Column Description
------ ----------------------------------------------------------
     1 Gene Identifiers
     2 mean normalised counts, averaged over all samples from both conditions
     3 the logarithm (to basis 2) of the fold change
     4 standard error estimate for the log2 fold change estimate
     5 p value for the statistical significance of this change
     6 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
       which controls false discovery rate (FDR)
====== ==========================================================


------

**References** 

DESeq2_ Authors: Michael Love (MPIMG Berlin), Simon Anders, Wolfgang Huber (EMBL Heidelberg)

If DESeq2_ is used to obtain results for scientific publications it
should be cited as [1]_. A paper describing DESeq2_ is in preparation.



.. [1] Anders, S and Huber, W (2010): `Differential expression analysis for sequence count data`_. 

.. _Differential expression analysis for sequence count data: http://dx.doi.org/10.1186/gb-2010-11-10-r106
.. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html


    </help>
</tool>