diff deseq2.xml @ 14:bb5c80d15e0a draft

Uploaded
author bgruening
date Wed, 04 Sep 2013 11:58:20 -0400
parents 6d17a7d6fe9c
children ff74cd9b0414
line wrap: on
line diff
--- a/deseq2.xml	Mon Sep 02 10:09:37 2013 -0400
+++ b/deseq2.xml	Wed Sep 04 11:58:20 2013 -0400
@@ -9,17 +9,42 @@
     <command interpreter="Rscript">
         deseq2.R
             -o "$deseq_out"
-            -p "$plots" 
+
+            #if $pdf:
+                -p "$plots"
+            #end if
+
             -i "$input_matrix"
 
         #if $filter_sel.filter_sel_opts == 'all_vs_all':
             -s 'all_vs_all'
         #else:
-            -s ## build a string like '1:2 5:6'
+            -s ## build a string like '1,2 5,6'
             "${filter_sel.control_cols} ${filter_sel.experiement_cols}"
+
+            #set $temp_factor_list = list()
+            #set $is_multi_factor_analysis = False
+            #for $factor in $filter_sel.factor:
+                #set $is_multi_factor_analysis = True
+                $temp_factor_list.append( '%s:%s' % ($factor.factor_name.replace(' ','_'), $factor.factor_index) )
+            #end for
+
+            #if $is_multi_factor_analysis:
+                -f "#echo ' '.join( $temp_factor_list )#"
+            #end if
         #end if
 
     </command>
+    <stdio>
+        <regex match="Execution halted" 
+           source="both" 
+           level="fatal" 
+           description="Execution halted" />
+        <regex match="Input-Error 01" 
+           source="both" 
+           level="fatal" 
+           description="Error in your input parameters: Make sure you only apply factors to selected samples." />
+    </stdio>
     <inputs>
         <param format="tabular" name="input_matrix" type="data" label="Countmatrix" help="You can create a count matrix with the tool ..."/>
 
@@ -40,16 +65,29 @@
                     <validator type="no_options" message="Please select at least one column."/>
                 </param>
 
+                <repeat name="factor" title="Include multi-factor analysis">
+                    <param name="factor_name" type="text" value="Factor Name" label="Specify a factor name" help=""/>
+
+                    <param name="factor_index" label="Select columns that are associated with a factor." type="data_column" data_ref="input_matrix"
+                        numerical="True" multiple="true" use_header_names="true" size="120" display="checkboxes">
+                        <validator type="no_options" message="Please select at least one column."/>
+                    </param>
+                </repeat>
+
             </when>
             <when value="all_v_all" />
         </conditional>
 
+        <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" label="Visualising the analysis results"
+            help="output an additional PDF file" />
+
     </inputs>
 
   <outputs>
-    <data format="txt" name="deseq_out" label="DESeq2 result file"/>
-    <data format="pdf" name="plots" label="Plot collection"/>
-    <data format="txt" name="log" label="DESeq2 log file"/>
+    <data format="tabular" name="deseq_out" label="DESeq2 result file on ${on_string}"/>
+    <data format="pdf" name="plots" label="DESeq2 plots on ${on_string}">
+        <filter>pdf == True</filter>
+    </data>
   </outputs>
 
   <help>
@@ -58,47 +96,49 @@
 
 **What it does** 
 
-`DESeq` is a tool for differential expression testing of RNA-Seq data.
+Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution
 
 
 **Inputs**
 
-`DESeq` requires three input files to run:
+DESeq2_ requires one count matrix as input file. You can use the tool
 
-1. Annotation file in GFF3, containing the necessary information about the transcripts that are to be quantified.
-2. The BAM alignment files grouped into replicate groups, each containing several replicates. BAM files store the read alignments in a compressed format. They can be generated using the `SAM-to-BAM` tool in the NGS: SAM Tools section. (The script will also work with only two groups containing only a single replicate each. However, this analysis has less statistical power and is  therefor not recommended.)
+
 
 **Output**
 
-`DESeq` generates a text file containing the gene name and the p-value.
+DESeq2_ generates a tabular file containing the different columns and optional visualized results as PDF.
+
+====== ==========================================================
+Column Description
+------ ----------------------------------------------------------
+     1 Sample ID (corresponds to the header in your count matrix)
+     2 Gene Identifiers
+     3 mean normalised counts, averaged over all samples from both conditions
+     4 the logarithm (to basis 2) of the fold change
+     5 standard error estimate for the log2 fold change estimate
+     6 p value for the statistical significance of this change
+     7 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
+       which controls false discovery rate (FDR)
+====== ==========================================================
+
 
 ------
 
-**Licenses**
+**References** 
+
+DESeq2_ Authors: Michael Love (MPIMG Berlin), Simon Anders, Wolfgang Huber (EMBL Heidelberg)
 
-If **DESeq** is used to obtain results for scientific publications it
-should be cited as [1]_.
+If _DESeq2_ is used to obtain results for scientific publications it
+should be cited as [1]_. A paper describing DESeq2_ is in preparation.
 
-**References** 
+
 
 .. [1] Anders, S and Huber, W (2010): `Differential expression analysis for sequence count data`_. 
 
 .. _Differential expression analysis for sequence count data: http://dx.doi.org/10.1186/gb-2010-11-10-r106
+.. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html
 
-------
-
-For more information see http://www.sequenceontology.org/gff3.shtml
 
-**SAM/BAM format** The Sequence Alignment/Map (SAM) format is a
-tab-limited text format that stores large nucleotide sequence
-alignments. BAM is the binary version of a SAM file that allows for
-fast and intensive data processing. The format specification and the
-description of SAMtools can be found on
-http://samtools.sourceforge.net/.
-
-------
-
-DESeq-hts Wrapper Version 0.3 (Feb 2012)
-
-</help>
+  </help>
 </tool>