comparison deseq2.xml @ 22:aad8927093ac draft

Uploaded
author bgruening
date Sat, 15 Nov 2014 05:42:24 -0500
parents d32de046ba31
children f8a59b35c397
comparison
equal deleted inserted replaced
21:d32de046ba31 22:aad8927093ac
1 <tool id="deseq2" name="DESeq2" version="2.0.1"> 1 <tool id="iuc_deseq2" name="DESeq2" version="2.1.6.0">
2 <description>Determines differentially expressed features from count data</description> 2 <description>Determines differentially expressed features from count tables</description>
3 <requirements> 3 <requirements>
4 <requirement type="binary">Rscript</requirement> 4 <requirement type="binary">Rscript</requirement>
5 <requirement type="R-module">DESeq2</requirement> 5 <requirement type="R-module">DESeq2</requirement>
6 <requirement type="package" version="3.0.2">R_3_0_2</requirement> 6 <requirement type="package" version="1.6.1">deseq2</requirement>
7 <requirement type="package" version="1.2.10">deseq2</requirement>
8 <!--<requirement type="set_environment">DESEQ2_SCRIPT_PATH</requirement>-->
9 </requirements> 7 </requirements>
8 <code file="helper.py" />
10 <command interpreter="Rscript"> 9 <command interpreter="Rscript">
11 #import json 10 #import json
12 deseq2.R 11 deseq2.R
13 -o "$deseq_out" 12 -o "$deseq_out"
14 --outfilefiltered "$deseq_out_filtered" 13 --outfilefiltered "$deseq_out_filtered"
15 14
16 #if $pdf: 15 #if $pdf:
17 -p "$plots" 16 -p "$plots"
18 #end if 17 #end if
18
19 #set $temp_factor_names = list()
20 #for $factor in $rep_factorName:
19 21
20 -i "$input_matrix" 22 #set $temp_factor = list()
21
22 #set $temp_factor_name = list()
23 #for $factor in $rep_factorName:
24 #set $temp_factor = dict()
25 #for $level in $factor.rep_factorLevel: 23 #for $level in $factor.rep_factorLevel:
26 ##$temp_factor_list.append( '%s::%s:%s' % ($factor.factorName.replace(' ','_'), $level.factorLevel, $level.factorIndex) ) 24 #set $count_files = list()
27 $temp_factor.update({str($level.factorLevel): map(int, str($level.factorIndex).split(','))}) 25 #for $file in $level.rep_countsFile:
26 $count_files.append(str($file.countsFile))
27 #end for
28 $temp_factor.append( {str($level.factorLevel): $count_files} )
28 #end for 29 #end for
29 $temp_factor_name.append([str($factor.factorName), $temp_factor]) 30 $temp_factor_names.append([str($factor.factorName), $temp_factor])
30 31
31 #end for 32 #end for
32 33
33 ##-m "#echo ' '.join( $temp_factor_list )#" 34 -m '#echo json.dumps(temp_factor_names)#'
34 -m '#echo json.dumps(temp_factor_name)#' 35 #if str($filtermode.mode) == "absolute":
35 ##--organism "$organism" 36 -c $filtermode.count_abs
36 ##-t "$fittype" 37 #elif str($filtermode.mode) == "quantile":
37 -c $countthreshold 38 -c $filtermode.count_quant
39 #end if
40 --filtermode $filtermode.mode
38 </command> 41 </command>
39 <stdio> 42 <stdio>
40 <regex match="Execution halted" 43 <exit_code range="1:" />
41 source="both" 44 <exit_code range=":-1" />
42 level="fatal" 45 <regex match="Error:" />
43 description="Execution halted." /> 46 <regex match="Exception:" />
44 <regex match="Input-Error 01"
45 source="both"
46 level="fatal"
47 description="Error in your input parameters: Make sure you only apply factors to selected samples." />
48 <regex match="Error in"
49 source="both"
50 level="fatal"
51 description="An undefined error occured, please check your intput carefully and contact your administrator." />
52 </stdio> 47 </stdio>
53 <inputs> 48 <inputs>
54 <param format="tabular" name="input_matrix" type="data" label="Countmatrix" help="You can create a count matrix with the tool 49 <repeat name="rep_factorName" title="Factor" min="1">
55 'Count reads in features with htseq-count'"/> 50 <param name="factorName" type="text" value="FactorName" label="Specify a factor name"
56 51 help="Only letters, numbers and underscores will be retained in this field">
57 <repeat name="rep_factorName" title="Factor/Condition" min="1"> 52 <sanitizer>
58 <param name="factorName" type="text" value="FactorName" label="Specify a factor name" help="" /> 53 <valid initial="string.letters,string.digits"><add value="_" /></valid>
59 <repeat name="rep_factorLevel" title="Factor level" min="1"> 54 </sanitizer>
60 <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level" help="" /> 55 </param>
61 56 <repeat name="rep_factorLevel" title="Factor level" min="2" max="2" default="2">
62 <param name="factorIndex" label="Select columns that are associated with this factor level" type="data_column" data_ref="input_matrix" 57 <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level"
63 numerical="True" multiple="true" use_header_names="true" size="120" display="checkboxes"> 58 help="Only letters, numbers and underscores will be retained in this field">
64 <validator type="no_options" message="Please select at least one column." /> 59 <sanitizer>
60 <valid initial="string.letters,string.digits"><add value="_" /></valid>
61 </sanitizer>
65 </param> 62 </param>
63 <repeat name="rep_countsFile" title="Biological replicate" min="1">
64 <param name="countsFile" type="data" label="Counts file"/>
65 </repeat>
66 </repeat> 66 </repeat>
67 </repeat> 67 </repeat>
68 <!-- 68 <conditional name="filtermode">
69 <param name="control_cols" type="select" display="checkboxes" multiple="true" optional="True" label="Select columns containing first condition" 69 <param name="mode" type="select" label="Filter out features with following criteria">
70 dynamic_options="get_matrix_header( input_dataset=input_matrix )" help="insert useful info here"> 70 <option value="absolute">Filter based on absolute normalized mean counts</option>
71 <validator type="no_options" message="Please select at least one column."/> 71 <option value="quantile">Filter based on quantiles</option>
72 </param>
73 <param name="experiement_cols" type="select" display="checkboxes" multiple="true" optional="True" label="Select columns containing second condition"
74 dynamic_options="get_matrix_header( input_dataset=input_matrix )" help="insert useful info here">
75 <validator type="no_options" message="Please select at least one column."/>
76 </param>
77
78 <repeat name="factor" title="factor">
79 <param name="factor_name" type="text" value="Factor Name" label="Specify a factor name"
80 help="Order of factors you add effects the design formual and hence effects whole analysis. Design formual will be created as follows: ~factor1+factor2+factor3+...+factorN+condition"/>
81 <param name="factor_index" type="select" display="checkboxes" multiple="true" optional="True" label="Choose sample to compare with"
82 dynamic_options="get_matrix_header( input_dataset=input_matrix )" help="Select columns that are associated with a factor">
83 <validator type="no_options" message="Please select at least one column."/>
84 </param> 72 </param>
85 </repeat> 73 <when value="absolute">
86 --> 74 <param name="count_abs" size="10" type="float" value="10.0" label="Filter out features with mean normalized counts lower than this value"/>
87 <!--param name="organism" size="10" type="select"> 75 </when>
88 <option value="human">human</option> 76 <when value="quantile">
89 <option value="mouse">mouse</option> 77 <param name="count_quant" size="10" type="float" value="10.0" min="0" max="100" label="Filter out features with mean normalized counts lower than this % of values" />
90 <option value="fly">fly</option> 78 </when>
91 <option value="other">other</option> 79 </conditional>
92 </param--> 80 <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" label="Visualising the analysis results"
93 <param name="countthreshold" size="10" type="float" value="10.0" label="Filter out features with mean normalized counts lower than this value"/>
94 <param name="fittype" type="select" label="Type of fitting of dispersions to the mean intensity">
95 <option value="parametric">parametric</option>
96 <option value="local">local</option>
97 <option value="mean">mean</option>
98 </param>
99 <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true"
100 label="Visualising the analysis results"
101 help="output an additional PDF files" /> 81 help="output an additional PDF files" />
102 </inputs> 82 </inputs>
103 83
104 <outputs> 84 <outputs>
105 <data format="tabular" name="deseq_out" label="DESeq2 result file on ${on_string}" /> 85 <data format="tabular" name="deseq_out" label="DESeq2 result file on ${on_string}"/>
106 <data format="tabular" name="deseq_out_filtered" label="Independent filtering result file on ${on_string}" /> 86 <data format="tabular" name="deseq_out_filtered" label="Independent filtering result file on ${on_string}"/>
107 <data format="pdf" name="plots" label="DESeq2 plots on ${on_string}"> 87 <data format="pdf" name="plots" label="DESeq2 plots on ${on_string}">
108 <filter>pdf == True</filter> 88 <filter>pdf == True</filter>
109 </data> 89 </data>
110 </outputs> 90 </outputs>
111 <code file="deseq_helper.py" />
112 91
113 <help> 92 <help>
114 93
115 .. class:: infomark 94 .. class:: infomark
116 95
119 Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution 98 Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution
120 99
121 100
122 **Inputs** 101 **Inputs**
123 102
124 DESeq2_ requires one count matrix as input file. You can use the tool 103 DESeq2_ takes count tables that generated from the htseq-count as input. Count tables must be generated for each sample individually. DESeq2 is capable of handling multiple factors that effect your experiment. The first factor you input is considered as the primary factor that affects gene expressions. You also input several secondary factors that might influence your experiment. But the final output will be changes in genes due to primary factor in presence of secondary factors. Each factor has two levels/states. You need to select appropriate count table from your history for each factor level.
125 104
105 The following table gives some examples of factors and their levels:
126 106
107 ========= ============== ===============
108 Factor Factor level 1 Factor level 2
109 --------- -------------- ---------------
110 Treatment Treated Untreated
111 --------- -------------- ---------------
112 Condition Knockdown Wildtype
113 --------- -------------- ---------------
114 TimePoint Day4 Day1
115 --------- -------------- ---------------
116 SeqType SingleEnd PairedEnd
117 --------- -------------- ---------------
118 Gender Female Male
119 ========= ============== ===============
120
121 *Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2. Here the order of factor levels is important. For example, for the factor 'Treatment' given in above table, DESeq2 computes fold changes of 'Treated' samples against 'Untreated', i.e. the values correspond to up or down regulations of genes in Treated samples.
127 122
128 **Output** 123 **Output**
129 124
130 DESeq2_ generates a tabular file containing the different columns and optional visualized results as PDF. 125 DESeq2_ generates a tabular file containing the different columns and optional visualized results as PDF.
131 126
132 ====== ========================================================== 127 ====== ==========================================================
133 Column Description 128 Column Description
134 ------ ---------------------------------------------------------- 129 ------ ----------------------------------------------------------
135 1 Gene Identifiers 130 1 Gene Identifiers
136 2 mean normalised counts, averaged over all samples from both conditions 131 2 mean normalised counts, averaged over all samples from both conditions
137 3 the logarithm (to basis 2) of the fold change 132 3 the logarithm (to basis 2) of the fold change (See the note in inputs section)
138 4 standard error estimate for the log2 fold change estimate 133 4 standard error estimate for the log2 fold change estimate
139 5 p value for the statistical significance of this change 134 5 p value for the statistical significance of this change
140 6 p value adjusted for multiple testing with the Benjamini-Hochberg procedure 135 6 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
141 which controls false discovery rate (FDR) 136 which controls false discovery rate (FDR)
142 ====== ========================================================== 137 ====== ==========================================================
143 138
144 139
145 ------
146
147 **References**
148
149 DESeq2_ Authors: Michael Love (MPIMG Berlin), Simon Anders, Wolfgang Huber (EMBL Heidelberg)
150
151 If DESeq2_ is used to obtain results for scientific publications it
152 should be cited as [1]_. A paper describing DESeq2_ is in preparation.
153
154
155
156 .. [1] Anders, S and Huber, W (2010): `Differential expression analysis for sequence count data`_.
157
158 .. _Differential expression analysis for sequence count data: http://dx.doi.org/10.1186/gb-2010-11-10-r106
159 .. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html 140 .. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html
160 141
161
162 </help> 142 </help>
143 <citations>
144 <citation type="doi">10.1101/002832</citation>
145 </citations>
163 </tool> 146 </tool>