|
2
|
1 <tool id="testing_deseq2" name="testing_deseq2" version="2.11.38">
|
|
0
|
2 <description>Determines differentially expressed features from count tables</description>
|
|
|
3 <requirements>
|
|
|
4 <requirement type="package">r-getopt</requirement>
|
|
|
5 <requirement type="package">r-rjson</requirement>
|
|
|
6 <requirement type="package">r-gplots</requirement>
|
|
|
7 <requirement type="package" version="1.12.4">bioconductor-deseq2</requirement>
|
|
|
8 </requirements>
|
|
|
9 <stdio>
|
|
|
10 <regex match="Execution halted"
|
|
|
11 source="both"
|
|
|
12 level="fatal"
|
|
|
13 description="Execution halted." />
|
|
|
14 <regex match="Error in"
|
|
|
15 source="both"
|
|
|
16 level="fatal"
|
|
|
17 description="An undefined error occurred, please check your input carefully and contact your administrator." />
|
|
|
18 <regex match="Fatal error"
|
|
|
19 source="both"
|
|
|
20 level="fatal"
|
|
|
21 description="An undefined error occurred, please check your input carefully and contact your administrator." />
|
|
|
22 </stdio>
|
|
|
23 <version_command>
|
|
|
24 <![CDATA[
|
|
|
25 echo $(R --version | grep version | grep -v GNU)", DESeq2 version" $(R --vanilla --slave -e "library(DESeq2); cat(sessionInfo()\$otherPkgs\$DESeq2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
|
|
|
26 ]]>
|
|
|
27 </version_command>
|
|
|
28 <command>
|
|
|
29 <![CDATA[
|
|
|
30
|
|
|
31 #if $tximport.tximport_selector == 'tximport':
|
|
|
32 #if $tximport.mapping_format.mapping_format_selector == 'gtf':
|
|
|
33 ln -s '$tximport.mapping_format.gtf_file' mapping.gtf &&
|
|
|
34 #else:
|
|
|
35 ln -s '$tximport.mapping_format.tabular_file' mapping.txt &&
|
|
|
36 #end if
|
|
|
37 #end if
|
|
|
38
|
|
|
39 #import json
|
|
|
40 Rscript '${__tool_directory__}/deseq2.R'
|
|
|
41 -o '$deseq_out'
|
|
|
42 #if $pdf:
|
|
|
43 -p '$plots'
|
|
|
44 #end if
|
|
|
45 #if $normCounts:
|
|
|
46 -n '$counts_out'
|
|
|
47 #end if
|
|
|
48 #set $temp_factor_names = list()
|
|
|
49 #for $factor in $rep_factorName:
|
|
|
50 #set $temp_factor = list()
|
|
|
51 #for $level in $factor.rep_factorLevel:
|
|
|
52 #set $count_files = list()
|
|
|
53 #for $file in $level.countsFile:
|
|
|
54 $count_files.append(str($file))
|
|
|
55 #end for
|
|
|
56 $temp_factor.append( {str($level.factorLevel): $count_files} )
|
|
|
57 #end for
|
|
|
58 $temp_factor.reverse()
|
|
|
59 $temp_factor_names.append([str($factor.factorName), $temp_factor])
|
|
|
60 #end for
|
|
|
61 -f '#echo json.dumps(temp_factor_names)#'
|
|
|
62 -t '$fit_type'
|
|
|
63 #if $outlier_replace_off:
|
|
|
64 -a
|
|
|
65 #end if
|
|
|
66 #if $outlier_filter_off:
|
|
|
67 -b
|
|
|
68 #end if
|
|
|
69 #if $auto_mean_filter_off:
|
|
|
70 -c
|
|
|
71 #end if
|
|
|
72 #if $many_contrasts:
|
|
|
73 -m
|
|
|
74 #end if
|
|
|
75 #if $tximport.tximport_selector == 'tximport':
|
|
|
76 -i
|
|
|
77 #if $tximport.mapping_format.mapping_format_selector == 'gtf':
|
|
|
78 -x mapping.gtf
|
|
|
79 #else:
|
|
|
80 -x mapping.txt
|
|
|
81 #end if
|
|
|
82
|
|
|
83 #end if
|
|
|
84 ]]>
|
|
|
85 </command>
|
|
|
86 <inputs>
|
|
|
87 <repeat name="rep_factorName" title="Factor" min="1">
|
|
|
88 <param name="factorName" type="text" value="FactorName" label="Specify a factor name, e.g. effects_drug_x or cancer_markers"
|
|
|
89 help="Only letters, numbers and underscores will be retained in this field">
|
|
|
90 <sanitizer>
|
|
|
91 <valid initial="string.letters,string.digits"><add value="_" /></valid>
|
|
|
92 </sanitizer>
|
|
|
93 </param>
|
|
|
94 <repeat name="rep_factorLevel" title="Factor level" min="2" default="2">
|
|
|
95 <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor', 'normal', 'treated' or 'control'"
|
|
|
96 help="Only letters, numbers and underscores will be retained in this field">
|
|
|
97 <sanitizer>
|
|
|
98 <valid initial="string.letters,string.digits"><add value="_" /></valid>
|
|
|
99 </sanitizer>
|
|
|
100 </param>
|
|
|
101 <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts file(s)"/>
|
|
|
102 </repeat>
|
|
|
103 </repeat>
|
|
|
104
|
|
|
105 <conditional name="tximport">
|
|
|
106 <param name="tximport_selector" type="select" label="Choice of Input data">
|
|
|
107 <option value="count" selected="True">Count data (e.g. from htseq-count or feature-count)</option>
|
|
|
108 <option value="tximport">TPM values (e.g. from sailfish or salmon)</option>
|
|
|
109 </param>
|
|
|
110 <when value="tximport">
|
|
|
111 <conditional name="mapping_format">
|
|
|
112 <param name="mapping_format_selector" type="select" label="Gene mapping format">
|
|
|
113 <option value="gtf" selected="True">GTF</option>
|
|
|
114 <option value="tabular">Transcript-ID and Gene-ID mapping file</option>
|
|
|
115 </param>
|
|
|
116 <when value="gtf">
|
|
|
117 <param name="gtf_file" type="data" format="gtf" label="GTF file with Transcript - Gene mapping"/>
|
|
|
118 </when>
|
|
|
119 <when value="tabular">
|
|
|
120 <param name="tabular_file" type="data" format="tabular" label="Tabular file with Transcript - Gene mapping"/>
|
|
|
121 </when>
|
|
|
122 </conditional>
|
|
|
123 </when>
|
|
|
124 <when value="count" />
|
|
|
125 </conditional>
|
|
|
126 <param name="pdf" type="boolean" truevalue="1" falsevalue="0" checked="true"
|
|
|
127 label="Visualising the analysis results"
|
|
|
128 help="output an additional PDF files" />
|
|
|
129 <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false"
|
|
|
130 label="Output normalized counts table" />
|
|
|
131 <param name="many_contrasts" type="boolean" truevalue="1" falsevalue="0" checked="false"
|
|
|
132 label="Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)"
|
|
|
133 help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" />
|
|
|
134 <param name="fit_type" type="select" label="Fit type">
|
|
|
135 <option value="1" selected="true">parametric</option>
|
|
|
136 <option value="2">local</option>
|
|
|
137 <option value="3">mean</option>
|
|
|
138 </param>
|
|
|
139 <param name="outlier_replace_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
|
|
|
140 label="Turn off outliers replacement (only affects with >6 replicates)"
|
|
|
141 help="When there are more than 6 replicates for a given sample, the DESeq2 will automatically replace
|
|
|
142 counts with large Cook’s distance with the trimmed mean over all samples, scaled up by the size factor
|
|
|
143 or normalization factor for that sample" />
|
|
|
144 <param name="outlier_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
|
|
|
145 label="Turn off outliers filtering (only affects with >2 replicates)"
|
|
|
146 help="When there are more than 2 replicates for a given sample, the DESeq2 will automatically
|
|
|
147 filter genes which contain a Cook’s distance above a cutoff" />
|
|
|
148 <param name="auto_mean_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
|
|
|
149 label="Turn off independent filtering"
|
|
|
150 help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" />
|
|
|
151 </inputs>
|
|
|
152 <outputs>
|
|
|
153 <data format="tabular" name="deseq_out" label="DESeq2 result file on ${on_string}">
|
|
|
154 <filter>many_contrasts is False</filter>
|
|
|
155 <actions>
|
|
|
156 <action name="column_names" type="metadata" default="GeneID,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj" />
|
|
|
157 </actions>
|
|
|
158 </data>
|
|
|
159 <collection name="split_output" type="list" label="DESeq2 result files on ${on_string}">
|
|
|
160 <filter>many_contrasts is True</filter>
|
|
|
161 <discover_datasets pattern="None.(?P<designation>.+_vs_.+)" format="tabular" directory="." visible="false"/>
|
|
|
162 </collection>
|
|
|
163 <data format="pdf" name="plots" label="DESeq2 plots on ${on_string}">
|
|
|
164 <filter>pdf == True</filter>
|
|
|
165 </data>
|
|
|
166 <data format="tabular" name="counts_out" label="Normalized counts file on ${on_string}">
|
|
|
167 <filter>normCounts == True</filter>
|
|
|
168 </data>
|
|
|
169 </outputs>
|
|
|
170 <tests>
|
|
|
171 <test>
|
|
|
172 <repeat name="rep_factorName">
|
|
|
173 <param name="factorName" value="Treatment"/>
|
|
|
174 <repeat name="rep_factorLevel">
|
|
|
175 <param name="factorLevel" value="Treated"/>
|
|
|
176 <param name="countsFile" value="GSM461179_treat_single.counts,GSM461180_treat_paired.counts,GSM461181_treat_paired.counts"/>
|
|
|
177 </repeat>
|
|
|
178 <repeat name="rep_factorLevel">
|
|
|
179 <param name="factorLevel" value="Untreated"/>
|
|
|
180 <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/>
|
|
|
181 </repeat>
|
|
|
182 </repeat>
|
|
|
183 <param name="pdf" value="False"/>
|
|
|
184 <param name="normCounts" value="True"/>
|
|
|
185 <output name="counts_out" file="normalized_readcounts.tab"/>
|
|
|
186 <output name="deseq_out" file="deseq2_out.tab"/>
|
|
|
187 </test>
|
|
|
188 <test>
|
|
|
189 <repeat name="rep_factorName">
|
|
|
190 <param name="factorName" value="Treatment"/>
|
|
|
191 <repeat name="rep_factorLevel">
|
|
|
192 <param name="factorLevel" value="Treated"/>
|
|
|
193 <param name="countsFile" value="sailfish_quant_result1.tab,sailfish_quant_result2.tab"/>
|
|
|
194 </repeat>
|
|
|
195 <repeat name="rep_factorLevel">
|
|
|
196 <param name="factorLevel" value="Untreated"/>
|
|
|
197 <param name="countsFile" value="sailfish_quant_result3.tab,sailfish_quant_result4.tab"/>
|
|
|
198 </repeat>
|
|
|
199 </repeat>
|
|
|
200 <param name="pdf" value="False"/>
|
|
|
201 <param name="tximport_selector" value="tximport"/>
|
|
|
202 <param name="mapping_format_selector" value="gtf"/>
|
|
|
203 <param name="gtf_file" value="genes_sub.gtf"/>
|
|
|
204 <output name="deseq_out" file="deseq2_tximport_out.tab"/>
|
|
|
205 </test>
|
|
|
206 </tests>
|
|
|
207 <help>
|
|
|
208 <![CDATA[
|
|
|
209 .. class:: infomark
|
|
|
210
|
|
|
211 **What it does**
|
|
|
212
|
|
|
213 Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution
|
|
|
214
|
|
|
215
|
|
|
216 **Inputs**
|
|
|
217
|
|
|
218 DESeq2_ takes count tables that generated from the htseq-count as input. Count tables must be generated for each sample individually. DESeq2 is capable of handling multiple factors that effect your experiment. The first factor you input is considered as the primary factor that affects gene expressions. You also input several secondary factors that might influence your experiment. But the final output will be changes in genes due to primary factor in presence of secondary factors. Each factor has two levels/states. You need to select appropriate count table from your history for each factor level.
|
|
|
219
|
|
|
220 The following table gives some examples of factors and their levels:
|
|
|
221
|
|
|
222 ========= ============== ===============
|
|
|
223 Factor Factor level 1 Factor level 2
|
|
|
224 --------- -------------- ---------------
|
|
|
225 Treatment Treated Untreated
|
|
|
226 --------- -------------- ---------------
|
|
|
227 Condition Knockdown Wildtype
|
|
|
228 --------- -------------- ---------------
|
|
|
229 TimePoint Day4 Day1
|
|
|
230 --------- -------------- ---------------
|
|
|
231 SeqType SingleEnd PairedEnd
|
|
|
232 --------- -------------- ---------------
|
|
|
233 Gender Female Male
|
|
|
234 ========= ============== ===============
|
|
|
235
|
|
|
236 *Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2. Here the order of factor levels is important. For example, for the factor 'Treatment' given in above table, DESeq2 computes fold changes of 'Treated' samples against 'Untreated', i.e. the values correspond to up or down regulations of genes in Treated samples.
|
|
|
237
|
|
|
238 **Output**
|
|
|
239
|
|
|
240 DESeq2_ generates a tabular file containing the different columns and optional visualized results as PDF.
|
|
|
241
|
|
|
242 ====== ==========================================================
|
|
|
243 Column Description
|
|
|
244 ------ ----------------------------------------------------------
|
|
|
245 1 Gene Identifiers
|
|
|
246 2 mean normalised counts, averaged over all samples from both conditions
|
|
|
247 3 the logarithm (to basis 2) of the fold change (See the note in inputs section)
|
|
|
248 4 standard error estimate for the log2 fold change estimate
|
|
|
249 5 Wald statistic
|
|
|
250 6 p value for the statistical significance of this change
|
|
|
251 7 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
|
|
|
252 which controls false discovery rate (FDR)
|
|
|
253 ====== ==========================================================
|
|
|
254
|
|
|
255
|
|
|
256 .. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html
|
|
|
257 ]]>
|
|
|
258 </help>
|
|
|
259 <citations>
|
|
|
260 <citation type="doi">10.1186/s13059-014-0550-8</citation>
|
|
|
261 </citations>
|
|
|
262 </tool>
|