25
|
1 <?xml version="1.0" encoding="UTF-8"?>
|
79
|
2 <tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.0.3-latest.a">
|
25
|
3 <description>RNA-Seq gene expression analysis using edgeR (R package)</description>
|
|
4
|
|
5 <requirements>
|
62
|
6 <!--<requirement type="package" version="3.0.1">package_r3_withx</requirement>-->
|
67
|
7 <!--<requirement type="package" version="3.1.0">R</requirement>-->
|
|
8 <requirement type="package" version="3.0.3">R</requirement>
|
77
|
9 <requirement type="package" version="latest">biocLite_edgeR_limma</requirement>
|
72
|
10 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
|
25
|
11 </requirements>
|
|
12
|
79
|
13 <version_command>R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null</version_command>
|
|
14
|
25
|
15 <command>
|
|
16 <!--
|
|
17 The following script is written in the "Cheetah" language:
|
|
18 http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html
|
|
19 -->
|
|
20
|
|
21 R --vanilla --slave -f $R_script '--args
|
|
22 $expression_matrix
|
|
23 $design_matrix
|
|
24 $contrast
|
|
25
|
|
26 $fdr
|
|
27
|
|
28 $output_count_edgeR
|
|
29 $output_cpm
|
|
30
|
|
31 /dev/null <!-- Calculation of FPKM/RPKM should come here -->
|
|
32
|
|
33 #if $output_raw_counts:
|
|
34 $output_raw_counts
|
|
35 #else:
|
|
36 /dev/null
|
|
37 #end if
|
|
38
|
|
39 #if $output_MDSplot:
|
|
40 $output_MDSplot
|
|
41 #else:
|
|
42 /dev/null
|
|
43 #end if
|
|
44
|
|
45 #if $output_BCVplot:
|
|
46 $output_BCVplot
|
|
47 #else:
|
|
48 /dev/null
|
|
49 #end if
|
|
50
|
|
51 #if $output_MAplot:
|
|
52 $output_MAplot
|
|
53 #else:
|
|
54 /dev/null
|
|
55 #end if
|
|
56
|
|
57 #if $output_PValue_distribution_plot:
|
|
58 $output_PValue_distribution_plot
|
|
59 #else:
|
|
60 /dev/null
|
|
61 #end if
|
|
62
|
|
63 #if $output_hierarchical_clustering_plot:
|
|
64 $output_hierarchical_clustering_plot
|
|
65 #else:
|
|
66 /dev/null
|
|
67 #end if
|
|
68
|
|
69 #if $output_heatmap_plot:
|
|
70 $output_heatmap_plot
|
|
71 #else:
|
|
72 /dev/null
|
|
73 #end if
|
|
74
|
|
75 #if $output_RData_obj:
|
|
76 $output_RData_obj
|
|
77 #else:
|
|
78 /dev/null
|
|
79 #end if
|
55
|
80
|
|
81 $output_format_images
|
|
82 '
|
25
|
83 #if $output_R:
|
|
84 > $output_R
|
|
85 #else:
|
|
86 > /dev/null
|
|
87 #end if
|
|
88
|
53
|
89 2> stderr.txt ;
|
|
90
|
70
|
91 #if $output_format_images.value == "png":
|
|
92 echo "Converting PDF figures to PNG" ;
|
69
|
93
|
70
|
94 #if $output_MDSplot:
|
|
95 #set $output_MDSplot_tmp = str($output_MDSplot)+".png"
|
|
96
|
72
|
97 gm convert $output_MDSplot $output_MDSplot_tmp ;
|
70
|
98 mv $output_MDSplot_tmp $output_MDSplot ;
|
|
99 #end if
|
|
100
|
|
101 #if $output_BCVplot:
|
|
102 #set $output_BCVplot_tmp = str($output_BCVplot)+".png"
|
|
103
|
72
|
104 gm convert $output_BCVplot $output_BCVplot_tmp ;
|
70
|
105 mv $output_BCVplot_tmp $output_BCVplot ;
|
|
106 #end if
|
69
|
107
|
70
|
108 #if $output_MAplot:
|
|
109 #set $output_MAplot_tmp = str($output_MAplot)+".png"
|
|
110
|
72
|
111 gm convert $output_MAplot $output_MAplot_tmp ;
|
70
|
112 mv $output_MAplot_tmp $output_MAplot ;
|
|
113 #end if
|
|
114
|
|
115 #if $output_PValue_distribution_plot:
|
|
116 #set $output_PValue_distribution_plot_tmp = str($output_PValue_distribution_plot)+".png"
|
|
117
|
72
|
118 gm convert $output_PValue_distribution_plot $output_PValue_distribution_plot_tmp ;
|
70
|
119 mv $output_PValue_distribution_plot_tmp $output_PValue_distribution_plot ;
|
|
120 #end if
|
69
|
121
|
70
|
122 #if $output_hierarchical_clustering_plot:
|
|
123 #set $output_hierarchical_clustering_plot_tmp = str($output_hierarchical_clustering_plot)+".png"
|
|
124
|
72
|
125 gm convert $output_hierarchical_clustering_plot $output_hierarchical_clustering_plot_tmp ;
|
70
|
126 mv $output_hierarchical_clustering_plot_tmp $output_hierarchical_clustering_plot ;
|
|
127 #end if
|
|
128
|
|
129 #if $output_heatmap_plot:
|
|
130 #set $output_heatmap_plot_tmp = str($output_heatmap_plot)+".png"
|
|
131
|
72
|
132 gm convert $output_heatmap_plot $output_heatmap_plot_tmp ;
|
70
|
133 mv $output_heatmap_plot_tmp $output_heatmap_plot ;
|
|
134 #end if
|
67
|
135 #end if
|
|
136
|
53
|
137 grep -v 'Calculating library sizes from column' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
138
|
|
139 ## Locale error messages:
|
|
140 grep -v 'During startup - Warning messages' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
141 grep -v 'Setting LC_TIME failed' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
142 grep -v 'Setting LC_MONETARY failed' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
143 grep -v 'Setting LC_PAPER failed' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
144 grep -v 'Setting LC_MEASUREMENT failed' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
145 grep -v 'Setting LC_CTYPE failed' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
146 grep -v 'Setting LC_COLLATE failed' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
147
|
|
148 cat stderr.txt >&2
|
72
|
149
|
25
|
150 </command>
|
|
151
|
|
152 <inputs>
|
|
153 <param name="expression_matrix" type="data" format="tabular" label="Expression (read count) matrix" />
|
|
154 <param name="design_matrix" type="data" format="tabular" label="Design matrix" hepl="Ensure your samplenames are identical to those in the expression matrix. Preferentially, create the contrast matrix using 'edgeR: Design- from Expression matrix'." />
|
|
155
|
|
156 <param name="contrast" type="text" label="Contrast (biological question)" help="e.g. 'tumor-normal' or '(G1+G2)/2-G3' using the factors chosen in the design matrix. Read the 'makeContrasts' manual from Limma package for more info: http://www.bioconductor.org/packages/release/bioc/html/limma.html and http://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf." />
|
|
157
|
|
158 <param name="fdr" type="float" min="0" max="1" value="0.05" label="False Discovery Rate (FDR)" />
|
|
159
|
|
160 <param name="outputs" type="select" label="Optional desired outputs" multiple="true" display="checkboxes">
|
|
161 <option value="make_output_raw_counts">Raw counts table</option>
|
|
162 <option value="make_output_MDSplot">MDS-plot</option>
|
|
163 <option value="make_output_BCVplot">BCV-plot</option>
|
|
164 <option value="make_output_MAplot">MA-plot</option>
|
|
165 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option>
|
|
166 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering</option>
|
|
167 <option value="make_output_heatmap_plot">Heatmap</option>
|
|
168
|
43
|
169 <option value="make_output_R_stdout">R stdout</option>
|
25
|
170 <option value="make_output_RData_obj">R Data object</option>
|
|
171 </param>
|
55
|
172
|
|
173 <param name="output_format_images" type="select" label="Output format of images" display="radio">
|
|
174 <option value="png">Portable network graphics (.png)</option>
|
|
175 <option value="pdf">Portable document format (.pdf)</option>
|
|
176 <option value="svg">Scalable vector graphics (.svg)</option>
|
|
177 </param>
|
25
|
178 </inputs>
|
|
179
|
|
180 <configfiles>
|
|
181 <configfile name="R_script">
|
|
182 library(limma,quietly=TRUE) ## enable quietly to avoid unnecessaity stderr dumping
|
|
183 library(edgeR,quietly=TRUE) ## enable quietly to avoid unnecessaity stderr dumping
|
|
184 library(splines,quietly=TRUE) ## enable quietly to avoid unnecessaity stderr dumping
|
|
185
|
|
186 ## Fetch commandline arguments
|
|
187 args <- commandArgs(trailingOnly = TRUE)
|
|
188
|
|
189 expression_matrix_file = args[1]
|
|
190 design_matrix_file = args[2]
|
|
191 contrast = args[3]
|
|
192
|
|
193 fdr = args[4]
|
|
194
|
|
195 output_count_edgeR = args[5]
|
|
196 output_cpm = args[6]
|
|
197
|
43
|
198 output_xpkm = args[7] ##FPKM file - yet to be implemented
|
25
|
199
|
|
200 output_raw_counts = args[8]
|
|
201 output_MDSplot = args[9]
|
|
202 output_BCVplot = args[10]
|
|
203 output_MAplot = args[11]
|
|
204 output_PValue_distribution_plot = args[12]
|
|
205 output_hierarchical_clustering_plot = args[13]
|
|
206 output_heatmap_plot = args[14]
|
|
207 output_RData_obj = args[15]
|
55
|
208 output_format_images = args[16]
|
25
|
209
|
|
210
|
|
211 library(edgeR)
|
|
212 ##raw_data <- read.delim(designmatrix,header=T,stringsAsFactors=T)
|
|
213 ## Obtain read-counts
|
|
214
|
|
215 expression_matrix <- read.delim(expression_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c(""))
|
|
216 design_matrix <- read.delim(design_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c(""))
|
|
217
|
|
218 colnames(design_matrix) <- make.names(colnames(design_matrix))
|
|
219
|
|
220 for(i in 1:ncol(design_matrix)) {
|
|
221 old = design_matrix[,i]
|
|
222 design_matrix[,i] = make.names(design_matrix[,i])
|
|
223 if(paste(design_matrix[,i],collapse="\t") != paste(old,collapse="\t")) {
|
|
224 print("Renaming of factors:")
|
|
225 print(old)
|
|
226 print("To:")
|
|
227 print(design_matrix[,i])
|
|
228 }
|
45
|
229 ## The following line seems to malfunction the script:
|
|
230 ##design_matrix[,i] <- as.factor(design_matrix[,i])
|
25
|
231 }
|
|
232
|
44
|
233 ## 1) In the expression matrix, you only want to have the samples described in the design matrix
|
25
|
234 columns <- match(rownames(design_matrix),colnames(expression_matrix))
|
43
|
235 columns <- columns[!is.na(columns)]
|
25
|
236 read_counts <- expression_matrix[,columns]
|
|
237
|
44
|
238 ## 2) In the design matrix, you only want to have samples of which you really have the counts
|
|
239 columns <- match(colnames(expression_matrix),rownames(design_matrix))
|
|
240 columns <- columns[!is.na(columns)]
|
|
241 design_matrix <- design_matrix[columns,,drop=FALSE]
|
25
|
242
|
|
243 ## Filter for HTSeq predifined counts:
|
|
244 exclude_HTSeq <- c("no_feature","ambiguous","too_low_aQual","not_aligned","alignment_not_unique")
|
|
245 exclude_DEXSeq <- c("_ambiguous","_empty","_lowaqual","_notaligned")
|
|
246
|
44
|
247 exclude <- match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts))
|
|
248 exclude <- exclude[is.na(exclude)==0]
|
25
|
249 if(length(exclude) != 0) {
|
44
|
250 read_counts <- read_counts[-exclude,]
|
25
|
251 }
|
|
252
|
|
253
|
44
|
254 empty_samples <- apply(read_counts,2,function(x) sum(x) == 0)
|
25
|
255 if(sum(empty_samples) > 0) {
|
|
256 write(paste("There are ",sum(empty_samples)," empty samples found:",sep=""),stderr())
|
|
257 write(colnames(read_counts)[empty_samples],stderr())
|
|
258 } else {
|
|
259
|
|
260 dge <- DGEList(counts=read_counts,genes=rownames(read_counts))
|
|
261
|
|
262 formula <- paste(c("~0",make.names(colnames(design_matrix))),collapse = " + ")
|
|
263 design_matrix_tmp <- design_matrix
|
|
264 colnames(design_matrix_tmp) <- make.names(colnames(design_matrix_tmp))
|
|
265 design <- model.matrix(as.formula(formula),design_matrix_tmp)
|
|
266 rm(design_matrix_tmp)
|
|
267
|
|
268 # Filter prefixes
|
|
269 prefixes = colnames(design_matrix)[attr(design,"assign")]
|
|
270 avoid = nchar(prefixes) == nchar(colnames(design))
|
|
271 replacements = substr(colnames(design),nchar(prefixes)+1,nchar(colnames(design)))
|
|
272 replacements[avoid] = colnames(design)[avoid]
|
|
273 colnames(design) = replacements
|
|
274
|
|
275 # Do normalization
|
|
276 write("Calculating normalization factors...",stdout())
|
|
277 dge <- calcNormFactors(dge)
|
|
278 write("Estimating common dispersion...",stdout())
|
|
279 dge <- estimateGLMCommonDisp(dge,design)
|
|
280 write("Estimating trended dispersion...",stdout())
|
|
281 dge <- estimateGLMTrendedDisp(dge,design)
|
|
282 write("Estimating tagwise dispersion...",stdout())
|
|
283 dge <- estimateGLMTagwiseDisp(dge,design)
|
|
284
|
|
285
|
|
286 if(output_MDSplot != "/dev/null") {
|
|
287 write("Creating MDS plot",stdout())
|
|
288 ##points <- plotMDS(dge,method="bcv",labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot
|
|
289 points <- plotMDS.DGEList(dge,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot
|
|
290 dev.off()# Kill it
|
|
291
|
67
|
292 if(output_format_images == "pdf" || output_format_images == "png") {
|
55
|
293 pdf(output_MDSplot)
|
|
294 } else if(output_format_images == "svg") {
|
|
295 svg(output_MDSplot)
|
70
|
296 }
|
|
297 ## else {
|
67
|
298 ## png(output_MDSplot)
|
|
299 ##}
|
55
|
300
|
25
|
301 diff_x <- abs(max(points\$x)-min(points\$x))
|
|
302 diff_y <-(max(points\$y)-min(points\$y))
|
|
303 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR MDS Plot",type="n", xlab="BCV distance 1", ylab="BCV distance 2")
|
|
304 points(points\$x,points\$y,pch=20)
|
|
305 text(points\$x, points\$y,rownames(dge\$samples),cex=0.7,col="gray",pos=4)
|
|
306 rm(diff_x,diff_y)
|
|
307
|
|
308 dev.off()
|
|
309 }
|
|
310
|
|
311 if(output_BCVplot != "/dev/null") {
|
|
312 write("Creating Biological coefficient of variation plot",stdout())
|
60
|
313
|
67
|
314 if(output_format_images == "pdf" || output_format_images == "png") {
|
60
|
315 pdf(output_BCVplot)
|
|
316 } else if(output_format_images == "svg") {
|
|
317 svg(output_BCVplot)
|
70
|
318 }
|
|
319 ##else {
|
67
|
320 ## png(output_BCVplot)
|
|
321 ##}
|
60
|
322
|
25
|
323 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance")
|
|
324 dev.off()
|
|
325 }
|
|
326
|
|
327
|
|
328 write("Fitting GLM...",stdout())
|
|
329 fit <- glmFit(dge,design)
|
|
330
|
|
331 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout())
|
|
332 cont <- c(contrast)
|
|
333 cont <- makeContrasts(contrasts=cont, levels=design)
|
|
334
|
|
335 lrt <- glmLRT(fit, contrast=cont[,1])
|
|
336 write(paste("Exporting to file: ",output_count_edgeR,sep=""),stdout())
|
|
337 write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA)
|
|
338 write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA)
|
|
339
|
|
340 ## todo EXPORT FPKM
|
|
341 write.table(file=output_raw_counts,dge\$counts,sep="\t",row.names=TRUE,col.names=NA)
|
|
342
|
34
|
343 if(output_MAplot != "/dev/null" || output_PValue_distribution_plot != "/dev/null") {
|
25
|
344 etable <- topTags(lrt, n=nrow(dge))\$table
|
|
345 etable <- etable[order(etable\$FDR), ]
|
32
|
346
|
|
347 if(output_MAplot != "/dev/null") {
|
|
348 write("Creating MA plot...",stdout())
|
60
|
349
|
67
|
350 if(output_format_images == "pdf" || output_format_images == "png") {
|
60
|
351 pdf(output_MAplot)
|
|
352 } else if(output_format_images == "svg") {
|
|
353 svg(output_MAplot)
|
70
|
354 }
|
|
355 ##else {
|
67
|
356 ## png(output_MAplot)
|
|
357 ##}
|
60
|
358
|
32
|
359 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance"))
|
|
360 with(subset(etable, FDR < fdr), points(logCPM, logFC, pch=20, col="red"))
|
|
361 abline(h=c(-1,1), col="blue")
|
|
362 dev.off()
|
|
363 }
|
25
|
364
|
32
|
365 if(output_PValue_distribution_plot != "/dev/null") {
|
|
366 write("Creating P-value distribution plot...",stdout())
|
60
|
367
|
67
|
368 if(output_format_images == "pdf" || output_format_images == "png") {
|
60
|
369 pdf(output_PValue_distribution_plot)
|
|
370 } else if(output_format_images == "svg") {
|
|
371 svg(output_PValue_distribution_plot)
|
70
|
372 }
|
|
373 ##else {
|
67
|
374 ## png(output_PValue_distribution_plot)
|
|
375 ##}
|
60
|
376
|
32
|
377 expressed_genes <- subset(etable, PValue < 0.99)
|
|
378 h <- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (< 0.99)")
|
|
379 center <- sum(h\$counts) / length(h\$counts)
|
|
380 lines(c(0,1),c(center,center),lty=2,col="red",lwd=2)
|
|
381 k <- ksmooth(h\$mid, h\$counts)
|
|
382 lines(k\$x,k\$y,col="red",lwd=2)
|
|
383 rmsd <- (h\$counts) - center
|
|
384 rmsd <- rmsd^2
|
|
385 rmsd <- sum(rmsd)
|
|
386 rmsd <- sqrt(rmsd)
|
|
387 text(0,max(h\$counts),paste("e=",round(rmsd,2),sep=""),pos=4,col="blue")
|
|
388 ## change e into epsilon somehow
|
|
389 dev.off()
|
|
390 }
|
40
|
391 }
|
|
392
|
|
393 if(output_heatmap_plot != "/dev/null") {
|
60
|
394
|
67
|
395 if(output_format_images == "pdf" || output_format_images == "png") {
|
60
|
396 pdf(output_heatmap_plot,width=10.5)
|
|
397 } else if(output_format_images == "svg") {
|
|
398 svg(output_heatmap_plot,width=10.5)
|
70
|
399 }
|
|
400 ## else {
|
67
|
401 ## png(output_heatmap_plot,width=10.5)
|
|
402 ##}
|
60
|
403
|
40
|
404 etable2 <- topTags(lrt, n=100)\$table
|
|
405 order <- rownames(etable2)
|
|
406 cpm_sub <- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),]
|
|
407 heatmap(t(cpm_sub))
|
|
408 dev.off()
|
25
|
409 }
|
|
410
|
|
411 ##output_hierarchical_clustering_plot = args[13]
|
|
412
|
35
|
413 if(output_RData_obj != "/dev/null") {
|
25
|
414 save.image(output_RData_obj)
|
|
415 }
|
|
416
|
|
417 write("Done!",stdout())
|
|
418 }
|
|
419 </configfile>
|
|
420 </configfiles>
|
|
421
|
|
422 <outputs>
|
53
|
423 <data format="tabular" name="output_count_edgeR" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - differentially expressed genes" />
|
25
|
424 <data format="tabular" name="output_cpm" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - CPM" />
|
|
425
|
|
426 <data format="tabular" name="output_raw_counts" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - raw counts">
|
53
|
427 <filter>outputs and ("make_output_raw_counts" in outputs)</filter>
|
25
|
428 </data>
|
|
429
|
59
|
430 <data format="png" name="output_MDSplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot">
|
53
|
431 <filter>outputs and ("make_output_MDSplot" in outputs)</filter>
|
59
|
432
|
|
433 <change_format>
|
|
434 <when input="output_format_images" value="png" format="png" />
|
|
435 <when input="output_format_images" value="pdf" format="pdf" />
|
|
436 <when input="output_format_images" value="svg" format="svg" />
|
|
437 </change_format>
|
25
|
438 </data>
|
|
439
|
60
|
440 <data format="png" name="output_BCVplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - BCV-plot">
|
53
|
441 <filter>outputs and ("make_output_BCVplot" in outputs)</filter>
|
60
|
442
|
|
443 <change_format>
|
|
444 <when input="output_format_images" value="png" format="png" />
|
|
445 <when input="output_format_images" value="pdf" format="pdf" />
|
|
446 <when input="output_format_images" value="svg" format="svg" />
|
|
447 </change_format>
|
25
|
448 </data>
|
|
449
|
60
|
450 <data format="png" name="output_MAplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MA-plot">
|
53
|
451 <filter>outputs and ("make_output_MAplot" in outputs)</filter>
|
60
|
452
|
|
453 <change_format>
|
|
454 <when input="output_format_images" value="png" format="png" />
|
|
455 <when input="output_format_images" value="pdf" format="pdf" />
|
|
456 <when input="output_format_images" value="svg" format="svg" />
|
|
457 </change_format>
|
25
|
458 </data>
|
|
459
|
60
|
460 <data format="png" name="output_PValue_distribution_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - P-Value distribution">
|
53
|
461 <filter>outputs and ("make_output_PValue_distribution_plot" in outputs)</filter>
|
60
|
462
|
|
463 <change_format>
|
|
464 <when input="output_format_images" value="png" format="png" />
|
|
465 <when input="output_format_images" value="pdf" format="pdf" />
|
|
466 <when input="output_format_images" value="svg" format="svg" />
|
|
467 </change_format>
|
25
|
468 </data>
|
|
469
|
60
|
470 <data format="png" name="output_hierarchical_clustering_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - Hierarchical custering">
|
53
|
471 <filter>outputs and ("make_output_hierarchical_clustering_plot" in outputs)</filter>
|
60
|
472
|
|
473 <change_format>
|
|
474 <when input="output_format_images" value="png" format="png" />
|
|
475 <when input="output_format_images" value="pdf" format="pdf" />
|
|
476 <when input="output_format_images" value="svg" format="svg" />
|
|
477 </change_format>
|
25
|
478 </data>
|
|
479
|
60
|
480 <data format="png" name="output_heatmap_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - Heatmap">
|
53
|
481 <filter>outputs and ("make_output_heatmap_plot" in outputs)</filter>
|
60
|
482
|
|
483 <change_format>
|
|
484 <when input="output_format_images" value="png" format="png" />
|
|
485 <when input="output_format_images" value="pdf" format="pdf" />
|
|
486 <when input="output_format_images" value="svg" format="svg" />
|
|
487 </change_format>
|
25
|
488 </data>
|
|
489
|
|
490 <data format="RData" name="output_RData_obj" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R data object">
|
53
|
491 <filter>outputs and ("make_output_RData_obj" in outputs)</filter>
|
25
|
492 </data>
|
|
493
|
40
|
494 <data format="txt" name="output_R" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R output (debug)" >
|
53
|
495 <filter>outputs and ("make_output_R_stdout" in outputs)</filter>
|
25
|
496 </data>
|
|
497 </outputs>
|
|
498
|
|
499 <help>
|
|
500 edgeR: Differential Gene(Expression) Analysis
|
36
|
501 #############################################
|
25
|
502
|
36
|
503 Overview
|
|
504 --------
|
|
505 Differential expression analysis of RNA-seq and digital gene expression profiles with biological replication. Uses empirical Bayes estimation and exact tests based on the negative binomial distribution. Also useful for differential signal analysis with other types of genome-scale count data [1].
|
25
|
506
|
|
507 For every experiment, the algorithm requires a design matrix. This matrix describes which samples belong to which groups.
|
36
|
508 More details on this are given in the edgeR manual: http://www.bioconductor.org/packages/2.12/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf
|
25
|
509 and the limma manual.
|
|
510
|
|
511 Because the creation of a design matrix can be complex and time consuming, especially if no GUI is used, this package comes with an alternative tool which can help you with it.
|
|
512 This tool is called *edgeR Design Matrix Creator*.
|
|
513 If the appropriate design matrix (with corresponding links to the files) is given,
|
|
514 the correct contrast ( http://en.wikipedia.org/wiki/Contrast_(statistics) ) has to be given.
|
|
515
|
|
516 If you have for example two groups, with an equal weight, you would like to compare either
|
79
|
517 "g1-g2" or "normal-cancer".
|
25
|
518
|
36
|
519 The test function makes use of a MCF7 dataset used in a study that indicates that a higher sequencing depth is not neccesairily more important than a higher amount of replaciates[2].
|
25
|
520
|
36
|
521 Input
|
|
522 -----
|
|
523 Expression matrix
|
|
524 ^^^^^^^^^^^^^^^^^
|
|
525 ::
|
25
|
526
|
|
527 Geneid "\t" Sample-1 "\t" Sample-2 "\t" Sample-3 "\t" Sample-4 [...] "\n"
|
|
528 SMURF "\t" 123 "\t" 21 "\t" 34545 "\t" 98 ... "\n"
|
|
529 BRCA1 "\t" 435 "\t" 6655 "\t" 45 "\t" 55 ... "\n"
|
|
530 LINK33 "\t" 4 "\t" 645 "\t" 345 "\t" 1 ... "\n"
|
|
531 SNORD78 "\t" 498 "\t" 65 "\t" 98 "\t" 27 ... "\n"
|
|
532 [...]
|
|
533
|
36
|
534 *Note: Make sure the number of columns in the header is identical to the number of columns in the body.*
|
25
|
535
|
36
|
536 Design matrix
|
|
537 ^^^^^^^^^^^^^
|
|
538 ::
|
25
|
539
|
|
540 Sample "\t" Condition "\t" Ethnicity "\t" Patient "\t" Batch "\n"
|
|
541 Sample-1 "\t" Tumor "\t" European "\t" 1 "\t" 1 "\n"
|
|
542 Sample-2 "\t" Normal "\t" European "\t" 1 "\t" 1 "\n"
|
|
543 Sample-3 "\t" Tumor "\t" European "\t" 2 "\t" 1 "\n"
|
|
544 Sample-4 "\t" Normal "\t" European "\t" 2 "\t" 1 "\n"
|
|
545 Sample-5 "\t" Tumor "\t" African "\t" 3 "\t" 1 "\n"
|
|
546 Sample-6 "\t" Normal "\t" African "\t" 3 "\t" 1 "\n"
|
|
547 Sample-7 "\t" Tumor "\t" African "\t" 4 "\t" 2 "\n"
|
|
548 Sample-8 "\t" Normal "\t" African "\t" 4 "\t" 2 "\n"
|
|
549 Sample-9 "\t" Tumor "\t" Asian "\t" 5 "\t" 2 "\n"
|
|
550 Sample-10 "\t" Normal "\t" Asian "\t" 5 "\t" 2 "\n"
|
|
551 Sample-11 "\t" Tumor "\t" Asian "\t" 6 "\t" 2 "\n"
|
|
552 Sample-12 "\t" Normal "\t" Asian "\t" 6 "\t" 2 "\n"
|
|
553
|
36
|
554 *Note: Avoid factor names that are (1) numerical, (2) contain mathematical symbols and preferebly only use letters.*
|
25
|
555
|
36
|
556 Contrast
|
|
557 ^^^^^^^^
|
|
558 The contrast represents the biological question. There can be many questions asked, e.g.:
|
25
|
559
|
36
|
560 - Tumor-Normal
|
|
561 - African-European
|
|
562 - 0.5*(Control+Placebo) / Treated
|
25
|
563
|
36
|
564 Installation
|
|
565 ------------
|
25
|
566
|
|
567 This tool requires no specific configurations. The following dependencies are installed automatically:
|
36
|
568
|
|
569 - R
|
|
570 - Bioconductor
|
79
|
571 - limma
|
|
572 - edgeR
|
25
|
573
|
36
|
574 License
|
|
575 -------
|
|
576 - R
|
79
|
577 - GPL 2 & GPL 3
|
36
|
578 - limma
|
|
579 - GPL (>=2)
|
|
580 - edgeR
|
79
|
581 - GPL (>=2)
|
36
|
582
|
|
583 References
|
|
584 ----------
|
|
585
|
|
586 EdgeR
|
|
587 ^^^^^
|
|
588 **[1] edgeR: a Bioconductor package for differential expression analysis of digital gene expression data.**
|
25
|
589
|
36
|
590 *Mark D. Robinson, Davis J. McCarthy and Gordon K. Smyth* - Bioinformatics (2010) 26 (1): 139-140.
|
|
591
|
|
592 - http://www.bioconductor.org/packages/2.12/bioc/html/edgeR.html
|
|
593 - http://dx.doi.org/10.1093/bioinformatics/btp616
|
|
594 - http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
|
25
|
595
|
36
|
596 Test-data (MCF7)
|
|
597 ^^^^^^^^^^^^^^^^
|
|
598 **[2] RNA-seq differential expression studies: more sequence or more replication?**
|
|
599
|
|
600 *Yuwen Liu, Jie Zhou and Kevin P. White* - Bioinformatics (2014) 30 (3): 301-304.
|
|
601
|
|
602 - http://www.ncbi.nlm.nih.gov/pubmed/24319002
|
|
603 - http://dx.doi.org/10.1093/bioinformatics/btt688
|
|
604
|
|
605 Contact
|
|
606 -------
|
79
|
607
|
|
608 The tool wrapper has been written by Youri Hoogstrate from the Erasmus
|
|
609 Medical Center (Rotterdam, Netherlands) on behalf of the Translational
|
|
610 Research IT (TraIT) project:
|
83
|
611
|
25
|
612 http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch
|
|
613
|
79
|
614 More tools by the Translational Research IT (TraIT) project can be found
|
|
615 in the following toolsheds:
|
83
|
616
|
|
617 http://toolshed.dtls.nl/
|
|
618
|
|
619 http://toolshed.g2.bx.psu.edu
|
|
620
|
|
621 http://testtoolshed.g2.bx.psu.edu/
|
79
|
622
|
36
|
623 I would like to thank Hina Riaz - Naz Khan for her helpful contribution.
|
25
|
624 </help>
|
|
625 </tool>
|