comparison edgeR_Differential_Gene_Expression.xml @ 111:b9abc9d3b429 draft

planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit 097f2974042345a231e431299a031f236f3eb689
author yhoogstrate
date Tue, 01 Sep 2015 07:27:00 -0400
parents 3112b2926266
children 5d7efbca6c75
comparison
equal deleted inserted replaced
110:3112b2926266 111:b9abc9d3b429
216 dge <- estimateGLMTrendedDisp(dge,design) 216 dge <- estimateGLMTrendedDisp(dge,design)
217 write("Estimating tagwise dispersion...",stdout()) 217 write("Estimating tagwise dispersion...",stdout())
218 dge <- estimateGLMTagwiseDisp(dge,design) 218 dge <- estimateGLMTagwiseDisp(dge,design)
219 219
220 220
221 if(output_MDSplot_logFC != "/dev/null") { 221 # hierarchical clustering makes use of the distance of the MDS
222 write("Creating MDS plot (logFC method)",stdout()) 222 if(output_MDSplot_logFC != "/dev/null" || output_hierarchical_clustering_plot != "/dev/null") {
223 points <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot 223 write("Calculating MDS plot (logFC method)",stdout())
224 mds_distance_logFC <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot
224 dev.off()# Kill it 225 dev.off()# Kill it
225 226
226 if(output_format_images == "pdf") { 227 if(output_MDSplot_logFC != "/dev/null") {
227 pdf(output_MDSplot_logFC,height=14,width=14) 228 write("Creating MDS plot (logFC method)",stdout())
228 } else if(output_format_images == "svg") { 229 if(output_format_images == "pdf") {
229 svg(output_MDSplot_logFC,height=14,width=14) 230 pdf(output_MDSplot_logFC,height=14,width=14)
230 } else { 231 } else if(output_format_images == "svg") {
231 ## png(output_MDSplot_logFC) 232 svg(output_MDSplot_logFC,height=14,width=14)
232 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ 233 } else {
233 234 ## png(output_MDSplot_logFC)
234 bitmap(output_MDSplot_logFC,type="png16m",height=14,width=14) 235 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
235 } 236
236 237 bitmap(output_MDSplot_logFC,type="png16m",height=7*3,width=7*3)
237 238 }
238 diff_x <- abs(max(points\$x)-min(points\$x)) 239
239 diff_y <-(max(points\$y)-min(points\$y)) 240 diff_x <- abs(max(mds_distance_logFC\$x)-min(mds_distance_logFC\$x))
240 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2") 241 diff_y <-(max(mds_distance_logFC\$y)-min(mds_distance_logFC\$y))
241 points(points\$x,points\$y,pch=20) 242 plot(c(min(mds_distance_logFC\$x),max(mds_distance_logFC\$x) + 0.45 * diff_x), c(min(mds_distance_logFC\$y) - 0.05 * diff_y,max(mds_distance_logFC\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2")
242 text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) 243 mds_distance_logFC(mds_distance_logFC\$x,mds_distance_logFC\$y,pch=20)
243 rm(diff_x,diff_y) 244 text(mds_distance_logFC\$x, mds_distance_logFC\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4)
244 245 rm(diff_x,diff_y)
245 dev.off() 246
246 } 247 dev.off()
248 }
249 }
250
247 251
248 if(output_MDSplot_bcv != "/dev/null") { 252 if(output_MDSplot_bcv != "/dev/null") {
249 write("Creating MDS plot (bcv method)",stdout()) 253 write("Creating MDS plot (bcv method)",stdout())
250 254
251 ## 1. First create a virtual plot to obtain the desired coordinates 255 ## 1. First create a virtual plot to obtain the desired coordinates
252 pdf("bcvmds.pdf") 256 pdf("bcvmds.pdf")
253 points <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples))) 257 mds_distance_BCV <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples)))
254 dev.off()# Kill it 258 dev.off()# Kill it
255 259
256 ## 2. Re-plot the coordinates in a new figure with the size and settings. 260 ## 2. Re-plot the coordinates in a new figure with the size and settings.
257 if(output_format_images == "pdf") { 261 if(output_format_images == "pdf") {
258 pdf(output_MDSplot_bcv,height=14,width=14) 262 pdf(output_MDSplot_bcv,height=14,width=14)
260 svg(output_MDSplot_bcv,height=14,width=14) 264 svg(output_MDSplot_bcv,height=14,width=14)
261 } else { 265 } else {
262 ## png(output_MDSplot_bcv) 266 ## png(output_MDSplot_bcv)
263 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ 267 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
264 268
265 bitmap(output_MDSplot_bcv,type="png16m",height=14,width=14) 269 bitmap(output_MDSplot_bcv,type="png16m",height=7*3,width=7*3)
266 } 270 }
267 271
268 diff_x <- abs(max(points\$x)-min(points\$x)) 272 diff_x <- abs(max(mds_distance_BCV\$x)-min(mds_distance_BCV\$x))
269 diff_y <- (max(points\$y)-min(points\$y)) 273 diff_y <- (max(mds_distance_BCV\$y)-min(mds_distance_BCV\$y))
270 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2") 274 plot(c(min(mds_distance_BCV\$x),max(mds_distance_BCV\$x) + 0.45 * diff_x), c(min(mds_distance_BCV\$y) - 0.05 * diff_y,max(mds_distance_BCV\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2")
271 points(points\$x,points\$y,pch=20) 275 mds_distance_BCV(mds_distance_BCV\$x,mds_distance_BCV\$y,pch=20)
272 text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) 276 text(mds_distance_BCV\$x, mds_distance_BCV\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4)
273 rm(diff_x,diff_y) 277 rm(diff_x,diff_y)
274 278
275 dev.off() 279 dev.off()
276 } 280 }
277 281
285 svg(output_BCVplot) 289 svg(output_BCVplot)
286 } else { 290 } else {
287 ## png(output_BCVplot) 291 ## png(output_BCVplot)
288 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ 292 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
289 293
290 bitmap(output_BCVplot,type="png16m") 294 bitmap(output_BCVplot,type="png16m",width=10.5*3,height=7*3)
291 } 295 }
292 296
293 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance") 297 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance")
294 dev.off() 298 dev.off()
295 } 299 }
301 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout()) 305 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout())
302 cont <- c(contrast) 306 cont <- c(contrast)
303 cont <- makeContrasts(contrasts=cont, levels=design) 307 cont <- makeContrasts(contrasts=cont, levels=design)
304 308
305 lrt <- glmLRT(fit, contrast=cont[,1]) 309 lrt <- glmLRT(fit, contrast=cont[,1])
306 write(paste("Exporting to file: ",output_count_edgeR,sep=""),stdout()) 310 write(paste("Exporting DGE results to file...",output_count_edgeR,sep=""),stdout())
307 write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA) 311 write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA)
308 write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA) 312 write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA)
309 313
310 ## todo EXPORT FPKM 314 ## todo EXPORT FPKM
311 write.table(file=output_raw_counts,dge\$counts,sep="\t",row.names=TRUE,col.names=NA) 315 write.table(file=output_raw_counts,dge\$counts,sep="\t",row.names=TRUE,col.names=NA)
323 svg(output_MAplot) 327 svg(output_MAplot)
324 } else { 328 } else {
325 ## png(output_MAplot) 329 ## png(output_MAplot)
326 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ 330 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
327 331
328 bitmap(output_MAplot,type="png16m") 332 bitmap(output_MAplot,type="png16m",width=10.5*3,height=7*3)
329 } 333 }
330 334
331 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance")) 335 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance"))
332 with(subset(etable, FDR < fdr), points(logCPM, logFC, pch=20, col="red")) 336 with(subset(etable, FDR < fdr), points(logCPM, logFC, pch=20, col="red"))
333 abline(h=c(-1,1), col="blue") 337 abline(h=c(-1,1), col="blue")
343 svg(output_PValue_distribution_plot,width=14,height=14) 347 svg(output_PValue_distribution_plot,width=14,height=14)
344 } else { 348 } else {
345 ## png(output_PValue_distribution_plot) 349 ## png(output_PValue_distribution_plot)
346 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ 350 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
347 351
348 bitmap(output_PValue_distribution_plot,type="png16m",width=14,height=14) 352 bitmap(output_PValue_distribution_plot,type="png16m",width=7*3,height=7*3)
349 } 353 }
350 354
351 expressed_genes <- subset(etable, PValue < 0.99) 355 expressed_genes <- subset(etable, PValue < 0.99)
352 h <- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (< 0.99)") 356 h <- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (< 0.99)")
353 center <- sum(h\$counts) / length(h\$counts) 357 center <- sum(h\$counts) / length(h\$counts)
372 svg(output_heatmap_plot,width=10.5) 376 svg(output_heatmap_plot,width=10.5)
373 } else { 377 } else {
374 ## png(output_heatmap_plot) 378 ## png(output_heatmap_plot)
375 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ 379 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
376 380
377 bitmap(output_heatmap_plot,type="png16m",width=10.5) 381 bitmap(output_heatmap_plot,type="png16m",width=10.5*3,height=7*3)
378 } 382 }
379 383
380 etable2 <- topTags(lrt, n=100)\$table 384 etable2 <- topTags(lrt, n=100)\$table
381 order <- rownames(etable2) 385 order <- rownames(etable2)
382 cpm_sub <- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),] 386 cpm_sub <- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),]
383 heatmap(t(cpm_sub)) 387 heatmap(t(cpm_sub))
384 dev.off() 388 dev.off()
385 } 389 }
386 390
387 ##output_hierarchical_clustering_plot = args[13] 391 if(output_hierarchical_clustering_plot != "/dev/null") {
392 if(output_hierarchical_clustering_plot == "pdf") {
393 pdf(output_hierarchical_clustering_plot,width=10.5)
394 } else if(output_hierarchical_clustering_plot == "svg") {
395 svg(output_hierarchical_clustering_plot,width=10.5)
396 } else {
397 ## png(output_hierarchical_clustering_plot)
398 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
399
400 bitmap(output_hierarchical_clustering_plot,type="png16m",width=10.5*3,height=7*3)
401 }
402
403 mds_distance = as.dist(mds_distance_logFC\$distance.matrix)
404 clustering = hclust(mds_distance)
405 plot(clustering,main=paste("Cluster Dendogram on the ",mds_distance_logFC\$top," TopTags",sep="",sub="\ncomplete linkage on logFC MDS distance"))
406
407 dev.off()
408 }
388 409
389 if(output_RData_obj != "/dev/null") { 410 if(output_RData_obj != "/dev/null") {
390 save.image(output_RData_obj) 411 save.image(output_RData_obj)
391 } 412 }
392 413
409 <option value="make_output_MDSplot_logFC">MDS-plot (logFC-method)</option> 430 <option value="make_output_MDSplot_logFC">MDS-plot (logFC-method)</option>
410 <option value="make_output_MDSplot_bcv">MDS-plot (BCV-method; much slower)</option> 431 <option value="make_output_MDSplot_bcv">MDS-plot (BCV-method; much slower)</option>
411 <option value="make_output_BCVplot">BCV-plot</option> 432 <option value="make_output_BCVplot">BCV-plot</option>
412 <option value="make_output_MAplot">MA-plot</option> 433 <option value="make_output_MAplot">MA-plot</option>
413 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option> 434 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option>
414 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering (under contstruction)</option> 435 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering</option>
415 <option value="make_output_heatmap_plot">Heatmap</option> 436 <option value="make_output_heatmap_plot">Heatmap</option>
416 437
417 <option value="make_output_R_stdout">R stdout</option> 438 <option value="make_output_R_stdout">R stdout</option>
418 <option value="make_output_RData_obj">R Data object</option> 439 <option value="make_output_RData_obj">R Data object</option>
419 </param> 440 </param>