Mercurial > repos > yhoogstrate > edger_with_design_matrix
comparison edgeR_Differential_Gene_Expression.xml @ 111:b9abc9d3b429 draft
planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit 097f2974042345a231e431299a031f236f3eb689
author | yhoogstrate |
---|---|
date | Tue, 01 Sep 2015 07:27:00 -0400 |
parents | 3112b2926266 |
children | 5d7efbca6c75 |
comparison
equal
deleted
inserted
replaced
110:3112b2926266 | 111:b9abc9d3b429 |
---|---|
216 dge <- estimateGLMTrendedDisp(dge,design) | 216 dge <- estimateGLMTrendedDisp(dge,design) |
217 write("Estimating tagwise dispersion...",stdout()) | 217 write("Estimating tagwise dispersion...",stdout()) |
218 dge <- estimateGLMTagwiseDisp(dge,design) | 218 dge <- estimateGLMTagwiseDisp(dge,design) |
219 | 219 |
220 | 220 |
221 if(output_MDSplot_logFC != "/dev/null") { | 221 # hierarchical clustering makes use of the distance of the MDS |
222 write("Creating MDS plot (logFC method)",stdout()) | 222 if(output_MDSplot_logFC != "/dev/null" || output_hierarchical_clustering_plot != "/dev/null") { |
223 points <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot | 223 write("Calculating MDS plot (logFC method)",stdout()) |
224 mds_distance_logFC <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot | |
224 dev.off()# Kill it | 225 dev.off()# Kill it |
225 | 226 |
226 if(output_format_images == "pdf") { | 227 if(output_MDSplot_logFC != "/dev/null") { |
227 pdf(output_MDSplot_logFC,height=14,width=14) | 228 write("Creating MDS plot (logFC method)",stdout()) |
228 } else if(output_format_images == "svg") { | 229 if(output_format_images == "pdf") { |
229 svg(output_MDSplot_logFC,height=14,width=14) | 230 pdf(output_MDSplot_logFC,height=14,width=14) |
230 } else { | 231 } else if(output_format_images == "svg") { |
231 ## png(output_MDSplot_logFC) | 232 svg(output_MDSplot_logFC,height=14,width=14) |
232 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | 233 } else { |
233 | 234 ## png(output_MDSplot_logFC) |
234 bitmap(output_MDSplot_logFC,type="png16m",height=14,width=14) | 235 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ |
235 } | 236 |
236 | 237 bitmap(output_MDSplot_logFC,type="png16m",height=7*3,width=7*3) |
237 | 238 } |
238 diff_x <- abs(max(points\$x)-min(points\$x)) | 239 |
239 diff_y <-(max(points\$y)-min(points\$y)) | 240 diff_x <- abs(max(mds_distance_logFC\$x)-min(mds_distance_logFC\$x)) |
240 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2") | 241 diff_y <-(max(mds_distance_logFC\$y)-min(mds_distance_logFC\$y)) |
241 points(points\$x,points\$y,pch=20) | 242 plot(c(min(mds_distance_logFC\$x),max(mds_distance_logFC\$x) + 0.45 * diff_x), c(min(mds_distance_logFC\$y) - 0.05 * diff_y,max(mds_distance_logFC\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2") |
242 text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) | 243 mds_distance_logFC(mds_distance_logFC\$x,mds_distance_logFC\$y,pch=20) |
243 rm(diff_x,diff_y) | 244 text(mds_distance_logFC\$x, mds_distance_logFC\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) |
244 | 245 rm(diff_x,diff_y) |
245 dev.off() | 246 |
246 } | 247 dev.off() |
248 } | |
249 } | |
250 | |
247 | 251 |
248 if(output_MDSplot_bcv != "/dev/null") { | 252 if(output_MDSplot_bcv != "/dev/null") { |
249 write("Creating MDS plot (bcv method)",stdout()) | 253 write("Creating MDS plot (bcv method)",stdout()) |
250 | 254 |
251 ## 1. First create a virtual plot to obtain the desired coordinates | 255 ## 1. First create a virtual plot to obtain the desired coordinates |
252 pdf("bcvmds.pdf") | 256 pdf("bcvmds.pdf") |
253 points <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples))) | 257 mds_distance_BCV <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples))) |
254 dev.off()# Kill it | 258 dev.off()# Kill it |
255 | 259 |
256 ## 2. Re-plot the coordinates in a new figure with the size and settings. | 260 ## 2. Re-plot the coordinates in a new figure with the size and settings. |
257 if(output_format_images == "pdf") { | 261 if(output_format_images == "pdf") { |
258 pdf(output_MDSplot_bcv,height=14,width=14) | 262 pdf(output_MDSplot_bcv,height=14,width=14) |
260 svg(output_MDSplot_bcv,height=14,width=14) | 264 svg(output_MDSplot_bcv,height=14,width=14) |
261 } else { | 265 } else { |
262 ## png(output_MDSplot_bcv) | 266 ## png(output_MDSplot_bcv) |
263 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | 267 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ |
264 | 268 |
265 bitmap(output_MDSplot_bcv,type="png16m",height=14,width=14) | 269 bitmap(output_MDSplot_bcv,type="png16m",height=7*3,width=7*3) |
266 } | 270 } |
267 | 271 |
268 diff_x <- abs(max(points\$x)-min(points\$x)) | 272 diff_x <- abs(max(mds_distance_BCV\$x)-min(mds_distance_BCV\$x)) |
269 diff_y <- (max(points\$y)-min(points\$y)) | 273 diff_y <- (max(mds_distance_BCV\$y)-min(mds_distance_BCV\$y)) |
270 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2") | 274 plot(c(min(mds_distance_BCV\$x),max(mds_distance_BCV\$x) + 0.45 * diff_x), c(min(mds_distance_BCV\$y) - 0.05 * diff_y,max(mds_distance_BCV\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2") |
271 points(points\$x,points\$y,pch=20) | 275 mds_distance_BCV(mds_distance_BCV\$x,mds_distance_BCV\$y,pch=20) |
272 text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) | 276 text(mds_distance_BCV\$x, mds_distance_BCV\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) |
273 rm(diff_x,diff_y) | 277 rm(diff_x,diff_y) |
274 | 278 |
275 dev.off() | 279 dev.off() |
276 } | 280 } |
277 | 281 |
285 svg(output_BCVplot) | 289 svg(output_BCVplot) |
286 } else { | 290 } else { |
287 ## png(output_BCVplot) | 291 ## png(output_BCVplot) |
288 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | 292 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ |
289 | 293 |
290 bitmap(output_BCVplot,type="png16m") | 294 bitmap(output_BCVplot,type="png16m",width=10.5*3,height=7*3) |
291 } | 295 } |
292 | 296 |
293 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance") | 297 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance") |
294 dev.off() | 298 dev.off() |
295 } | 299 } |
301 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout()) | 305 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout()) |
302 cont <- c(contrast) | 306 cont <- c(contrast) |
303 cont <- makeContrasts(contrasts=cont, levels=design) | 307 cont <- makeContrasts(contrasts=cont, levels=design) |
304 | 308 |
305 lrt <- glmLRT(fit, contrast=cont[,1]) | 309 lrt <- glmLRT(fit, contrast=cont[,1]) |
306 write(paste("Exporting to file: ",output_count_edgeR,sep=""),stdout()) | 310 write(paste("Exporting DGE results to file...",output_count_edgeR,sep=""),stdout()) |
307 write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA) | 311 write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA) |
308 write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA) | 312 write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA) |
309 | 313 |
310 ## todo EXPORT FPKM | 314 ## todo EXPORT FPKM |
311 write.table(file=output_raw_counts,dge\$counts,sep="\t",row.names=TRUE,col.names=NA) | 315 write.table(file=output_raw_counts,dge\$counts,sep="\t",row.names=TRUE,col.names=NA) |
323 svg(output_MAplot) | 327 svg(output_MAplot) |
324 } else { | 328 } else { |
325 ## png(output_MAplot) | 329 ## png(output_MAplot) |
326 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | 330 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ |
327 | 331 |
328 bitmap(output_MAplot,type="png16m") | 332 bitmap(output_MAplot,type="png16m",width=10.5*3,height=7*3) |
329 } | 333 } |
330 | 334 |
331 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance")) | 335 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance")) |
332 with(subset(etable, FDR < fdr), points(logCPM, logFC, pch=20, col="red")) | 336 with(subset(etable, FDR < fdr), points(logCPM, logFC, pch=20, col="red")) |
333 abline(h=c(-1,1), col="blue") | 337 abline(h=c(-1,1), col="blue") |
343 svg(output_PValue_distribution_plot,width=14,height=14) | 347 svg(output_PValue_distribution_plot,width=14,height=14) |
344 } else { | 348 } else { |
345 ## png(output_PValue_distribution_plot) | 349 ## png(output_PValue_distribution_plot) |
346 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | 350 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ |
347 | 351 |
348 bitmap(output_PValue_distribution_plot,type="png16m",width=14,height=14) | 352 bitmap(output_PValue_distribution_plot,type="png16m",width=7*3,height=7*3) |
349 } | 353 } |
350 | 354 |
351 expressed_genes <- subset(etable, PValue < 0.99) | 355 expressed_genes <- subset(etable, PValue < 0.99) |
352 h <- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (< 0.99)") | 356 h <- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (< 0.99)") |
353 center <- sum(h\$counts) / length(h\$counts) | 357 center <- sum(h\$counts) / length(h\$counts) |
372 svg(output_heatmap_plot,width=10.5) | 376 svg(output_heatmap_plot,width=10.5) |
373 } else { | 377 } else { |
374 ## png(output_heatmap_plot) | 378 ## png(output_heatmap_plot) |
375 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | 379 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ |
376 | 380 |
377 bitmap(output_heatmap_plot,type="png16m",width=10.5) | 381 bitmap(output_heatmap_plot,type="png16m",width=10.5*3,height=7*3) |
378 } | 382 } |
379 | 383 |
380 etable2 <- topTags(lrt, n=100)\$table | 384 etable2 <- topTags(lrt, n=100)\$table |
381 order <- rownames(etable2) | 385 order <- rownames(etable2) |
382 cpm_sub <- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),] | 386 cpm_sub <- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),] |
383 heatmap(t(cpm_sub)) | 387 heatmap(t(cpm_sub)) |
384 dev.off() | 388 dev.off() |
385 } | 389 } |
386 | 390 |
387 ##output_hierarchical_clustering_plot = args[13] | 391 if(output_hierarchical_clustering_plot != "/dev/null") { |
392 if(output_hierarchical_clustering_plot == "pdf") { | |
393 pdf(output_hierarchical_clustering_plot,width=10.5) | |
394 } else if(output_hierarchical_clustering_plot == "svg") { | |
395 svg(output_hierarchical_clustering_plot,width=10.5) | |
396 } else { | |
397 ## png(output_hierarchical_clustering_plot) | |
398 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | |
399 | |
400 bitmap(output_hierarchical_clustering_plot,type="png16m",width=10.5*3,height=7*3) | |
401 } | |
402 | |
403 mds_distance = as.dist(mds_distance_logFC\$distance.matrix) | |
404 clustering = hclust(mds_distance) | |
405 plot(clustering,main=paste("Cluster Dendogram on the ",mds_distance_logFC\$top," TopTags",sep="",sub="\ncomplete linkage on logFC MDS distance")) | |
406 | |
407 dev.off() | |
408 } | |
388 | 409 |
389 if(output_RData_obj != "/dev/null") { | 410 if(output_RData_obj != "/dev/null") { |
390 save.image(output_RData_obj) | 411 save.image(output_RData_obj) |
391 } | 412 } |
392 | 413 |
409 <option value="make_output_MDSplot_logFC">MDS-plot (logFC-method)</option> | 430 <option value="make_output_MDSplot_logFC">MDS-plot (logFC-method)</option> |
410 <option value="make_output_MDSplot_bcv">MDS-plot (BCV-method; much slower)</option> | 431 <option value="make_output_MDSplot_bcv">MDS-plot (BCV-method; much slower)</option> |
411 <option value="make_output_BCVplot">BCV-plot</option> | 432 <option value="make_output_BCVplot">BCV-plot</option> |
412 <option value="make_output_MAplot">MA-plot</option> | 433 <option value="make_output_MAplot">MA-plot</option> |
413 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option> | 434 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option> |
414 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering (under contstruction)</option> | 435 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering</option> |
415 <option value="make_output_heatmap_plot">Heatmap</option> | 436 <option value="make_output_heatmap_plot">Heatmap</option> |
416 | 437 |
417 <option value="make_output_R_stdout">R stdout</option> | 438 <option value="make_output_R_stdout">R stdout</option> |
418 <option value="make_output_RData_obj">R Data object</option> | 439 <option value="make_output_RData_obj">R Data object</option> |
419 </param> | 440 </param> |