comparison rgedgeRpaired_nocamera.xml @ 111:9f2e0ec3e826 draft

Uploaded
author fubar
date Wed, 22 Oct 2014 23:37:31 -0400
parents d7e2a0c0cce9
children badcd3b0e708
comparison
equal deleted inserted replaced
110:d7e2a0c0cce9 111:9f2e0ec3e826
1 <tool id="rgDifferentialCount" name="Differential_Count" version="0.25"> 1 <tool id="rgDifferentialCount" name="Differential_Count" version="0.26">
2 <description>models using BioConductor packages</description> 2 <description>models using BioConductor packages</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="2.14">biocbasics</requirement> 4 <requirement type="package" version="2.14">biocbasics</requirement>
5 <requirement type="package" version="3.0.3">R</requirement> 5 <requirement type="package" version="3.1.1">R_3_1_1</requirement>
6 <requirement type="package" version="1.3.18">graphicsmagick</requirement> 6 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
7 <requirement type="package" version="9.10">ghostscript</requirement> 7 <requirement type="package" version="9.10">ghostscript</requirement>
8 </requirements> 8 </requirements>
9 9
10 <command interpreter="python"> 10 <command interpreter="python">
191 # 1 - Output Dir 191 # 1 - Output Dir
192 192
193 # Original edgeR code by: S.Lunke and A.Kaspi 193 # Original edgeR code by: S.Lunke and A.Kaspi
194 reallybig = log10(.Machine\$double.xmax) 194 reallybig = log10(.Machine\$double.xmax)
195 reallysmall = log10(.Machine\$double.xmin) 195 reallysmall = log10(.Machine\$double.xmin)
196 library('stringr') 196 library("stringr")
197 library('gplots') 197 library("gplots")
198 library('edgeR') 198 library("edgeR")
199 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here') 199 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here')
200 { 200 {
201 # Perform clustering for significant pvalues after controlling FWER 201 # Perform clustering for significant pvalues after controlling FWER
202 samples = colnames(cmat) 202 samples = colnames(cmat)
203 gu = unique(group) 203 gu = unique(group)
261 lines(e,e,col="red") 261 lines(e,e,col="red")
262 grid(col = "lightgray", lty = "dotted") 262 grid(col = "lightgray", lty = "dotted")
263 dev.off() 263 dev.off()
264 } 264 }
265 265
266 smearPlot = function(DGEList,deTags, outSmear, outMain) 266 smearPlot = function(myDGEList,deTags, outSmear, outMain)
267 { 267 {
268 pdf(outSmear) 268 pdf(outSmear)
269 plotSmear(DGEList,de.tags=deTags,main=outMain) 269 plotSmear(myDGEList,de.tags=deTags,main=outMain)
270 grid(col="lightgray", lty="dotted") 270 grid(col="lightgray", lty="dotted")
271 dev.off() 271 dev.off()
272 } 272 }
273 273
274 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname) 274 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname)
511 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F) 511 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
512 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:')) 512 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
513 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F) 513 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
514 sink() 514 sink()
515 } 515 }
516 516
517 517
518 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5, 518 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_Voom=F,out_DESeq2=F,fdrtype='fdr',priordf=5,
519 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F, 519 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
520 filterquantile=0.2, subjects=c(),mydesign=NULL, 520 filterquantile=0.2, subjects=c(),mydesign=NULL,
521 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19', 521 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
522 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", 522 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
523 doCook=F,DESeq_fitType="parameteric",robust_meth='ordinary') 523 doCook=F,DESeq_fitType="parameteric",robust_meth='ordinary')
524 { 524 {
525
526
527 run_edgeR = function(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR)
528 {
529 sink('edgeR.log')
530 #### Setup myDGEList object
531 myDGEList = DGEList(counts=workCM, group = group)
532 myDGEList = calcNormFactors(myDGEList)
533 if (robust_meth == 'ordinary') {
534 myDGEList = estimateGLMCommonDisp(myDGEList,mydesign)
535 myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign)
536 if (priordf > 0) { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign,prior.df = priordf)
537 } else { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign) }
538 comdisp = myDGEList\$common.dispersion
539 estpriorn = getPriorN(myDGEList)
540 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
541 } else {
542 myDGEList = estimateGLMRobustDisp(myDGEList,design=mydesign, prior.df = priordf, maxit = 6, residual.type = robust_meth)
543 }
544
545
546 DGLM = glmFit(myDGEList,design=mydesign)
547 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
548 efflib = myDGEList\$samples\$lib.size*myDGEList\$samples\$norm.factors
549 normData = (1e+06*myDGEList\$counts/efflib)
550 uoutput = cbind(
551 Name=as.character(rownames(myDGEList\$counts)),
552 DE\$table,
553 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
554 Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
555 myDGEList\$counts
556 )
557 soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
558 goodness = gof(DGLM, pcutoff=fdrthresh)
559 if (sum(goodness\$outlier) > 0) {
560 print.noquote('GLM outliers:')
561 print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
562 } else {
563 print('No GLM fit outlier genes found\n')
564 }
565 z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
566 pdf(paste("edgeR",mt,"GoodnessofFit.pdf",sep='_'))
567 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
568 abline(0,1,lwd=3)
569 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
570 dev.off()
571 uniqueg = unique(group)
572 #### Plot MDS
573 sample_colors = match(group,levels(group))
574 sampleTypes = levels(factor(group))
575 print.noquote(sampleTypes)
576 pdf(paste("edgeR",mt,"MDSplot.pdf",sep='_'))
577 plotMDS.DGEList(myDGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
578 legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
579 grid(col="blue")
580 dev.off()
581 colnames(normData) = paste( colnames(normData),'N',sep="_")
582 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
583 nzd = data.frame(log(nonzerod + 1e-2,10))
584 try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname=paste("edgeR",mt,"raw_norm_counts_box.pdf",sep='_') ))
585 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
586 tt = cbind(
587 Name=as.character(rownames(myDGEList)),
588 DE\$table,
589 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
590 Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums
591 )
592 print.noquote("# edgeR Top tags\n")
593 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
594 tt = tt[order(DE\$table\$PValue),]
595 print.noquote(tt[1:50,])
596 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
597 nsig = length(deTags)
598 print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
599 deColours = ifelse(deTags,'red','black')
600 pdf(paste("edgeR",mt,"BCV_vs_abundance.pdf",sep="_"))
601 plotBCV(myDGEList, cex=0.3, main="Biological CV vs abundance")
602 dev.off()
603 dg = myDGEList[order(DE\$table\$PValue),]
604 #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
605 outpdfname= paste("edgeR",mt,"top_100_heatmap.pdf",sep="_")
606 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste(myTitle,'Heatmap'))
607 outSmear = paste("edgeR",mt,"smearplot.pdf",sep="_")
608 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
609 smearPlot(myDGEList=myDGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
610 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf=paste('edgeR',mt,'qqplot.pdf',sep='_'))
611 norm.factor = myDGEList\$samples\$norm.factors
612 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
613 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
614 edgeRcounts = rep(0, length(allgenes))
615 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits
616 sink()
617 return(list(myDGEList=myDGEList,edgeRcounts=edgeRcounts))
618 } ### run_edgeR
619
620
621 run_DESeq2 = function(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType)
622
623 {
624 sink("DESeq2.log")
625 # DESeq2
626 require('DESeq2')
627 library('RColorBrewer')
628 if (length(subjects) == 0)
629 {
630 pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
631 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx))
632 } else {
633 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
634 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx))
635 }
636 #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
637 #rDESeq = results(DESeq2)
638 #newCountDataSet(workCM, group)
639 deSeqDatsizefac = estimateSizeFactors(deSEQds)
640 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
641 resDESeq = nbinomWaldTest(deSeqDatdisp)
642 rDESeq = as.data.frame(results(resDESeq))
643 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
644 srDESeq = rDESeq[order(rDESeq\$pvalue),]
645 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf=paste('DESeq2',mt,'qqplot.pdf',sep="_"))
646 cat("# DESeq top 50\n")
647 print.noquote(srDESeq[1:50,])
648 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
649 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
650 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
651 DESeqcounts = rep(0, length(allgenes))
652 DESeqcounts[DESeqcountsindex] = 1
653 pdf(paste("DESeq2",mt,"dispersion_estimates.pdf",sep='_'))
654 plotDispEsts(resDESeq)
655 dev.off()
656 ysmall = abs(min(rDESeq\$log2FoldChange))
657 ybig = abs(max(rDESeq\$log2FoldChange))
658 ylimit = min(4,ysmall,ybig)
659 pdf(paste("DESeq2",mt,"MA_plot.pdf",sep="_"))
660 plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
661 dev.off()
662 rlogres = rlogTransformation(resDESeq)
663 sampledists = dist( t( assay(rlogres) ) )
664 sdmat = as.matrix(sampledists)
665 pdf(paste("DESeq2",mt,"sample_distance_plot.pdf",sep="_"))
666 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
667 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
668 dev.off()
669 ###outpdfname=paste("DESeq2",mt,"top50_heatmap.pdf",sep="_")
670 ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
671 sink()
672 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
673 if ("try-error" %in% class(result)) {
674 print.noquote('DESeq2 plotPCA failed.')
675 } else {
676 pdf(paste("DESeq2",mt,"PCA_plot.pdf",sep="_"))
677 #### wtf - print? Seems needed to get this to work
678 print(ppca)
679 dev.off()
680 }
681 return(DESeqcounts)
682 }
683
684
685 run_Voom = function(workCM,pdata,subjects,group,mydesign,mt,out_Voom)
686 {
687 sink('VOOM.log')
688 if (doedgeR == F) {
689 #### Setup myDGEList object
690 myDGEList = DGEList(counts=workCM, group = group)
691 myDGEList = calcNormFactors(myDGEList)
692 myDGEList = estimateGLMCommonDisp(myDGEList,mydesign)
693 myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign)
694 myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign)
695 }
696 pdf(paste("VOOM",mt,"mean_variance_plot.pdf",sep='_'))
697 dat.voomed <- voom(myDGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL)
698 dev.off()
699 # Use limma to fit data
700 fit = lmFit(dat.voomed, mydesign)
701 fit = eBayes(fit)
702 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
703 qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf=paste('VOOM',mt,'qqplot.pdf',sep='_'))
704 rownames(rvoom) = rownames(workCM)
705 rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
706 srvoom = rvoom[order(rvoom\$P.Value),]
707 cat("# VOOM top 50\n")
708 print(srvoom[1:50,])
709 write.table(srvoom,file=out_Voom, quote=FALSE, sep="\t",row.names=F)
710 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
711 topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
712 voomcountsindex <- which(allgenes %in% rownames(topresults.voom))
713 voomcounts = rep(0, length(allgenes))
714 voomcounts[voomcountsindex] = 1
715 sink()
716 return(voomcounts)
717 }
718
719
720 #### data cleaning and analsis control starts here
721
525 # Error handling 722 # Error handling
526 if (length(unique(group))!=2){ 723 nugroup = length(unique(group))
724 if (nugroup!=2){
527 print("Number of conditions identified in experiment does not equal 2") 725 print("Number of conditions identified in experiment does not equal 2")
528 q() 726 q()
529 } 727 }
530 require(edgeR) 728 require(edgeR)
531 options(width = 512) 729 options(width = 512)
532 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ") 730 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
533 allN = nrow(Count_Matrix) 731 allN = nrow(Count_Matrix)
534 nscut = round(ncol(Count_Matrix)/2) 732 nscut = round(ncol(Count_Matrix)/2) # half samples
535 colTotmillionreads = colSums(Count_Matrix)/1e6 733 colTotmillionreads = colSums(Count_Matrix)/1e6
536 counts.dataframe = as.data.frame(c()) 734 counts.dataframe = as.data.frame(c())
537 rawrs = rowSums(Count_Matrix) 735 rawrs = rowSums(Count_Matrix)
538 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes 736 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes
539 nzN = nrow(nonzerod) 737 nzN = nrow(nonzerod)
561 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F) 759 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F)
562 maint = paste('Filter below',filterquantile,'quantile') 760 maint = paste('Filter below',filterquantile,'quantile')
563 } 761 }
564 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle) 762 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
565 allgenes = rownames(workCM) 763 allgenes = rownames(workCM)
566 reg = "^chr([0-9]+):([0-9]+)-([0-9]+)" 764 reg = "^chr([0-9]+):([0-9]+)-([0-9]+)" # ucsc chr:start-end regexp
567 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/" 765 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/"
568 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='') 766 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='')
569 testreg = str_match(allgenes,reg) 767 testreg = str_match(allgenes,reg)
570 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string 768 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string
571 { 769 {
573 contigurls = paste0(ucsc,"&amp;position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>") 771 contigurls = paste0(ucsc,"&amp;position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>")
574 } else { 772 } else {
575 print("@@ using genecards substitution for urls") 773 print("@@ using genecards substitution for urls")
576 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>") 774 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>")
577 } 775 }
578 print.noquote("# urls") 776 print.noquote("# urls sample")
579 print.noquote(head(contigurls)) 777 print.noquote(head(contigurls))
580 print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F) 778 print(paste("# Total low count contigs per sample = ",table(lo)),quote=F)
581 cmrowsums = rowSums(workCM) 779 cmrowsums = rowSums(workCM)
582 TName=unique(group)[1] 780 TName=unique(group)[1]
583 CName=unique(group)[2] 781 CName=unique(group)[2]
584 if (is.null(mydesign)) { 782 if (is.null(mydesign)) {
585 if (length(subjects) == 0) 783 if (length(subjects) == 0)
593 } 791 }
594 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=','))) 792 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
595 print.noquote('Using design matrix:') 793 print.noquote('Using design matrix:')
596 print.noquote(mydesign) 794 print.noquote(mydesign)
597 if (doedgeR == T) { 795 if (doedgeR == T) {
598 sink('edgeR.log') 796 eres = run_edgeR(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR)
599 #### Setup DGEList object 797 myDGEList = eres\$myDGEList
600 DGEList = DGEList(counts=workCM, group = group) 798 edgeRcounts = eres\$edgeRcounts
601 DGEList = calcNormFactors(DGEList) 799 }
602 if (robust_meth == 'ordinary') { 800 if (doDESeq2 == T) { DESeqcounts = run_DESeq2(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) }
603 DGEList = estimateGLMCommonDisp(DGEList,mydesign) 801 if (doVoom == T) { voomcounts = run_Voom(workCM,pdata,subjects,group,mydesign,mt,out_Voom) }
604 DGEList = estimateGLMTrendedDisp(DGEList,mydesign) 802
605 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf)
606
607 comdisp = DGEList\$common.dispersion
608 estpriorn = getPriorN(DGEList)
609 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
610 } else {
611 DGEList = estimateGLMRobustDisp(DGEList,design=mydesign, prior.df = edgeR_priordf, maxit = 6, residual.type = robust_meth)
612 }
613
614
615 DGLM = glmFit(DGEList,design=mydesign)
616 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
617 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
618 normData = (1e+06*DGEList\$counts/efflib)
619 uoutput = cbind(
620 Name=as.character(rownames(DGEList\$counts)),
621 DE\$table,
622 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
623 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
624 DGEList\$counts
625 )
626 soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
627 goodness = gof(DGLM, pcutoff=fdrthresh)
628 if (sum(goodness\$outlier) > 0) {
629 print.noquote('GLM outliers:')
630 print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
631 } else {
632 print('No GLM fit outlier genes found\n')
633 }
634 z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
635 pdf("edgeR_GoodnessofFit.pdf")
636 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
637 abline(0,1,lwd=3)
638 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
639 dev.off()
640 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
641 normData = (1e+06*DGEList\$counts/efflib)
642 uniqueg = unique(group)
643 #### Plot MDS
644 sample_colors = match(group,levels(group))
645 sampleTypes = levels(factor(group))
646 print.noquote(sampleTypes)
647 pdf("edgeR_MDSplot.pdf")
648 plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
649 legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
650 grid(col="blue")
651 dev.off()
652 colnames(normData) = paste( colnames(normData),'N',sep="_")
653 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
654 nzd = data.frame(log(nonzerod + 1e-2,10))
655 try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") )
656 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
657 tt = cbind(
658 Name=as.character(rownames(DGEList\$counts)),
659 DE\$table,
660 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
661 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums
662 )
663 print.noquote("# edgeR Top tags\n")
664 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
665 tt = tt[order(DE\$table\$PValue),]
666 print.noquote(tt[1:50,])
667 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
668 nsig = length(deTags)
669 print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
670 deColours = ifelse(deTags,'red','black')
671 pdf("edgeR_BCV_vs_abundance.pdf")
672 plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance")
673 dev.off()
674 dg = DGEList[order(DE\$table\$PValue),]
675 #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
676 efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors
677 normData = (1e+06*dg\$counts/efflib)
678 outpdfname="edgeR_top_100_heatmap.pdf"
679 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle))
680 outSmear = "edgeR_smearplot.pdf"
681 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
682 smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
683 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf')
684 norm.factor = DGEList\$samples\$norm.factors
685 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
686 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
687 edgeRcounts = rep(0, length(allgenes))
688 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits
689 sink()
690 } ### doedgeR
691 if (doDESeq2 == T)
692 {
693 sink("DESeq2.log")
694 # DESeq2
695 require('DESeq2')
696 library('RColorBrewer')
697 if (length(subjects) == 0)
698 {
699 pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
700 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx))
701 } else {
702 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
703 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx))
704 }
705 #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
706 #rDESeq = results(DESeq2)
707 #newCountDataSet(workCM, group)
708 deSeqDatsizefac = estimateSizeFactors(deSEQds)
709 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
710 resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
711 rDESeq = as.data.frame(results(resDESeq))
712 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
713 srDESeq = rDESeq[order(rDESeq\$pvalue),]
714 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf')
715 cat("# DESeq top 50\n")
716 print.noquote(srDESeq[1:50,])
717 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
718 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
719 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
720 DESeqcounts = rep(0, length(allgenes))
721 DESeqcounts[DESeqcountsindex] = 1
722 pdf("DESeq2_dispersion_estimates.pdf")
723 plotDispEsts(resDESeq)
724 dev.off()
725 ysmall = abs(min(rDESeq\$log2FoldChange))
726 ybig = abs(max(rDESeq\$log2FoldChange))
727 ylimit = min(4,ysmall,ybig)
728 pdf("DESeq2_MA_plot.pdf")
729 plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
730 dev.off()
731 rlogres = rlogTransformation(resDESeq)
732 sampledists = dist( t( assay(rlogres) ) )
733 sdmat = as.matrix(sampledists)
734 pdf("DESeq2_sample_distance_plot.pdf")
735 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
736 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
737 dev.off()
738 ###outpdfname="DESeq2_top50_heatmap.pdf"
739 ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
740 sink()
741 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
742 if ("try-error" %in% class(result)) {
743 print.noquote('DESeq2 plotPCA failed.')
744 } else {
745 pdf("DESeq2_PCA_plot.pdf")
746 #### wtf - print? Seems needed to get this to work
747 print(ppca)
748 dev.off()
749 }
750 }
751
752 if (doVoom == T) {
753 sink('VOOM.log')
754 if (doedgeR == F) {
755 #### Setup DGEList object
756 DGEList = DGEList(counts=workCM, group = group)
757 DGEList = estimateGLMCommonDisp(DGEList,mydesign)
758 DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
759 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
760 }
761 calcNormFactors(DGEList)
762 ls = colSums(DGEList\$counts) * DGEList\$samples\$norm.factors
763 pdf("VOOM_mean_variance_plot.pdf")
764 #dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = ls)
765 dat.voomed <- voom(DGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL)
766 dev.off()
767 # Use limma to fit data
768 fit = lmFit(dat.voomed, mydesign)
769 fit = eBayes(fit)
770 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
771 qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='VOOM_qqplot.pdf')
772 rownames(rvoom) = rownames(workCM)
773 rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
774 srvoom = rvoom[order(rvoom\$P.Value),]
775 cat("# VOOM top 50\n")
776 print(srvoom[1:50,])
777 write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F)
778 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
779 topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
780 voomcountsindex <- which(allgenes %in% rownames(topresults.voom))
781 voomcounts = rep(0, length(allgenes))
782 voomcounts[voomcountsindex] = 1
783 sink()
784 }
785 803
786 if (doCamera) { 804 if (doCamera) {
787 doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle, 805 doGSEA(y=myDGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
788 outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype) 806 outfname=paste("GSEA_Camera",mt,"table.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
789 } 807 }
790 counts.dataframe = c() 808 counts.dataframe = c()
791 vennmain = 'no venn' 809 vennmain = 'no venn'
792 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) { 810 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) {
793 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) { 811 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) {
802 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes) 820 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
803 } 821 }
804 822
805 if (nrow(counts.dataframe > 1)) { 823 if (nrow(counts.dataframe > 1)) {
806 counts.venn = vennCounts(counts.dataframe) 824 counts.venn = vennCounts(counts.dataframe)
807 vennf = "Venn_significant_genes_overlap.pdf" 825 vennf = paste("Venn",mt,"significant_genes_overlap.pdf",sep="_")
808 pdf(vennf) 826 pdf(vennf)
809 vennDiagram(counts.venn,main=vennmain,col="maroon") 827 vennDiagram(counts.venn,main=vennmain,col="maroon")
810 dev.off() 828 dev.off()
811 } 829 }
812 } #### doDESeq2 or doVoom 830 } #### doDESeq2 or doVoom
818 builtin_gmt = "" 836 builtin_gmt = ""
819 history_gmt = "" 837 history_gmt = ""
820 history_gmt_name = "" 838 history_gmt_name = ""
821 out_edgeR = F 839 out_edgeR = F
822 out_DESeq2 = F 840 out_DESeq2 = F
823 out_VOOM = "$out_VOOM" 841 out_Voom = "$out_VOOM"
824 edgeR_robust_meth = "ordinary" # control robust deviance options 842 edgeR_robust_meth = "ordinary" # control robust deviance options
825 doDESeq2 = $DESeq2.doDESeq2 843 doDESeq2 = $DESeq2.doDESeq2
826 doVoom = $doVoom 844 doVoom = $doVoom
827 doCamera = F 845 doCamera = F
828 doedgeR = $edgeR.doedgeR 846 doedgeR = $edgeR.doedgeR
829 edgeR_priordf = 10 847 edgeR_priordf = 10
830 848
831 849
832 #if $doVoom == "T": 850 #if $doVoom == "T":
833 out_VOOM = "$out_VOOM" 851 out_Voom = "$out_VOOM"
834 #end if 852 #end if
835 853
836 #if $DESeq2.doDESeq2 == "T": 854 #if $DESeq2.doDESeq2 == "T":
837 out_DESeq2 = "$out_DESeq2" 855 out_DESeq2 = "$out_DESeq2"
838 doDESeq2 = T 856 doDESeq2 = T
894 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first 912 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
895 Count_Matrix = Count_Matrix[subset(rn,! islib),] 913 Count_Matrix = Count_Matrix[subset(rn,! islib),]
896 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor 914 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor
897 group = factor(group, levels=c(ControlName,TreatmentName)) 915 group = factor(group, levels=c(ControlName,TreatmentName))
898 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns 916 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns
899 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2, 917 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_Voom=out_Voom, out_DESeq2=out_DESeq2,
900 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.', 918 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
901 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects, 919 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,
902 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org, 920 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
903 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType,robust_meth=edgeR_robust_meth) 921 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType,robust_meth=edgeR_robust_meth)
904 sessionInfo() 922 sessionInfo()