Mercurial > repos > fubar > differential_count_models
comparison rgedgeRpaired_nocamera.xml @ 111:9f2e0ec3e826 draft
Uploaded
author | fubar |
---|---|
date | Wed, 22 Oct 2014 23:37:31 -0400 |
parents | d7e2a0c0cce9 |
children | badcd3b0e708 |
comparison
equal
deleted
inserted
replaced
110:d7e2a0c0cce9 | 111:9f2e0ec3e826 |
---|---|
1 <tool id="rgDifferentialCount" name="Differential_Count" version="0.25"> | 1 <tool id="rgDifferentialCount" name="Differential_Count" version="0.26"> |
2 <description>models using BioConductor packages</description> | 2 <description>models using BioConductor packages</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="2.14">biocbasics</requirement> | 4 <requirement type="package" version="2.14">biocbasics</requirement> |
5 <requirement type="package" version="3.0.3">R</requirement> | 5 <requirement type="package" version="3.1.1">R_3_1_1</requirement> |
6 <requirement type="package" version="1.3.18">graphicsmagick</requirement> | 6 <requirement type="package" version="1.3.18">graphicsmagick</requirement> |
7 <requirement type="package" version="9.10">ghostscript</requirement> | 7 <requirement type="package" version="9.10">ghostscript</requirement> |
8 </requirements> | 8 </requirements> |
9 | 9 |
10 <command interpreter="python"> | 10 <command interpreter="python"> |
191 # 1 - Output Dir | 191 # 1 - Output Dir |
192 | 192 |
193 # Original edgeR code by: S.Lunke and A.Kaspi | 193 # Original edgeR code by: S.Lunke and A.Kaspi |
194 reallybig = log10(.Machine\$double.xmax) | 194 reallybig = log10(.Machine\$double.xmax) |
195 reallysmall = log10(.Machine\$double.xmin) | 195 reallysmall = log10(.Machine\$double.xmin) |
196 library('stringr') | 196 library("stringr") |
197 library('gplots') | 197 library("gplots") |
198 library('edgeR') | 198 library("edgeR") |
199 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here') | 199 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here') |
200 { | 200 { |
201 # Perform clustering for significant pvalues after controlling FWER | 201 # Perform clustering for significant pvalues after controlling FWER |
202 samples = colnames(cmat) | 202 samples = colnames(cmat) |
203 gu = unique(group) | 203 gu = unique(group) |
261 lines(e,e,col="red") | 261 lines(e,e,col="red") |
262 grid(col = "lightgray", lty = "dotted") | 262 grid(col = "lightgray", lty = "dotted") |
263 dev.off() | 263 dev.off() |
264 } | 264 } |
265 | 265 |
266 smearPlot = function(DGEList,deTags, outSmear, outMain) | 266 smearPlot = function(myDGEList,deTags, outSmear, outMain) |
267 { | 267 { |
268 pdf(outSmear) | 268 pdf(outSmear) |
269 plotSmear(DGEList,de.tags=deTags,main=outMain) | 269 plotSmear(myDGEList,de.tags=deTags,main=outMain) |
270 grid(col="lightgray", lty="dotted") | 270 grid(col="lightgray", lty="dotted") |
271 dev.off() | 271 dev.off() |
272 } | 272 } |
273 | 273 |
274 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname) | 274 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname) |
511 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F) | 511 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F) |
512 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:')) | 512 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:')) |
513 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F) | 513 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F) |
514 sink() | 514 sink() |
515 } | 515 } |
516 | 516 |
517 | 517 |
518 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5, | 518 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_Voom=F,out_DESeq2=F,fdrtype='fdr',priordf=5, |
519 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F, | 519 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F, |
520 filterquantile=0.2, subjects=c(),mydesign=NULL, | 520 filterquantile=0.2, subjects=c(),mydesign=NULL, |
521 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19', | 521 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19', |
522 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", | 522 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", |
523 doCook=F,DESeq_fitType="parameteric",robust_meth='ordinary') | 523 doCook=F,DESeq_fitType="parameteric",robust_meth='ordinary') |
524 { | 524 { |
525 | |
526 | |
527 run_edgeR = function(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR) | |
528 { | |
529 sink('edgeR.log') | |
530 #### Setup myDGEList object | |
531 myDGEList = DGEList(counts=workCM, group = group) | |
532 myDGEList = calcNormFactors(myDGEList) | |
533 if (robust_meth == 'ordinary') { | |
534 myDGEList = estimateGLMCommonDisp(myDGEList,mydesign) | |
535 myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign) | |
536 if (priordf > 0) { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign,prior.df = priordf) | |
537 } else { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign) } | |
538 comdisp = myDGEList\$common.dispersion | |
539 estpriorn = getPriorN(myDGEList) | |
540 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F) | |
541 } else { | |
542 myDGEList = estimateGLMRobustDisp(myDGEList,design=mydesign, prior.df = priordf, maxit = 6, residual.type = robust_meth) | |
543 } | |
544 | |
545 | |
546 DGLM = glmFit(myDGEList,design=mydesign) | |
547 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed | |
548 efflib = myDGEList\$samples\$lib.size*myDGEList\$samples\$norm.factors | |
549 normData = (1e+06*myDGEList\$counts/efflib) | |
550 uoutput = cbind( | |
551 Name=as.character(rownames(myDGEList\$counts)), | |
552 DE\$table, | |
553 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), | |
554 Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums,normData, | |
555 myDGEList\$counts | |
556 ) | |
557 soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable | |
558 goodness = gof(DGLM, pcutoff=fdrthresh) | |
559 if (sum(goodness\$outlier) > 0) { | |
560 print.noquote('GLM outliers:') | |
561 print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F) | |
562 } else { | |
563 print('No GLM fit outlier genes found\n') | |
564 } | |
565 z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2) | |
566 pdf(paste("edgeR",mt,"GoodnessofFit.pdf",sep='_')) | |
567 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion") | |
568 abline(0,1,lwd=3) | |
569 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon") | |
570 dev.off() | |
571 uniqueg = unique(group) | |
572 #### Plot MDS | |
573 sample_colors = match(group,levels(group)) | |
574 sampleTypes = levels(factor(group)) | |
575 print.noquote(sampleTypes) | |
576 pdf(paste("edgeR",mt,"MDSplot.pdf",sep='_')) | |
577 plotMDS.DGEList(myDGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors) | |
578 legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19) | |
579 grid(col="blue") | |
580 dev.off() | |
581 colnames(normData) = paste( colnames(normData),'N',sep="_") | |
582 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=','))) | |
583 nzd = data.frame(log(nonzerod + 1e-2,10)) | |
584 try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname=paste("edgeR",mt,"raw_norm_counts_box.pdf",sep='_') )) | |
585 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F) | |
586 tt = cbind( | |
587 Name=as.character(rownames(myDGEList)), | |
588 DE\$table, | |
589 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), | |
590 Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums | |
591 ) | |
592 print.noquote("# edgeR Top tags\n") | |
593 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely | |
594 tt = tt[order(DE\$table\$PValue),] | |
595 print.noquote(tt[1:50,]) | |
596 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,]) | |
597 nsig = length(deTags) | |
598 print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F) | |
599 deColours = ifelse(deTags,'red','black') | |
600 pdf(paste("edgeR",mt,"BCV_vs_abundance.pdf",sep="_")) | |
601 plotBCV(myDGEList, cex=0.3, main="Biological CV vs abundance") | |
602 dev.off() | |
603 dg = myDGEList[order(DE\$table\$PValue),] | |
604 #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg))) | |
605 outpdfname= paste("edgeR",mt,"top_100_heatmap.pdf",sep="_") | |
606 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste(myTitle,'Heatmap')) | |
607 outSmear = paste("edgeR",mt,"smearplot.pdf",sep="_") | |
608 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='') | |
609 smearPlot(myDGEList=myDGEList,deTags=deTags, outSmear=outSmear, outMain = outMain) | |
610 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf=paste('edgeR',mt,'qqplot.pdf',sep='_')) | |
611 norm.factor = myDGEList\$samples\$norm.factors | |
612 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ] | |
613 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR)) | |
614 edgeRcounts = rep(0, length(allgenes)) | |
615 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits | |
616 sink() | |
617 return(list(myDGEList=myDGEList,edgeRcounts=edgeRcounts)) | |
618 } ### run_edgeR | |
619 | |
620 | |
621 run_DESeq2 = function(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) | |
622 | |
623 { | |
624 sink("DESeq2.log") | |
625 # DESeq2 | |
626 require('DESeq2') | |
627 library('RColorBrewer') | |
628 if (length(subjects) == 0) | |
629 { | |
630 pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM)) | |
631 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx)) | |
632 } else { | |
633 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM)) | |
634 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx)) | |
635 } | |
636 #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype) | |
637 #rDESeq = results(DESeq2) | |
638 #newCountDataSet(workCM, group) | |
639 deSeqDatsizefac = estimateSizeFactors(deSEQds) | |
640 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType) | |
641 resDESeq = nbinomWaldTest(deSeqDatdisp) | |
642 rDESeq = as.data.frame(results(resDESeq)) | |
643 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls) | |
644 srDESeq = rDESeq[order(rDESeq\$pvalue),] | |
645 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf=paste('DESeq2',mt,'qqplot.pdf',sep="_")) | |
646 cat("# DESeq top 50\n") | |
647 print.noquote(srDESeq[1:50,]) | |
648 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F) | |
649 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ] | |
650 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq)) | |
651 DESeqcounts = rep(0, length(allgenes)) | |
652 DESeqcounts[DESeqcountsindex] = 1 | |
653 pdf(paste("DESeq2",mt,"dispersion_estimates.pdf",sep='_')) | |
654 plotDispEsts(resDESeq) | |
655 dev.off() | |
656 ysmall = abs(min(rDESeq\$log2FoldChange)) | |
657 ybig = abs(max(rDESeq\$log2FoldChange)) | |
658 ylimit = min(4,ysmall,ybig) | |
659 pdf(paste("DESeq2",mt,"MA_plot.pdf",sep="_")) | |
660 plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit)) | |
661 dev.off() | |
662 rlogres = rlogTransformation(resDESeq) | |
663 sampledists = dist( t( assay(rlogres) ) ) | |
664 sdmat = as.matrix(sampledists) | |
665 pdf(paste("DESeq2",mt,"sample_distance_plot.pdf",sep="_")) | |
666 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"), | |
667 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255)) | |
668 dev.off() | |
669 ###outpdfname=paste("DESeq2",mt,"top50_heatmap.pdf",sep="_") | |
670 ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle)) | |
671 sink() | |
672 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) ) | |
673 if ("try-error" %in% class(result)) { | |
674 print.noquote('DESeq2 plotPCA failed.') | |
675 } else { | |
676 pdf(paste("DESeq2",mt,"PCA_plot.pdf",sep="_")) | |
677 #### wtf - print? Seems needed to get this to work | |
678 print(ppca) | |
679 dev.off() | |
680 } | |
681 return(DESeqcounts) | |
682 } | |
683 | |
684 | |
685 run_Voom = function(workCM,pdata,subjects,group,mydesign,mt,out_Voom) | |
686 { | |
687 sink('VOOM.log') | |
688 if (doedgeR == F) { | |
689 #### Setup myDGEList object | |
690 myDGEList = DGEList(counts=workCM, group = group) | |
691 myDGEList = calcNormFactors(myDGEList) | |
692 myDGEList = estimateGLMCommonDisp(myDGEList,mydesign) | |
693 myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign) | |
694 myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign) | |
695 } | |
696 pdf(paste("VOOM",mt,"mean_variance_plot.pdf",sep='_')) | |
697 dat.voomed <- voom(myDGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL) | |
698 dev.off() | |
699 # Use limma to fit data | |
700 fit = lmFit(dat.voomed, mydesign) | |
701 fit = eBayes(fit) | |
702 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none") | |
703 qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf=paste('VOOM',mt,'qqplot.pdf',sep='_')) | |
704 rownames(rvoom) = rownames(workCM) | |
705 rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls) | |
706 srvoom = rvoom[order(rvoom\$P.Value),] | |
707 cat("# VOOM top 50\n") | |
708 print(srvoom[1:50,]) | |
709 write.table(srvoom,file=out_Voom, quote=FALSE, sep="\t",row.names=F) | |
710 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma | |
711 topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ] | |
712 voomcountsindex <- which(allgenes %in% rownames(topresults.voom)) | |
713 voomcounts = rep(0, length(allgenes)) | |
714 voomcounts[voomcountsindex] = 1 | |
715 sink() | |
716 return(voomcounts) | |
717 } | |
718 | |
719 | |
720 #### data cleaning and analsis control starts here | |
721 | |
525 # Error handling | 722 # Error handling |
526 if (length(unique(group))!=2){ | 723 nugroup = length(unique(group)) |
724 if (nugroup!=2){ | |
527 print("Number of conditions identified in experiment does not equal 2") | 725 print("Number of conditions identified in experiment does not equal 2") |
528 q() | 726 q() |
529 } | 727 } |
530 require(edgeR) | 728 require(edgeR) |
531 options(width = 512) | 729 options(width = 512) |
532 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ") | 730 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ") |
533 allN = nrow(Count_Matrix) | 731 allN = nrow(Count_Matrix) |
534 nscut = round(ncol(Count_Matrix)/2) | 732 nscut = round(ncol(Count_Matrix)/2) # half samples |
535 colTotmillionreads = colSums(Count_Matrix)/1e6 | 733 colTotmillionreads = colSums(Count_Matrix)/1e6 |
536 counts.dataframe = as.data.frame(c()) | 734 counts.dataframe = as.data.frame(c()) |
537 rawrs = rowSums(Count_Matrix) | 735 rawrs = rowSums(Count_Matrix) |
538 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes | 736 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes |
539 nzN = nrow(nonzerod) | 737 nzN = nrow(nonzerod) |
561 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F) | 759 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F) |
562 maint = paste('Filter below',filterquantile,'quantile') | 760 maint = paste('Filter below',filterquantile,'quantile') |
563 } | 761 } |
564 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle) | 762 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle) |
565 allgenes = rownames(workCM) | 763 allgenes = rownames(workCM) |
566 reg = "^chr([0-9]+):([0-9]+)-([0-9]+)" | 764 reg = "^chr([0-9]+):([0-9]+)-([0-9]+)" # ucsc chr:start-end regexp |
567 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/" | 765 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/" |
568 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='') | 766 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='') |
569 testreg = str_match(allgenes,reg) | 767 testreg = str_match(allgenes,reg) |
570 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string | 768 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string |
571 { | 769 { |
573 contigurls = paste0(ucsc,"&position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>") | 771 contigurls = paste0(ucsc,"&position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>") |
574 } else { | 772 } else { |
575 print("@@ using genecards substitution for urls") | 773 print("@@ using genecards substitution for urls") |
576 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>") | 774 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>") |
577 } | 775 } |
578 print.noquote("# urls") | 776 print.noquote("# urls sample") |
579 print.noquote(head(contigurls)) | 777 print.noquote(head(contigurls)) |
580 print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F) | 778 print(paste("# Total low count contigs per sample = ",table(lo)),quote=F) |
581 cmrowsums = rowSums(workCM) | 779 cmrowsums = rowSums(workCM) |
582 TName=unique(group)[1] | 780 TName=unique(group)[1] |
583 CName=unique(group)[2] | 781 CName=unique(group)[2] |
584 if (is.null(mydesign)) { | 782 if (is.null(mydesign)) { |
585 if (length(subjects) == 0) | 783 if (length(subjects) == 0) |
593 } | 791 } |
594 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=','))) | 792 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=','))) |
595 print.noquote('Using design matrix:') | 793 print.noquote('Using design matrix:') |
596 print.noquote(mydesign) | 794 print.noquote(mydesign) |
597 if (doedgeR == T) { | 795 if (doedgeR == T) { |
598 sink('edgeR.log') | 796 eres = run_edgeR(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR) |
599 #### Setup DGEList object | 797 myDGEList = eres\$myDGEList |
600 DGEList = DGEList(counts=workCM, group = group) | 798 edgeRcounts = eres\$edgeRcounts |
601 DGEList = calcNormFactors(DGEList) | 799 } |
602 if (robust_meth == 'ordinary') { | 800 if (doDESeq2 == T) { DESeqcounts = run_DESeq2(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) } |
603 DGEList = estimateGLMCommonDisp(DGEList,mydesign) | 801 if (doVoom == T) { voomcounts = run_Voom(workCM,pdata,subjects,group,mydesign,mt,out_Voom) } |
604 DGEList = estimateGLMTrendedDisp(DGEList,mydesign) | 802 |
605 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf) | |
606 | |
607 comdisp = DGEList\$common.dispersion | |
608 estpriorn = getPriorN(DGEList) | |
609 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F) | |
610 } else { | |
611 DGEList = estimateGLMRobustDisp(DGEList,design=mydesign, prior.df = edgeR_priordf, maxit = 6, residual.type = robust_meth) | |
612 } | |
613 | |
614 | |
615 DGLM = glmFit(DGEList,design=mydesign) | |
616 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed | |
617 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors | |
618 normData = (1e+06*DGEList\$counts/efflib) | |
619 uoutput = cbind( | |
620 Name=as.character(rownames(DGEList\$counts)), | |
621 DE\$table, | |
622 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), | |
623 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData, | |
624 DGEList\$counts | |
625 ) | |
626 soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable | |
627 goodness = gof(DGLM, pcutoff=fdrthresh) | |
628 if (sum(goodness\$outlier) > 0) { | |
629 print.noquote('GLM outliers:') | |
630 print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F) | |
631 } else { | |
632 print('No GLM fit outlier genes found\n') | |
633 } | |
634 z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2) | |
635 pdf("edgeR_GoodnessofFit.pdf") | |
636 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion") | |
637 abline(0,1,lwd=3) | |
638 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon") | |
639 dev.off() | |
640 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors | |
641 normData = (1e+06*DGEList\$counts/efflib) | |
642 uniqueg = unique(group) | |
643 #### Plot MDS | |
644 sample_colors = match(group,levels(group)) | |
645 sampleTypes = levels(factor(group)) | |
646 print.noquote(sampleTypes) | |
647 pdf("edgeR_MDSplot.pdf") | |
648 plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors) | |
649 legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19) | |
650 grid(col="blue") | |
651 dev.off() | |
652 colnames(normData) = paste( colnames(normData),'N',sep="_") | |
653 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=','))) | |
654 nzd = data.frame(log(nonzerod + 1e-2,10)) | |
655 try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") ) | |
656 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F) | |
657 tt = cbind( | |
658 Name=as.character(rownames(DGEList\$counts)), | |
659 DE\$table, | |
660 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), | |
661 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums | |
662 ) | |
663 print.noquote("# edgeR Top tags\n") | |
664 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely | |
665 tt = tt[order(DE\$table\$PValue),] | |
666 print.noquote(tt[1:50,]) | |
667 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,]) | |
668 nsig = length(deTags) | |
669 print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F) | |
670 deColours = ifelse(deTags,'red','black') | |
671 pdf("edgeR_BCV_vs_abundance.pdf") | |
672 plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance") | |
673 dev.off() | |
674 dg = DGEList[order(DE\$table\$PValue),] | |
675 #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg))) | |
676 efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors | |
677 normData = (1e+06*dg\$counts/efflib) | |
678 outpdfname="edgeR_top_100_heatmap.pdf" | |
679 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle)) | |
680 outSmear = "edgeR_smearplot.pdf" | |
681 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='') | |
682 smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain) | |
683 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf') | |
684 norm.factor = DGEList\$samples\$norm.factors | |
685 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ] | |
686 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR)) | |
687 edgeRcounts = rep(0, length(allgenes)) | |
688 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits | |
689 sink() | |
690 } ### doedgeR | |
691 if (doDESeq2 == T) | |
692 { | |
693 sink("DESeq2.log") | |
694 # DESeq2 | |
695 require('DESeq2') | |
696 library('RColorBrewer') | |
697 if (length(subjects) == 0) | |
698 { | |
699 pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM)) | |
700 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx)) | |
701 } else { | |
702 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM)) | |
703 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx)) | |
704 } | |
705 #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype) | |
706 #rDESeq = results(DESeq2) | |
707 #newCountDataSet(workCM, group) | |
708 deSeqDatsizefac = estimateSizeFactors(deSEQds) | |
709 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType) | |
710 resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype) | |
711 rDESeq = as.data.frame(results(resDESeq)) | |
712 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls) | |
713 srDESeq = rDESeq[order(rDESeq\$pvalue),] | |
714 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf') | |
715 cat("# DESeq top 50\n") | |
716 print.noquote(srDESeq[1:50,]) | |
717 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F) | |
718 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ] | |
719 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq)) | |
720 DESeqcounts = rep(0, length(allgenes)) | |
721 DESeqcounts[DESeqcountsindex] = 1 | |
722 pdf("DESeq2_dispersion_estimates.pdf") | |
723 plotDispEsts(resDESeq) | |
724 dev.off() | |
725 ysmall = abs(min(rDESeq\$log2FoldChange)) | |
726 ybig = abs(max(rDESeq\$log2FoldChange)) | |
727 ylimit = min(4,ysmall,ybig) | |
728 pdf("DESeq2_MA_plot.pdf") | |
729 plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit)) | |
730 dev.off() | |
731 rlogres = rlogTransformation(resDESeq) | |
732 sampledists = dist( t( assay(rlogres) ) ) | |
733 sdmat = as.matrix(sampledists) | |
734 pdf("DESeq2_sample_distance_plot.pdf") | |
735 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"), | |
736 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255)) | |
737 dev.off() | |
738 ###outpdfname="DESeq2_top50_heatmap.pdf" | |
739 ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle)) | |
740 sink() | |
741 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) ) | |
742 if ("try-error" %in% class(result)) { | |
743 print.noquote('DESeq2 plotPCA failed.') | |
744 } else { | |
745 pdf("DESeq2_PCA_plot.pdf") | |
746 #### wtf - print? Seems needed to get this to work | |
747 print(ppca) | |
748 dev.off() | |
749 } | |
750 } | |
751 | |
752 if (doVoom == T) { | |
753 sink('VOOM.log') | |
754 if (doedgeR == F) { | |
755 #### Setup DGEList object | |
756 DGEList = DGEList(counts=workCM, group = group) | |
757 DGEList = estimateGLMCommonDisp(DGEList,mydesign) | |
758 DGEList = estimateGLMTrendedDisp(DGEList,mydesign) | |
759 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign) | |
760 } | |
761 calcNormFactors(DGEList) | |
762 ls = colSums(DGEList\$counts) * DGEList\$samples\$norm.factors | |
763 pdf("VOOM_mean_variance_plot.pdf") | |
764 #dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = ls) | |
765 dat.voomed <- voom(DGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL) | |
766 dev.off() | |
767 # Use limma to fit data | |
768 fit = lmFit(dat.voomed, mydesign) | |
769 fit = eBayes(fit) | |
770 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none") | |
771 qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='VOOM_qqplot.pdf') | |
772 rownames(rvoom) = rownames(workCM) | |
773 rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls) | |
774 srvoom = rvoom[order(rvoom\$P.Value),] | |
775 cat("# VOOM top 50\n") | |
776 print(srvoom[1:50,]) | |
777 write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F) | |
778 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma | |
779 topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ] | |
780 voomcountsindex <- which(allgenes %in% rownames(topresults.voom)) | |
781 voomcounts = rep(0, length(allgenes)) | |
782 voomcounts[voomcountsindex] = 1 | |
783 sink() | |
784 } | |
785 | 803 |
786 if (doCamera) { | 804 if (doCamera) { |
787 doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle, | 805 doGSEA(y=myDGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle, |
788 outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype) | 806 outfname=paste("GSEA_Camera",mt,"table.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype) |
789 } | 807 } |
790 counts.dataframe = c() | 808 counts.dataframe = c() |
791 vennmain = 'no venn' | 809 vennmain = 'no venn' |
792 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) { | 810 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) { |
793 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) { | 811 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) { |
802 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes) | 820 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes) |
803 } | 821 } |
804 | 822 |
805 if (nrow(counts.dataframe > 1)) { | 823 if (nrow(counts.dataframe > 1)) { |
806 counts.venn = vennCounts(counts.dataframe) | 824 counts.venn = vennCounts(counts.dataframe) |
807 vennf = "Venn_significant_genes_overlap.pdf" | 825 vennf = paste("Venn",mt,"significant_genes_overlap.pdf",sep="_") |
808 pdf(vennf) | 826 pdf(vennf) |
809 vennDiagram(counts.venn,main=vennmain,col="maroon") | 827 vennDiagram(counts.venn,main=vennmain,col="maroon") |
810 dev.off() | 828 dev.off() |
811 } | 829 } |
812 } #### doDESeq2 or doVoom | 830 } #### doDESeq2 or doVoom |
818 builtin_gmt = "" | 836 builtin_gmt = "" |
819 history_gmt = "" | 837 history_gmt = "" |
820 history_gmt_name = "" | 838 history_gmt_name = "" |
821 out_edgeR = F | 839 out_edgeR = F |
822 out_DESeq2 = F | 840 out_DESeq2 = F |
823 out_VOOM = "$out_VOOM" | 841 out_Voom = "$out_VOOM" |
824 edgeR_robust_meth = "ordinary" # control robust deviance options | 842 edgeR_robust_meth = "ordinary" # control robust deviance options |
825 doDESeq2 = $DESeq2.doDESeq2 | 843 doDESeq2 = $DESeq2.doDESeq2 |
826 doVoom = $doVoom | 844 doVoom = $doVoom |
827 doCamera = F | 845 doCamera = F |
828 doedgeR = $edgeR.doedgeR | 846 doedgeR = $edgeR.doedgeR |
829 edgeR_priordf = 10 | 847 edgeR_priordf = 10 |
830 | 848 |
831 | 849 |
832 #if $doVoom == "T": | 850 #if $doVoom == "T": |
833 out_VOOM = "$out_VOOM" | 851 out_Voom = "$out_VOOM" |
834 #end if | 852 #end if |
835 | 853 |
836 #if $DESeq2.doDESeq2 == "T": | 854 #if $DESeq2.doDESeq2 == "T": |
837 out_DESeq2 = "$out_DESeq2" | 855 out_DESeq2 = "$out_DESeq2" |
838 doDESeq2 = T | 856 doDESeq2 = T |
894 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first | 912 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first |
895 Count_Matrix = Count_Matrix[subset(rn,! islib),] | 913 Count_Matrix = Count_Matrix[subset(rn,! islib),] |
896 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor | 914 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor |
897 group = factor(group, levels=c(ControlName,TreatmentName)) | 915 group = factor(group, levels=c(ControlName,TreatmentName)) |
898 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns | 916 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns |
899 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2, | 917 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_Voom=out_Voom, out_DESeq2=out_DESeq2, |
900 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.', | 918 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.', |
901 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects, | 919 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects, |
902 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org, | 920 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org, |
903 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType,robust_meth=edgeR_robust_meth) | 921 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType,robust_meth=edgeR_robust_meth) |
904 sessionInfo() | 922 sessionInfo() |