Mercurial > repos > fubar > differential_count_models
diff rgedgeRpaired_nocamera.xml @ 111:9f2e0ec3e826 draft
Uploaded
author | fubar |
---|---|
date | Wed, 22 Oct 2014 23:37:31 -0400 |
parents | d7e2a0c0cce9 |
children | badcd3b0e708 |
line wrap: on
line diff
--- a/rgedgeRpaired_nocamera.xml Sun Jul 06 21:00:54 2014 -0400 +++ b/rgedgeRpaired_nocamera.xml Wed Oct 22 23:37:31 2014 -0400 @@ -1,8 +1,8 @@ -<tool id="rgDifferentialCount" name="Differential_Count" version="0.25"> +<tool id="rgDifferentialCount" name="Differential_Count" version="0.26"> <description>models using BioConductor packages</description> <requirements> <requirement type="package" version="2.14">biocbasics</requirement> - <requirement type="package" version="3.0.3">R</requirement> + <requirement type="package" version="3.1.1">R_3_1_1</requirement> <requirement type="package" version="1.3.18">graphicsmagick</requirement> <requirement type="package" version="9.10">ghostscript</requirement> </requirements> @@ -193,9 +193,9 @@ # Original edgeR code by: S.Lunke and A.Kaspi reallybig = log10(.Machine\$double.xmax) reallysmall = log10(.Machine\$double.xmin) -library('stringr') -library('gplots') -library('edgeR') +library("stringr") +library("gplots") +library("edgeR") hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here') { # Perform clustering for significant pvalues after controlling FWER @@ -263,10 +263,10 @@ dev.off() } -smearPlot = function(DGEList,deTags, outSmear, outMain) +smearPlot = function(myDGEList,deTags, outSmear, outMain) { pdf(outSmear) - plotSmear(DGEList,de.tags=deTags,main=outMain) + plotSmear(myDGEList,de.tags=deTags,main=outMain) grid(col="lightgray", lty="dotted") dev.off() } @@ -513,17 +513,215 @@ write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F) sink() } - + -edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5, +edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_Voom=F,out_DESeq2=F,fdrtype='fdr',priordf=5, fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F, filterquantile=0.2, subjects=c(),mydesign=NULL, doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19', histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", doCook=F,DESeq_fitType="parameteric",robust_meth='ordinary') { + + +run_edgeR = function(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR) +{ + sink('edgeR.log') + #### Setup myDGEList object + myDGEList = DGEList(counts=workCM, group = group) + myDGEList = calcNormFactors(myDGEList) + if (robust_meth == 'ordinary') { + myDGEList = estimateGLMCommonDisp(myDGEList,mydesign) + myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign) + if (priordf > 0) { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign,prior.df = priordf) + } else { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign) } + comdisp = myDGEList\$common.dispersion + estpriorn = getPriorN(myDGEList) + print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F) + } else { + myDGEList = estimateGLMRobustDisp(myDGEList,design=mydesign, prior.df = priordf, maxit = 6, residual.type = robust_meth) + } + + + DGLM = glmFit(myDGEList,design=mydesign) + DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed + efflib = myDGEList\$samples\$lib.size*myDGEList\$samples\$norm.factors + normData = (1e+06*myDGEList\$counts/efflib) + uoutput = cbind( + Name=as.character(rownames(myDGEList\$counts)), + DE\$table, + adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), + Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums,normData, + myDGEList\$counts + ) + soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable + goodness = gof(DGLM, pcutoff=fdrthresh) + if (sum(goodness\$outlier) > 0) { + print.noquote('GLM outliers:') + print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F) + } else { + print('No GLM fit outlier genes found\n') + } + z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2) + pdf(paste("edgeR",mt,"GoodnessofFit.pdf",sep='_')) + qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion") + abline(0,1,lwd=3) + points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon") + dev.off() + uniqueg = unique(group) + #### Plot MDS + sample_colors = match(group,levels(group)) + sampleTypes = levels(factor(group)) + print.noquote(sampleTypes) + pdf(paste("edgeR",mt,"MDSplot.pdf",sep='_')) + plotMDS.DGEList(myDGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors) + legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19) + grid(col="blue") + dev.off() + colnames(normData) = paste( colnames(normData),'N',sep="_") + print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=','))) + nzd = data.frame(log(nonzerod + 1e-2,10)) + try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname=paste("edgeR",mt,"raw_norm_counts_box.pdf",sep='_') )) + write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F) + tt = cbind( + Name=as.character(rownames(myDGEList)), + DE\$table, + adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), + Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums + ) + print.noquote("# edgeR Top tags\n") + tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely + tt = tt[order(DE\$table\$PValue),] + print.noquote(tt[1:50,]) + deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,]) + nsig = length(deTags) + print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F) + deColours = ifelse(deTags,'red','black') + pdf(paste("edgeR",mt,"BCV_vs_abundance.pdf",sep="_")) + plotBCV(myDGEList, cex=0.3, main="Biological CV vs abundance") + dev.off() + dg = myDGEList[order(DE\$table\$PValue),] + #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg))) + outpdfname= paste("edgeR",mt,"top_100_heatmap.pdf",sep="_") + hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste(myTitle,'Heatmap')) + outSmear = paste("edgeR",mt,"smearplot.pdf",sep="_") + outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='') + smearPlot(myDGEList=myDGEList,deTags=deTags, outSmear=outSmear, outMain = outMain) + qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf=paste('edgeR',mt,'qqplot.pdf',sep='_')) + norm.factor = myDGEList\$samples\$norm.factors + topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ] + edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR)) + edgeRcounts = rep(0, length(allgenes)) + edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits + sink() + return(list(myDGEList=myDGEList,edgeRcounts=edgeRcounts)) +} ### run_edgeR + + +run_DESeq2 = function(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) + + { + sink("DESeq2.log") + # DESeq2 + require('DESeq2') + library('RColorBrewer') + if (length(subjects) == 0) + { + pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM)) + deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx)) + } else { + pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM)) + deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx)) + } + #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype) + #rDESeq = results(DESeq2) + #newCountDataSet(workCM, group) + deSeqDatsizefac = estimateSizeFactors(deSEQds) + deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType) + resDESeq = nbinomWaldTest(deSeqDatdisp) + rDESeq = as.data.frame(results(resDESeq)) + rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls) + srDESeq = rDESeq[order(rDESeq\$pvalue),] + qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf=paste('DESeq2',mt,'qqplot.pdf',sep="_")) + cat("# DESeq top 50\n") + print.noquote(srDESeq[1:50,]) + write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F) + topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ] + DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq)) + DESeqcounts = rep(0, length(allgenes)) + DESeqcounts[DESeqcountsindex] = 1 + pdf(paste("DESeq2",mt,"dispersion_estimates.pdf",sep='_')) + plotDispEsts(resDESeq) + dev.off() + ysmall = abs(min(rDESeq\$log2FoldChange)) + ybig = abs(max(rDESeq\$log2FoldChange)) + ylimit = min(4,ysmall,ybig) + pdf(paste("DESeq2",mt,"MA_plot.pdf",sep="_")) + plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit)) + dev.off() + rlogres = rlogTransformation(resDESeq) + sampledists = dist( t( assay(rlogres) ) ) + sdmat = as.matrix(sampledists) + pdf(paste("DESeq2",mt,"sample_distance_plot.pdf",sep="_")) + heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"), + col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255)) + dev.off() + ###outpdfname=paste("DESeq2",mt,"top50_heatmap.pdf",sep="_") + ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle)) + sink() + result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) ) + if ("try-error" %in% class(result)) { + print.noquote('DESeq2 plotPCA failed.') + } else { + pdf(paste("DESeq2",mt,"PCA_plot.pdf",sep="_")) + #### wtf - print? Seems needed to get this to work + print(ppca) + dev.off() + } + return(DESeqcounts) + } + + +run_Voom = function(workCM,pdata,subjects,group,mydesign,mt,out_Voom) + { + sink('VOOM.log') + if (doedgeR == F) { + #### Setup myDGEList object + myDGEList = DGEList(counts=workCM, group = group) + myDGEList = calcNormFactors(myDGEList) + myDGEList = estimateGLMCommonDisp(myDGEList,mydesign) + myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign) + myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign) + } + pdf(paste("VOOM",mt,"mean_variance_plot.pdf",sep='_')) + dat.voomed <- voom(myDGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL) + dev.off() + # Use limma to fit data + fit = lmFit(dat.voomed, mydesign) + fit = eBayes(fit) + rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none") + qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf=paste('VOOM',mt,'qqplot.pdf',sep='_')) + rownames(rvoom) = rownames(workCM) + rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls) + srvoom = rvoom[order(rvoom\$P.Value),] + cat("# VOOM top 50\n") + print(srvoom[1:50,]) + write.table(srvoom,file=out_Voom, quote=FALSE, sep="\t",row.names=F) + # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma + topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ] + voomcountsindex <- which(allgenes %in% rownames(topresults.voom)) + voomcounts = rep(0, length(allgenes)) + voomcounts[voomcountsindex] = 1 + sink() + return(voomcounts) + } + + +#### data cleaning and analsis control starts here + # Error handling - if (length(unique(group))!=2){ + nugroup = length(unique(group)) + if (nugroup!=2){ print("Number of conditions identified in experiment does not equal 2") q() } @@ -531,7 +729,7 @@ options(width = 512) mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ") allN = nrow(Count_Matrix) - nscut = round(ncol(Count_Matrix)/2) + nscut = round(ncol(Count_Matrix)/2) # half samples colTotmillionreads = colSums(Count_Matrix)/1e6 counts.dataframe = as.data.frame(c()) rawrs = rowSums(Count_Matrix) @@ -563,7 +761,7 @@ } cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle) allgenes = rownames(workCM) - reg = "^chr([0-9]+):([0-9]+)-([0-9]+)" + reg = "^chr([0-9]+):([0-9]+)-([0-9]+)" # ucsc chr:start-end regexp genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/" ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='') testreg = str_match(allgenes,reg) @@ -575,9 +773,9 @@ print("@@ using genecards substitution for urls") contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>") } - print.noquote("# urls") + print.noquote("# urls sample") print.noquote(head(contigurls)) - print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F) + print(paste("# Total low count contigs per sample = ",table(lo)),quote=F) cmrowsums = rowSums(workCM) TName=unique(group)[1] CName=unique(group)[2] @@ -595,197 +793,17 @@ print.noquote('Using design matrix:') print.noquote(mydesign) if (doedgeR == T) { - sink('edgeR.log') - #### Setup DGEList object - DGEList = DGEList(counts=workCM, group = group) - DGEList = calcNormFactors(DGEList) - if (robust_meth == 'ordinary') { - DGEList = estimateGLMCommonDisp(DGEList,mydesign) - DGEList = estimateGLMTrendedDisp(DGEList,mydesign) - DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf) + eres = run_edgeR(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR) + myDGEList = eres\$myDGEList + edgeRcounts = eres\$edgeRcounts + } + if (doDESeq2 == T) { DESeqcounts = run_DESeq2(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) } + if (doVoom == T) { voomcounts = run_Voom(workCM,pdata,subjects,group,mydesign,mt,out_Voom) } - comdisp = DGEList\$common.dispersion - estpriorn = getPriorN(DGEList) - print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F) - } else { - DGEList = estimateGLMRobustDisp(DGEList,design=mydesign, prior.df = edgeR_priordf, maxit = 6, residual.type = robust_meth) - } - - - DGLM = glmFit(DGEList,design=mydesign) - DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed - efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors - normData = (1e+06*DGEList\$counts/efflib) - uoutput = cbind( - Name=as.character(rownames(DGEList\$counts)), - DE\$table, - adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), - Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData, - DGEList\$counts - ) - soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable - goodness = gof(DGLM, pcutoff=fdrthresh) - if (sum(goodness\$outlier) > 0) { - print.noquote('GLM outliers:') - print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F) - } else { - print('No GLM fit outlier genes found\n') - } - z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2) - pdf("edgeR_GoodnessofFit.pdf") - qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion") - abline(0,1,lwd=3) - points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon") - dev.off() - efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors - normData = (1e+06*DGEList\$counts/efflib) - uniqueg = unique(group) - #### Plot MDS - sample_colors = match(group,levels(group)) - sampleTypes = levels(factor(group)) - print.noquote(sampleTypes) - pdf("edgeR_MDSplot.pdf") - plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors) - legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19) - grid(col="blue") - dev.off() - colnames(normData) = paste( colnames(normData),'N',sep="_") - print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=','))) - nzd = data.frame(log(nonzerod + 1e-2,10)) - try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") ) - write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F) - tt = cbind( - Name=as.character(rownames(DGEList\$counts)), - DE\$table, - adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), - Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums - ) - print.noquote("# edgeR Top tags\n") - tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely - tt = tt[order(DE\$table\$PValue),] - print.noquote(tt[1:50,]) - deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,]) - nsig = length(deTags) - print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F) - deColours = ifelse(deTags,'red','black') - pdf("edgeR_BCV_vs_abundance.pdf") - plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance") - dev.off() - dg = DGEList[order(DE\$table\$PValue),] - #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg))) - efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors - normData = (1e+06*dg\$counts/efflib) - outpdfname="edgeR_top_100_heatmap.pdf" - hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle)) - outSmear = "edgeR_smearplot.pdf" - outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='') - smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain) - qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf') - norm.factor = DGEList\$samples\$norm.factors - topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ] - edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR)) - edgeRcounts = rep(0, length(allgenes)) - edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits - sink() - } ### doedgeR - if (doDESeq2 == T) - { - sink("DESeq2.log") - # DESeq2 - require('DESeq2') - library('RColorBrewer') - if (length(subjects) == 0) - { - pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM)) - deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx)) - } else { - pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM)) - deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx)) - } - #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype) - #rDESeq = results(DESeq2) - #newCountDataSet(workCM, group) - deSeqDatsizefac = estimateSizeFactors(deSEQds) - deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType) - resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype) - rDESeq = as.data.frame(results(resDESeq)) - rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls) - srDESeq = rDESeq[order(rDESeq\$pvalue),] - qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf') - cat("# DESeq top 50\n") - print.noquote(srDESeq[1:50,]) - write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F) - topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ] - DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq)) - DESeqcounts = rep(0, length(allgenes)) - DESeqcounts[DESeqcountsindex] = 1 - pdf("DESeq2_dispersion_estimates.pdf") - plotDispEsts(resDESeq) - dev.off() - ysmall = abs(min(rDESeq\$log2FoldChange)) - ybig = abs(max(rDESeq\$log2FoldChange)) - ylimit = min(4,ysmall,ybig) - pdf("DESeq2_MA_plot.pdf") - plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit)) - dev.off() - rlogres = rlogTransformation(resDESeq) - sampledists = dist( t( assay(rlogres) ) ) - sdmat = as.matrix(sampledists) - pdf("DESeq2_sample_distance_plot.pdf") - heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"), - col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255)) - dev.off() - ###outpdfname="DESeq2_top50_heatmap.pdf" - ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle)) - sink() - result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) ) - if ("try-error" %in% class(result)) { - print.noquote('DESeq2 plotPCA failed.') - } else { - pdf("DESeq2_PCA_plot.pdf") - #### wtf - print? Seems needed to get this to work - print(ppca) - dev.off() - } - } - - if (doVoom == T) { - sink('VOOM.log') - if (doedgeR == F) { - #### Setup DGEList object - DGEList = DGEList(counts=workCM, group = group) - DGEList = estimateGLMCommonDisp(DGEList,mydesign) - DGEList = estimateGLMTrendedDisp(DGEList,mydesign) - DGEList = estimateGLMTagwiseDisp(DGEList,mydesign) - } - calcNormFactors(DGEList) - ls = colSums(DGEList\$counts) * DGEList\$samples\$norm.factors - pdf("VOOM_mean_variance_plot.pdf") - #dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = ls) - dat.voomed <- voom(DGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL) - dev.off() - # Use limma to fit data - fit = lmFit(dat.voomed, mydesign) - fit = eBayes(fit) - rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none") - qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='VOOM_qqplot.pdf') - rownames(rvoom) = rownames(workCM) - rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls) - srvoom = rvoom[order(rvoom\$P.Value),] - cat("# VOOM top 50\n") - print(srvoom[1:50,]) - write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F) - # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma - topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ] - voomcountsindex <- which(allgenes %in% rownames(topresults.voom)) - voomcounts = rep(0, length(allgenes)) - voomcounts[voomcountsindex] = 1 - sink() - } if (doCamera) { - doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle, - outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype) + doGSEA(y=myDGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle, + outfname=paste("GSEA_Camera",mt,"table.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype) } counts.dataframe = c() vennmain = 'no venn' @@ -804,7 +822,7 @@ if (nrow(counts.dataframe > 1)) { counts.venn = vennCounts(counts.dataframe) - vennf = "Venn_significant_genes_overlap.pdf" + vennf = paste("Venn",mt,"significant_genes_overlap.pdf",sep="_") pdf(vennf) vennDiagram(counts.venn,main=vennmain,col="maroon") dev.off() @@ -820,7 +838,7 @@ history_gmt_name = "" out_edgeR = F out_DESeq2 = F -out_VOOM = "$out_VOOM" +out_Voom = "$out_VOOM" edgeR_robust_meth = "ordinary" # control robust deviance options doDESeq2 = $DESeq2.doDESeq2 doVoom = $doVoom @@ -830,7 +848,7 @@ #if $doVoom == "T": - out_VOOM = "$out_VOOM" + out_Voom = "$out_VOOM" #end if #if $DESeq2.doDESeq2 == "T": @@ -896,7 +914,7 @@ group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor group = factor(group, levels=c(ControlName,TreatmentName)) colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns -results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2, +results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_Voom=out_Voom, out_DESeq2=out_DESeq2, fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.', myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects, doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,