differential_count_models: rgedgeRpaired

comparison rgedgeRpaired_nocamera.xml @ 111:9f2e0ec3e826 draft

Uploaded

author	fubar
date	Wed, 22 Oct 2014 23:37:31 -0400
parents	d7e2a0c0cce9
children	badcd3b0e708

comparison

equal deleted inserted replaced

-:d7e2a0c0cce9
+:9f2e0ec3e826
-<tool id="rgDifferentialCount" name="Differential_Count" version="0.25">
+<tool id="rgDifferentialCount" name="Differential_Count" version="0.26">
 <description>models using BioConductor packages</description>
 <requirements>
 <requirement type="package" version="2.14">biocbasics</requirement>
-<requirement type="package" version="3.0.3">R</requirement>
+<requirement type="package" version="3.1.1">R_3_1_1</requirement>
 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
 <requirement type="package" version="9.10">ghostscript</requirement>
 </requirements>
 <command interpreter="python">
 # 1 - Output Dir
 # Original edgeR code by: S.Lunke and A.Kaspi
 reallybig = log10(.Machine\$double.xmax)
 reallysmall = log10(.Machine\$double.xmin)
-library('stringr')
+library("stringr")
-library('gplots')
+library("gplots")
-library('edgeR')
+library("edgeR")
 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here')
 {
 # Perform clustering for significant pvalues after controlling FWER
 samples = colnames(cmat)
 gu = unique(group)
 lines(e,e,col="red")
 grid(col = "lightgray", lty = "dotted")
 dev.off()
 }
-smearPlot = function(DGEList,deTags, outSmear, outMain)
+smearPlot = function(myDGEList,deTags, outSmear, outMain)
 {
 pdf(outSmear)
-plotSmear(DGEList,de.tags=deTags,main=outMain)
+plotSmear(myDGEList,de.tags=deTags,main=outMain)
 grid(col="lightgray", lty="dotted")
 dev.off()
 }
 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname)
 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
 sink()
 }
-edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5,
+edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_Voom=F,out_DESeq2=F,fdrtype='fdr',priordf=5,
 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
 filterquantile=0.2, subjects=c(),mydesign=NULL,
 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
 doCook=F,DESeq_fitType="parameteric",robust_meth='ordinary')
 {
+run_edgeR = function(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR)
+{
+sink('edgeR.log')
+#### Setup myDGEList object
+myDGEList = DGEList(counts=workCM, group = group)
+myDGEList = calcNormFactors(myDGEList)
+if (robust_meth == 'ordinary') {
+myDGEList = estimateGLMCommonDisp(myDGEList,mydesign)
+myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign)
+if (priordf > 0) {  myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign,prior.df = priordf)
+} else { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign) }
+comdisp = myDGEList\$common.dispersion
+estpriorn = getPriorN(myDGEList)
+print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
+} else {
+myDGEList = estimateGLMRobustDisp(myDGEList,design=mydesign, prior.df = priordf, maxit = 6, residual.type = robust_meth)
+}
+DGLM = glmFit(myDGEList,design=mydesign)
+DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
+efflib = myDGEList\$samples\$lib.size*myDGEList\$samples\$norm.factors
+normData = (1e+06*myDGEList\$counts/efflib)
+uoutput = cbind(
+Name=as.character(rownames(myDGEList\$counts)),
+DE\$table,
+adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
+Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
+myDGEList\$counts
+)
+soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
+goodness = gof(DGLM, pcutoff=fdrthresh)
+if (sum(goodness\$outlier) > 0) {
+print.noquote('GLM outliers:')
+print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
+} else {
+print('No GLM fit outlier genes found\n')
+}
+z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
+pdf(paste("edgeR",mt,"GoodnessofFit.pdf",sep='_'))
+qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
+abline(0,1,lwd=3)
+points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
+dev.off()
+uniqueg = unique(group)
+#### Plot MDS
+sample_colors =  match(group,levels(group))
+sampleTypes = levels(factor(group))
+print.noquote(sampleTypes)
+pdf(paste("edgeR",mt,"MDSplot.pdf",sep='_'))
+plotMDS.DGEList(myDGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
+legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
+grid(col="blue")
+dev.off()
+colnames(normData) = paste( colnames(normData),'N',sep="_")
+print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
+nzd = data.frame(log(nonzerod + 1e-2,10))
+try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname=paste("edgeR",mt,"raw_norm_counts_box.pdf",sep='_') ))
+write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
+tt = cbind(
+Name=as.character(rownames(myDGEList)),
+DE\$table,
+adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
+Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums
+)
+print.noquote("# edgeR Top tags\n")
+tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
+tt = tt[order(DE\$table\$PValue),]
+print.noquote(tt[1:50,])
+deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
+nsig = length(deTags)
+print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
+deColours = ifelse(deTags,'red','black')
+pdf(paste("edgeR",mt,"BCV_vs_abundance.pdf",sep="_"))
+plotBCV(myDGEList, cex=0.3, main="Biological CV vs abundance")
+dev.off()
+dg = myDGEList[order(DE\$table\$PValue),]
+#normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
+outpdfname= paste("edgeR",mt,"top_100_heatmap.pdf",sep="_")
+hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste(myTitle,'Heatmap'))
+outSmear = paste("edgeR",mt,"smearplot.pdf",sep="_")
+outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
+smearPlot(myDGEList=myDGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
+qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf=paste('edgeR',mt,'qqplot.pdf',sep='_'))
+norm.factor = myDGEList\$samples\$norm.factors
+topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
+edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
+edgeRcounts = rep(0, length(allgenes))
+edgeRcounts[edgeRcountsindex] = 1  # Create venn diagram of hits
+sink()
+return(list(myDGEList=myDGEList,edgeRcounts=edgeRcounts))
+} ### run_edgeR
+run_DESeq2 = function(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType)
+{
+sink("DESeq2.log")
+# DESeq2
+require('DESeq2')
+library('RColorBrewer')
+if (length(subjects) == 0)
+{
+pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
+deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ Rx))
+} else {
+pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
+deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ subjects + Rx))
+}
+#DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
+#rDESeq = results(DESeq2)
+#newCountDataSet(workCM, group)
+deSeqDatsizefac = estimateSizeFactors(deSEQds)
+deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
+resDESeq = nbinomWaldTest(deSeqDatdisp)
+rDESeq = as.data.frame(results(resDESeq))
+rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
+srDESeq = rDESeq[order(rDESeq\$pvalue),]
+qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf=paste('DESeq2',mt,'qqplot.pdf',sep="_"))
+cat("# DESeq top 50\n")
+print.noquote(srDESeq[1:50,])
+write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
+topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
+DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
+DESeqcounts = rep(0, length(allgenes))
+DESeqcounts[DESeqcountsindex] = 1
+pdf(paste("DESeq2",mt,"dispersion_estimates.pdf",sep='_'))
+plotDispEsts(resDESeq)
+dev.off()
+ysmall = abs(min(rDESeq\$log2FoldChange))
+ybig = abs(max(rDESeq\$log2FoldChange))
+ylimit = min(4,ysmall,ybig)
+pdf(paste("DESeq2",mt,"MA_plot.pdf",sep="_"))
+plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
+dev.off()
+rlogres = rlogTransformation(resDESeq)
+sampledists = dist( t( assay(rlogres) ) )
+sdmat = as.matrix(sampledists)
+pdf(paste("DESeq2",mt,"sample_distance_plot.pdf",sep="_"))
+heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
+col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
+dev.off()
+###outpdfname=paste("DESeq2",mt,"top50_heatmap.pdf",sep="_")
+###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
+sink()
+result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
+if ("try-error" %in% class(result)) {
+print.noquote('DESeq2 plotPCA failed.')
+} else {
+pdf(paste("DESeq2",mt,"PCA_plot.pdf",sep="_"))
+#### wtf - print? Seems needed to get this to work
+print(ppca)
+dev.off()
+}
+return(DESeqcounts)
+}
+run_Voom = function(workCM,pdata,subjects,group,mydesign,mt,out_Voom)
+{
+sink('VOOM.log')
+if (doedgeR == F) {
+#### Setup myDGEList object
+myDGEList = DGEList(counts=workCM, group = group)
+myDGEList = calcNormFactors(myDGEList)
+myDGEList = estimateGLMCommonDisp(myDGEList,mydesign)
+myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign)
+myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign)
+}
+pdf(paste("VOOM",mt,"mean_variance_plot.pdf",sep='_'))
+dat.voomed <- voom(myDGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL)
+dev.off()
+# Use limma to fit data
+fit = lmFit(dat.voomed, mydesign)
+fit = eBayes(fit)
+rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
+qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf=paste('VOOM',mt,'qqplot.pdf',sep='_'))
+rownames(rvoom) = rownames(workCM)
+rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
+srvoom = rvoom[order(rvoom\$P.Value),]
+cat("# VOOM top 50\n")
+print(srvoom[1:50,])
+write.table(srvoom,file=out_Voom, quote=FALSE, sep="\t",row.names=F)
+# Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
+topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
+voomcountsindex <- which(allgenes %in% rownames(topresults.voom))
+voomcounts = rep(0, length(allgenes))
+voomcounts[voomcountsindex] = 1
+sink()
+return(voomcounts)
+}
+#### data cleaning and analsis control starts here
 # Error handling
-if (length(unique(group))!=2){
+nugroup = length(unique(group))
+if (nugroup!=2){
 print("Number of conditions identified in experiment does not equal 2")
 q()
 }
 require(edgeR)
 options(width = 512)
 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
 allN = nrow(Count_Matrix)
-nscut = round(ncol(Count_Matrix)/2)
+nscut = round(ncol(Count_Matrix)/2) # half samples
 colTotmillionreads = colSums(Count_Matrix)/1e6
 counts.dataframe = as.data.frame(c())
 rawrs = rowSums(Count_Matrix)
 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes
 nzN = nrow(nonzerod)
 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F)
 maint = paste('Filter below',filterquantile,'quantile')
 }
 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
 allgenes = rownames(workCM)
-reg = "^chr([0-9]+):([0-9]+)-([0-9]+)"
+reg = "^chr([0-9]+):([0-9]+)-([0-9]+)" # ucsc chr:start-end regexp
 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/"
 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='')
 testreg = str_match(allgenes,reg)
 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string
 {
 contigurls = paste0(ucsc,"&amp;position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>")
 } else {
 print("@@ using genecards substitution for urls")
 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>")
 }
-print.noquote("# urls")
+print.noquote("# urls sample")
 print.noquote(head(contigurls))
-print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F)
+print(paste("# Total low count contigs per sample = ",table(lo)),quote=F)
 cmrowsums = rowSums(workCM)
 TName=unique(group)[1]
 CName=unique(group)[2]
 if (is.null(mydesign)) {
 if (length(subjects) == 0)
 }
 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
 print.noquote('Using design matrix:')
 print.noquote(mydesign)
 if (doedgeR == T) {
-sink('edgeR.log')
+eres = run_edgeR(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR)
-#### Setup DGEList object
+myDGEList = eres\$myDGEList
-DGEList = DGEList(counts=workCM, group = group)
+edgeRcounts = eres\$edgeRcounts
-DGEList = calcNormFactors(DGEList)
+}
-if (robust_meth == 'ordinary') {
+if (doDESeq2 == T) {  DESeqcounts = run_DESeq2(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) }
-DGEList = estimateGLMCommonDisp(DGEList,mydesign)
+if (doVoom == T) { voomcounts = run_Voom(workCM,pdata,subjects,group,mydesign,mt,out_Voom) }
-DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
-DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf)
-comdisp = DGEList\$common.dispersion
-estpriorn = getPriorN(DGEList)
-print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
-} else {
-DGEList = estimateGLMRobustDisp(DGEList,design=mydesign, prior.df = edgeR_priordf, maxit = 6, residual.type = robust_meth)
-}
-DGLM = glmFit(DGEList,design=mydesign)
-DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
-efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
-normData = (1e+06*DGEList\$counts/efflib)
-uoutput = cbind(
-Name=as.character(rownames(DGEList\$counts)),
-DE\$table,
-adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
-Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
-DGEList\$counts
-)
-soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
-goodness = gof(DGLM, pcutoff=fdrthresh)
-if (sum(goodness\$outlier) > 0) {
-print.noquote('GLM outliers:')
-print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
-} else {
-print('No GLM fit outlier genes found\n')
-}
-z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
-pdf("edgeR_GoodnessofFit.pdf")
-qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
-abline(0,1,lwd=3)
-points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
-dev.off()
-efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
-normData = (1e+06*DGEList\$counts/efflib)
-uniqueg = unique(group)
-#### Plot MDS
-sample_colors =  match(group,levels(group))
-sampleTypes = levels(factor(group))
-print.noquote(sampleTypes)
-pdf("edgeR_MDSplot.pdf")
-plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
-legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
-grid(col="blue")
-dev.off()
-colnames(normData) = paste( colnames(normData),'N',sep="_")
-print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
-nzd = data.frame(log(nonzerod + 1e-2,10))
-try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") )
-write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
-tt = cbind(
-Name=as.character(rownames(DGEList\$counts)),
-DE\$table,
-adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
-Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums
-)
-print.noquote("# edgeR Top tags\n")
-tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
-tt = tt[order(DE\$table\$PValue),]
-print.noquote(tt[1:50,])
-deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
-nsig = length(deTags)
-print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
-deColours = ifelse(deTags,'red','black')
-pdf("edgeR_BCV_vs_abundance.pdf")
-plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance")
-dev.off()
-dg = DGEList[order(DE\$table\$PValue),]
-#normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
-efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors
-normData = (1e+06*dg\$counts/efflib)
-outpdfname="edgeR_top_100_heatmap.pdf"
-hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle))
-outSmear = "edgeR_smearplot.pdf"
-outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
-smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
-qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf')
-norm.factor = DGEList\$samples\$norm.factors
-topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
-edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
-edgeRcounts = rep(0, length(allgenes))
-edgeRcounts[edgeRcountsindex] = 1  # Create venn diagram of hits
-sink()
-} ### doedgeR
-if (doDESeq2 == T)
-{
-sink("DESeq2.log")
-# DESeq2
-require('DESeq2')
-library('RColorBrewer')
-if (length(subjects) == 0)
-{
-pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
-deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ Rx))
-} else {
-pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
-deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ subjects + Rx))
-}
-#DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
-#rDESeq = results(DESeq2)
-#newCountDataSet(workCM, group)
-deSeqDatsizefac = estimateSizeFactors(deSEQds)
-deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
-resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
-rDESeq = as.data.frame(results(resDESeq))
-rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
-srDESeq = rDESeq[order(rDESeq\$pvalue),]
-qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf')
-cat("# DESeq top 50\n")
-print.noquote(srDESeq[1:50,])
-write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
-topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
-DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
-DESeqcounts = rep(0, length(allgenes))
-DESeqcounts[DESeqcountsindex] = 1
-pdf("DESeq2_dispersion_estimates.pdf")
-plotDispEsts(resDESeq)
-dev.off()
-ysmall = abs(min(rDESeq\$log2FoldChange))
-ybig = abs(max(rDESeq\$log2FoldChange))
-ylimit = min(4,ysmall,ybig)
-pdf("DESeq2_MA_plot.pdf")
-plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
-dev.off()
-rlogres = rlogTransformation(resDESeq)
-sampledists = dist( t( assay(rlogres) ) )
-sdmat = as.matrix(sampledists)
-pdf("DESeq2_sample_distance_plot.pdf")
-heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
-col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
-dev.off()
-###outpdfname="DESeq2_top50_heatmap.pdf"
-###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
-sink()
-result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
-if ("try-error" %in% class(result)) {
-print.noquote('DESeq2 plotPCA failed.')
-} else {
-pdf("DESeq2_PCA_plot.pdf")
-#### wtf - print? Seems needed to get this to work
-print(ppca)
-dev.off()
-}
-}
-if (doVoom == T) {
-sink('VOOM.log')
-if (doedgeR == F) {
-#### Setup DGEList object
-DGEList = DGEList(counts=workCM, group = group)
-DGEList = estimateGLMCommonDisp(DGEList,mydesign)
-DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
-DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
-}
-calcNormFactors(DGEList)
-ls = colSums(DGEList\$counts) * DGEList\$samples\$norm.factors
-pdf("VOOM_mean_variance_plot.pdf")
-#dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = ls)
-dat.voomed <- voom(DGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL)
-dev.off()
-# Use limma to fit data
-fit = lmFit(dat.voomed, mydesign)
-fit = eBayes(fit)
-rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
-qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='VOOM_qqplot.pdf')
-rownames(rvoom) = rownames(workCM)
-rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
-srvoom = rvoom[order(rvoom\$P.Value),]
-cat("# VOOM top 50\n")
-print(srvoom[1:50,])
-write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F)
-# Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
-topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
-voomcountsindex <- which(allgenes %in% rownames(topresults.voom))
-voomcounts = rep(0, length(allgenes))
-voomcounts[voomcountsindex] = 1
-sink()
-}
 if (doCamera) {
-doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
+doGSEA(y=myDGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
-outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
+outfname=paste("GSEA_Camera",mt,"table.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
 }
 counts.dataframe = c()
 vennmain = 'no venn'
 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) {
 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) {
 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
 }
 if (nrow(counts.dataframe > 1)) {
 counts.venn = vennCounts(counts.dataframe)
-vennf = "Venn_significant_genes_overlap.pdf"
+vennf = paste("Venn",mt,"significant_genes_overlap.pdf",sep="_")
 pdf(vennf)
 vennDiagram(counts.venn,main=vennmain,col="maroon")
 dev.off()
 }
 } #### doDESeq2 or doVoom
 builtin_gmt = ""
 history_gmt = ""
 history_gmt_name = ""
 out_edgeR = F
 out_DESeq2 = F
-out_VOOM = "$out_VOOM"
+out_Voom = "$out_VOOM"
 edgeR_robust_meth = "ordinary" # control robust deviance options
 doDESeq2 = $DESeq2.doDESeq2
 doVoom = $doVoom
 doCamera = F
 doedgeR = $edgeR.doedgeR
 edgeR_priordf = 10
 #if $doVoom == "T":
-out_VOOM = "$out_VOOM"
+out_Voom = "$out_VOOM"
 #end if
 #if $DESeq2.doDESeq2 == "T":
 out_DESeq2 = "$out_DESeq2"
 doDESeq2 = T
 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
 Count_Matrix = Count_Matrix[subset(rn,! islib),]
 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) )             #Build a group descriptor
 group = factor(group, levels=c(ControlName,TreatmentName))
 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_")                   #Relable columns
-results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2,
+results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_Voom=out_Voom, out_DESeq2=out_DESeq2,
 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,
 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType,robust_meth=edgeR_robust_meth)
 sessionInfo()

Mercurial > repos > fubar > differential_count_models

comparison rgedgeRpaired_nocamera.xml @ 111:9f2e0ec3e826 draft