Mercurial > repos > fubar > differential_count_models

diff rgedgeRpaired_nocamera.xml @ 111:9f2e0ec3e826 draft
Uploaded
author: fubar
date: Wed, 22 Oct 2014 23:37:31 -0400
parents: d7e2a0c0cce9
children: badcd3b0e708
--- a/rgedgeRpaired_nocamera.xml	Sun Jul 06 21:00:54 2014 -0400
+++ b/rgedgeRpaired_nocamera.xml	Wed Oct 22 23:37:31 2014 -0400
@@ -1,8 +1,8 @@
-<tool id="rgDifferentialCount" name="Differential_Count" version="0.25">
+<tool id="rgDifferentialCount" name="Differential_Count" version="0.26">
   <description>models using BioConductor packages</description>
   <requirements>
       <requirement type="package" version="2.14">biocbasics</requirement>
-      <requirement type="package" version="3.0.3">R</requirement>
+      <requirement type="package" version="3.1.1">R_3_1_1</requirement>
       <requirement type="package" version="1.3.18">graphicsmagick</requirement>
       <requirement type="package" version="9.10">ghostscript</requirement>
   </requirements>
@@ -193,9 +193,9 @@
 # Original edgeR code by: S.Lunke and A.Kaspi
 reallybig = log10(.Machine\$double.xmax)
 reallysmall = log10(.Machine\$double.xmin)
-library('stringr')
-library('gplots')
-library('edgeR')
+library("stringr")
+library("gplots")
+library("edgeR")
 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here')
 {
 # Perform clustering for significant pvalues after controlling FWER
@@ -263,10 +263,10 @@
     dev.off()
 }
 
-smearPlot = function(DGEList,deTags, outSmear, outMain)
+smearPlot = function(myDGEList,deTags, outSmear, outMain)
         {
         pdf(outSmear)
-        plotSmear(DGEList,de.tags=deTags,main=outMain)
+        plotSmear(myDGEList,de.tags=deTags,main=outMain)
         grid(col="lightgray", lty="dotted")
         dev.off()
         }
@@ -513,17 +513,215 @@
   write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
   sink()
   }
- 
+
 
-edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5, 
+edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_Voom=F,out_DESeq2=F,fdrtype='fdr',priordf=5, 
         fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
         filterquantile=0.2, subjects=c(),mydesign=NULL,
         doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
         histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
         doCook=F,DESeq_fitType="parameteric",robust_meth='ordinary') 
 {
+
+
+run_edgeR = function(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR)
+{
+  sink('edgeR.log')
+  #### Setup myDGEList object
+  myDGEList = DGEList(counts=workCM, group = group)
+  myDGEList = calcNormFactors(myDGEList)
+  if (robust_meth == 'ordinary') {
+       myDGEList = estimateGLMCommonDisp(myDGEList,mydesign)
+       myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign)
+       if (priordf > 0) {  myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign,prior.df = priordf) 
+       } else { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign) }
+       comdisp = myDGEList\$common.dispersion
+       estpriorn = getPriorN(myDGEList)
+       print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
+     } else { 
+       myDGEList = estimateGLMRobustDisp(myDGEList,design=mydesign, prior.df = priordf, maxit = 6, residual.type = robust_meth)
+          }
+    
+  
+  DGLM = glmFit(myDGEList,design=mydesign)
+  DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
+  efflib = myDGEList\$samples\$lib.size*myDGEList\$samples\$norm.factors
+  normData = (1e+06*myDGEList\$counts/efflib)
+  uoutput = cbind( 
+    Name=as.character(rownames(myDGEList\$counts)),
+    DE\$table,
+    adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
+    Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
+    myDGEList\$counts
+  )
+  soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
+  goodness = gof(DGLM, pcutoff=fdrthresh)
+  if (sum(goodness\$outlier) > 0) {
+    print.noquote('GLM outliers:')
+    print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
+    } else { 
+      print('No GLM fit outlier genes found\n')
+    }
+  z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
+  pdf(paste("edgeR",mt,"GoodnessofFit.pdf",sep='_'))
+  qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
+  abline(0,1,lwd=3)
+  points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
+  dev.off()
+  uniqueg = unique(group)
+  #### Plot MDS
+  sample_colors =  match(group,levels(group))
+  sampleTypes = levels(factor(group))
+  print.noquote(sampleTypes)
+  pdf(paste("edgeR",mt,"MDSplot.pdf",sep='_'))
+  plotMDS.DGEList(myDGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
+  legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
+  grid(col="blue")
+  dev.off()
+  colnames(normData) = paste( colnames(normData),'N',sep="_")
+  print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
+  nzd = data.frame(log(nonzerod + 1e-2,10))
+  try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname=paste("edgeR",mt,"raw_norm_counts_box.pdf",sep='_') ))
+  write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
+  tt = cbind( 
+    Name=as.character(rownames(myDGEList)),
+    DE\$table,
+    adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
+    Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums
+  )
+  print.noquote("# edgeR Top tags\n")
+  tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
+  tt = tt[order(DE\$table\$PValue),] 
+  print.noquote(tt[1:50,])
+  deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
+  nsig = length(deTags)
+  print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
+  deColours = ifelse(deTags,'red','black')
+  pdf(paste("edgeR",mt,"BCV_vs_abundance.pdf",sep="_"))
+  plotBCV(myDGEList, cex=0.3, main="Biological CV vs abundance")
+  dev.off()
+  dg = myDGEList[order(DE\$table\$PValue),]
+  #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
+  outpdfname= paste("edgeR",mt,"top_100_heatmap.pdf",sep="_")
+  hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste(myTitle,'Heatmap'))
+  outSmear = paste("edgeR",mt,"smearplot.pdf",sep="_")
+  outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
+  smearPlot(myDGEList=myDGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
+  qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf=paste('edgeR',mt,'qqplot.pdf',sep='_'))
+  norm.factor = myDGEList\$samples\$norm.factors
+  topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
+  edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
+  edgeRcounts = rep(0, length(allgenes))
+  edgeRcounts[edgeRcountsindex] = 1  # Create venn diagram of hits
+  sink()
+  return(list(myDGEList=myDGEList,edgeRcounts=edgeRcounts))
+} ### run_edgeR
+
+
+run_DESeq2 = function(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType)
+
+ {
+    sink("DESeq2.log")
+    # DESeq2
+    require('DESeq2')
+    library('RColorBrewer')
+    if (length(subjects) == 0)
+        {
+        pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
+        deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ Rx))
+        } else {
+        pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
+        deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ subjects + Rx))
+        }
+    #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
+    #rDESeq = results(DESeq2)
+    #newCountDataSet(workCM, group)
+    deSeqDatsizefac = estimateSizeFactors(deSEQds)
+    deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
+    resDESeq = nbinomWaldTest(deSeqDatdisp)
+    rDESeq = as.data.frame(results(resDESeq))
+    rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
+    srDESeq = rDESeq[order(rDESeq\$pvalue),]
+    qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf=paste('DESeq2',mt,'qqplot.pdf',sep="_"))
+    cat("# DESeq top 50\n")
+    print.noquote(srDESeq[1:50,])
+    write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
+    topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
+    DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
+    DESeqcounts = rep(0, length(allgenes))
+    DESeqcounts[DESeqcountsindex] = 1
+    pdf(paste("DESeq2",mt,"dispersion_estimates.pdf",sep='_'))
+    plotDispEsts(resDESeq)
+    dev.off()
+    ysmall = abs(min(rDESeq\$log2FoldChange))
+    ybig = abs(max(rDESeq\$log2FoldChange))
+    ylimit = min(4,ysmall,ybig)
+    pdf(paste("DESeq2",mt,"MA_plot.pdf",sep="_"))
+    plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
+    dev.off()
+    rlogres = rlogTransformation(resDESeq)
+    sampledists = dist( t( assay(rlogres) ) )
+    sdmat = as.matrix(sampledists)
+    pdf(paste("DESeq2",mt,"sample_distance_plot.pdf",sep="_"))
+    heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
+         col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
+    dev.off()
+    ###outpdfname=paste("DESeq2",mt,"top50_heatmap.pdf",sep="_")
+    ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
+    sink()
+    result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
+    if ("try-error" %in% class(result)) {
+         print.noquote('DESeq2 plotPCA failed.')
+         } else {
+         pdf(paste("DESeq2",mt,"PCA_plot.pdf",sep="_"))
+         #### wtf - print? Seems needed to get this to work
+         print(ppca)
+         dev.off()
+        }
+    return(DESeqcounts)
+  }
+
+
+run_Voom = function(workCM,pdata,subjects,group,mydesign,mt,out_Voom)
+  {
+      sink('VOOM.log')
+      if (doedgeR == F) {
+         #### Setup myDGEList object
+         myDGEList = DGEList(counts=workCM, group = group)
+         myDGEList = calcNormFactors(myDGEList)
+         myDGEList = estimateGLMCommonDisp(myDGEList,mydesign)
+         myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign) 
+         myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign)
+         }
+      pdf(paste("VOOM",mt,"mean_variance_plot.pdf",sep='_'))
+      dat.voomed <- voom(myDGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL)
+      dev.off()
+      # Use limma to fit data
+      fit = lmFit(dat.voomed, mydesign)
+      fit = eBayes(fit)
+      rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
+      qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf=paste('VOOM',mt,'qqplot.pdf',sep='_'))
+      rownames(rvoom) = rownames(workCM)
+      rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
+      srvoom = rvoom[order(rvoom\$P.Value),]
+      cat("# VOOM top 50\n")
+      print(srvoom[1:50,])
+      write.table(srvoom,file=out_Voom, quote=FALSE, sep="\t",row.names=F)
+      # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
+      topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
+      voomcountsindex <- which(allgenes %in% rownames(topresults.voom))
+      voomcounts = rep(0, length(allgenes))
+      voomcounts[voomcountsindex] = 1
+      sink()
+      return(voomcounts)
+  }
+
+
+#### data cleaning and analsis control starts here
+
   # Error handling
-  if (length(unique(group))!=2){
+  nugroup = length(unique(group))
+  if (nugroup!=2){
     print("Number of conditions identified in experiment does not equal 2")
     q()
     }
@@ -531,7 +729,7 @@
   options(width = 512) 
   mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
   allN = nrow(Count_Matrix)
-  nscut = round(ncol(Count_Matrix)/2)
+  nscut = round(ncol(Count_Matrix)/2) # half samples
   colTotmillionreads = colSums(Count_Matrix)/1e6
   counts.dataframe = as.data.frame(c()) 
   rawrs = rowSums(Count_Matrix)
@@ -563,7 +761,7 @@
   }
   cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
   allgenes = rownames(workCM)
-  reg = "^chr([0-9]+):([0-9]+)-([0-9]+)"
+  reg = "^chr([0-9]+):([0-9]+)-([0-9]+)" # ucsc chr:start-end regexp
   genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/"
   ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='')
   testreg = str_match(allgenes,reg)
@@ -575,9 +773,9 @@
     print("@@ using genecards substitution for urls")
     contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>")
   }
-  print.noquote("# urls")
+  print.noquote("# urls sample")
   print.noquote(head(contigurls))
-  print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F) 
+  print(paste("# Total low count contigs per sample = ",table(lo)),quote=F) 
   cmrowsums = rowSums(workCM)
   TName=unique(group)[1]
   CName=unique(group)[2]
@@ -595,197 +793,17 @@
   print.noquote('Using design matrix:')
   print.noquote(mydesign)
   if (doedgeR == T) {
-  sink('edgeR.log')
-  #### Setup DGEList object
-  DGEList = DGEList(counts=workCM, group = group)
-  DGEList = calcNormFactors(DGEList)
-  if (robust_meth == 'ordinary') {
-       DGEList = estimateGLMCommonDisp(DGEList,mydesign)
-       DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
-       DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf)
+      eres = run_edgeR(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR)
+      myDGEList = eres\$myDGEList
+      edgeRcounts = eres\$edgeRcounts
+   }
+  if (doDESeq2 == T) {  DESeqcounts = run_DESeq2(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) }
+  if (doVoom == T) { voomcounts = run_Voom(workCM,pdata,subjects,group,mydesign,mt,out_Voom) }
 
-       comdisp = DGEList\$common.dispersion
-       estpriorn = getPriorN(DGEList)
-       print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
-     } else { 
-       DGEList = estimateGLMRobustDisp(DGEList,design=mydesign, prior.df = edgeR_priordf, maxit = 6, residual.type = robust_meth)
-          }
-    
-  
-  DGLM = glmFit(DGEList,design=mydesign)
-  DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
-  efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
-  normData = (1e+06*DGEList\$counts/efflib)
-  uoutput = cbind( 
-    Name=as.character(rownames(DGEList\$counts)),
-    DE\$table,
-    adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
-    Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
-    DGEList\$counts
-  )
-  soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
-  goodness = gof(DGLM, pcutoff=fdrthresh)
-  if (sum(goodness\$outlier) > 0) {
-    print.noquote('GLM outliers:')
-    print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
-    } else { 
-      print('No GLM fit outlier genes found\n')
-    }
-  z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
-  pdf("edgeR_GoodnessofFit.pdf")
-  qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
-  abline(0,1,lwd=3)
-  points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
-  dev.off()
-  efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
-  normData = (1e+06*DGEList\$counts/efflib)
-  uniqueg = unique(group)
-  #### Plot MDS
-  sample_colors =  match(group,levels(group))
-  sampleTypes = levels(factor(group))
-  print.noquote(sampleTypes)
-  pdf("edgeR_MDSplot.pdf")
-  plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
-  legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
-  grid(col="blue")
-  dev.off()
-  colnames(normData) = paste( colnames(normData),'N',sep="_")
-  print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
-  nzd = data.frame(log(nonzerod + 1e-2,10))
-  try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") )
-  write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
-  tt = cbind( 
-    Name=as.character(rownames(DGEList\$counts)),
-    DE\$table,
-    adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
-    Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums
-  )
-  print.noquote("# edgeR Top tags\n")
-  tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
-  tt = tt[order(DE\$table\$PValue),] 
-  print.noquote(tt[1:50,])
-  deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
-  nsig = length(deTags)
-  print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
-  deColours = ifelse(deTags,'red','black')
-  pdf("edgeR_BCV_vs_abundance.pdf")
-  plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance")
-  dev.off()
-  dg = DGEList[order(DE\$table\$PValue),]
-  #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
-  efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors
-  normData = (1e+06*dg\$counts/efflib)
-  outpdfname="edgeR_top_100_heatmap.pdf"
-  hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle))
-  outSmear = "edgeR_smearplot.pdf"
-  outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
-  smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
-  qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf')
-  norm.factor = DGEList\$samples\$norm.factors
-  topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
-  edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
-  edgeRcounts = rep(0, length(allgenes))
-  edgeRcounts[edgeRcountsindex] = 1  # Create venn diagram of hits
-  sink()
-  } ### doedgeR
-  if (doDESeq2 == T)
-  {
-    sink("DESeq2.log")
-    # DESeq2
-    require('DESeq2')
-    library('RColorBrewer')
-    if (length(subjects) == 0)
-        {
-        pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
-        deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ Rx))
-        } else {
-        pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
-        deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ subjects + Rx))
-        }
-    #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
-    #rDESeq = results(DESeq2)
-    #newCountDataSet(workCM, group)
-    deSeqDatsizefac = estimateSizeFactors(deSEQds)
-    deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
-    resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
-    rDESeq = as.data.frame(results(resDESeq))
-    rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
-    srDESeq = rDESeq[order(rDESeq\$pvalue),]
-    qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf')
-    cat("# DESeq top 50\n")
-    print.noquote(srDESeq[1:50,])
-    write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
-    topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
-    DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
-    DESeqcounts = rep(0, length(allgenes))
-    DESeqcounts[DESeqcountsindex] = 1
-    pdf("DESeq2_dispersion_estimates.pdf")
-    plotDispEsts(resDESeq)
-    dev.off()
-    ysmall = abs(min(rDESeq\$log2FoldChange))
-    ybig = abs(max(rDESeq\$log2FoldChange))
-    ylimit = min(4,ysmall,ybig)
-    pdf("DESeq2_MA_plot.pdf")
-    plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
-    dev.off()
-    rlogres = rlogTransformation(resDESeq)
-    sampledists = dist( t( assay(rlogres) ) )
-    sdmat = as.matrix(sampledists)
-    pdf("DESeq2_sample_distance_plot.pdf")
-    heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
-         col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
-    dev.off()
-    ###outpdfname="DESeq2_top50_heatmap.pdf"
-    ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
-    sink()
-    result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
-    if ("try-error" %in% class(result)) {
-         print.noquote('DESeq2 plotPCA failed.')
-         } else {
-         pdf("DESeq2_PCA_plot.pdf")
-         #### wtf - print? Seems needed to get this to work
-         print(ppca)
-         dev.off()
-        }
-  }
-
-  if (doVoom == T) {
-      sink('VOOM.log')
-      if (doedgeR == F) {
-         #### Setup DGEList object
-         DGEList = DGEList(counts=workCM, group = group)
-         DGEList = estimateGLMCommonDisp(DGEList,mydesign)
-         DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
-         DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
-         }
-      calcNormFactors(DGEList) 
-      ls = colSums(DGEList\$counts) * DGEList\$samples\$norm.factors
-      pdf("VOOM_mean_variance_plot.pdf")
-      #dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = ls)
-      dat.voomed <- voom(DGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL)
-      dev.off()
-      # Use limma to fit data
-      fit = lmFit(dat.voomed, mydesign)
-      fit = eBayes(fit)
-      rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
-      qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='VOOM_qqplot.pdf')
-      rownames(rvoom) = rownames(workCM)
-      rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
-      srvoom = rvoom[order(rvoom\$P.Value),]
-      cat("# VOOM top 50\n")
-      print(srvoom[1:50,])
-      write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F)
-      # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
-      topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
-      voomcountsindex <- which(allgenes %in% rownames(topresults.voom))
-      voomcounts = rep(0, length(allgenes))
-      voomcounts[voomcountsindex] = 1
-      sink()
-  }
 
   if (doCamera) {
-  doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
-    outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
+  doGSEA(y=myDGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
+    outfname=paste("GSEA_Camera",mt,"table.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
   }
   counts.dataframe = c()
   vennmain = 'no venn'
@@ -804,7 +822,7 @@
     
     if (nrow(counts.dataframe > 1)) {
       counts.venn = vennCounts(counts.dataframe)
-      vennf = "Venn_significant_genes_overlap.pdf" 
+      vennf = paste("Venn",mt,"significant_genes_overlap.pdf",sep="_") 
       pdf(vennf)
       vennDiagram(counts.venn,main=vennmain,col="maroon")
       dev.off()
@@ -820,7 +838,7 @@
 history_gmt_name = ""
 out_edgeR = F
 out_DESeq2 = F
-out_VOOM = "$out_VOOM"
+out_Voom = "$out_VOOM"
 edgeR_robust_meth = "ordinary" # control robust deviance options
 doDESeq2 = $DESeq2.doDESeq2
 doVoom = $doVoom
@@ -830,7 +848,7 @@
 
 
 #if $doVoom == "T":
-  out_VOOM = "$out_VOOM"
+  out_Voom = "$out_VOOM"
 #end if
 
 #if $DESeq2.doDESeq2 == "T":
@@ -896,7 +914,7 @@
 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) )             #Build a group descriptor
 group = factor(group, levels=c(ControlName,TreatmentName))
 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_")                   #Relable columns
-results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2,
+results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_Voom=out_Voom, out_DESeq2=out_DESeq2,
                  fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
                  myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,
                  doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
author	fubar
date	Wed, 22 Oct 2014 23:37:31 -0400
parents	d7e2a0c0cce9
children	badcd3b0e708