Mercurial > repos > fubar > differential_count_models

--- a/rgedgeRpaired_nocamera.xml	Tue Feb 25 23:54:15 2014 -0500
+++ b/rgedgeRpaired_nocamera.xml	Tue Feb 25 23:54:59 2014 -0500
@@ -1,10 +1,10 @@
-<tool id="rgDifferentialCount" name="Differential_Count" version="0.31">
+<tool id="rgDifferentialCount" name="Differential_Count" version="0.22">
   <description>models using BioConductor packages</description>
   <requirements>
       <requirement type="package" version="2.14">biocbasics</requirement>
       <requirement type="package" version="3.0.2">r302</requirement>
       <requirement type="package" version="1.3.18">graphicsmagick</requirement>
-      <requirement type="package" version="9.10">ghostscript</requirement>
+      <requirement type="package" version="9.07">ghostscript</requirement>
   </requirements>

   <command interpreter="python">
@@ -50,9 +50,16 @@
           <option value="T" selected="true">Run edgeR</option>
          </param>
          <when value="T">
-          <param name="edgeR_priordf" type="integer" value="20" size="3"
-           label="prior.df for tagwise dispersion - lower value = more emphasis on each tag's variance. Replaces prior.n  and prior.df = prior.n * residual.df"
-           help="0 = Use edgeR default. Use a small value to 'smooth' small samples. See edgeR docs and note below"/>
+          <param name="edgeR_priordf" type="integer" value="10" size="3"
+           label="prior.df for tagwise dispersion - larger value = more squeezing of tag dispersions to common dispersion. Replaces prior.n  and prior.df = prior.n * residual.df"
+           help="10 = edgeR default. Use a larger value to 'smooth' small samples. See edgeR docs and note below"/>
+          <param name="edgeR_robust" type="select" value="20" size="3"
+           label="Use robust dispersion method"
+           help="Use ordinary, anscombe or deviance robust deviance estimates">
+             <option value="ordinary" selected="true">Use ordinary deviance estimates</option>
+             <option value="deviance">Use robust deviance estimates</option>
+             <option value="anscombe">use Anscombe robust deviance estimates</option>
+          </param>
          </when>
          <when value="F"></when>
     </conditional>
@@ -159,6 +166,7 @@
  <param name='doDESeq2' value='T' />
  <param name='fdrtype' value='fdr' />
  <param name='edgeR_priordf' value="8" />
+ <param name='edgeR_robust' value="ordinary" />
  <param name='fdrthresh' value="0.05" />
  <param name='control_name' value='heart' />
  <param name='subjectids' value='' />
@@ -174,6 +182,7 @@
 <![CDATA[
 #
 # edgeR.Rscript
+# updated feb 2014 adding outlier-robust deviance estimate options by ross for R 3.0.2/bioc 2.13
 # updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross
 # Performs DGE on a count table containing n replicates of two conditions
 #
@@ -511,7 +520,7 @@
         filterquantile=0.2, subjects=c(),mydesign=NULL,
         doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
         histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
-        doCook=F,DESeq_fitType="parameteric")
+        doCook=F,DESeq_fitType="parameteric",robustmeth='ordinary')
 {
   # Error handling
   if (length(unique(group))!=2){
@@ -590,15 +599,17 @@
   #### Setup DGEList object
   DGEList = DGEList(counts=workCM, group = group)
   DGEList = calcNormFactors(DGEList)
+  if (robust_meth == 'ordinary') {
+       DGEList = estimateGLMCommonDisp(DGEList,mydesign)
+       DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
+       DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf)

-  DGEList = estimateGLMCommonDisp(DGEList,mydesign)
-  comdisp = DGEList\$common.dispersion
-  DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
-  if (edgeR_priordf > 0) {
-    print.noquote(paste("prior.df =",edgeR_priordf))
-    DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf)
-  } else {
-    DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
+       comdisp = DGEList\$common.dispersion
+       estpriorn = getPriorN(DGEList)
+       print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
+     } else {
+       DGEList = estimateGLMRobustDisp(DGEList,design=mydesign, prior.df = edgeR_priordf, maxit = 6, residual.type = robust_meth)
+          }
   }
   DGLM = glmFit(DGEList,design=mydesign)
   DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
@@ -625,8 +636,6 @@
   abline(0,1,lwd=3)
   points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
   dev.off()
-  estpriorn = getPriorN(DGEList)
-  print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
   efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
   normData = (1e+06*DGEList\$counts/efflib)
   uniqueg = unique(group)
@@ -697,7 +706,7 @@
     #newCountDataSet(workCM, group)
     deSeqDatsizefac = estimateSizeFactors(deSEQds)
     deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
-    resDESeq = nbinomWaldTest(deSeqDatdisp)
+    resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
     rDESeq = as.data.frame(results(resDESeq))
     rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
     srDESeq = rDESeq[order(rDESeq\$pvalue),]
@@ -744,13 +753,11 @@
       if (doedgeR == F) {
          #### Setup DGEList object
          DGEList = DGEList(counts=workCM, group = group)
-         DGEList = calcNormFactors(DGEList)
          DGEList = estimateGLMCommonDisp(DGEList,mydesign)
          DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
          DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
-         DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
-         norm.factor = DGEList\$samples\$norm.factors
          }
+      norm.factor = calcNormFactors(DGEList)
       pdf("VOOM_mean_variance_plot.pdf")
       dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = colSums(workCM) * norm.factor)
       dev.off()
@@ -810,11 +817,12 @@
 out_edgeR = F
 out_DESeq2 = F
 out_VOOM = "$out_VOOM"
+edgeR_robust_meth = $edgeR_robust # control robust deviance options
 doDESeq2 = $DESeq2.doDESeq2 # make these T or F
 doVoom = $doVoom
 doCamera = F
 doedgeR = $edgeR.doedgeR
-edgeR_priordf = 0
+edgeR_priordf = 10


 #if $doVoom == "T":
@@ -886,7 +894,7 @@
                  fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
                  myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,
                  doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
-                 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType)
+                 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType,robustmeth=edgeR_robust_meth)
 sessionInfo()
 ]]>
 </configfile>