Mercurial > repos > fubar > differential_count_models
comparison rgedgeRpaired_nocamera.xml @ 77:4a2e7a9725b2 draft
Uploaded
author | fubar |
---|---|
date | Tue, 25 Feb 2014 23:54:59 -0500 |
parents | 151bf55e018a |
children | 340d5460f3ff |
comparison
equal
deleted
inserted
replaced
76:2a377f98ab76 | 77:4a2e7a9725b2 |
---|---|
1 <tool id="rgDifferentialCount" name="Differential_Count" version="0.31"> | 1 <tool id="rgDifferentialCount" name="Differential_Count" version="0.22"> |
2 <description>models using BioConductor packages</description> | 2 <description>models using BioConductor packages</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="2.14">biocbasics</requirement> | 4 <requirement type="package" version="2.14">biocbasics</requirement> |
5 <requirement type="package" version="3.0.2">r302</requirement> | 5 <requirement type="package" version="3.0.2">r302</requirement> |
6 <requirement type="package" version="1.3.18">graphicsmagick</requirement> | 6 <requirement type="package" version="1.3.18">graphicsmagick</requirement> |
7 <requirement type="package" version="9.10">ghostscript</requirement> | 7 <requirement type="package" version="9.07">ghostscript</requirement> |
8 </requirements> | 8 </requirements> |
9 | 9 |
10 <command interpreter="python"> | 10 <command interpreter="python"> |
11 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts" | 11 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts" |
12 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" | 12 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" |
48 help="edgeR uses a negative binomial model and seems to be powerful, even with few replicates"> | 48 help="edgeR uses a negative binomial model and seems to be powerful, even with few replicates"> |
49 <option value="F">Do not run edgeR</option> | 49 <option value="F">Do not run edgeR</option> |
50 <option value="T" selected="true">Run edgeR</option> | 50 <option value="T" selected="true">Run edgeR</option> |
51 </param> | 51 </param> |
52 <when value="T"> | 52 <when value="T"> |
53 <param name="edgeR_priordf" type="integer" value="20" size="3" | 53 <param name="edgeR_priordf" type="integer" value="10" size="3" |
54 label="prior.df for tagwise dispersion - lower value = more emphasis on each tag's variance. Replaces prior.n and prior.df = prior.n * residual.df" | 54 label="prior.df for tagwise dispersion - larger value = more squeezing of tag dispersions to common dispersion. Replaces prior.n and prior.df = prior.n * residual.df" |
55 help="0 = Use edgeR default. Use a small value to 'smooth' small samples. See edgeR docs and note below"/> | 55 help="10 = edgeR default. Use a larger value to 'smooth' small samples. See edgeR docs and note below"/> |
56 <param name="edgeR_robust" type="select" value="20" size="3" | |
57 label="Use robust dispersion method" | |
58 help="Use ordinary, anscombe or deviance robust deviance estimates"> | |
59 <option value="ordinary" selected="true">Use ordinary deviance estimates</option> | |
60 <option value="deviance">Use robust deviance estimates</option> | |
61 <option value="anscombe">use Anscombe robust deviance estimates</option> | |
62 </param> | |
56 </when> | 63 </when> |
57 <when value="F"></when> | 64 <when value="F"></when> |
58 </conditional> | 65 </conditional> |
59 <conditional name="DESeq2"> | 66 <conditional name="DESeq2"> |
60 <param name="doDESeq2" type="select" | 67 <param name="doDESeq2" type="select" |
157 <param name='doedgeR' value='T' /> | 164 <param name='doedgeR' value='T' /> |
158 <param name='doVoom' value='T' /> | 165 <param name='doVoom' value='T' /> |
159 <param name='doDESeq2' value='T' /> | 166 <param name='doDESeq2' value='T' /> |
160 <param name='fdrtype' value='fdr' /> | 167 <param name='fdrtype' value='fdr' /> |
161 <param name='edgeR_priordf' value="8" /> | 168 <param name='edgeR_priordf' value="8" /> |
169 <param name='edgeR_robust' value="ordinary" /> | |
162 <param name='fdrthresh' value="0.05" /> | 170 <param name='fdrthresh' value="0.05" /> |
163 <param name='control_name' value='heart' /> | 171 <param name='control_name' value='heart' /> |
164 <param name='subjectids' value='' /> | 172 <param name='subjectids' value='' /> |
165 <param name='Control_cols' value='3,4,5,9' /> | 173 <param name='Control_cols' value='3,4,5,9' /> |
166 <param name='Treat_cols' value='2,6,7,8' /> | 174 <param name='Treat_cols' value='2,6,7,8' /> |
172 <configfiles> | 180 <configfiles> |
173 <configfile name="runme"> | 181 <configfile name="runme"> |
174 <![CDATA[ | 182 <![CDATA[ |
175 # | 183 # |
176 # edgeR.Rscript | 184 # edgeR.Rscript |
185 # updated feb 2014 adding outlier-robust deviance estimate options by ross for R 3.0.2/bioc 2.13 | |
177 # updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross | 186 # updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross |
178 # Performs DGE on a count table containing n replicates of two conditions | 187 # Performs DGE on a count table containing n replicates of two conditions |
179 # | 188 # |
180 # Parameters | 189 # Parameters |
181 # | 190 # |
509 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5, | 518 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5, |
510 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F, | 519 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F, |
511 filterquantile=0.2, subjects=c(),mydesign=NULL, | 520 filterquantile=0.2, subjects=c(),mydesign=NULL, |
512 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19', | 521 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19', |
513 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", | 522 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", |
514 doCook=F,DESeq_fitType="parameteric") | 523 doCook=F,DESeq_fitType="parameteric",robustmeth='ordinary') |
515 { | 524 { |
516 # Error handling | 525 # Error handling |
517 if (length(unique(group))!=2){ | 526 if (length(unique(group))!=2){ |
518 print("Number of conditions identified in experiment does not equal 2") | 527 print("Number of conditions identified in experiment does not equal 2") |
519 q() | 528 q() |
588 if (doedgeR) { | 597 if (doedgeR) { |
589 sink('edgeR.log') | 598 sink('edgeR.log') |
590 #### Setup DGEList object | 599 #### Setup DGEList object |
591 DGEList = DGEList(counts=workCM, group = group) | 600 DGEList = DGEList(counts=workCM, group = group) |
592 DGEList = calcNormFactors(DGEList) | 601 DGEList = calcNormFactors(DGEList) |
593 | 602 if (robust_meth == 'ordinary') { |
594 DGEList = estimateGLMCommonDisp(DGEList,mydesign) | 603 DGEList = estimateGLMCommonDisp(DGEList,mydesign) |
595 comdisp = DGEList\$common.dispersion | 604 DGEList = estimateGLMTrendedDisp(DGEList,mydesign) |
596 DGEList = estimateGLMTrendedDisp(DGEList,mydesign) | 605 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf) |
597 if (edgeR_priordf > 0) { | 606 |
598 print.noquote(paste("prior.df =",edgeR_priordf)) | 607 comdisp = DGEList\$common.dispersion |
599 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf) | 608 estpriorn = getPriorN(DGEList) |
600 } else { | 609 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F) |
601 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign) | 610 } else { |
611 DGEList = estimateGLMRobustDisp(DGEList,design=mydesign, prior.df = edgeR_priordf, maxit = 6, residual.type = robust_meth) | |
612 } | |
602 } | 613 } |
603 DGLM = glmFit(DGEList,design=mydesign) | 614 DGLM = glmFit(DGEList,design=mydesign) |
604 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed | 615 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed |
605 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors | 616 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors |
606 normData = (1e+06*DGEList\$counts/efflib) | 617 normData = (1e+06*DGEList\$counts/efflib) |
623 pdf("edgeR_GoodnessofFit.pdf") | 634 pdf("edgeR_GoodnessofFit.pdf") |
624 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion") | 635 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion") |
625 abline(0,1,lwd=3) | 636 abline(0,1,lwd=3) |
626 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon") | 637 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon") |
627 dev.off() | 638 dev.off() |
628 estpriorn = getPriorN(DGEList) | |
629 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F) | |
630 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors | 639 efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors |
631 normData = (1e+06*DGEList\$counts/efflib) | 640 normData = (1e+06*DGEList\$counts/efflib) |
632 uniqueg = unique(group) | 641 uniqueg = unique(group) |
633 #### Plot MDS | 642 #### Plot MDS |
634 sample_colors = match(group,levels(group)) | 643 sample_colors = match(group,levels(group)) |
695 #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype) | 704 #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype) |
696 #rDESeq = results(DESeq2) | 705 #rDESeq = results(DESeq2) |
697 #newCountDataSet(workCM, group) | 706 #newCountDataSet(workCM, group) |
698 deSeqDatsizefac = estimateSizeFactors(deSEQds) | 707 deSeqDatsizefac = estimateSizeFactors(deSEQds) |
699 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType) | 708 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType) |
700 resDESeq = nbinomWaldTest(deSeqDatdisp) | 709 resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype) |
701 rDESeq = as.data.frame(results(resDESeq)) | 710 rDESeq = as.data.frame(results(resDESeq)) |
702 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls) | 711 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls) |
703 srDESeq = rDESeq[order(rDESeq\$pvalue),] | 712 srDESeq = rDESeq[order(rDESeq\$pvalue),] |
704 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf') | 713 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf') |
705 cat("# DESeq top 50\n") | 714 cat("# DESeq top 50\n") |
742 if (doVoom == T) { | 751 if (doVoom == T) { |
743 sink('VOOM.log') | 752 sink('VOOM.log') |
744 if (doedgeR == F) { | 753 if (doedgeR == F) { |
745 #### Setup DGEList object | 754 #### Setup DGEList object |
746 DGEList = DGEList(counts=workCM, group = group) | 755 DGEList = DGEList(counts=workCM, group = group) |
747 DGEList = calcNormFactors(DGEList) | |
748 DGEList = estimateGLMCommonDisp(DGEList,mydesign) | 756 DGEList = estimateGLMCommonDisp(DGEList,mydesign) |
749 DGEList = estimateGLMTrendedDisp(DGEList,mydesign) | 757 DGEList = estimateGLMTrendedDisp(DGEList,mydesign) |
750 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign) | 758 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign) |
751 DGEList = estimateGLMTagwiseDisp(DGEList,mydesign) | |
752 norm.factor = DGEList\$samples\$norm.factors | |
753 } | 759 } |
760 norm.factor = calcNormFactors(DGEList) | |
754 pdf("VOOM_mean_variance_plot.pdf") | 761 pdf("VOOM_mean_variance_plot.pdf") |
755 dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = colSums(workCM) * norm.factor) | 762 dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = colSums(workCM) * norm.factor) |
756 dev.off() | 763 dev.off() |
757 # Use limma to fit data | 764 # Use limma to fit data |
758 fit = lmFit(dat.voomed, mydesign) | 765 fit = lmFit(dat.voomed, mydesign) |
808 history_gmt = "" | 815 history_gmt = "" |
809 history_gmt_name = "" | 816 history_gmt_name = "" |
810 out_edgeR = F | 817 out_edgeR = F |
811 out_DESeq2 = F | 818 out_DESeq2 = F |
812 out_VOOM = "$out_VOOM" | 819 out_VOOM = "$out_VOOM" |
820 edgeR_robust_meth = $edgeR_robust # control robust deviance options | |
813 doDESeq2 = $DESeq2.doDESeq2 # make these T or F | 821 doDESeq2 = $DESeq2.doDESeq2 # make these T or F |
814 doVoom = $doVoom | 822 doVoom = $doVoom |
815 doCamera = F | 823 doCamera = F |
816 doedgeR = $edgeR.doedgeR | 824 doedgeR = $edgeR.doedgeR |
817 edgeR_priordf = 0 | 825 edgeR_priordf = 10 |
818 | 826 |
819 | 827 |
820 #if $doVoom == "T": | 828 #if $doVoom == "T": |
821 out_VOOM = "$out_VOOM" | 829 out_VOOM = "$out_VOOM" |
822 #end if | 830 #end if |
884 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns | 892 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns |
885 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2, | 893 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2, |
886 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.', | 894 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.', |
887 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects, | 895 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects, |
888 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org, | 896 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org, |
889 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType) | 897 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType,robustmeth=edgeR_robust_meth) |
890 sessionInfo() | 898 sessionInfo() |
891 ]]> | 899 ]]> |
892 </configfile> | 900 </configfile> |
893 </configfiles> | 901 </configfiles> |
894 <help> | 902 <help> |