comparison rgedgeR/rgedgeRpaired.xml @ 16:cddf60746340 draft

Uploaded
author fubar
date Sat, 27 Jul 2013 04:19:51 -0400
parents 993d35bcf98c
children b1cf0745bde5
comparison
equal deleted inserted replaced
15:993d35bcf98c 16:cddf60746340
537 outpdfname="edgeR_heatmap.pdf" 537 outpdfname="edgeR_heatmap.pdf"
538 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=myTitle) 538 hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=myTitle)
539 outSmear = "edgeR_smearplot.pdf" 539 outSmear = "edgeR_smearplot.pdf"
540 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='') 540 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
541 smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain) 541 smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
542 qqPlot(descr=paste(myTitle,'edgeR QQ plot'),pvector=DE\$table\$PValue,outpdf='edgeR_qqplot.pdf') 542 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf')
543 norm.factor = DGEList\$samples\$norm.factors 543 norm.factor = DGEList\$samples\$norm.factors
544 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ] 544 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
545 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR)) 545 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
546 edgeRcounts = rep(0, length(allgenes)) 546 edgeRcounts = rep(0, length(allgenes))
547 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits 547 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits
562 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType) 562 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
563 resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype) 563 resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
564 rDESeq = as.data.frame(results(resDESeq)) 564 rDESeq = as.data.frame(results(resDESeq))
565 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls) 565 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
566 srDESeq = rDESeq[order(rDESeq\$pvalue),] 566 srDESeq = rDESeq[order(rDESeq\$pvalue),]
567 qqPlot(descr=paste(myTitle,'DESeq2 qqplot'),pvector=rDESeq\$pvalue,outpdf='DESeq2_qqplot.pdf') 567 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf')
568 cat("# DESeq top 50\n") 568 cat("# DESeq top 50\n")
569 print.noquote(srDESeq[1:50,]) 569 print.noquote(srDESeq[1:50,])
570 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F) 570 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
571 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ] 571 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
572 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq)) 572 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
617 dev.off() 617 dev.off()
618 # Use limma to fit data 618 # Use limma to fit data
619 fit = lmFit(dat.voomed, mydesign) 619 fit = lmFit(dat.voomed, mydesign)
620 fit = eBayes(fit) 620 fit = eBayes(fit)
621 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none") 621 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
622 qqPlot(descr=paste(myTitle,'VOOM-limma QQ plot'),pvector=rvoom\$P.Value,outpdf='VOOM_qqplot.pdf') 622 qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='VOOM_qqplot.pdf')
623 rownames(rvoom) = rownames(workCM) 623 rownames(rvoom) = rownames(workCM)
624 rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls) 624 rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
625 srvoom = rvoom[order(rvoom\$P.Value),] 625 srvoom = rvoom[order(rvoom\$P.Value),]
626 cat("# VOOM top 50\n") 626 cat("# VOOM top 50\n")
627 print(srvoom[1:50,]) 627 print(srvoom[1:50,])
802 802
803 Some helpful plots and analysis results. Note that most of these are produced using R code 803 Some helpful plots and analysis results. Note that most of these are produced using R code
804 suggested by the excellent documentation and vignettes for the Bioconductor 804 suggested by the excellent documentation and vignettes for the Bioconductor
805 packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy. 805 packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy.
806 806
807 **Note on Voom**
808
809 The voom from limma version 3.16.6 help in R includes this from the authors - but you should read the paper to interpret this method.
810
811 This function is intended to process RNA-Seq or ChIP-Seq data prior to linear modelling in limma.
812
813 voom is an acronym for mean-variance modelling at the observational level.
814 The key concern is to estimate the mean-variance relationship in the data, then use this to compute appropriate weights for each observation.
815 Count data almost show non-trivial mean-variance relationships. Raw counts show increasing variance with increasing count size, while log-counts typically show a decreasing mean-variance trend.
816 This function estimates the mean-variance trend for log-counts, then assigns a weight to each observation based on its predicted variance.
817 The weights are then used in the linear modelling process to adjust for heteroscedasticity.
818
819 In an experiment, a count value is observed for each tag in each sample. A tag-wise mean-variance trend is computed using lowess.
820 The tag-wise mean is the mean log2 count with an offset of 0.5, across samples for a given tag.
821 The tag-wise variance is the quarter-root-variance of normalized log2 counts per million values with an offset of 0.5, across samples for a given tag.
822 Tags with zero counts across all samples are not included in the lowess fit. Optional normalization is performed using normalizeBetweenArrays.
823 Using fitted values of log2 counts from a linear model fit by lmFit, variances from the mean-variance trend were interpolated for each observation.
824 This was carried out by approxfun. Inverse variance weights can be used to correct for mean-variance trend in the count data.
825
826
827 Author(s)
828
829 Charity Law and Gordon Smyth
830
831 References
832
833 Law, CW (2013). Precision weights for gene expression analysis. PhD Thesis. University of Melbourne, Australia.
834
835 Law, CW, Chen, Y, Shi, W, Smyth, GK (2013). Voom! Precision weights unlock linear model analysis tools for RNA-seq read counts.
836 Technical Report 1 May 2013, Bioinformatics Division, Walter and Eliza Hall Institute of Medical Reseach, Melbourne, Australia.
837 http://www.statsci.org/smyth/pubs/VoomPreprint.pdf
838
839 See Also
840
841 A voom case study is given in the edgeR User's Guide.
842
843 vooma is a similar function but for microarrays instead of RNA-seq.
844
845
807 ***old rant on changes to Bioconductor package variable names between versions*** 846 ***old rant on changes to Bioconductor package variable names between versions***
808 847
809 The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue) 848 The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue)
810 breaking this and all other code that assumed the old name for this variable, 849 breaking this and all other code that assumed the old name for this variable,
811 between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing). 850 between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing).