Mercurial > repos > bgruening > deseq2
view deseq2.R @ 26:bac4ebab1122 draft
Uploaded
author | bgruening |
---|---|
date | Wed, 26 Nov 2014 12:06:14 -0500 |
parents | 2f52c1a27205 |
children |
line wrap: on
line source
## Setup R error handling to go to stderr options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) # we need that to not crash galaxy with an UTF8 error on German LC settings. Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") library('getopt') library('rjson') library('DESeq2') library("RColorBrewer") library("gplots") options(stringAsfactors = FALSE, useFancyQuotes = FALSE) args <- commandArgs(trailingOnly = TRUE) #get options, using the spec as defined by the enclosed list. #we read the options from the default: commandArgs(TRUE). spec = matrix(c( 'verbose', 'v', 2, "integer", 'help' , 'h', 0, "logical", 'outfile' , 'o', 1, "character", 'outfilefiltered' , 'f', 1, "character", 'plots' , 'p', 2, "character", 'factors', 'm', 2, "character", 'filtermode', 'l', 2, "character", 'threshold', 'c', 2, "double", 'organism', 'g', 2, "character" ), byrow=TRUE, ncol=4); opt = getopt(spec); # if help was asked for print a friendly message # and exit with a non-zero error code if ( !is.null(opt$help) ) { cat(getopt(spec, usage=TRUE)); q(status=1); } if( is.null(opt$filtermode)) opt$filtermode = "absolute" trim <- function (x) gsub("^\\s+|\\s+$", "", x) opt$samples <- trim(opt$samples) opt$factors <- trim(opt$factors) generatePlots <-function(dds, title_prefix){ plotDispEsts(dds, main= paste(title_prefix, "Dispersion estimate plot")) plotMA(dds, main= paste(title_prefix, "MA-plot")) select <- order(rowMeans(counts(dds,normalized=TRUE)),decreasing=TRUE)[1:30] hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100) rld <- rlogTransformation(dds, blind=TRUE) vsd <- varianceStabilizingTransformation(dds, blind=TRUE) distsRL <- dist(t(assay(rld))) mat <- as.matrix(distsRL) heatmap.2(mat, trace="none", col = rev(hmcol), main = paste(title_prefix, "Sample-to-sample distances"), margin=c(13,13)) } parser <- newJSONParser() parser$addData( opt$factors ) factorsList <- parser$getObject() sampleTable<-data.frame() factorNames<-c() primaryfactor = TRUE for(factor in factorsList){ factorName<-factor[[1]] factorNames<-append(factorNames, factorName) factorValuesMapList<-factor[[2]] c = length(factorValuesMapList) for (factorValuesMap in factorValuesMapList){ for(files in factorValuesMap){ fvc = 0 for(file in files){ fvc = fvc+1 if(primaryfactor) { sampleTable[basename(file),"sampleName"]<-paste(fvc,names(factorValuesMap),sep="_") } sampleTable[basename(file),"fileName"]<-file sampleTable[basename(file),factorName]<-paste(c,names(factorValuesMap),sep="_") } } c = c-1 } primaryfactor = FALSE } factorNames<-rev(factorNames) designFormula <- as.formula(paste("", paste(factorNames, collapse=" + "), sep=" ~ ")) ddsHTSeq = DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, directory = "", design = designFormula) ddsHTSeq <- DESeq(ddsHTSeq) #ddsHTSeq <- nbinomWaldTest(ddsHTSeq, cooksCutoff=FALSE) deseqRes <- results(ddsHTSeq) mcols(deseqRes)$description resSorted <- deseqRes[order(deseqRes$padj),] head(resSorted) write.table(as.data.frame(resSorted), file = opt$outfile, sep="\t", quote = FALSE, append=TRUE, col.names = FALSE) if(opt$filtermode == "absolute"){ use = (deseqRes$baseMean > opt$threshold & !is.na(deseqRes$pvalue)) } else if(opt$filtermode == "quantile"){ opt$threshold = opt$threshold/100 print(paste("Threshold:", opt$threshold)) print(quantile(unique(deseqRes$baseMean), probs = opt$threshold)) use = (deseqRes$baseMean > quantile(unique(deseqRes$baseMean), probs = opt$threshold) & (!is.na(deseqRes$pvalue))) } else{ print(paste("Unknown filtermode:", opt$filtermode)) q("no", 1, FALSE) } ddsHTSeqFilt <- ddsHTSeq[use, ] ddsHTSeqFilt <- DESeq(ddsHTSeqFilt) #ddsHTSeqFilt <- nbinomWaldTest(ddsHTSeqFilt, cooksCutoff=FALSE) deseqResFilt <- results(ddsHTSeqFilt) mcols(deseqResFilt)$description resFiltSorted <- deseqResFilt[order(deseqResFilt$padj),] head(resFiltSorted) write.table(as.data.frame(resFiltSorted), file = opt$outfilefiltered, sep="\t", quote = FALSE, append=TRUE, col.names = FALSE) if(!is.null(opt$plots)){ pdf(opt$plots) generatePlots(ddsHTSeq, "Complete: ") generatePlots(ddsHTSeqFilt, "Filtered: ") ## p-value distribution h1 <- hist(deseqRes$pvalue[!use], breaks=50, plot=FALSE) h2 <- hist(deseqRes$pvalue[use], breaks=50, plot=FALSE) colori <- c(`filtered out`="khaki", `complete`="powderblue") barplot(height = rbind(h1$counts, h2$counts), beside = FALSE, col = colori, space = 0, main = "Distribution of p-values", ylab="frequency") text(x = c(0, length(h1$counts)), y = 0, label = paste(c(0,1)), adj = c(0.5,1.7), xpd=NA) legend("topright", fill=rev(colori), legend=rev(names(colori))) dev.off() } sessionInfo()