testing_deseq2: deseq2.R annotate

annotate deseq2.R @ 2:11342b3cd769 draft default tip

Uploaded

author	moheydarian
date	Thu, 30 Mar 2017 12:04:28 -0400
parents	f961752f5419
children

rev	line source
1 f961752f5419 Uploaded moheydarian parents: diff changeset	1 #!/usr/bin/env Rscript
f961752f5419 Uploaded moheydarian parents: diff changeset	2
f961752f5419 Uploaded moheydarian parents: diff changeset	3 # A command-line interface to DESeq2 for use with Galaxy
f961752f5419 Uploaded moheydarian parents: diff changeset	4 # written by Bjoern Gruening and modified by Michael Love 2016.03.30
f961752f5419 Uploaded moheydarian parents: diff changeset	5 #
f961752f5419 Uploaded moheydarian parents: diff changeset	6 # one of these arguments is required:
f961752f5419 Uploaded moheydarian parents: diff changeset	7 #
f961752f5419 Uploaded moheydarian parents: diff changeset	8 # 'factors' a JSON list object from Galaxy
f961752f5419 Uploaded moheydarian parents: diff changeset	9 #
f961752f5419 Uploaded moheydarian parents: diff changeset	10 # 'sample_table' is a sample table as described in ?DESeqDataSetFromHTSeq
f961752f5419 Uploaded moheydarian parents: diff changeset	11 # with columns: sample name, filename, then factors (variables)
f961752f5419 Uploaded moheydarian parents: diff changeset	12 #
f961752f5419 Uploaded moheydarian parents: diff changeset	13 # the output file has columns:
f961752f5419 Uploaded moheydarian parents: diff changeset	14 #
f961752f5419 Uploaded moheydarian parents: diff changeset	15 # baseMean (mean normalized count)
f961752f5419 Uploaded moheydarian parents: diff changeset	16 # log2FoldChange (by default a moderated LFC estimate)
f961752f5419 Uploaded moheydarian parents: diff changeset	17 # lfcSE (the standard error)
f961752f5419 Uploaded moheydarian parents: diff changeset	18 # stat (the Wald statistic)
f961752f5419 Uploaded moheydarian parents: diff changeset	19 # pvalue (p-value from comparison of Wald statistic to a standard Normal)
f961752f5419 Uploaded moheydarian parents: diff changeset	20 # padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter)
f961752f5419 Uploaded moheydarian parents: diff changeset	21 #
f961752f5419 Uploaded moheydarian parents: diff changeset	22 # the first variable in 'factors' and first column in 'sample_table' will be the primary factor.
f961752f5419 Uploaded moheydarian parents: diff changeset	23 # the levels of the primary factor are used in the order of appearance in factors or in sample_table.
f961752f5419 Uploaded moheydarian parents: diff changeset	24 #
f961752f5419 Uploaded moheydarian parents: diff changeset	25 # by default, levels in the order A,B,C produces a single comparison of B vs A, to a single file 'outfile'
f961752f5419 Uploaded moheydarian parents: diff changeset	26 #
f961752f5419 Uploaded moheydarian parents: diff changeset	27 # for the 'many_contrasts' flag, levels in the order A,B,C produces comparisons C vs A, B vs A, C vs B,
f961752f5419 Uploaded moheydarian parents: diff changeset	28 # to a number of files using the 'outfile' prefix: 'outfile.condition_C_vs_A' etc.
f961752f5419 Uploaded moheydarian parents: diff changeset	29 # all plots will still be sent to a single PDF, named by the arg 'plots', with extra pages.
f961752f5419 Uploaded moheydarian parents: diff changeset	30 #
f961752f5419 Uploaded moheydarian parents: diff changeset	31 # fit_type is an integer valued argument, with the options from ?estimateDisperions
f961752f5419 Uploaded moheydarian parents: diff changeset	32 # 1 "parametric"
f961752f5419 Uploaded moheydarian parents: diff changeset	33 # 2 "local"
f961752f5419 Uploaded moheydarian parents: diff changeset	34 # 3 "mean"
f961752f5419 Uploaded moheydarian parents: diff changeset	35
f961752f5419 Uploaded moheydarian parents: diff changeset	36 # setup R error handling to go to stderr
f961752f5419 Uploaded moheydarian parents: diff changeset	37 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
f961752f5419 Uploaded moheydarian parents: diff changeset	38
f961752f5419 Uploaded moheydarian parents: diff changeset	39 # we need that to not crash galaxy with an UTF8 error on German LC settings.
f961752f5419 Uploaded moheydarian parents: diff changeset	40 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
f961752f5419 Uploaded moheydarian parents: diff changeset	41
f961752f5419 Uploaded moheydarian parents: diff changeset	42 library("getopt")
f961752f5419 Uploaded moheydarian parents: diff changeset	43 library("tools")
f961752f5419 Uploaded moheydarian parents: diff changeset	44 options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
f961752f5419 Uploaded moheydarian parents: diff changeset	45 args <- commandArgs(trailingOnly = TRUE)
f961752f5419 Uploaded moheydarian parents: diff changeset	46
f961752f5419 Uploaded moheydarian parents: diff changeset	47 # get options, using the spec as defined by the enclosed list.
f961752f5419 Uploaded moheydarian parents: diff changeset	48 # we read the options from the default: commandArgs(TRUE).
f961752f5419 Uploaded moheydarian parents: diff changeset	49 spec <- matrix(c(
f961752f5419 Uploaded moheydarian parents: diff changeset	50 "quiet", "q", 0, "logical",
f961752f5419 Uploaded moheydarian parents: diff changeset	51 "help", "h", 0, "logical",
f961752f5419 Uploaded moheydarian parents: diff changeset	52 "outfile", "o", 1, "character",
f961752f5419 Uploaded moheydarian parents: diff changeset	53 "countsfile", "n", 1, "character",
f961752f5419 Uploaded moheydarian parents: diff changeset	54 "factors", "f", 1, "character",
f961752f5419 Uploaded moheydarian parents: diff changeset	55 "plots" , "p", 1, "character",
f961752f5419 Uploaded moheydarian parents: diff changeset	56 "sample_table", "s", 1, "character",
f961752f5419 Uploaded moheydarian parents: diff changeset	57 "tximport", "i", 0, "logical",
f961752f5419 Uploaded moheydarian parents: diff changeset	58 "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF)
f961752f5419 Uploaded moheydarian parents: diff changeset	59 "fit_type", "t", 1, "integer",
f961752f5419 Uploaded moheydarian parents: diff changeset	60 "many_contrasts", "m", 0, "logical",
f961752f5419 Uploaded moheydarian parents: diff changeset	61 "outlier_replace_off" , "a", 0, "logical",
f961752f5419 Uploaded moheydarian parents: diff changeset	62 "outlier_filter_off" , "b", 0, "logical",
f961752f5419 Uploaded moheydarian parents: diff changeset	63 "auto_mean_filter_off", "c", 0, "logical",
f961752f5419 Uploaded moheydarian parents: diff changeset	64 "beta_prior_off", "d", 0, "logical"),
f961752f5419 Uploaded moheydarian parents: diff changeset	65 byrow=TRUE, ncol=4)
f961752f5419 Uploaded moheydarian parents: diff changeset	66 opt <- getopt(spec)
f961752f5419 Uploaded moheydarian parents: diff changeset	67
f961752f5419 Uploaded moheydarian parents: diff changeset	68 # if help was asked for print a friendly message
f961752f5419 Uploaded moheydarian parents: diff changeset	69 # and exit with a non-zero error code
f961752f5419 Uploaded moheydarian parents: diff changeset	70 if (!is.null(opt$help)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	71 cat(getopt(spec, usage=TRUE))
f961752f5419 Uploaded moheydarian parents: diff changeset	72 q(status=1)
f961752f5419 Uploaded moheydarian parents: diff changeset	73 }
f961752f5419 Uploaded moheydarian parents: diff changeset	74
f961752f5419 Uploaded moheydarian parents: diff changeset	75 # enforce the following required arguments
f961752f5419 Uploaded moheydarian parents: diff changeset	76 if (is.null(opt$outfile)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	77 cat("'outfile' is required\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	78 q(status=1)
f961752f5419 Uploaded moheydarian parents: diff changeset	79 }
f961752f5419 Uploaded moheydarian parents: diff changeset	80 if (is.null(opt$sample_table) & is.null(opt$factors)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	81 cat("'factors' or 'sample_table' is required\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	82 q(status=1)
f961752f5419 Uploaded moheydarian parents: diff changeset	83 }
f961752f5419 Uploaded moheydarian parents: diff changeset	84
f961752f5419 Uploaded moheydarian parents: diff changeset	85 verbose <- if (is.null(opt$quiet)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	86 TRUE
f961752f5419 Uploaded moheydarian parents: diff changeset	87 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	88 FALSE
f961752f5419 Uploaded moheydarian parents: diff changeset	89 }
f961752f5419 Uploaded moheydarian parents: diff changeset	90
f961752f5419 Uploaded moheydarian parents: diff changeset	91 if (!is.null(opt$tximport)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	92 if (is.null(opt$tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport")
f961752f5419 Uploaded moheydarian parents: diff changeset	93 if (tolower(file_ext(opt$tx2gene)) == "gtf") {
f961752f5419 Uploaded moheydarian parents: diff changeset	94 gtfFile <- opt$tx2gene
f961752f5419 Uploaded moheydarian parents: diff changeset	95 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	96 gtfFile <- NULL
f961752f5419 Uploaded moheydarian parents: diff changeset	97 tx2gene <- read.table(opt$tx2gene, header=FALSE)
f961752f5419 Uploaded moheydarian parents: diff changeset	98 }
f961752f5419 Uploaded moheydarian parents: diff changeset	99 useTXI <- TRUE
f961752f5419 Uploaded moheydarian parents: diff changeset	100 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	101 useTXI <- FALSE
f961752f5419 Uploaded moheydarian parents: diff changeset	102 }
f961752f5419 Uploaded moheydarian parents: diff changeset	103
f961752f5419 Uploaded moheydarian parents: diff changeset	104 suppressPackageStartupMessages({
f961752f5419 Uploaded moheydarian parents: diff changeset	105 library("DESeq2")
f961752f5419 Uploaded moheydarian parents: diff changeset	106 library("RColorBrewer")
f961752f5419 Uploaded moheydarian parents: diff changeset	107 library("gplots")
f961752f5419 Uploaded moheydarian parents: diff changeset	108 })
f961752f5419 Uploaded moheydarian parents: diff changeset	109
f961752f5419 Uploaded moheydarian parents: diff changeset	110 # build or read sample table
f961752f5419 Uploaded moheydarian parents: diff changeset	111
f961752f5419 Uploaded moheydarian parents: diff changeset	112 trim <- function (x) gsub("^\\s+\|\\s+$", "", x)
f961752f5419 Uploaded moheydarian parents: diff changeset	113
f961752f5419 Uploaded moheydarian parents: diff changeset	114 # switch on if 'factors' was provided:
f961752f5419 Uploaded moheydarian parents: diff changeset	115 if (!is.null(opt$factors)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	116 library("rjson")
f961752f5419 Uploaded moheydarian parents: diff changeset	117 parser <- newJSONParser()
f961752f5419 Uploaded moheydarian parents: diff changeset	118 parser$addData(opt$factors)
f961752f5419 Uploaded moheydarian parents: diff changeset	119 factorList <- parser$getObject()
f961752f5419 Uploaded moheydarian parents: diff changeset	120 factors <- sapply(factorList, function(x) x[[1]])
f961752f5419 Uploaded moheydarian parents: diff changeset	121 primaryFactor <- factors[1]
f961752f5419 Uploaded moheydarian parents: diff changeset	122 filenamesIn <- unname(unlist(factorList[[1]][[2]]))
f961752f5419 Uploaded moheydarian parents: diff changeset	123 sampleTable <- data.frame(sample=basename(filenamesIn),
f961752f5419 Uploaded moheydarian parents: diff changeset	124 filename=filenamesIn,
f961752f5419 Uploaded moheydarian parents: diff changeset	125 row.names=filenamesIn,
f961752f5419 Uploaded moheydarian parents: diff changeset	126 stringsAsFactors=FALSE)
f961752f5419 Uploaded moheydarian parents: diff changeset	127 for (factor in factorList) {
f961752f5419 Uploaded moheydarian parents: diff changeset	128 factorName <- trim(factor[[1]])
f961752f5419 Uploaded moheydarian parents: diff changeset	129 sampleTable[[factorName]] <- character(nrow(sampleTable))
f961752f5419 Uploaded moheydarian parents: diff changeset	130 lvls <- sapply(factor[[2]], function(x) names(x))
f961752f5419 Uploaded moheydarian parents: diff changeset	131 for (i in seq_along(factor[[2]])) {
f961752f5419 Uploaded moheydarian parents: diff changeset	132 files <- factor[[2]][[i]][[1]]
f961752f5419 Uploaded moheydarian parents: diff changeset	133 sampleTable[files,factorName] <- trim(lvls[i])
f961752f5419 Uploaded moheydarian parents: diff changeset	134 }
f961752f5419 Uploaded moheydarian parents: diff changeset	135 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
f961752f5419 Uploaded moheydarian parents: diff changeset	136 }
f961752f5419 Uploaded moheydarian parents: diff changeset	137 rownames(sampleTable) <- sampleTable$sample
f961752f5419 Uploaded moheydarian parents: diff changeset	138 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	139 # read the sample_table argument
f961752f5419 Uploaded moheydarian parents: diff changeset	140 # this table is described in ?DESeqDataSet
f961752f5419 Uploaded moheydarian parents: diff changeset	141 # one column for the sample name, one for the filename, and
f961752f5419 Uploaded moheydarian parents: diff changeset	142 # the remaining columns for factors in the analysis
f961752f5419 Uploaded moheydarian parents: diff changeset	143 sampleTable <- read.delim(opt$sample_table, stringsAsFactors=FALSE)
f961752f5419 Uploaded moheydarian parents: diff changeset	144 factors <- colnames(sampleTable)[-c(1:2)]
f961752f5419 Uploaded moheydarian parents: diff changeset	145 for (factor in factors) {
f961752f5419 Uploaded moheydarian parents: diff changeset	146 lvls <- unique(as.character(sampleTable[[factor]]))
f961752f5419 Uploaded moheydarian parents: diff changeset	147 sampleTable[[factor]] <- factor(sampleTable[[factor]], levels=lvls)
f961752f5419 Uploaded moheydarian parents: diff changeset	148 }
f961752f5419 Uploaded moheydarian parents: diff changeset	149 }
f961752f5419 Uploaded moheydarian parents: diff changeset	150
f961752f5419 Uploaded moheydarian parents: diff changeset	151 primaryFactor <- factors[1]
f961752f5419 Uploaded moheydarian parents: diff changeset	152 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + ")))
f961752f5419 Uploaded moheydarian parents: diff changeset	153
f961752f5419 Uploaded moheydarian parents: diff changeset	154 if (verbose) {
f961752f5419 Uploaded moheydarian parents: diff changeset	155 cat("DESeq2 run information\n\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	156 cat("sample table:\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	157 print(sampleTable[,-c(1:2),drop=FALSE])
f961752f5419 Uploaded moheydarian parents: diff changeset	158 cat("\ndesign formula:\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	159 print(designFormula)
f961752f5419 Uploaded moheydarian parents: diff changeset	160 cat("\n\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	161 }
f961752f5419 Uploaded moheydarian parents: diff changeset	162
f961752f5419 Uploaded moheydarian parents: diff changeset	163 # these are plots which are made once for each analysis
f961752f5419 Uploaded moheydarian parents: diff changeset	164 generateGenericPlots <- function(dds, factors) {
f961752f5419 Uploaded moheydarian parents: diff changeset	165 rld <- rlog(dds)
f961752f5419 Uploaded moheydarian parents: diff changeset	166 d=plotPCA(rld, intgroup=rev(factors), returnData=TRUE)
f961752f5419 Uploaded moheydarian parents: diff changeset	167 labs <- paste0(seq_len(ncol(dds)), ": ", do.call(paste, as.list(colData(dds)[factors])))
f961752f5419 Uploaded moheydarian parents: diff changeset	168 library(ggplot2)
f961752f5419 Uploaded moheydarian parents: diff changeset	169 print(ggplot(d, aes(x=PC1,y=PC2, col=group,label=factor(labs)), environment = environment()) + geom_point() + geom_text(size=3))
f961752f5419 Uploaded moheydarian parents: diff changeset	170 dat <- assay(rld)
f961752f5419 Uploaded moheydarian parents: diff changeset	171 colnames(dat) <- labs
f961752f5419 Uploaded moheydarian parents: diff changeset	172 distsRL <- dist(t(dat))
f961752f5419 Uploaded moheydarian parents: diff changeset	173 mat <- as.matrix(distsRL)
f961752f5419 Uploaded moheydarian parents: diff changeset	174 hc <- hclust(distsRL)
f961752f5419 Uploaded moheydarian parents: diff changeset	175 hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
f961752f5419 Uploaded moheydarian parents: diff changeset	176 heatmap.2(mat, Rowv=as.dendrogram(hc), symm=TRUE, trace="none", col = rev(hmcol),
f961752f5419 Uploaded moheydarian parents: diff changeset	177 main="Sample-to-sample distances", margin=c(13,13))
f961752f5419 Uploaded moheydarian parents: diff changeset	178 plotDispEsts(dds, main="Dispersion estimates")
f961752f5419 Uploaded moheydarian parents: diff changeset	179 }
f961752f5419 Uploaded moheydarian parents: diff changeset	180
f961752f5419 Uploaded moheydarian parents: diff changeset	181 # these are plots which can be made for each comparison, e.g.
f961752f5419 Uploaded moheydarian parents: diff changeset	182 # once for C vs A and once for B vs A
f961752f5419 Uploaded moheydarian parents: diff changeset	183 generateSpecificPlots <- function(res, threshold, title_suffix) {
f961752f5419 Uploaded moheydarian parents: diff changeset	184 use <- res$baseMean > threshold
f961752f5419 Uploaded moheydarian parents: diff changeset	185 if (sum(!use) == 0) {
f961752f5419 Uploaded moheydarian parents: diff changeset	186 h <- hist(res$pvalue, breaks=0:50/50, plot=FALSE)
f961752f5419 Uploaded moheydarian parents: diff changeset	187 barplot(height = h$counts,
f961752f5419 Uploaded moheydarian parents: diff changeset	188 col = "powderblue", space = 0, xlab="p-values", ylab="frequency",
f961752f5419 Uploaded moheydarian parents: diff changeset	189 main=paste("Histogram of p-values for",title_suffix))
f961752f5419 Uploaded moheydarian parents: diff changeset	190 text(x = c(0, length(h$counts)), y = 0, label=paste(c(0,1)), adj=c(0.5,1.7), xpd=NA)
f961752f5419 Uploaded moheydarian parents: diff changeset	191 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	192 h1 <- hist(res$pvalue[!use], breaks=0:50/50, plot=FALSE)
f961752f5419 Uploaded moheydarian parents: diff changeset	193 h2 <- hist(res$pvalue[use], breaks=0:50/50, plot=FALSE)
f961752f5419 Uploaded moheydarian parents: diff changeset	194 colori <- c("filtered (low count)"="khaki", "not filtered"="powderblue")
f961752f5419 Uploaded moheydarian parents: diff changeset	195 barplot(height = rbind(h1$counts, h2$counts), beside = FALSE,
f961752f5419 Uploaded moheydarian parents: diff changeset	196 col = colori, space = 0, xlab="p-values", ylab="frequency",
f961752f5419 Uploaded moheydarian parents: diff changeset	197 main=paste("Histogram of p-values for",title_suffix))
f961752f5419 Uploaded moheydarian parents: diff changeset	198 text(x = c(0, length(h1$counts)), y = 0, label=paste(c(0,1)), adj=c(0.5,1.7), xpd=NA)
f961752f5419 Uploaded moheydarian parents: diff changeset	199 legend("topright", fill=rev(colori), legend=rev(names(colori)), bg="white")
f961752f5419 Uploaded moheydarian parents: diff changeset	200 }
f961752f5419 Uploaded moheydarian parents: diff changeset	201 plotMA(res, main= paste("MA-plot for",title_suffix), ylim=range(res$log2FoldChange, na.rm=TRUE))
f961752f5419 Uploaded moheydarian parents: diff changeset	202 }
f961752f5419 Uploaded moheydarian parents: diff changeset	203
f961752f5419 Uploaded moheydarian parents: diff changeset	204 if (verbose) {
f961752f5419 Uploaded moheydarian parents: diff changeset	205 cat(paste("primary factor:",primaryFactor,"\n"))
f961752f5419 Uploaded moheydarian parents: diff changeset	206 if (length(factors) > 1) {
f961752f5419 Uploaded moheydarian parents: diff changeset	207 cat(paste("other factors in design:",paste(factors[-length(factors)],collapse=","),"\n"))
f961752f5419 Uploaded moheydarian parents: diff changeset	208 }
f961752f5419 Uploaded moheydarian parents: diff changeset	209 cat("\n---------------------\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	210 }
f961752f5419 Uploaded moheydarian parents: diff changeset	211
f961752f5419 Uploaded moheydarian parents: diff changeset	212 # if JSON input from Galaxy, path is absolute
f961752f5419 Uploaded moheydarian parents: diff changeset	213 # otherwise, from sample_table, assume it is relative
f961752f5419 Uploaded moheydarian parents: diff changeset	214 dir <- if (is.null(opt$factors)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	215 "."
f961752f5419 Uploaded moheydarian parents: diff changeset	216 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	217 ""
f961752f5419 Uploaded moheydarian parents: diff changeset	218 }
f961752f5419 Uploaded moheydarian parents: diff changeset	219
f961752f5419 Uploaded moheydarian parents: diff changeset	220 if (!useTXI) {
f961752f5419 Uploaded moheydarian parents: diff changeset	221 # construct the object from HTSeq files
f961752f5419 Uploaded moheydarian parents: diff changeset	222 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
f961752f5419 Uploaded moheydarian parents: diff changeset	223 directory = dir,
f961752f5419 Uploaded moheydarian parents: diff changeset	224 design = designFormula)
f961752f5419 Uploaded moheydarian parents: diff changeset	225 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	226 # construct the object using tximport
f961752f5419 Uploaded moheydarian parents: diff changeset	227 # first need to make the tx2gene table
f961752f5419 Uploaded moheydarian parents: diff changeset	228 # this takes ~2-3 minutes using Bioconductor functions
f961752f5419 Uploaded moheydarian parents: diff changeset	229 if (!is.null(gtfFile)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	230 suppressPackageStartupMessages({
f961752f5419 Uploaded moheydarian parents: diff changeset	231 library("GenomicFeatures")
f961752f5419 Uploaded moheydarian parents: diff changeset	232 })
f961752f5419 Uploaded moheydarian parents: diff changeset	233 txdb <- makeTxDbFromGFF(gtfFile, format="gtf")
f961752f5419 Uploaded moheydarian parents: diff changeset	234 k <- keys(txdb, keytype = "GENEID")
f961752f5419 Uploaded moheydarian parents: diff changeset	235 df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME")
f961752f5419 Uploaded moheydarian parents: diff changeset	236 tx2gene <- df[, 2:1] # tx ID, then gene ID
f961752f5419 Uploaded moheydarian parents: diff changeset	237 }
f961752f5419 Uploaded moheydarian parents: diff changeset	238 library("tximport")
f961752f5419 Uploaded moheydarian parents: diff changeset	239 txiFiles <- as.character(sampleTable[,2])
f961752f5419 Uploaded moheydarian parents: diff changeset	240 names(txiFiles) <- as.character(sampleTable[,1])
f961752f5419 Uploaded moheydarian parents: diff changeset	241 txi <- tximport(txiFiles, type="sailfish", tx2gene=tx2gene)
f961752f5419 Uploaded moheydarian parents: diff changeset	242 dds <- DESeqDataSetFromTximport(txi,
f961752f5419 Uploaded moheydarian parents: diff changeset	243 sampleTable[,3:ncol(sampleTable),drop=FALSE],
f961752f5419 Uploaded moheydarian parents: diff changeset	244 designFormula)
f961752f5419 Uploaded moheydarian parents: diff changeset	245 }
f961752f5419 Uploaded moheydarian parents: diff changeset	246
f961752f5419 Uploaded moheydarian parents: diff changeset	247 if (verbose) cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n"))
f961752f5419 Uploaded moheydarian parents: diff changeset	248 # optional outlier behavior
f961752f5419 Uploaded moheydarian parents: diff changeset	249 if (is.null(opt$outlier_replace_off)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	250 minRep <- 7
f961752f5419 Uploaded moheydarian parents: diff changeset	251 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	252 minRep <- Inf
f961752f5419 Uploaded moheydarian parents: diff changeset	253 if (verbose) cat("outlier replacement off\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	254 }
f961752f5419 Uploaded moheydarian parents: diff changeset	255 if (is.null(opt$outlier_filter_off)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	256 cooksCutoff <- TRUE
f961752f5419 Uploaded moheydarian parents: diff changeset	257 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	258 cooksCutoff <- FALSE
f961752f5419 Uploaded moheydarian parents: diff changeset	259 if (verbose) cat("outlier filtering off\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	260 }
f961752f5419 Uploaded moheydarian parents: diff changeset	261
f961752f5419 Uploaded moheydarian parents: diff changeset	262 # optional automatic mean filtering
f961752f5419 Uploaded moheydarian parents: diff changeset	263 if (is.null(opt$auto_mean_filter_off)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	264 independentFiltering <- TRUE
f961752f5419 Uploaded moheydarian parents: diff changeset	265 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	266 independentFiltering <- FALSE
f961752f5419 Uploaded moheydarian parents: diff changeset	267 if (verbose) cat("automatic filtering on the mean off\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	268 }
f961752f5419 Uploaded moheydarian parents: diff changeset	269
f961752f5419 Uploaded moheydarian parents: diff changeset	270 # shrinkage of LFCs
f961752f5419 Uploaded moheydarian parents: diff changeset	271 if (is.null(opt$beta_prior_off)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	272 betaPrior <- TRUE
f961752f5419 Uploaded moheydarian parents: diff changeset	273 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	274 betaPrior <- FALSE
f961752f5419 Uploaded moheydarian parents: diff changeset	275 if (verbose) cat("beta prior off\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	276 }
f961752f5419 Uploaded moheydarian parents: diff changeset	277
f961752f5419 Uploaded moheydarian parents: diff changeset	278 # dispersion fit type
f961752f5419 Uploaded moheydarian parents: diff changeset	279 if (is.null(opt$fit_type)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	280 fitType <- "parametric"
f961752f5419 Uploaded moheydarian parents: diff changeset	281 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	282 fitType <- c("parametric","local","mean")[opt$fit_type]
f961752f5419 Uploaded moheydarian parents: diff changeset	283 }
f961752f5419 Uploaded moheydarian parents: diff changeset	284
f961752f5419 Uploaded moheydarian parents: diff changeset	285 if (verbose) cat(paste("using disperion fit type:",fitType,"\n"))
f961752f5419 Uploaded moheydarian parents: diff changeset	286
f961752f5419 Uploaded moheydarian parents: diff changeset	287 # run the analysis
f961752f5419 Uploaded moheydarian parents: diff changeset	288 dds <- DESeq(dds, fitType=fitType, betaPrior=betaPrior, minReplicatesForReplace=minRep)
f961752f5419 Uploaded moheydarian parents: diff changeset	289
f961752f5419 Uploaded moheydarian parents: diff changeset	290 # create the generic plots and leave the device open
f961752f5419 Uploaded moheydarian parents: diff changeset	291 if (!is.null(opt$plots)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	292 if (verbose) cat("creating plots\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	293 pdf(opt$plots)
f961752f5419 Uploaded moheydarian parents: diff changeset	294 generateGenericPlots(dds, factors)
f961752f5419 Uploaded moheydarian parents: diff changeset	295 }
f961752f5419 Uploaded moheydarian parents: diff changeset	296
f961752f5419 Uploaded moheydarian parents: diff changeset	297 n <- nlevels(colData(dds)[[primaryFactor]])
f961752f5419 Uploaded moheydarian parents: diff changeset	298 allLevels <- levels(colData(dds)[[primaryFactor]])
f961752f5419 Uploaded moheydarian parents: diff changeset	299
f961752f5419 Uploaded moheydarian parents: diff changeset	300 if (!is.null(opt$countsfile)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	301 labs <- paste0(seq_len(ncol(dds)), ": ", do.call(paste, as.list(colData(dds)[factors])))
f961752f5419 Uploaded moheydarian parents: diff changeset	302 normalizedCounts<-counts(dds,normalized=TRUE)
f961752f5419 Uploaded moheydarian parents: diff changeset	303 colnames(normalizedCounts)<-labs
f961752f5419 Uploaded moheydarian parents: diff changeset	304 write.table(normalizedCounts, file=opt$countsfile, sep="\t", col.names=NA, quote=FALSE)
f961752f5419 Uploaded moheydarian parents: diff changeset	305 }
f961752f5419 Uploaded moheydarian parents: diff changeset	306
f961752f5419 Uploaded moheydarian parents: diff changeset	307 if (is.null(opt$many_contrasts)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	308 # only contrast the first and second level of the primary factor
f961752f5419 Uploaded moheydarian parents: diff changeset	309 ref <- allLevels[1]
f961752f5419 Uploaded moheydarian parents: diff changeset	310 lvl <- allLevels[2]
f961752f5419 Uploaded moheydarian parents: diff changeset	311 res <- results(dds, contrast=c(primaryFactor, lvl, ref),
f961752f5419 Uploaded moheydarian parents: diff changeset	312 cooksCutoff=cooksCutoff,
f961752f5419 Uploaded moheydarian parents: diff changeset	313 independentFiltering=independentFiltering)
f961752f5419 Uploaded moheydarian parents: diff changeset	314 if (verbose) {
f961752f5419 Uploaded moheydarian parents: diff changeset	315 cat("summary of results\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	316 cat(paste0(primaryFactor,": ",lvl," vs ",ref,"\n"))
f961752f5419 Uploaded moheydarian parents: diff changeset	317 print(summary(res))
f961752f5419 Uploaded moheydarian parents: diff changeset	318 }
f961752f5419 Uploaded moheydarian parents: diff changeset	319 resSorted <- res[order(res$padj),]
f961752f5419 Uploaded moheydarian parents: diff changeset	320 outDF <- as.data.frame(resSorted)
f961752f5419 Uploaded moheydarian parents: diff changeset	321 outDF$geneID <- rownames(outDF)
f961752f5419 Uploaded moheydarian parents: diff changeset	322 outDF <- outDF[,c("geneID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
f961752f5419 Uploaded moheydarian parents: diff changeset	323 filename <- opt$outfile
f961752f5419 Uploaded moheydarian parents: diff changeset	324 write.table(outDF, file=filename, sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE)
f961752f5419 Uploaded moheydarian parents: diff changeset	325 if (independentFiltering) {
f961752f5419 Uploaded moheydarian parents: diff changeset	326 threshold <- unname(attr(res, "filterThreshold"))
f961752f5419 Uploaded moheydarian parents: diff changeset	327 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	328 threshold <- 0
f961752f5419 Uploaded moheydarian parents: diff changeset	329 }
f961752f5419 Uploaded moheydarian parents: diff changeset	330 title_suffix <- paste0(primaryFactor,": ",lvl," vs ",ref)
f961752f5419 Uploaded moheydarian parents: diff changeset	331 if (!is.null(opt$plots)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	332 generateSpecificPlots(res, threshold, title_suffix)
f961752f5419 Uploaded moheydarian parents: diff changeset	333 }
f961752f5419 Uploaded moheydarian parents: diff changeset	334 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	335 # rotate through the possible contrasts of the primary factor
f961752f5419 Uploaded moheydarian parents: diff changeset	336 # write out a sorted table of results with the contrast as a suffix
f961752f5419 Uploaded moheydarian parents: diff changeset	337 # add contrast specific plots to the device
f961752f5419 Uploaded moheydarian parents: diff changeset	338 for (i in seq_len(n-1)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	339 ref <- allLevels[i]
f961752f5419 Uploaded moheydarian parents: diff changeset	340 contrastLevels <- allLevels[(i+1):n]
f961752f5419 Uploaded moheydarian parents: diff changeset	341 for (lvl in contrastLevels) {
f961752f5419 Uploaded moheydarian parents: diff changeset	342 res <- results(dds, contrast=c(primaryFactor, lvl, ref),
f961752f5419 Uploaded moheydarian parents: diff changeset	343 cooksCutoff=cooksCutoff,
f961752f5419 Uploaded moheydarian parents: diff changeset	344 independentFiltering=independentFiltering)
f961752f5419 Uploaded moheydarian parents: diff changeset	345 resSorted <- res[order(res$padj),]
f961752f5419 Uploaded moheydarian parents: diff changeset	346 outDF <- as.data.frame(resSorted)
f961752f5419 Uploaded moheydarian parents: diff changeset	347 outDF$geneID <- rownames(outDF)
f961752f5419 Uploaded moheydarian parents: diff changeset	348 outDF <- outDF[,c("geneID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
f961752f5419 Uploaded moheydarian parents: diff changeset	349 filename <- paste0(opt$outfile,".",primaryFactor,"_",lvl,"_vs_",ref)
f961752f5419 Uploaded moheydarian parents: diff changeset	350 write.table(outDF, file=filename, sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE)
f961752f5419 Uploaded moheydarian parents: diff changeset	351 if (independentFiltering) {
f961752f5419 Uploaded moheydarian parents: diff changeset	352 threshold <- unname(attr(res, "filterThreshold"))
f961752f5419 Uploaded moheydarian parents: diff changeset	353 } else {
f961752f5419 Uploaded moheydarian parents: diff changeset	354 threshold <- 0
f961752f5419 Uploaded moheydarian parents: diff changeset	355 }
f961752f5419 Uploaded moheydarian parents: diff changeset	356 title_suffix <- paste0(primaryFactor,": ",lvl," vs ",ref)
f961752f5419 Uploaded moheydarian parents: diff changeset	357 if (!is.null(opt$plots)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	358 generateSpecificPlots(res, threshold, title_suffix)
f961752f5419 Uploaded moheydarian parents: diff changeset	359 }
f961752f5419 Uploaded moheydarian parents: diff changeset	360 }
f961752f5419 Uploaded moheydarian parents: diff changeset	361 }
f961752f5419 Uploaded moheydarian parents: diff changeset	362 }
f961752f5419 Uploaded moheydarian parents: diff changeset	363
f961752f5419 Uploaded moheydarian parents: diff changeset	364 # close the plot device
f961752f5419 Uploaded moheydarian parents: diff changeset	365 if (!is.null(opt$plots)) {
f961752f5419 Uploaded moheydarian parents: diff changeset	366 cat("closing plot device\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	367 dev.off()
f961752f5419 Uploaded moheydarian parents: diff changeset	368 }
f961752f5419 Uploaded moheydarian parents: diff changeset	369
f961752f5419 Uploaded moheydarian parents: diff changeset	370 cat("Session information:\n\n")
f961752f5419 Uploaded moheydarian parents: diff changeset	371
f961752f5419 Uploaded moheydarian parents: diff changeset	372 sessionInfo()
f961752f5419 Uploaded moheydarian parents: diff changeset	373

Mercurial > repos > moheydarian > testing_deseq2

annotate deseq2.R @ 2:11342b3cd769 draft default tip