view deseq2.R @ 16:1d2a02bc2208 draft

Uploaded
author bgruening
date Mon, 09 Sep 2013 07:09:38 -0400
parents bb5c80d15e0a
children a05999fd6e26
line wrap: on
line source

## Setup R error handling to go to stderr
options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
# we need that to not crash galaxy with an UTF8 error on German LC settings.
Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")

library('getopt');
options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
args <- commandArgs(trailingOnly = TRUE)

#get options, using the spec as defined by the enclosed list.
#we read the options from the default: commandArgs(TRUE).
spec = matrix(c(
  'verbose', 'v', 2, "integer",
  'help' , 'h', 0, "logical",
  'outputfile' , 'o', 1, "character",
  'plots' , 'p', 2, "character",
  'input' , 'i', 1, "character",
  'samples', 's', 1, "character",
  'factors', 'f', 2, "character",
  'fittype', 't', 2, "character"
), byrow=TRUE, ncol=4);
opt = getopt(spec);


# if help was asked for print a friendly message
# and exit with a non-zero error code
if ( !is.null(opt$help) ) {
  cat(getopt(spec, usage=TRUE));
  q(status=1);
}

if( is.null(opt$fittype))
	opt$fittype = "parametric"

trim <- function (x) gsub("^\\s+|\\s+$", "", x)
opt$samples <- trim(opt$samples)
opt$factors <- trim(opt$factors)

htseqCountTable = read.table(opt$input, sep="\t", comment="", as.is=T)

colnames(htseqCountTable) <- htseqCountTable[2,]
names(htseqCountTable)<-colnames(htseqCountTable)
conditions <- htseqCountTable[1,]
conditions <- unlist(conditions[,-1])
tagnames <- htseqCountTable[-(1:2), 1]
htseqCountTable <- htseqCountTable[-(1:2),-1]
htseqCountTable[,1] <- gsub(",", ".", htseqCountTable[,1])

l <- unique(c(conditions))

library( "DESeq2" )

if ( !is.null(opt$plots) ) {
    pdf(opt$plots)
}
if (opt$samples=="all_vs_all"){
  # all versus all
  for (i in seq(1, length(l), by=1)) {
    k=i+1
    if(k<=length(l)){
      for (j in seq(k, length(l), by=1)) {
        currentColmuns <- which(conditions %in% c(l[[i]], l[[j]]))
            sampleNames <- names(htseqCountTable[currentColmuns])

        currentPairTable <- htseqCountTable[currentColmuns]
        ncondition1cols <- length(which(conditions %in% c(l[[i]])))
        ncondition2cols <- length(which(conditions %in% c(l[[i]])))

        ntabcols <- length(currentPairTable)
            currentPairTable <- as.integer(unlist(currentPairTable))
        currentPairTable <- matrix(unlist(currentPairTable), ncol = ntabcols, byrow = FALSE)
            colnames(currentPairTable) <- sampleNames

        pdata = data.frame(condition=factor(conditions[currentColmuns]),row.names=colnames(currentPairTable))
        print(pdata)

        dds = DESeqDataSetFromMatrix(countData = currentPairTable,  colData = pdata, design = ~ condition)

        dds <- DESeq(dds, fitType= opt$fittype)
        sizeFactors(dds)
        res <- results(dds)
        resCols <- colnames(res)

        cond <- c(rep(paste(unique(conditions[currentColmuns]),collapse="_"), nrow(res)))
        res[, "condition"] <- cond
        res[, "geneIds"] <- tagnames
        
        resSorted <- res[order(res$padj),]
        write.table(as.data.frame(resSorted[,c("condition", "geneIds", resCols)]), file = opt$outputfile, sep="\t", quote = FALSE, append=TRUE, row.names = FALSE, col.names = FALSE)
        
        if ( !is.null(opt$plots) ) {
            plotDispEsts(dds)
            plotMA(dds)
            
			library("RColorBrewer")
			library("gplots")
			select <- order(rowMeans(counts(dds,normalized=TRUE)),decreasing=TRUE)[1:30]
			hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
			
			rld <- rlogTransformation(dds, blind=TRUE)
			vsd <- varianceStabilizingTransformation(dds, blind=TRUE)
			distsRL <- dist(t(assay(rld)))
			mat <- as.matrix(distsRL)
			heatmap.2(mat, trace="none", col = rev(hmcol), main = "Sample-to-sample distances")            
        }        
      }
    }
  }
}else{

    sampleSets <- unlist(strsplit(opt$samples, " "))
    samplesControl <- {}
    samplesExperiment <- {}
    samplesControl <- unlist(strsplit(sampleSets[1], ","))
    samplesExperiment <- unlist(strsplit(sampleSets[2], ","))

    # the minus one is needed because we get column indizes including the first column
    sampleColumns <- c()
    for (i in samplesControl) sampleColumns[[length(sampleColumns) + 1]] <- as.integer(i) - 1
    for (i in samplesExperiment) sampleColumns[[length(sampleColumns) + 1]] <- as.integer(i) - 1
    htseqSubCountTable <- htseqCountTable[,sampleColumns]

    ntabcols <- length(htseqSubCountTable)
    htseqSubCountTable <- as.integer(unlist(htseqSubCountTable))
    htseqSubCountTable <- matrix(unlist(htseqSubCountTable), ncol = ntabcols, byrow = FALSE)
    colnames(htseqSubCountTable) <- names(htseqCountTable)[sampleColumns]

    pdata = data.frame(row.names=colnames(htseqSubCountTable))

    if(!is.null(opt$factors)){
        factorSets <- unlist(strsplit(opt$factors, " "))
        for (factorSet in factorSets) {
            currentFactor <- unlist(strsplit(factorSet, ":"))
            for (factor in currentFactor[2]) {
                factoredCols <- unlist(strsplit(factor, ","))
                factoredCols <- as.numeric(factoredCols)
                factors <- c()
                for (i in sampleColumns){
                    j=i+1
                    if(j %in% factoredCols)
                        factors[[length(factors) + 1]] <- "yes"
                    else
                        factors[[length(factors) + 1]] <- "no"
                }
                # only add factor if factor is applied to the selected samples
                if((length(unique(factors))) >= 2){
                        pdata[[currentFactor[1]]]<-factor(factors)
                }else{
                    write("Input-Error 01: You can only apply factors to selected samples.", stderr())
                }
            }
        }
    }
    pdata$condition<-factor(conditions[sampleColumns])
    print(pdata)

    designFormula <- as.formula(paste("", paste(names(pdata), collapse=" + "), sep=" ~ "))
    dds = DESeqDataSetFromMatrix(countData = htseqSubCountTable,  colData = pdata, design = designFormula)

    dds <- DESeq(dds, fitType= opt$fittype)
    sizeFactors(dds)
    res <- results(dds)
    resCols <- colnames(res)

    cond <- c(rep(paste(unique(conditions[sampleColumns]),collapse="_"), nrow(res)))
    res[, "condition"] <- cond
    res[, "geneIds"] <- tagnames

    resSorted <- res[order(res$padj),]
    write.table(as.data.frame(resSorted[,c("condition", "geneIds", resCols)]), file = opt$outputfile, sep="\t", quote = FALSE, row.names = FALSE, col.names = FALSE)

    if ( !is.null(opt$plots) ) {
        plotDispEsts(dds)
        plotMA(dds)

		library("RColorBrewer")
		library("gplots")
		select <- order(rowMeans(counts(dds,normalized=TRUE)),decreasing=TRUE)[1:30]
		hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
        
		rld <- rlogTransformation(dds, blind=TRUE)
		vsd <- varianceStabilizingTransformation(dds, blind=TRUE)
		distsRL <- dist(t(assay(rld)))
		mat <- as.matrix(distsRL)
		heatmap.2(mat, trace="none", col = rev(hmcol), main = "Sample-to-sample distances")
    }    
}
dev.off()