diff deseq2.R @ 13:6d17a7d6fe9c draft

Uploaded
author bgruening
date Mon, 02 Sep 2013 10:09:37 -0400
parents
children bb5c80d15e0a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq2.R	Mon Sep 02 10:09:37 2013 -0400
@@ -0,0 +1,121 @@
+library('getopt');
+options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
+args <- commandArgs(trailingOnly = TRUE)
+
+#get options, using the spec as defined by the enclosed list.
+#we read the options from the default: commandArgs(TRUE).
+spec = matrix(c(
+  'verbose', 'v', 2, "integer",
+  'help' , 'h', 0, "logical",
+  'outputfile' , 'o', 1, "character",
+  'plots' , 'p', 1, "character",
+  'input' , 'i', 1, "character",
+  'samples', 's', 1, "character"
+), byrow=TRUE, ncol=4);
+opt = getopt(spec);
+
+
+# if help was asked for print a friendly message
+# and exit with a non-zero error code
+if ( !is.null(opt$help) ) {
+  cat(getopt(spec, usage=TRUE));
+  q(status=1);
+}
+
+trim <- function (x) gsub("^\\s+|\\s+$", "", x)
+opt$samples<-trim(opt$samples)
+
+
+htseqCountTable = read.table(opt$input, sep="\t", comment="", as.is=T)
+
+l<-unique(c(htseqCountTable[1,-1]))
+#print(l)
+
+colnames(htseqCountTable) <- htseqCountTable[1,]
+names(htseqCountTable)<-colnames(htseqCountTable)
+tagnames <- htseqCountTable[-(1:2), 1]
+htseqCountTable <- htseqCountTable[-(1:2),-1]
+htseqCountTable[,1] <- gsub(",", ".", htseqCountTable[,1])
+
+
+library( "DESeq2" )
+
+opt$samples
+
+print('----------------')
+
+pdf(opt$plots)
+
+if (opt$samples=="all_vs_all"){
+
+  for (i in seq(1, length(l), by=1)) {
+    k=i+1
+    if(k<=length(l)){
+      for (j in seq(k, length(l), by=1)) {
+        
+        colData<-names(htseqCountTable[, which(gsub("[.][0-9]+","",names(htseqCountTable)) %in% c(l[[i]], l[[j]]))])
+        currentPairTable<-htseqCountTable[ , which(gsub("[.][0-9]+","",names(htseqCountTable)) %in% c(l[[i]], l[[j]]))]
+        #      rownames(currentPairTable)<-tagnames
+        
+        write.table(currentPairTable, file=paste(l[[i]],"_", l[[j]],".txt"))
+        currentPairTable<-read.table(paste(l[[i]],"_", l[[j]],".txt"),row.names=1,header=T)
+        
+        currentPairTable <- as.data.frame(currentPairTable,stringsAsFactors=F)
+        #print(currentPairTable)
+        pdata = data.frame(condition=factor(c(rep(l[[i]], 2), rep(l[[j]], 2))),row.names=colnames(currentPairTable))
+        dds = DESeqDataSetFromMatrix(countData = currentPairTable,  colData = pdata, design = ~ condition)
+        
+        #      colData(dds)$condition
+        dds <- DESeq(dds)
+        sizeFactors(dds)
+        plotDispEsts(dds)
+        plotMA(dds)
+        
+        res <- results(dds)
+        rownames(res)<-tagnames
+        resSorted <- res[order(res$padj),];
+        write.csv( as.data.frame(resSorted), file = opt$outputfile )
+      }
+    }
+  }
+}else{
+
+  sampleSets<-unlist(strsplit(opt$samples, " "))
+  sampleSets
+  samplesControl <- {}
+  samplesExperiment <- {}
+  sampleNames <- {}
+
+  samplesControl<-unlist(strsplit(sampleSets[1], ","))
+  samplesExperiment<-unlist(strsplit(sampleSets[2], ","))
+
+  print(samplesControl)
+  print(samplesExperiment)
+
+  # the minus one is needed because we get column indizes including the first column
+  samplecolumns <- c()
+  for (i in samplesControl) samplecolumns[[length(samplecolumns) + 1]] <- as.integer(i) - 1
+  for (i in samplesExperiment) samplecolumns[[length(samplecolumns) + 1]] <- as.integer(i) - 1
+  
+print(samplecolumns)
+  
+  htseqSubCountTable <- htseqCountTable[,samplecolumns]
+
+  #TODO: pavan hack :)
+  #write.table(htseqSubCountTable, file=paste(gsub(" ","_",opt$samples),".txt"))
+  #htseqSubCountTable<-read.table(paste(gsub(" ","_",opt$samples),".txt"),row.names=1,header=T)
+
+  pdata = data.frame(condition=factor(c(names( htseqSubCountTable )[ samplecolumns ])),row.names=colnames(htseqSubCountTable))
+  dds = DESeqDataSetFromMatrix(countData = bla,  colData = pdata, design = ~ condition)
+  
+  dds <- DESeq(dds)
+  sizeFactors(dds)
+  plotDispEsts(dds)
+  plotMA(dds)
+  
+  res <- results(dds)
+  rownames(res)<-tagnames
+  resSorted <- res[order(res$padj),];
+  write.csv(as.data.frame(resSorted), file = opt$outputfile)  
+}
+dev.off()