comparison deseq2.R @ 13:6d17a7d6fe9c draft

Uploaded
author bgruening
date Mon, 02 Sep 2013 10:09:37 -0400
parents
children bb5c80d15e0a
comparison
equal deleted inserted replaced
12:01114c547afb 13:6d17a7d6fe9c
1 library('getopt');
2 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
3 args <- commandArgs(trailingOnly = TRUE)
4
5 #get options, using the spec as defined by the enclosed list.
6 #we read the options from the default: commandArgs(TRUE).
7 spec = matrix(c(
8 'verbose', 'v', 2, "integer",
9 'help' , 'h', 0, "logical",
10 'outputfile' , 'o', 1, "character",
11 'plots' , 'p', 1, "character",
12 'input' , 'i', 1, "character",
13 'samples', 's', 1, "character"
14 ), byrow=TRUE, ncol=4);
15 opt = getopt(spec);
16
17
18 # if help was asked for print a friendly message
19 # and exit with a non-zero error code
20 if ( !is.null(opt$help) ) {
21 cat(getopt(spec, usage=TRUE));
22 q(status=1);
23 }
24
25 trim <- function (x) gsub("^\\s+|\\s+$", "", x)
26 opt$samples<-trim(opt$samples)
27
28
29 htseqCountTable = read.table(opt$input, sep="\t", comment="", as.is=T)
30
31 l<-unique(c(htseqCountTable[1,-1]))
32 #print(l)
33
34 colnames(htseqCountTable) <- htseqCountTable[1,]
35 names(htseqCountTable)<-colnames(htseqCountTable)
36 tagnames <- htseqCountTable[-(1:2), 1]
37 htseqCountTable <- htseqCountTable[-(1:2),-1]
38 htseqCountTable[,1] <- gsub(",", ".", htseqCountTable[,1])
39
40
41 library( "DESeq2" )
42
43 opt$samples
44
45 print('----------------')
46
47 pdf(opt$plots)
48
49 if (opt$samples=="all_vs_all"){
50
51 for (i in seq(1, length(l), by=1)) {
52 k=i+1
53 if(k<=length(l)){
54 for (j in seq(k, length(l), by=1)) {
55
56 colData<-names(htseqCountTable[, which(gsub("[.][0-9]+","",names(htseqCountTable)) %in% c(l[[i]], l[[j]]))])
57 currentPairTable<-htseqCountTable[ , which(gsub("[.][0-9]+","",names(htseqCountTable)) %in% c(l[[i]], l[[j]]))]
58 # rownames(currentPairTable)<-tagnames
59
60 write.table(currentPairTable, file=paste(l[[i]],"_", l[[j]],".txt"))
61 currentPairTable<-read.table(paste(l[[i]],"_", l[[j]],".txt"),row.names=1,header=T)
62
63 currentPairTable <- as.data.frame(currentPairTable,stringsAsFactors=F)
64 #print(currentPairTable)
65 pdata = data.frame(condition=factor(c(rep(l[[i]], 2), rep(l[[j]], 2))),row.names=colnames(currentPairTable))
66 dds = DESeqDataSetFromMatrix(countData = currentPairTable, colData = pdata, design = ~ condition)
67
68 # colData(dds)$condition
69 dds <- DESeq(dds)
70 sizeFactors(dds)
71 plotDispEsts(dds)
72 plotMA(dds)
73
74 res <- results(dds)
75 rownames(res)<-tagnames
76 resSorted <- res[order(res$padj),];
77 write.csv( as.data.frame(resSorted), file = opt$outputfile )
78 }
79 }
80 }
81 }else{
82
83 sampleSets<-unlist(strsplit(opt$samples, " "))
84 sampleSets
85 samplesControl <- {}
86 samplesExperiment <- {}
87 sampleNames <- {}
88
89 samplesControl<-unlist(strsplit(sampleSets[1], ","))
90 samplesExperiment<-unlist(strsplit(sampleSets[2], ","))
91
92 print(samplesControl)
93 print(samplesExperiment)
94
95 # the minus one is needed because we get column indizes including the first column
96 samplecolumns <- c()
97 for (i in samplesControl) samplecolumns[[length(samplecolumns) + 1]] <- as.integer(i) - 1
98 for (i in samplesExperiment) samplecolumns[[length(samplecolumns) + 1]] <- as.integer(i) - 1
99
100 print(samplecolumns)
101
102 htseqSubCountTable <- htseqCountTable[,samplecolumns]
103
104 #TODO: pavan hack :)
105 #write.table(htseqSubCountTable, file=paste(gsub(" ","_",opt$samples),".txt"))
106 #htseqSubCountTable<-read.table(paste(gsub(" ","_",opt$samples),".txt"),row.names=1,header=T)
107
108 pdata = data.frame(condition=factor(c(names( htseqSubCountTable )[ samplecolumns ])),row.names=colnames(htseqSubCountTable))
109 dds = DESeqDataSetFromMatrix(countData = bla, colData = pdata, design = ~ condition)
110
111 dds <- DESeq(dds)
112 sizeFactors(dds)
113 plotDispEsts(dds)
114 plotMA(dds)
115
116 res <- results(dds)
117 rownames(res)<-tagnames
118 resSorted <- res[order(res$padj),];
119 write.csv(as.data.frame(resSorted), file = opt$outputfile)
120 }
121 dev.off()