annotate dexseq.R @ 18:ce3f79d2feb5 draft

Uploaded
author pavanvidem
date Tue, 01 Sep 2015 11:08:29 -0400
parents b7e9bf50295c
children b7235a9b1881
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
1 ## Setup R error handling to go to stderr
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
2 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
3 # we need that to not crash galaxy with an UTF8 error on German LC settings.
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
4 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
5
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
6 library("DEXSeq")
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
7 library('getopt')
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
8 library('rjson')
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
9
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
10
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
11 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
12 args <- commandArgs(trailingOnly = TRUE)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
13
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
14 #get options, using the spec as defined by the enclosed list.
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
15 #we read the options from the default: commandArgs(TRUE).
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
16 spec = matrix(c(
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
17 'verbose', 'v', 2, "integer",
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
18 'help', 'h', 0, "logical",
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
19 'gtf', 'a', 1, "character",
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
20 'outfile', 'o', 1, "character",
15
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
21 'reportdir', 'r', 1, "character",
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
22 'htmlfile', 'x', 1, "character",
1
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
23 'factors', 'f', 1, "character",
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
24 'threads', 'p', 1, "integer",
15
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
25 'fdr', 'c', 1, "double"
1
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
26 ), byrow=TRUE, ncol=4);
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
27 opt = getopt(spec);
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
28
15
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
29
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
30 #reportdir <- gsub(".dat", "_files", opt$outfile)
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
31
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
32 dir.create(file.path(opt$reportdir))
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
33 setwd(opt$reportdir)
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
34 getwd()
1
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
35
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
36 # if help was asked for print a friendly message
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
37 # and exit with a non-zero error code
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
38 if ( !is.null(opt$help) ) {
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
39 cat(getopt(spec, usage=TRUE));
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
40 q(status=1);
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
41 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
42
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
43 trim <- function (x) gsub("^\\s+|\\s+$", "", x)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
44 opt$samples <- trim(opt$samples)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
45 opt$factors <- trim(opt$factors)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
46
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
47 parser <- newJSONParser()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
48 parser$addData( opt$factors )
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
49 factorsList <- parser$getObject()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
50
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
51 sampleTable<-data.frame()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
52 countFiles<-c()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
53 factorNames<-c()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
54 primaryFactor<-""
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
55 for(factor in factorsList){
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
56 factorName<-factor[[1]]
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
57 factorNames<-append(factorNames, paste(factorName,"exon",sep=":"))
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
58 factorValuesMapList<-factor[[2]]
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
59 c = length(factorValuesMapList)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
60 for (factorValuesMap in factorValuesMapList){
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
61 for(files in factorValuesMap){
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
62 for(file in files){
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
63 if(primaryFactor == "") {
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
64 countFiles<-append(countFiles,file)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
65 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
66 sampleTable[basename(file),factorName]<-paste(c,names(factorValuesMap),sep="_")
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
67 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
68 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
69 c = c-1
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
70 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
71 if(primaryFactor == ""){
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
72 primaryFactor <- factorName
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
73 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
74 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
75
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
76 factorNames<-append(factorNames,"exon")
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
77 factorNames<-append(factorNames,"sample")
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
78 factorNames<-rev(factorNames)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
79 formulaFullModel <- as.formula(paste("", paste(factorNames, collapse=" + "), sep=" ~ "))
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
80 factorNames <- head(factorNames,-1)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
81 formulaReducedModel <- as.formula(paste("", paste(factorNames, collapse=" + "), sep=" ~ "))
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
82
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
83 sampleTable
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
84 formulaFullModel
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
85 formulaReducedModel
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
86 primaryFactor
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
87 countFiles
15
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
88 opt$reportdir
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
89 opt$htmlfile
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
90 opt$threads
1
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
91 getwd()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
92
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
93 dxd = DEXSeqDataSetFromHTSeq(countFiles, sampleData=sampleTable, design= formulaFullModel, flattenedfile=opt$gtf)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
94
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
95 colData(dxd)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
96 dxd <- estimateSizeFactors(dxd)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
97 sizeFactors(dxd)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
98 BPPARAM=MulticoreParam(workers=opt$threads)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
99 dxd <- estimateDispersions(dxd, formula=formulaFullModel, BPPARAM=BPPARAM)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
100 dxd <- testForDEU(dxd, reducedModel=formulaReducedModel, fullModel=formulaFullModel, BPPARAM=BPPARAM)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
101 dxd <- estimateExonFoldChanges(dxd, fitExpToVar=primaryFactor, BPPARAM=BPPARAM)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
102 res <- DEXSeqResults(dxd)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
103 table(res$padj <= opt$fdr)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
104 resSorted <- res[order(res$padj),]
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
105 head(resSorted)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
106
15
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
107 write.table(as.data.frame(resSorted), file = opt$outfile, sep="\t", quote = FALSE, col.names = FALSE)
1
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
108
15
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
109 if ( !is.null(opt$reportdir) ) {
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
110 save(dxd, resSorted, file = file.path(opt$reportdir,"DEXSeq_analysis.RData"))
1
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
111 save.image()
15
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
112 DEXSeqHTML(res, path=opt$reportdir, FDR=opt$fdr, color=c("#C3EEE7","#B7FEA0","#F1E7A1","#CEAEFF","#FF8F43","#EDC3C5","#AAA8AA","#FF0000","#637EE9","#FBFBFB"))
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
113 unlink(file.path(opt$reportdir,"DEXSeq_analysis.RData"))
1
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
114 }
15
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
115 file.remove(opt$htmlfile)
b7e9bf50295c Uploaded
pavanvidem
parents: 1
diff changeset
116 file.symlink(file.path(opt$reportdir,"testForDEU.html"), opt$htmlfile)
1
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
117 sessionInfo()