annotate dexseq.R @ 4:aa92ffbce394 draft

Uploaded
author pavanvidem
date Fri, 28 Aug 2015 09:38:27 -0400
parents bc7eab5753a8
children b7e9bf50295c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
1 ## Setup R error handling to go to stderr
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
2 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
3 # we need that to not crash galaxy with an UTF8 error on German LC settings.
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
4 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
5
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
6 library("DEXSeq")
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
7 library('getopt')
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
8 library('rjson')
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
9
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
10
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
11 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
12 args <- commandArgs(trailingOnly = TRUE)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
13
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
14 #get options, using the spec as defined by the enclosed list.
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
15 #we read the options from the default: commandArgs(TRUE).
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
16 spec = matrix(c(
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
17 'verbose', 'v', 2, "integer",
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
18 'help', 'h', 0, "logical",
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
19 'gtf', 'a', 1, "character",
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
20 'outfile', 'o', 1, "character",
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
21 'report', 'r', 1, "character",
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
22 'factors', 'f', 1, "character",
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
23 'threads', 'p', 1, "integer",
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
24 'fdr', 'c', 1, "double",
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
25 ), byrow=TRUE, ncol=4);
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
26 opt = getopt(spec);
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
27
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
28 setwd(opt$report)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
29
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
30 # if help was asked for print a friendly message
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
31 # and exit with a non-zero error code
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
32 if ( !is.null(opt$help) ) {
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
33 cat(getopt(spec, usage=TRUE));
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
34 q(status=1);
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
35 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
36
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
37 trim <- function (x) gsub("^\\s+|\\s+$", "", x)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
38 opt$samples <- trim(opt$samples)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
39 opt$factors <- trim(opt$factors)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
40
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
41 parser <- newJSONParser()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
42 parser$addData( opt$factors )
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
43 factorsList <- parser$getObject()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
44
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
45 sampleTable<-data.frame()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
46 countFiles<-c()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
47 factorNames<-c()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
48 primaryFactor<-""
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
49 for(factor in factorsList){
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
50 factorName<-factor[[1]]
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
51 factorNames<-append(factorNames, paste(factorName,"exon",sep=":"))
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
52 factorValuesMapList<-factor[[2]]
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
53 c = length(factorValuesMapList)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
54 for (factorValuesMap in factorValuesMapList){
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
55 for(files in factorValuesMap){
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
56 for(file in files){
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
57 if(primaryFactor == "") {
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
58 countFiles<-append(countFiles,file)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
59 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
60 sampleTable[basename(file),factorName]<-paste(c,names(factorValuesMap),sep="_")
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
61 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
62 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
63 c = c-1
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
64 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
65 if(primaryFactor == ""){
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
66 primaryFactor <- factorName
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
67 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
68 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
69
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
70 factorNames<-append(factorNames,"exon")
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
71 factorNames<-append(factorNames,"sample")
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
72 factorNames<-rev(factorNames)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
73 formulaFullModel <- as.formula(paste("", paste(factorNames, collapse=" + "), sep=" ~ "))
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
74 factorNames <- head(factorNames,-1)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
75 formulaReducedModel <- as.formula(paste("", paste(factorNames, collapse=" + "), sep=" ~ "))
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
76
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
77 sampleTable
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
78 formulaFullModel
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
79 formulaReducedModel
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
80 primaryFactor
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
81 countFiles
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
82 opt$report
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
83 file.path(opt$report,"DEXSeq_analysis.RData")
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
84 getwd()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
85
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
86 dxd = DEXSeqDataSetFromHTSeq(countFiles, sampleData=sampleTable, design= formulaFullModel, flattenedfile=opt$gtf)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
87
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
88 colData(dxd)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
89 dxd <- estimateSizeFactors(dxd)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
90 sizeFactors(dxd)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
91 BPPARAM=MulticoreParam(workers=opt$threads)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
92 dxd <- estimateDispersions(dxd, formula=formulaFullModel, BPPARAM=BPPARAM)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
93 dxd <- testForDEU(dxd, reducedModel=formulaReducedModel, fullModel=formulaFullModel, BPPARAM=BPPARAM)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
94 dxd <- estimateExonFoldChanges(dxd, fitExpToVar=primaryFactor, BPPARAM=BPPARAM)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
95 res <- DEXSeqResults(dxd)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
96 table(res$padj <= opt$fdr)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
97 resSorted <- res[order(res$padj),]
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
98 head(resSorted)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
99
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
100 write.csv(as.data.frame(resSorted), file=opt$outfile)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
101
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
102 if ( !is.null(opt$report) ) {
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
103 save(dxd, resSorted, file = file.path(opt$report,"DEXSeq_analysis.RData"))
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
104 save.image()
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
105 DEXSeqHTML(res, FDR=opt$fdr, color=c("#C3EEE7","#B7FEA0","#F1E7A1","#CEAEFF","#FF8F43","#EDC3C5","#AAA8AA","#FF0000","#637EE9","#FBFBFB"), BPPARAM=BPPARAM)
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
106 unlink(file.path(opt$report,"DEXSeq_analysis.RData"))
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
107 }
bc7eab5753a8 Uploaded
pavanvidem
parents:
diff changeset
108 sessionInfo()