annotate Analyse.R @ 11:f9732f6bf218 draft

planemo upload commit 3cad4bb49300f86625d62d40ea7e2ea2ba40e314-dirty
author sblanck
date Thu, 11 May 2017 10:20:35 -0400
parents 56267e3293b2
children 7c49b4ec8cff
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
1 #!/usr/bin/env Rscript
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
2 # setup R error handling to go to stderr
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
3 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
4
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
5 # we need that to not crash galaxy with an UTF8 error on German LC settings.
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
6 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
7
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
8 library("optparse")
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
9
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
10 ##### Read options
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
11 option_list=list(
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
12 make_option("--rdatainput",type="character",default="NULL",help="rdata object containing eset object"),
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
13 make_option("--conditions",type="character",default=NULL,help="Text file summarizing conditions of the experiment (required)"),
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
14 make_option("--selectcondition1",type="character",default=NULL,help="log2 transformation"),
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
15 # make_option("--condition1",type="character",default=NULL,help="A table containing the expression data"),
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
16 make_option("--selectcondition2",type="character",default="NULL",help="rdata object containing eset object"),
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
17 # make_option("--condition2",type="character",default=NULL,help="Text file summarizing conditions of the experiment (required)"),
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
18 make_option("--nbresult",type="character",default=NULL,help="number of result displayed results"),
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
19 make_option("--rdataoutput",type="character",default="NULL",help="output rdata object containing eset object"),
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
20 make_option("--htmloutput",type="character",default=NULL,help="Output html report"),
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
21 make_option("--htmloutputpath",type="character",default="NULL",help="Path of output html report"),
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
22 make_option("--tabularoutput",type="character",default=NULL,help="Output text file"),
11
f9732f6bf218 planemo upload commit 3cad4bb49300f86625d62d40ea7e2ea2ba40e314-dirty
sblanck
parents: 10
diff changeset
23 make_option("--htmltemplate",type="character",default=NULL,help="html template)"),
f9732f6bf218 planemo upload commit 3cad4bb49300f86625d62d40ea7e2ea2ba40e314-dirty
sblanck
parents: 10
diff changeset
24 make_option("--tooldirectory",type="character",default=NULL,help="tool directory)")
6
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
25
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
26
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
27 );
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
28
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
29 opt_parser = OptionParser(option_list=option_list);
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
30 opt = parse_args(opt_parser);
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
31
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
32 if(is.null(opt$rdatainput)){
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
33 print_help(opt_parser)
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
34 stop("rData input required.", call.=FALSE)
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
35 }
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
36
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
37 if(is.null(opt$conditions)){
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
38 print_help(opt_parser)
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
39 stop("conditions input required.", call.=FALSE)
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
40 }
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
41
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
42
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
43 #loading libraries
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
44 suppressPackageStartupMessages(require(GEOquery))
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
45
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
46 suppressPackageStartupMessages(require(Biobase))
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
47 suppressPackageStartupMessages(require(GEOquery))
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
48 suppressPackageStartupMessages(require(GEOmetadb))
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
49 suppressPackageStartupMessages(require(limma))
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
50 suppressPackageStartupMessages(require(jsonlite))
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
51 suppressPackageStartupMessages(require(affy))
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
52 suppressPackageStartupMessages(require(dplyr))
2
93451f832736 Uploaded
sblanck
parents:
diff changeset
53
6
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
54 load(opt$rdatainput)
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
55 targetFile=opt$conditions
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
56 condition1Name=opt$selectcondition1
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
57 #condition1=opt$condition1
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
58 condition2Name=opt$selectcondition2
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
59 #condition2=opt$condition2
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
60 nbresult=opt$nbresult
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
61 result_export_eset=opt$rdataoutput
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
62 result=opt$htmloutput
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
63 result.path=opt$htmloutputpath
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
64 result.tabular=opt$tabularoutput
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
65 result.template=opt$htmltemplate
11
f9732f6bf218 planemo upload commit 3cad4bb49300f86625d62d40ea7e2ea2ba40e314-dirty
sblanck
parents: 10
diff changeset
66 tooldirectory=opt$tooldirectory
2
93451f832736 Uploaded
sblanck
parents:
diff changeset
67
93451f832736 Uploaded
sblanck
parents:
diff changeset
68 #file.copy(targetFile,"./targetFile.txt")
93451f832736 Uploaded
sblanck
parents:
diff changeset
69
6
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
70 targets <- read.table(targetFile,sep="\t",stringsAsFactors=FALSE)
2
93451f832736 Uploaded
sblanck
parents:
diff changeset
71
6
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
72 #condition1_tmp <- strsplit(condition1,",")
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
73 condition1 <-targets[which(targets$V2==condition1Name),1]
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
74
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
75 #condition2_tmp <- strsplit(condition2,",")
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
76 #condition2<-unlist(condition2_tmp)
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
77 condition2 <-targets[which(targets$V2==condition2Name),1]
2
93451f832736 Uploaded
sblanck
parents:
diff changeset
78
93451f832736 Uploaded
sblanck
parents:
diff changeset
79 conditions=c(condition1,condition2)
93451f832736 Uploaded
sblanck
parents:
diff changeset
80
93451f832736 Uploaded
sblanck
parents:
diff changeset
81 #nbresult=1000
93451f832736 Uploaded
sblanck
parents:
diff changeset
82 dir.create(result.path, showWarnings = TRUE, recursive = FALSE)
93451f832736 Uploaded
sblanck
parents:
diff changeset
83
6
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
84 eset=eset[,which(rownames(eset@phenoData@data) %in% conditions)]
2
93451f832736 Uploaded
sblanck
parents:
diff changeset
85
6
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
86 rownames(eset@phenoData@data)
a2b8c2aabeb0 Uploaded
sblanck
parents: 2
diff changeset
87 which(rownames(eset@phenoData@data) %in% conditions)
2
93451f832736 Uploaded
sblanck
parents:
diff changeset
88 #condition1Name=make.names(condition1Name)
93451f832736 Uploaded
sblanck
parents:
diff changeset
89 #condition2Name=make.names(condition2Name)
93451f832736 Uploaded
sblanck
parents:
diff changeset
90 #condition1Name=gsub("_","",condition1Name)
93451f832736 Uploaded
sblanck
parents:
diff changeset
91 #condition2Name=gsub("_","",condition2Name)
93451f832736 Uploaded
sblanck
parents:
diff changeset
92 #condition1Name
93451f832736 Uploaded
sblanck
parents:
diff changeset
93 #condition2Name
93451f832736 Uploaded
sblanck
parents:
diff changeset
94
93451f832736 Uploaded
sblanck
parents:
diff changeset
95 eset@phenoData@data$source_name_ch1=""
93451f832736 Uploaded
sblanck
parents:
diff changeset
96 eset@phenoData@data$source_name_ch1[which(rownames(eset@phenoData@data) %in% condition1)]=condition1Name
93451f832736 Uploaded
sblanck
parents:
diff changeset
97 eset@phenoData@data$source_name_ch1[which(rownames(eset@phenoData@data) %in% condition2)]=condition2Name
93451f832736 Uploaded
sblanck
parents:
diff changeset
98 #condition1Name
93451f832736 Uploaded
sblanck
parents:
diff changeset
99 #condition2Name
93451f832736 Uploaded
sblanck
parents:
diff changeset
100
93451f832736 Uploaded
sblanck
parents:
diff changeset
101 condNames=paste0("G",as.numeric(as.character(pData(eset)["source_name_ch1"][,1])!=condition1Name))
93451f832736 Uploaded
sblanck
parents:
diff changeset
102 #condNames=make.names(targets[,2])
93451f832736 Uploaded
sblanck
parents:
diff changeset
103 #condNames=gsub("_","",condNames)
93451f832736 Uploaded
sblanck
parents:
diff changeset
104 f <- as.factor(condNames)
93451f832736 Uploaded
sblanck
parents:
diff changeset
105 #eset$description <- factors
93451f832736 Uploaded
sblanck
parents:
diff changeset
106 design <- model.matrix(~ 0+f)
93451f832736 Uploaded
sblanck
parents:
diff changeset
107
93451f832736 Uploaded
sblanck
parents:
diff changeset
108 colnames(design) <- levels(f)
11
f9732f6bf218 planemo upload commit 3cad4bb49300f86625d62d40ea7e2ea2ba40e314-dirty
sblanck
parents: 10
diff changeset
109 #colnames(design)
2
93451f832736 Uploaded
sblanck
parents:
diff changeset
110 fit <- lmFit(eset, design)
11
f9732f6bf218 planemo upload commit 3cad4bb49300f86625d62d40ea7e2ea2ba40e314-dirty
sblanck
parents: 10
diff changeset
111
2
93451f832736 Uploaded
sblanck
parents:
diff changeset
112 #cont.matrix <- makeContrasts(C1=paste0(condition1Name,"-",condition2Name), levels=design)
93451f832736 Uploaded
sblanck
parents:
diff changeset
113 cont.matrix <- makeContrasts(G0-G1, levels=design)
11
f9732f6bf218 planemo upload commit 3cad4bb49300f86625d62d40ea7e2ea2ba40e314-dirty
sblanck
parents: 10
diff changeset
114 #cont.matrix
2
93451f832736 Uploaded
sblanck
parents:
diff changeset
115 fit2 <- contrasts.fit(fit, cont.matrix)
93451f832736 Uploaded
sblanck
parents:
diff changeset
116 fit2 <- eBayes(fit2)
11
f9732f6bf218 planemo upload commit 3cad4bb49300f86625d62d40ea7e2ea2ba40e314-dirty
sblanck
parents: 10
diff changeset
117
2
93451f832736 Uploaded
sblanck
parents:
diff changeset
118 tT <- topTable(fit2, adjust="fdr", sort.by="B", number=nbresult)
93451f832736 Uploaded
sblanck
parents:
diff changeset
119
93451f832736 Uploaded
sblanck
parents:
diff changeset
120 #head(exprs(eset))
93451f832736 Uploaded
sblanck
parents:
diff changeset
121
93451f832736 Uploaded
sblanck
parents:
diff changeset
122 gpl <- annotation(eset)
93451f832736 Uploaded
sblanck
parents:
diff changeset
123 if (substr(x = gpl,1,3)!="GPL"){
10
56267e3293b2 planemo upload commit 0ad899de37d891f387912b7abd63506605a25976-dirty
sblanck
parents: 7
diff changeset
124 #if the annotation info does not start with "GPL" we retrieve the corresponding GPL annotation
11
f9732f6bf218 planemo upload commit 3cad4bb49300f86625d62d40ea7e2ea2ba40e314-dirty
sblanck
parents: 10
diff changeset
125 print(getwd())
f9732f6bf218 planemo upload commit 3cad4bb49300f86625d62d40ea7e2ea2ba40e314-dirty
sblanck
parents: 10
diff changeset
126 mapping=read.csv(paste0(tooldirectory,"/gplToBioc.csv"),stringsAsFactors=FALSE)
2
93451f832736 Uploaded
sblanck
parents:
diff changeset
127 gpl=mapping[which(mapping$bioc_package==annotation(eset)),]$gpl
93451f832736 Uploaded
sblanck
parents:
diff changeset
128 gpl=gpl[1]
93451f832736 Uploaded
sblanck
parents:
diff changeset
129
93451f832736 Uploaded
sblanck
parents:
diff changeset
130 annotation(eset)=gpl
93451f832736 Uploaded
sblanck
parents:
diff changeset
131
93451f832736 Uploaded
sblanck
parents:
diff changeset
132 platf <- getGEO(gpl, AnnotGPL=TRUE)
93451f832736 Uploaded
sblanck
parents:
diff changeset
133 ncbifd <- data.frame(attr(dataTable(platf), "table"))
93451f832736 Uploaded
sblanck
parents:
diff changeset
134
93451f832736 Uploaded
sblanck
parents:
diff changeset
135 fData(eset)["ID"]=row.names(fData(eset))
93451f832736 Uploaded
sblanck
parents:
diff changeset
136 fData(eset)=merge(x=fData(eset),y=ncbifd,all.x = TRUE, by = "ID")
93451f832736 Uploaded
sblanck
parents:
diff changeset
137 colnames(fData(eset))[4]="ENTREZ_GENE_ID"
93451f832736 Uploaded
sblanck
parents:
diff changeset
138 row.names(fData(eset))=fData(eset)[,"ID"]
93451f832736 Uploaded
sblanck
parents:
diff changeset
139
93451f832736 Uploaded
sblanck
parents:
diff changeset
140 tT <- add_rownames(tT, "ID")
93451f832736 Uploaded
sblanck
parents:
diff changeset
141
93451f832736 Uploaded
sblanck
parents:
diff changeset
142 } else {
93451f832736 Uploaded
sblanck
parents:
diff changeset
143
93451f832736 Uploaded
sblanck
parents:
diff changeset
144 gpl <- annotation(eset)
93451f832736 Uploaded
sblanck
parents:
diff changeset
145 platf <- getGEO(gpl, AnnotGPL=TRUE)
93451f832736 Uploaded
sblanck
parents:
diff changeset
146 ncbifd <- data.frame(attr(dataTable(platf), "table"))
93451f832736 Uploaded
sblanck
parents:
diff changeset
147
93451f832736 Uploaded
sblanck
parents:
diff changeset
148 if (!("ID" %in% colnames(tT))){
93451f832736 Uploaded
sblanck
parents:
diff changeset
149 tT <- add_rownames(tT, "ID")}
93451f832736 Uploaded
sblanck
parents:
diff changeset
150
93451f832736 Uploaded
sblanck
parents:
diff changeset
151 }
93451f832736 Uploaded
sblanck
parents:
diff changeset
152
93451f832736 Uploaded
sblanck
parents:
diff changeset
153 tT <- merge(tT, ncbifd, by="ID")
93451f832736 Uploaded
sblanck
parents:
diff changeset
154 tT <- tT[order(tT$P.Value), ]
93451f832736 Uploaded
sblanck
parents:
diff changeset
155 tT <- subset(tT, select=c("Platform_SPOTID","ID","adj.P.Val","P.Value","t","B","logFC","Gene.symbol","Gene.title","Gene.ID","Chromosome.annotation","GO.Function.ID"))
93451f832736 Uploaded
sblanck
parents:
diff changeset
156 tT<-format(tT, digits=2, nsmall=2)
93451f832736 Uploaded
sblanck
parents:
diff changeset
157 head(tT)
93451f832736 Uploaded
sblanck
parents:
diff changeset
158 colnames(tT)=gsub(pattern = "\\.",replacement = "_",colnames(tT))
93451f832736 Uploaded
sblanck
parents:
diff changeset
159 matrixtT=as.matrix(tT)
93451f832736 Uploaded
sblanck
parents:
diff changeset
160 datajson=toJSON(matrixtT,pretty = TRUE)
93451f832736 Uploaded
sblanck
parents:
diff changeset
161
93451f832736 Uploaded
sblanck
parents:
diff changeset
162 htmlfile=readChar(result.template, file.info(result.template)$size)
93451f832736 Uploaded
sblanck
parents:
diff changeset
163 htmlfile=gsub(x=htmlfile,pattern = "###DATAJSON###",replacement = datajson, fixed = TRUE)
93451f832736 Uploaded
sblanck
parents:
diff changeset
164 dir.create(result.path, showWarnings = TRUE, recursive = FALSE)
93451f832736 Uploaded
sblanck
parents:
diff changeset
165
93451f832736 Uploaded
sblanck
parents:
diff changeset
166 boxplot="boxplot.png"
93451f832736 Uploaded
sblanck
parents:
diff changeset
167 png(boxplot,width=800,height = 400)
93451f832736 Uploaded
sblanck
parents:
diff changeset
168 par(mar=c(7,5,1,1))
93451f832736 Uploaded
sblanck
parents:
diff changeset
169 boxplot(exprs(eset),las=2,outline=FALSE)
93451f832736 Uploaded
sblanck
parents:
diff changeset
170 dev.off()
93451f832736 Uploaded
sblanck
parents:
diff changeset
171 htmlfile=gsub(x=htmlfile,pattern = "###BOXPLOT###",replacement = boxplot, fixed = TRUE)
93451f832736 Uploaded
sblanck
parents:
diff changeset
172 file.copy(boxplot,result.path)
93451f832736 Uploaded
sblanck
parents:
diff changeset
173
93451f832736 Uploaded
sblanck
parents:
diff changeset
174 histopvalue="histopvalue.png"
93451f832736 Uploaded
sblanck
parents:
diff changeset
175
93451f832736 Uploaded
sblanck
parents:
diff changeset
176 png(histopvalue,width=800,height = 400)
93451f832736 Uploaded
sblanck
parents:
diff changeset
177 par(mfrow=c(1,2))
93451f832736 Uploaded
sblanck
parents:
diff changeset
178 hist(fit2$F.p.value,nclass=100,main="Histogram of p-values", xlab="p-values",ylab="frequency")
93451f832736 Uploaded
sblanck
parents:
diff changeset
179 volcanoplot(fit2,coef=1,highlight=10,main="Volcano plot")
93451f832736 Uploaded
sblanck
parents:
diff changeset
180 htmlfile=gsub(x=htmlfile,pattern = "###HIST###",replacement = histopvalue, fixed = TRUE)
93451f832736 Uploaded
sblanck
parents:
diff changeset
181 dev.off()
93451f832736 Uploaded
sblanck
parents:
diff changeset
182 file.copy(histopvalue,result.path)
93451f832736 Uploaded
sblanck
parents:
diff changeset
183
93451f832736 Uploaded
sblanck
parents:
diff changeset
184 #write.table(tolower(c(condition1Name,condition2Name)),quote = FALSE,col.names = FALSE, row.names=FALSE,file=result_export_conditions)
93451f832736 Uploaded
sblanck
parents:
diff changeset
185 saveConditions=c(condition1Name,condition2Name)
93451f832736 Uploaded
sblanck
parents:
diff changeset
186 save(eset,saveConditions,file=result_export_eset)
93451f832736 Uploaded
sblanck
parents:
diff changeset
187 write.table(x=tT[,-1],file=result.tabular,quote=FALSE,row.names=FALSE,col.names=TRUE,sep="\t")
93451f832736 Uploaded
sblanck
parents:
diff changeset
188 write(htmlfile,result)
93451f832736 Uploaded
sblanck
parents:
diff changeset
189