annotate GEOQuery.R @ 3:b01c4f260085 draft

use optparse
author sblanck
date Wed, 12 Apr 2017 03:44:02 -0400
parents 93451f832736
children 2c9e44ff68dc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
93451f832736 Uploaded
sblanck
parents:
diff changeset
1
93451f832736 Uploaded
sblanck
parents:
diff changeset
2 library(GEOquery)
93451f832736 Uploaded
sblanck
parents:
diff changeset
3
93451f832736 Uploaded
sblanck
parents:
diff changeset
4
93451f832736 Uploaded
sblanck
parents:
diff changeset
5 cargs<-commandArgs()
93451f832736 Uploaded
sblanck
parents:
diff changeset
6 cargs<-cargs[(which(cargs=="--args")+1):length(cargs)]
93451f832736 Uploaded
sblanck
parents:
diff changeset
7
93451f832736 Uploaded
sblanck
parents:
diff changeset
8 GEOQueryID<-cargs[[1]]
93451f832736 Uploaded
sblanck
parents:
diff changeset
9 GEOQueryData<-cargs[[2]]
93451f832736 Uploaded
sblanck
parents:
diff changeset
10 GEOQueryRData<-cargs[[3]]
93451f832736 Uploaded
sblanck
parents:
diff changeset
11 conditionFile<-cargs[[4]]
93451f832736 Uploaded
sblanck
parents:
diff changeset
12 transformation<-cargs[[5]]
93451f832736 Uploaded
sblanck
parents:
diff changeset
13
93451f832736 Uploaded
sblanck
parents:
diff changeset
14 data1 = getGEO(GEOQueryID)
93451f832736 Uploaded
sblanck
parents:
diff changeset
15 eset=data1[[1]]
93451f832736 Uploaded
sblanck
parents:
diff changeset
16
93451f832736 Uploaded
sblanck
parents:
diff changeset
17 #check if datas are in log2 space
93451f832736 Uploaded
sblanck
parents:
diff changeset
18 normalization<-function(data){
93451f832736 Uploaded
sblanck
parents:
diff changeset
19 ex <- exprs(data)
93451f832736 Uploaded
sblanck
parents:
diff changeset
20 qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
93451f832736 Uploaded
sblanck
parents:
diff changeset
21 LogC <- (qx[5] > 100) ||
93451f832736 Uploaded
sblanck
parents:
diff changeset
22 (qx[6]-qx[1] > 50 && qx[2] > 0) ||
93451f832736 Uploaded
sblanck
parents:
diff changeset
23 (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)
93451f832736 Uploaded
sblanck
parents:
diff changeset
24 if (LogC) { ex[which(ex <= 0)] <- NaN
93451f832736 Uploaded
sblanck
parents:
diff changeset
25 return (log2(ex)) } else {
93451f832736 Uploaded
sblanck
parents:
diff changeset
26 return (ex)
93451f832736 Uploaded
sblanck
parents:
diff changeset
27 }
93451f832736 Uploaded
sblanck
parents:
diff changeset
28 }
93451f832736 Uploaded
sblanck
parents:
diff changeset
29
93451f832736 Uploaded
sblanck
parents:
diff changeset
30 if (transformation=="auto"){
93451f832736 Uploaded
sblanck
parents:
diff changeset
31 exprs(eset)=normalization(eset)
93451f832736 Uploaded
sblanck
parents:
diff changeset
32 } else if (transformation=="yes"){
93451f832736 Uploaded
sblanck
parents:
diff changeset
33 exprs(eset)=log2(exprs(eset))
93451f832736 Uploaded
sblanck
parents:
diff changeset
34 }
93451f832736 Uploaded
sblanck
parents:
diff changeset
35
93451f832736 Uploaded
sblanck
parents:
diff changeset
36 matrixData=exprs(eset)
93451f832736 Uploaded
sblanck
parents:
diff changeset
37 write.table(matrixData,col.names=NA,row.names=TRUE,sep="\t",file=GEOQueryData)
93451f832736 Uploaded
sblanck
parents:
diff changeset
38
93451f832736 Uploaded
sblanck
parents:
diff changeset
39 #Construcion of condition file
93451f832736 Uploaded
sblanck
parents:
diff changeset
40 #if there is data in "source_name_ch1" field, we keep this data as a condition
93451f832736 Uploaded
sblanck
parents:
diff changeset
41 #else we keep the "description" field data.
93451f832736 Uploaded
sblanck
parents:
diff changeset
42 if (length(unique(tolower(pData(data1[[1]])["source_name_ch1"][,1])))>1)
93451f832736 Uploaded
sblanck
parents:
diff changeset
43 {
93451f832736 Uploaded
sblanck
parents:
diff changeset
44 conditions=pData(data1[[1]])["source_name_ch1"]
93451f832736 Uploaded
sblanck
parents:
diff changeset
45 description=paste0(as.vector(pData(data1[[1]])["geo_accession"][,1]), " ",as.vector(pData(data1[[1]])["title"][,1]), " ", as.vector(conditions[,1]))
93451f832736 Uploaded
sblanck
parents:
diff changeset
46 } else
93451f832736 Uploaded
sblanck
parents:
diff changeset
47 {
93451f832736 Uploaded
sblanck
parents:
diff changeset
48 conditions=pData(data1[[1]])["description"]
93451f832736 Uploaded
sblanck
parents:
diff changeset
49 description=paste0(as.vector(pData(data1[[1]])["geo_accession"][,1]), " ",as.vector(pData(data1[[1]])["title"][,1]), " ", as.vector(conditions[,1]))
93451f832736 Uploaded
sblanck
parents:
diff changeset
50 }
93451f832736 Uploaded
sblanck
parents:
diff changeset
51
93451f832736 Uploaded
sblanck
parents:
diff changeset
52 conditions[,1]=tolower(conditions[,1])
93451f832736 Uploaded
sblanck
parents:
diff changeset
53 pData(eset)["source_name_ch1"]=conditions
93451f832736 Uploaded
sblanck
parents:
diff changeset
54
93451f832736 Uploaded
sblanck
parents:
diff changeset
55 write.table(cbind(conditions,description),quote = FALSE,col.names = FALSE, row.names=TRUE,file=conditionFile,sep="\t")
93451f832736 Uploaded
sblanck
parents:
diff changeset
56 save(eset,conditions,file=GEOQueryRData)