3
|
1 ##########
|
|
2 args <- commandArgs(T)
|
|
3 arg1 <- args[1]
|
|
4 arg2 <- args[2]
|
|
5 arg3 <- args[3]
|
|
6 #source("~/galaxy-dist/tools/mpdstoolsV2/tool3/Preold.R")
|
|
7 #pre(arg1,arg2,arg3)
|
|
8 set.seed(1234)
|
|
9
|
|
10 pre <- function(args1,args2,args3){
|
|
11 #args <- commandArgs(TRUE)
|
|
12 nTrain <- read.csv(args1,row.names= 1, header = T) # example nTrain.csv file of unknown activity
|
|
13 #save(nTrain,file = "nTrain.RData")
|
|
14 #load("nTrain.RData")
|
|
15 load(args2) # model generated from previous programn
|
|
16 newdata <- nTrain
|
|
17 modelFit <- Fit
|
|
18 ###########
|
|
19 # input csv file must contaion the exact same column as used in model building #
|
|
20 # Also do pre-proccessing by means of centering and scaling
|
|
21 ## problem in s4 object so first check that the given model has s4 object in
|
|
22 ## >isS4(Fit$finalmodel) if it is s4 than add in with elseif loop
|
|
23 ## eg . isS4(plsFit$finalModel) == TRUE
|
|
24 f=function(x){
|
|
25 x<-as.numeric(as.character(x)) #first convert each column into numeric if it is from factor
|
|
26 x[is.na(x) | is.nan(x) | is.infinite(x)] =median(x, na.rm=TRUE) #convert the item with NA to median value from the column
|
|
27 x #display the column
|
|
28 }
|
|
29
|
|
30 f2=function(x){
|
|
31 all(is.na(x))
|
|
32 }
|
|
33
|
|
34
|
|
35 fop <- apply(newdata,2,f2)
|
|
36 allcolumnmissing <- which(fop)
|
|
37 if (length(allcolumnmissing) > 0){
|
|
38 newdata[,allcolumnmissing] <- 0
|
|
39 newdata[,allcolumnmissing] <- newdata[,allcolumnmissing] + runif(3,0,0.00001) ### add noise}
|
|
40 }
|
|
41
|
|
42 library(caret)
|
|
43 if(exists('ppInfo')){
|
|
44 #if(as.character(!isS4(Fit$finalModel == "TRUE")))
|
|
45 if((Fit$method != "svmRadial") && (Fit$method != "svmLinear"))
|
|
46 {
|
|
47 reqcol <- Fit$finalModel$xNames
|
|
48 newdata <- newdata[,reqcol]
|
|
49 newdata <- apply(newdata,2,f)
|
|
50 newdata <- newdata + runif(3,0,0.0001) ### add noise to overcome from NZV error
|
|
51 #newdata1 <- preProcess(newdata, method = c("center", "scale"))
|
|
52 #newdata1 <- preProcess(newdata, ppInfo)
|
|
53 newdata11 <- predict(ppInfo,newdata)
|
|
54 ###########
|
|
55 library(stats)
|
|
56 testpredict <- predict(modelFit,newdata11)
|
|
57 Label <- levels(testpredict)
|
|
58 a1 <- Label[1]
|
|
59 a2 <- Label[2]
|
|
60 probpredict <- predict(modelFit,newdata11,type="prob")
|
|
61 names <- as.data.frame(rownames(nTrain))
|
|
62 colnames(names) <- "COMPOUND"
|
|
63 activity <- as.data.frame(testpredict)
|
|
64 colnames(activity) <- "PREDICTED ACTIVITY"
|
|
65 colnames(probpredict) <- c(eval(a1),eval(a2))
|
|
66 Prob <- as.data.frame(probpredict)
|
|
67 dw <- format(cbind(names,Prob,activity),justify="centre")
|
|
68 write.table(dw,file=args3,row.names=FALSE,sep="\t")
|
|
69
|
|
70
|
|
71
|
|
72 } else if((Fit$method == "svmRadial") | (Fit$method == "svmLinear")){
|
|
73 library(stats)
|
|
74 reqcol <- colnames(Fit$trainingData)
|
|
75 reqcol <- reqcol[1:length(reqcol)-1]
|
|
76 newdata <- newdata[,reqcol]
|
|
77
|
|
78 newdata <- apply(newdata,2,f)
|
|
79 newdata <- newdata + runif(3,0,0.0001) ### add little noise to overcome from NZV problem
|
|
80 #newdata1 <- preProcess(newdata, method = c("center", "scale"))
|
|
81 #newdata1 <- preProcess(newdata,ppInfo)
|
|
82 newdata11 <- predict(ppInfo,newdata)
|
|
83 testpredict <- predict(modelFit,newdata11)
|
|
84 Label <- levels(testpredict)
|
|
85 a1 <- Label[1]
|
|
86 a2 <- Label[2]
|
|
87 probpredict <- predict(modelFit,newdata11,type="prob")
|
|
88 names <- as.data.frame(rownames(nTrain))
|
|
89 colnames(names) <- "COMPOUND"
|
|
90 activity <- as.data.frame(testpredict)
|
|
91 colnames(activity) <- "PREDICTED ACTIVITY"
|
|
92 colnames(probpredict) <- c(eval(a1),eval(a2))
|
|
93 Prob <- as.data.frame(probpredict)
|
|
94 dw <- format(cbind(names,Prob,activity),justify="centre")
|
|
95 write.table(dw,file=args3,row.names=FALSE,sep="\t")
|
|
96 }else {
|
|
97 dw <- "There is something wrong in data or model"
|
|
98 write.csv(dw,file=args3,row.names=FALSE)
|
|
99 }
|
|
100 } else{
|
|
101
|
|
102 #if(as.character(!isS4(Fit$finalModel == "TRUE")))
|
|
103 if((Fit$method != "svmRadial") && (Fit$method != "svmLinear"))
|
|
104 {
|
|
105 reqcol <- Fit$finalModel$xNames
|
|
106 newdata <- newdata[,reqcol]
|
|
107 newdata <- apply(newdata,2,f)
|
|
108 newdata <- newdata + runif(3,0,0.0001) ### add noise to overcome from NZV error
|
|
109
|
|
110 ###########
|
|
111 library(stats)
|
|
112 testpredict <- predict(modelFit,newdata)
|
|
113 Label <- levels(testpredict)
|
|
114 a1 <- Label[1]
|
|
115 a2 <- Label[2]
|
|
116 probpredict <- predict(modelFit,newdata,type="prob")
|
|
117 names <- as.data.frame(rownames(nTrain))
|
|
118 colnames(names) <- "COMPOUND"
|
|
119 activity <- as.data.frame(testpredict)
|
|
120 colnames(activity) <- "PREDICTED ACTIVITY"
|
|
121 colnames(probpredict) <- c(eval(a1),eval(a2))
|
|
122 Prob <- as.data.frame(probpredict)
|
|
123 dw <- format(cbind(names,Prob,activity),justify="centre")
|
|
124 write.table(dw,file=args3,row.names=FALSE,sep="\t")
|
|
125
|
|
126
|
|
127
|
|
128 } else if((Fit$method == "svmRadial") | (Fit$method == "svmLinear")){
|
|
129 library(stats)
|
|
130 reqcol <- colnames(Fit$trainingData)
|
|
131 reqcol <- reqcol[1:length(reqcol)-1]
|
|
132 newdata <- newdata[,reqcol]
|
|
133
|
|
134 newdata <- apply(newdata,2,f)
|
|
135 newdata <- newdata + runif(3,0,0.0001) ### add little noise to overcome from NZV problem
|
|
136
|
|
137 testpredict <- predict(modelFit,newdata)
|
|
138 Label <- levels(testpredict)
|
|
139 a1 <- Label[1]
|
|
140 a2 <- Label[2]
|
|
141 probpredict <- predict(modelFit,newdata,type="prob")
|
|
142 names <- as.data.frame(rownames(nTrain))
|
|
143 colnames(names) <- "COMPOUND"
|
|
144 activity <- as.data.frame(testpredict)
|
|
145 colnames(activity) <- "PREDICTED ACTIVITY"
|
|
146 colnames(probpredict) <- c(eval(a1),eval(a2))
|
|
147 Prob <- as.data.frame(probpredict)
|
|
148 dw <- format(cbind(names,Prob,activity),justify="centre")
|
|
149 write.table(dw,file=args3,row.names=FALSE,sep="\t")
|
|
150 }else {
|
|
151 dw <- "There is something wrong in data or model"
|
|
152 write.csv(dw,file=args3,row.names=FALSE)
|
|
153 }
|
|
154 }
|
|
155 }
|
|
156 pre(arg1,arg2,arg3)
|