0
|
1 ##########
|
|
2 args <- commandArgs(T)
|
|
3 arg1 <- args[1]
|
|
4 arg2 <- args[2]
|
|
5 arg3 <- args[3]
|
|
6 #source("~/galaxy-dist/tools/mpdstoolsV2/tool3/Preold.R")
|
|
7 #pre(arg1,arg2,arg3)
|
|
8 set.seed(1)
|
|
9 pre <- function(args1,args2,args3){
|
|
10 #args <- commandArgs(TRUE)
|
|
11 nTrain <- read.csv(args1,row.names= 1, header = T) # example nTrain.csv file of unknown activity
|
|
12 #save(nTrain,file = "nTrain.RData")
|
|
13 #load("nTrain.RData")
|
|
14 load(args2) # model generated from previous programn
|
|
15 newdata <- nTrain
|
|
16 modelFit <- Fit
|
|
17 ###########
|
|
18 # input csv file must contaion the exact same column as used in model building #
|
|
19 # Also do pre-proccessing by means of centering and scaling
|
|
20 ## problem in s4 object so first check that the given model has s4 object in
|
|
21 ## >isS4(Fit$finalmodel) if it is s4 than add in with elseif loop
|
|
22 ## eg . isS4(plsFit$finalModel) == TRUE
|
|
23 f=function(x){
|
|
24 x<-as.numeric(as.character(x)) #first convert each column into numeric if it is from factor
|
|
25 x[is.na(x)] =median(x, na.rm=TRUE) #convert the item with NA to median value from the column
|
|
26 x #display the column
|
|
27 }
|
|
28
|
|
29 f2=function(x){
|
|
30 all(is.na(x))
|
|
31 }
|
|
32
|
|
33
|
|
34 fop <- apply(newdata,2,f2)
|
|
35 allcolumnmissing <- which(fop)
|
|
36 if (length(allcolumnmissing) > 0){
|
|
37 newdata[,allcolumnmissing] <- 0
|
|
38 newdata[,allcolumnmissing] <- newdata[,allcolumnmissing] + runif(3,0,0.00000000000000000000000000000001) ### add noise}
|
|
39 }
|
|
40
|
|
41 library(caret)
|
|
42
|
|
43 #if(as.character(!isS4(Fit$finalModel == "TRUE")))
|
|
44 if((Fit$method != "svmRadial") && (Fit$method != "svmLinear"))
|
|
45 {
|
|
46 reqcol <- Fit$finalModel$xNames
|
|
47 newdata <- newdata[,reqcol]
|
|
48 newdata <- apply(newdata,2,f)
|
|
49 newdata <- newdata + runif(3,0,0.01) ### add noise to overcome from NZV error
|
|
50 newdata1 <- preProcess(newdata, method = c("center", "scale"))
|
|
51 newdata11 <- predict(newdata1,newdata)
|
|
52 ###########
|
|
53 library(stats)
|
|
54 testpredict <- predict(modelFit,newdata11)
|
|
55 Label <- levels(testpredict)
|
|
56 a1 <- Label[1]
|
|
57 a2 <- Label[2]
|
|
58 probpredict <- predict(modelFit,newdata11,type="prob")
|
|
59 names <- as.data.frame(rownames(nTrain))
|
|
60 colnames(names) <- "COMPOUND"
|
|
61 activity <- as.data.frame(testpredict)
|
|
62 colnames(activity) <- "PREDICTED ACTIVITY"
|
|
63 colnames(probpredict) <- c(eval(a1),eval(a2))
|
|
64 Prob <- as.data.frame(probpredict)
|
|
65 dw <- format(cbind(names,Prob,activity),justify="centre")
|
|
66 write.table(dw,file=args3,row.names=FALSE,sep="\t")
|
|
67 } else if((Fit$method == "svmRadial") | (Fit$method == "svmLinear")){
|
|
68 library(stats)
|
|
69 reqcol <- colnames(Fit$trainingData)
|
|
70 reqcol <- reqcol[1:length(reqcol)-1]
|
|
71 newdata <- newdata[,reqcol]
|
|
72
|
|
73 newdata <- apply(newdata,2,f)
|
|
74 newdata <- newdata + runif(3,0,0.01) ### add little noise to overcome from NZV problem
|
|
75 newdata1 <- preProcess(newdata, method = c("center", "scale"))
|
|
76 newdata11 <- predict(newdata1,newdata)
|
|
77 testpredict <- predict(modelFit,newdata11)
|
|
78 Label <- levels(testpredict)
|
|
79 a1 <- Label[1]
|
|
80 a2 <- Label[2]
|
|
81 probpredict <- predict(modelFit,newdata11,type="prob")
|
|
82 names <- as.data.frame(rownames(nTrain))
|
|
83 colnames(names) <- "COMPOUND"
|
|
84 activity <- as.data.frame(testpredict)
|
|
85 colnames(activity) <- "PREDICTED ACTIVITY"
|
|
86 colnames(probpredict) <- c(eval(a1),eval(a2))
|
|
87 Prob <- as.data.frame(probpredict)
|
|
88 dw <- format(cbind(names,Prob,activity),justify="centre")
|
|
89 write.table(dw,file=args3,row.names=FALSE,sep="\t")
|
|
90 }else {
|
|
91 dw <- "There is something wrong in data or model"
|
|
92 write.csv(dw,file=args3,row.names=FALSE)
|
|
93 }
|
|
94 }
|
|
95 pre(arg1,arg2,arg3)
|