annotate caret_regression/tool3/predict.R @ 0:a4a2ad5a214e draft default tip

Uploaded
author deepakjadmin
date Thu, 05 Nov 2015 02:37:56 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1 ##########
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
2 args <- commandArgs(T)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
3 arg1 <- args[1]
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
4 arg2 <- args[2]
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
5 arg3 <- args[3]
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
6 #source("~/galaxy-dist/tools/mpdstoolsV2/tool3/Preold.R")
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
7 #pre(arg1,arg2,arg3)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
8 set.seed(1)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
9 pre <- function(args1,args2,args3){
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
10 #args <- commandArgs(TRUE)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
11 nTrain <- read.csv(args1,row.names= 1, header = T) # example nTrain.csv file of unknown activity
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
12 #save(nTrain,file = "nTrain.RData")
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
13 #load("nTrain.RData")
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
14 load(args2) # model generated from previous programn
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
15 newdata <- nTrain
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
16 modelFit <- Fit
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
17 ###########
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
18 # input csv file must contaion the exact same column as used in model building #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
19 # Also do pre-proccessing by means of centering and scaling
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
20 ## problem in s4 object so first check that the given model has s4 object in
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
21 ## >isS4(Fit$finalmodel) if it is s4 than add in with elseif loop
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
22 ## eg . isS4(plsFit$finalModel) == TRUE
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
23 f=function(x){
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
24 x<-as.numeric(as.character(x)) #first convert each column into numeric if it is from factor
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
25 x[is.na(x)] =median(x, na.rm=TRUE) #convert the item with NA to median value from the column
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
26 x #display the column
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
27 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
28
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
29 f2=function(x){
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
30 all(is.na(x))
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
31 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
32
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
33 fop <- apply(newdata,2,f2)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
34 allcolumnmissing <- which(fop)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
35 if (length(allcolumnmissing) > 0){
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
36 newdata[,allcolumnmissing] <- 0
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
37 newdata[,allcolumnmissing] <- newdata[,allcolumnmissing] + runif(3,0,0.000000000000000001) ### add noise}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
38 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
39
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
40 library(caret)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
41
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
42 #if(as.character(!isS4(Fit$finalModel == "TRUE")))
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
43 if((Fit$method != "svmRadial") && (Fit$method != "svmLinear") )
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
44 {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
45 reqcol <- Fit$finalModel$xNames
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
46 newdata <- newdata[,reqcol]
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
47 newdata <- apply(newdata,2,f)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
48 newdata <- newdata + runif(3,0,0.01) ### add noise to overcome from NZV error
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
49 newdata1 <- preProcess(newdata, method = c("center", "scale"))
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
50 newdata11 <- predict(newdata1,newdata)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
51 ###########
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
52 library(stats)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
53 testpredict <- predict(modelFit,newdata11)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
54 names <- as.data.frame(rownames(nTrain))
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
55 colnames(names) <- "COMPOUND"
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
56 activity <- as.data.frame(testpredict)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
57 colnames(activity) <- "PREDICTED VALUE"
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
58 dw <- format(cbind(names,activity),justify="centre")
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
59 write.table(dw,file=args3,row.names=FALSE,sep="\t")
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
60 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
61 #else if(Fit$method == "svmRadial")
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
62 else if((Fit$method == "svmLinear") | (Fit$method == "svmRadial"))
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
63 {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
64 reqcol <- colnames(Fit$trainingData)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
65 reqcol <- reqcol[1:length(reqcol)-1]
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
66 newdata <- newdata[,reqcol]
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
67 newdata <- apply(newdata,2,f)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
68 newdata <- newdata + runif(3,0,0.01) ### add little noise to overcome from NZV problem
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
69 newdata1 <- preProcess(newdata, method = c("center", "scale"))
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
70 newdata11 <- predict(newdata1,newdata)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
71 testpredict <- predict(modelFit,newdata11)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
72 names <- as.data.frame(rownames(nTrain))
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
73 colnames(names) <- "COMPOUND"
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
74 activity <- as.data.frame(testpredict)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
75 colnames(activity) <- "PREDICTED VALUE"
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
76 dw <- format(cbind(names,activity),justify="centre")
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
77 write.table(dw,file=args3,row.names=FALSE,sep="\t")
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
78
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
79 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
80 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
81 dw <- "There is something wrong in data or model"
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
82 write.csv(dw,file=args3,row.names=FALSE)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
83
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
84 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
85
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
86 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
87 pre(arg1,arg2,arg3)