view predict.R @ 0:749240e755e1 draft default tip

Uploaded
author deepakjadmin
date Fri, 22 Jan 2016 14:18:58 -0500
parents
children
line wrap: on
line source

##########
args <- commandArgs(T)
arg1 <- args[1]
arg2 <- args[2]
arg3 <- args[3]
#source("~/galaxy-dist/tools/mpdstoolsV2/tool3/Preold.R")
#pre(arg1,arg2,arg3)
set.seed(1)
pre <- function(args1,args2,args3){
#args <- commandArgs(TRUE)
nTrain <- read.csv(args1,row.names= 1, header = T) # example nTrain.csv file of unknown activity
#save(nTrain,file = "nTrain.RData")
#load("nTrain.RData")
load(args2) # model generated from  previous programn  
newdata <- nTrain
modelFit <- Fit
###########
# input csv file must contaion the exact same column as used in model building #
# Also do pre-proccessing by means of centering and scaling 
## problem in s4 object so first check that the given model has s4 object in
## >isS4(Fit$finalmodel) if it is s4 than add in with elseif loop 
## eg . isS4(plsFit$finalModel) == TRUE
f=function(x){
   x<-as.numeric(as.character(x)) #first convert each column into numeric if it is from factor
   x[is.na(x)] =median(x, na.rm=TRUE) #convert the item with NA to median value from the column
   x #display the column
}

f2=function(x){
               all(is.na(x))
                }

fop <- apply(newdata,2,f2)
allcolumnmissing <- which(fop)
if (length(allcolumnmissing) > 0){
newdata[,allcolumnmissing] <- 0
newdata[,allcolumnmissing] <- newdata[,allcolumnmissing] + runif(3,0,0.000000000000000001) ### add noise}
}

library(caret)

#if(as.character(!isS4(Fit$finalModel == "TRUE")))
if((Fit$method != "svmRadial") &&  (Fit$method != "svmLinear") )
{
	reqcol <- Fit$finalModel$xNames
	newdata <- newdata[,reqcol]
        newdata <- apply(newdata,2,f)
        newdata <- newdata + runif(3,0,0.01) ### add noise to overcome from NZV error
	newdata1 <- preProcess(newdata, method = c("center", "scale"))
	newdata11 <- predict(newdata1,newdata)
###########
	library(stats)
	testpredict <- predict(modelFit,newdata11)
	names <- as.data.frame(rownames(nTrain))
	colnames(names) <- "COMPOUND"
	activity <- as.data.frame(testpredict)
	colnames(activity) <- "PREDICTED VALUE"
	dw <- format(cbind(names,activity),justify="centre")
	write.table(dw,file=args3,row.names=FALSE,sep="\t")
}
#else if(Fit$method == "svmRadial")
else if((Fit$method == "svmLinear") | (Fit$method == "svmRadial"))
{        
       	reqcol <- colnames(Fit$trainingData)
	reqcol <- reqcol[1:length(reqcol)-1]
	newdata <- newdata[,reqcol]
	newdata <- apply(newdata,2,f)
        newdata <- newdata + runif(3,0,0.01) ### add little noise to overcome from NZV problem
        newdata1 <- preProcess(newdata, method = c("center", "scale"))
        newdata11 <- predict(newdata1,newdata)
        testpredict <- predict(modelFit,newdata11)
        names <- as.data.frame(rownames(nTrain))
        colnames(names) <- "COMPOUND"
        activity <- as.data.frame(testpredict)
        colnames(activity) <- "PREDICTED VALUE"
        dw <- format(cbind(names,activity),justify="centre")
        write.table(dw,file=args3,row.names=FALSE,sep="\t")
 
}
else {
      dw <- "There is something wrong in data or model"
	 write.csv(dw,file=args3,row.names=FALSE)

 }

}
pre(arg1,arg2,arg3)