Mercurial > repos > anmoljh > activity_predict

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/predict_activity.R	Mon Jun 12 09:18:21 2017 -0400
@@ -0,0 +1,157 @@
+##########
+args <- commandArgs(T)
+arg1 <- args[1]
+arg2 <- args[2]
+arg3 <- args[3]
+#source("~/galaxy-dist/tools/mpdstoolsV2/tool3/Preold.R")
+#pre(arg1,arg2,arg3
+set.seed(1)
+pre <- function(args1,args2,args3){
+#args <- commandArgs(TRUE)
+nTrain <- read.csv(args1,row.names= 1, header = T) # example nTrain.csv file of unknown activity
+#save(nTrain,file = "nTrain.RData")
+#load("nTrain.RData")
+load(args2) # model generated from  previous programn
+newdata <- nTrain
+modelFit <- Fit
+###########
+# input csv file must contaion the exact same column as used in model building #
+# Also do pre-proccessing by means of centering and scaling
+## problem in s4 object so first check that the given model has s4 object in
+## >isS4(Fit$finalmodel) if it is s4 than add in with elseif loop
+## eg . isS4(plsFit$finalModel) == TRUE
+f=function(x){
+   x<-as.numeric(as.character(x)) #first convert each column into numeric if it is from factor
+   x[is.na(x) | is.nan(x) | is.infinite(x)] = median(x, na.rm=TRUE) #convert the item with NA to median value from the column
+   x #display the column
+}
+
+f2=function(x){
+               all(is.na(x))
+                }
+
+
+fop <- apply(newdata,2,f2)
+allcolumnmissing <- which(fop)
+if (length(allcolumnmissing) > 0){
+newdata[,allcolumnmissing] <- 0
+newdata[,allcolumnmissing] <- newdata[,allcolumnmissing] + runif(3,0,0.00001) ### add noise}
+}
+
+library(caret)
+
+#if(as.character(!isS4(Fit$finalModel == "TRUE")))
+if(exists('ppInfo')){
+#if(as.character(!isS4(Fit$finalModel == "TRUE")))
+if((Fit$method != "svmRadial") && (Fit$method != "svmLinear"))
+{
+        reqcol <- Fit$finalModel$xNames
+        newdata <- newdata[,reqcol]
+        newdata <- apply(newdata,2,f)
+        #newdata <- newdata + runif(3,0,0.0001) ### add noise to overcome from NZV error
+        #newdata1 <- preProcess(newdata, method = c("center", "scale"))
+        #newdata1 <- preProcess(newdata, ppInfo)
+        newdata11 <- predict(ppInfo,newdata)
+###########
+        library(stats)
+        testpredict <- predict(modelFit,newdata11)
+        Label <- levels(testpredict)
+        a1 <- Label[1]
+        a2 <- Label[2]
+        probpredict <- predict(modelFit,newdata11,type="prob")
+        names <- as.data.frame(rownames(nTrain))
+        colnames(names) <- "COMPOUND"
+        activity <- as.data.frame(testpredict)
+        colnames(activity) <- "PREDICTED ACTIVITY"
+        colnames(probpredict) <- c(eval(a1),eval(a2))
+        Prob <- as.data.frame(probpredict)
+        dw <- format(cbind(names,Prob,activity),justify="centre")
+        write.table(dw,file=args3,row.names=FALSE,sep="\t")
+
+
+
+} else if((Fit$method == "svmRadial") | (Fit$method == "svmLinear")){
+        library(stats)
+        reqcol <- colnames(Fit$trainingData)
+        reqcol <- reqcol[1:length(reqcol)-1]
+        newdata <- newdata[,reqcol]
+
+        newdata <- apply(newdata,2,f)
+        #newdata <- newdata + runif(3,0,0.0001) ### add little noise to overcome from NZV problem
+        #newdata1 <- preProcess(newdata, method = c("center", "scale"))
+        #newdata1 <- preProcess(newdata,ppInfo)
+        newdata11 <- predict(ppInfo,newdata)
+        testpredict <- predict(modelFit,newdata11)
+        Label <- levels(testpredict)
+        a1 <- Label[1]
+        a2 <- Label[2]
+        probpredict <- predict(modelFit,newdata11,type="prob")
+        names <- as.data.frame(rownames(nTrain))
+        colnames(names) <- "COMPOUND"
+        activity <- as.data.frame(testpredict)
+        colnames(activity) <- "PREDICTED ACTIVITY"
+        colnames(probpredict) <- c(eval(a1),eval(a2))
+        Prob <- as.data.frame(probpredict)
+        dw <- format(cbind(names,Prob,activity),justify="centre")
+        write.table(dw,file=args3,row.names=FALSE,sep="\t")
+}else {
+      dw <- "There is something wrong in data or model"
+	 write.csv(dw,file=args3,row.names=FALSE)
+}
+} else{
+
+#if(as.character(!isS4(Fit$finalModel == "TRUE")))
+if((Fit$method != "svmRadial") && (Fit$method != "svmLinear"))
+{
+        reqcol <- Fit$finalModel$xNames
+        newdata <- newdata[,reqcol]
+        newdata <- apply(newdata,2,f)
+        #newdata <- newdata + runif(3,0,0.0001) ### add noise to overcome from NZV error
+
+###########
+        library(stats)
+        testpredict <- predict(modelFit,newdata)
+        Label <- levels(testpredict)
+        a1 <- Label[1]
+        a2 <- Label[2]
+        probpredict <- predict(modelFit,newdata,type="prob")
+        names <- as.data.frame(rownames(nTrain))
+        colnames(names) <- "COMPOUND"
+        activity <- as.data.frame(testpredict)
+        colnames(activity) <- "PREDICTED ACTIVITY"
+        colnames(probpredict) <- c(eval(a1),eval(a2))
+        Prob <- as.data.frame(probpredict)
+        dw <- format(cbind(names,Prob,activity),justify="centre")
+        write.table(dw,file=args3,row.names=FALSE,sep="\t")
+
+
+
+} else if((Fit$method == "svmRadial") | (Fit$method == "svmLinear")){
+        library(stats)
+        reqcol <- colnames(Fit$trainingData)
+        reqcol <- reqcol[1:length(reqcol)-1]
+        newdata <- newdata[,reqcol]
+
+        newdata <- apply(newdata,2,f)
+        #newdata <- newdata + runif(3,0,0.0001) ### add little noise to overcome from NZV problem
+
+        testpredict <- predict(modelFit,newdata)
+        Label <- levels(testpredict)
+        a1 <- Label[1]
+        a2 <- Label[2]
+        probpredict <- predict(modelFit,newdata,type="prob")
+        names <- as.data.frame(rownames(nTrain))
+        colnames(names) <- "COMPOUND"
+        activity <- as.data.frame(testpredict)
+        colnames(activity) <- "PREDICTED ACTIVITY"
+        colnames(probpredict) <- c(eval(a1),eval(a2))
+        Prob <- as.data.frame(probpredict)
+        dw <- format(cbind(names,Prob,activity),justify="centre")
+        write.table(dw,file=args3,row.names=FALSE,sep="\t")
+}else {
+      dw <- "There is something wrong in data or model"
+	 write.csv(dw,file=args3,row.names=FALSE)
+}
+}
+}
+pre(arg1,arg2,arg3)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/predict_activity.xml	Mon Jun 12 09:18:21 2017 -0400
@@ -0,0 +1,72 @@
+<tool id="predict_activity" name="Predict Activity" version="1.0">
+    <description>
+        used to predict activity based on given model
+    </description>
+    <requirements>
+        <requirement type="package" version="3.2.1">R</requirement>
+        <requirement type="package" version="1.0">carettools</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+
+    <command interpreter="Rscript">predict_activity.R  $file1 $model $output1  2>/dev/null </command>
+
+    <inputs>
+        <param name="model" type="data" format="data" label="Select Model" help="Select built model obtained from caret tool 'Create script from the template file'." />
+        <param name="file1" type="data" format="csv"  label="Select file have descriptor data for activity prediction" help="csv format" />
+    </inputs>
+
+    <outputs>
+        <data format="txt" name="output1" label="Prediction on $file1.name" />
+    </outputs>
+
+    <tests>
+ 	<test>
+          	<param name="model" value="MODEL.RData"  />
+          	<param name="file1" value="Descriptors_File_of_prediction_set.csv" />
+          	<output name="output1" file="Prediction_on_Descriptors_File_of_prediction_set.txt" compare="sim_size" delta="90000" />
+ 	</test>
+    </tests>
+
+<help>
+
+.. class:: infomark
+
+Make sure this file **must** contain **all** or **more features** than **input** "csv file" used for **model building**
+
+----------
+
+**Input "csv file" must be as follows**
+
+----------
+
+
+Example file:-
+
+
+
+# example.csv
+
+	 feature1,feature2,feature3,..,featureN
+
+ro1	234,2.3,34,7,..,0.9
+
+ro2	432,3.4,23.1,12,..,0.12
+
+ro3	692,23,12.2,19,..,0.14
+
+
+-----------
+
+**MODEL**
+
+Choose model file received from model building step.
+
+Model file has "data" file format can be seen by
+
+clicking on output files shown in history .
+
+</help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Mon Jun 12 09:18:21 2017 -0400
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="R" version="3.2.1">
+	    <repository changeset_revision="77d439695328" name="package_r_3_2_1" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu/" />
+    </package>
+    <package name="carettools" version="1.0">
+	    <repository changeset_revision="5517d8d4223b" name="package_carettools_1_0" owner="planemo" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu/" />
+    </package>
+</tool_dependency>