changeset 0:749240e755e1 draft default tip

Uploaded
author deepakjadmin
date Fri, 22 Jan 2016 14:18:58 -0500
parents
children
files predict.R tool3.xml tool_dependencies.xml
diffstat 3 files changed, 161 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/predict.R	Fri Jan 22 14:18:58 2016 -0500
@@ -0,0 +1,87 @@
+##########
+args <- commandArgs(T)
+arg1 <- args[1]
+arg2 <- args[2]
+arg3 <- args[3]
+#source("~/galaxy-dist/tools/mpdstoolsV2/tool3/Preold.R")
+#pre(arg1,arg2,arg3)
+set.seed(1)
+pre <- function(args1,args2,args3){
+#args <- commandArgs(TRUE)
+nTrain <- read.csv(args1,row.names= 1, header = T) # example nTrain.csv file of unknown activity
+#save(nTrain,file = "nTrain.RData")
+#load("nTrain.RData")
+load(args2) # model generated from  previous programn  
+newdata <- nTrain
+modelFit <- Fit
+###########
+# input csv file must contaion the exact same column as used in model building #
+# Also do pre-proccessing by means of centering and scaling 
+## problem in s4 object so first check that the given model has s4 object in
+## >isS4(Fit$finalmodel) if it is s4 than add in with elseif loop 
+## eg . isS4(plsFit$finalModel) == TRUE
+f=function(x){
+   x<-as.numeric(as.character(x)) #first convert each column into numeric if it is from factor
+   x[is.na(x)] =median(x, na.rm=TRUE) #convert the item with NA to median value from the column
+   x #display the column
+}
+
+f2=function(x){
+               all(is.na(x))
+                }
+
+fop <- apply(newdata,2,f2)
+allcolumnmissing <- which(fop)
+if (length(allcolumnmissing) > 0){
+newdata[,allcolumnmissing] <- 0
+newdata[,allcolumnmissing] <- newdata[,allcolumnmissing] + runif(3,0,0.000000000000000001) ### add noise}
+}
+
+library(caret)
+
+#if(as.character(!isS4(Fit$finalModel == "TRUE")))
+if((Fit$method != "svmRadial") &&  (Fit$method != "svmLinear") )
+{
+	reqcol <- Fit$finalModel$xNames
+	newdata <- newdata[,reqcol]
+        newdata <- apply(newdata,2,f)
+        newdata <- newdata + runif(3,0,0.01) ### add noise to overcome from NZV error
+	newdata1 <- preProcess(newdata, method = c("center", "scale"))
+	newdata11 <- predict(newdata1,newdata)
+###########
+	library(stats)
+	testpredict <- predict(modelFit,newdata11)
+	names <- as.data.frame(rownames(nTrain))
+	colnames(names) <- "COMPOUND"
+	activity <- as.data.frame(testpredict)
+	colnames(activity) <- "PREDICTED VALUE"
+	dw <- format(cbind(names,activity),justify="centre")
+	write.table(dw,file=args3,row.names=FALSE,sep="\t")
+}
+#else if(Fit$method == "svmRadial")
+else if((Fit$method == "svmLinear") | (Fit$method == "svmRadial"))
+{        
+       	reqcol <- colnames(Fit$trainingData)
+	reqcol <- reqcol[1:length(reqcol)-1]
+	newdata <- newdata[,reqcol]
+	newdata <- apply(newdata,2,f)
+        newdata <- newdata + runif(3,0,0.01) ### add little noise to overcome from NZV problem
+        newdata1 <- preProcess(newdata, method = c("center", "scale"))
+        newdata11 <- predict(newdata1,newdata)
+        testpredict <- predict(modelFit,newdata11)
+        names <- as.data.frame(rownames(nTrain))
+        colnames(names) <- "COMPOUND"
+        activity <- as.data.frame(testpredict)
+        colnames(activity) <- "PREDICTED VALUE"
+        dw <- format(cbind(names,activity),justify="centre")
+        write.table(dw,file=args3,row.names=FALSE,sep="\t")
+ 
+}
+else {
+      dw <- "There is something wrong in data or model"
+	 write.csv(dw,file=args3,row.names=FALSE)
+
+ }
+
+}
+pre(arg1,arg2,arg3)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool3.xml	Fri Jan 22 14:18:58 2016 -0500
@@ -0,0 +1,61 @@
+<tool id="tool3AAA" name="Predict Activity using regression model">
+<description>
+ used to predict activity based on given regression model 
+</description>
+<!--command interpreter="bash">step3run.sh $file1 $model $output1  2>/dev/null </command-->
+<requirements>
+        <requirement type="set_environment">CARET_TOOL3_PATH</requirement>
+                 <requirement type="set_environment">R_ROOT_DIR</requirement>
+    <requirement type="package" version="3.2.0">R</requirement>
+    <requirement type="package" version="1.0.0">caret-tools</requirement>
+</requirements>
+<command interpreter="Rscript">predict.R  $file1 $model $output1  2>/dev/null </command>
+
+<inputs>
+<param name="model"  format="data" type="data" label="Select Model" help="Select built model obtained from caret tool 'Create script from the template file for regression'." />
+<param name="file1" format="csv" type="data" label="Select file have descriptor data for activity prediction" help="csv format" />
+</inputs>
+<outputs>
+<data format="txt" name="output1" label="Prediction on $file1.name" />
+</outputs>
+<help>
+
+.. class:: infomark
+
+Make sure this file **must** contain **all** or **more features** than **input** "csv file" used for **model building**
+
+----------
+
+**Input "csv file" must be as follows**
+
+----------
+
+
+Example file:-
+
+
+
+# example.csv
+
+	 feature1,feature2,feature3,..,featureN
+
+ro1	234,2.3,34,7,..,0.9
+
+ro2	432,3.4,23.1,12,..,0.12
+
+ro3	692,23,12.2,19,..,0.14
+
+
+-----------
+
+**MODEL**
+
+Choose model file received from model building step.
+
+Model file has "data" file format can be seen by 
+
+clicking on output files shown in history . 
+
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Fri Jan 22 14:18:58 2016 -0500
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<tool_dependency>
+
+<set_environment version="1.0">
+        <environment_variable name="CARET_REG_TOOL3_PATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable>   
+        </set_environment>
+        <package name="R" version="3.2.0">
+                        <repository changeset_revision="7833b0ebf8d6" name="package_r_3_2_0" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+                        </package>
+     <package name="caret-tools" version="1.0.0">
+                        <repository changeset_revision="e5faefaf1037" name="caret_tool_test1" owner="deepakjadmin" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+                        </package>
+</tool_dependency>