changeset 0:b82c88293260 draft

Uploaded
author deepakjadmin
date Fri, 22 Jan 2016 14:16:12 -0500
parents
children 75f274299190
files modelBuilding.py templateLibrary.py templateLibrary.pyc tool2reg.xml tool_dependencies.xml
diffstat 5 files changed, 1760 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/modelBuilding.py	Fri Jan 22 14:16:12 2016 -0500
@@ -0,0 +1,170 @@
+def __inputArguments():
+
+	import argparse
+	parser = argparse.ArgumentParser()
+
+	parser.add_argument("--method", nargs='?',default ='pls',help="Name of the method on which model will build; \
+	Available Methods are:- pls, glm , glmboost ")
+	parser.add_argument("rdata",help="Descriptor file for model building")
+	parser.add_argument("--getdatainfoeval",nargs='?',default='TRUE',help="Validation of the data ")
+	parser.add_argument("--getdatainfoecho",nargs='?',default='FALSE',help="print on consol about Validity of the data ")
+	parser.add_argument("--getdatainforesult",nargs='?',default='hide',help="print in output file about Validity of the data ")
+	parser.add_argument("--missingfiltereval",nargs='?',default='FALSE',help="Processing step :: removal of missing value ")
+	parser.add_argument("--missingfilterecho",nargs='?',default='FALSE',help="print Processing step :: removal of missing value ")
+	parser.add_argument("--missingfilterresult",nargs='?',default='hide',help="print in output file about Processing step :: removal of missing value ")
+	parser.add_argument("--missingfilterthreshc",nargs='?',default=0.20,type=float,help="info about highly missing column data")
+	parser.add_argument("--missingfilterthreshr",nargs='?',default=0.20,type=float,help="info about highly missing row number")
+	parser.add_argument("--pcaeval",nargs='?',default='FALSE',help="PCA of data ")
+	parser.add_argument("--pcaecho",nargs='?',default='FALSE',help="PCA of data ")
+	parser.add_argument("--pcaresult",nargs='?',default='hide',help="print in file about PCA of data ")
+	parser.add_argument("--pcacomp",nargs='?',default=3,type=int,help="Number of PCA componant will be plotted ")
+	parser.add_argument("--pcaploteval",nargs='?',default='FALSE',help="PCA plot of data ")
+	parser.add_argument("--pcaplotecho",nargs='?',default='FALSE',help="print PCA plot of data ")
+	parser.add_argument("--pcaplotresult",nargs='?',default='hide',help="write in output file about PCA plot of data")
+	parser.add_argument("--pcaplotfig",nargs='?',default='TRUE',help="make figure file for integration in output file")
+	parser.add_argument("--initialdataspliteval",nargs='?',default='TRUE',help="data splitting in test and train set ")
+	parser.add_argument("--initialdatasplitecho",nargs='?',default='FALSE',help="print about data splitting in test and train set")
+	parser.add_argument("--initialdatasplitresult",nargs='?',default='hide',help="write in outputfile about data splitting in test and train set")
+	parser.add_argument("--percent",nargs='?',default=0.8,type=float,help="percent value at which data splitting is done")
+	parser.add_argument("--nzveval",nargs='?',default='FALSE',help="remove near zero values")
+	parser.add_argument("--nzvresult",nargs='?',default='hide',help="write in outputfile about removing near zero values")
+	parser.add_argument("--nzvecho",nargs='?',default='FALSE',help="print about removing near zero values")
+	parser.add_argument("--corrfiltereval",nargs='?',default='FALSE',help="remove higly correlated values")
+	parser.add_argument("--corrfilterresult",nargs='?',default='hide',help="write in outputfile about removing highly correlated values")
+	parser.add_argument("--corrfilterecho",nargs='?',default='FALSE',help="print about removing correlated values")
+	parser.add_argument("--threshholdcor",nargs='?',default=0.75,type=float,help="percent value at which correlated values ommitted ")
+	parser.add_argument("--preproceval",nargs='?',default='FALSE',help="pre proccesing")
+	parser.add_argument("--preprocecho",nargs='?',default='FALSE',help="print about pre proccesing")
+	parser.add_argument("--preprocresult",nargs='?',default='hide',help="write in output file about pre proccesing")
+	parser.add_argument("--setupworkersecho",nargs='?',default='FALSE',help="print about number of processors")
+	parser.add_argument("--setupworkersresult",nargs='?',default='tex',help="write about number of processors in output file")
+	parser.add_argument("--numworkers",nargs='?',default=1,type=int,help="defines used processors")
+	parser.add_argument("--setupresamplingecho",nargs='?',default='FALSE',help="print resampling rules")
+	parser.add_argument("--setupresamplingresult",nargs='?',default='hide',help="write resampling rules in output file")
+	parser.add_argument("--resamplenumber",nargs='?',default=10,type=int,help="set number of resampling")
+	parser.add_argument("--resamplenumberpercent",nargs='?',default=0.75,type=float,help="set PERCENT resampling")
+	parser.add_argument("--setupgridresult",nargs='?',default='hide',help="write about number of grids in output file")
+	parser.add_argument("--setupgridecho",nargs='?',default='FALSE',help="print about number of grids")
+	parser.add_argument("--setupgridsize",nargs='?',default=3,type=int,help="set number of grids")
+	parser.add_argument("--fitmodelresult",nargs='?',default='hide',help="write about model")
+	parser.add_argument("--fitmodelecho",nargs='?',default='FALSE',help="print about model")
+	parser.add_argument("--fitmodeleval",nargs='?',default='TRUE',help="start model building")
+	parser.add_argument("--modeldescrecho",nargs='?',default='FALSE',help="print model description")
+	parser.add_argument("--modeldescrresult",nargs='?',default='hide',help="write model description in outout file")
+	parser.add_argument("--resamptableecho",nargs='?',default='FALSE',help="print resample table")
+	parser.add_argument("--resamptableresult",nargs='?',default='tex',help="write resample table in output file")
+	parser.add_argument("--profileplotecho",nargs='?',default='FALSE',help="print about profile plots")
+	parser.add_argument("--profileplotfig",nargs='?',default='TRUE',help=" profile plots")
+	parser.add_argument("--stopworkersecho",nargs='?',default='FALSE',help="stop workers ie processors")
+	parser.add_argument("--stopworkersresult",nargs='?',default='hide',help="write about workers ie processors used")
+	parser.add_argument("--testpredresult",nargs='?',default='tex',help="write about statistical measure")
+	parser.add_argument("--testpredecho",nargs='?',default='FALSE',help="print about statistical measure")
+	parser.add_argument("--classprobstexresult",nargs='?',default='tex',help="paste various figure of statistical measure in outputfile")
+	parser.add_argument("--classprobstexecho",nargs='?',default='FALSE',help="print various figure of statistical measure")
+	parser.add_argument("--savedataecho",nargs='?',default='FALSE',help="information about completion of model building ")
+	parser.add_argument("--savedataresult",nargs='?',default='hide',help="write information about completion of model building in outputfile ")
+	parser.add_argument("--outputmodel", help="give name for the generated model")		
+	parser.add_argument("--outputresultpdf", help="give name for the output pdf file")		
+	
+	args = parser.parse_args()
+	return args
+
+def generateRnwScript():
+
+        import templateLibrary
+	t = templateLibrary.__template4Rnw()
+	
+	from string import Template
+	s = Template(t)
+        
+	args = __inputArguments()
+
+	templt = s.safe_substitute(METHOD=args.method,
+			RDATA=args.rdata, 
+			GETDATAINFOEVAL=args.getdatainfoeval, 
+			GETDATAINFOECHO=args.getdatainfoecho, 
+			GETDATAINFORESULT=args.getdatainforesult, 
+			MISSINGFILTEREVAL=args.missingfiltereval,
+			MISSINGFILTERECHO=args.missingfilterecho,
+			MISSINGFILTERRESULT=args.missingfilterresult,
+			MISSINGFILTERTHRESHC=args.missingfilterthreshc,
+			MISSINGFILTERTHRESHR=args.missingfilterthreshr,
+			PCAEVAL=args.pcaeval,
+			PCAECHO=args.pcaecho,
+			PCARESULT=args.pcaresult,
+                        PCACOMP=args.pcacomp,    
+			PCAPLOTEVAL=args.pcaploteval,
+			PCAPLOTECHO=args.pcaplotecho,
+			PCAPLOTRESULT=args.pcaplotresult,
+			PCAPLOTFIG=args.pcaplotfig,
+			INITIALDATASPLITEVAL=args.initialdataspliteval,
+			INITIALDATASPLITECHO=args.initialdatasplitecho,
+			INITIALDATASPLITRESULT=args.initialdatasplitresult,
+			PERCENT=args.percent,
+			NZVEVAL=args.nzveval,
+			NZVRESULT=args.nzvresult,
+			NZVECHO=args.nzvecho,
+			CORRFILTEREVAL=args.corrfiltereval,
+			CORRFILTERRESULT=args.corrfilterresult,
+			CORRFILTERECHO=args.corrfilterecho,
+			THRESHHOLDCOR=args.threshholdcor,
+			PREPROCEVAL=args.preproceval,
+			PREPROCECHO=args.preprocecho,
+			PREPROCRESULT=args.preprocresult,
+			SETUPWORKERSECHO=args.setupworkersecho,
+			SETUPWORKERSRESULT=args.setupworkersresult,
+			NUMWORKERS=args.numworkers,
+			SETUPRESAMPLINGECHO=args.setupresamplingecho,
+			SETUPRESAMPLINGRESULT=args.setupresamplingresult,
+			RESAMPLENUMBER=args.resamplenumber,
+			RESAMPLENUMBERPERCENT=args.resamplenumberpercent,
+			SETUPGRIDRESULT=args.setupgridresult,
+			SETUPGRIDECHO=args.setupgridecho,
+			SETUPGRIDSIZE=args.setupgridsize,
+			FITMODELRESULT=args.fitmodelresult,
+			FITMODELECHO=args.fitmodelecho,
+			FITMODELEVAL=args.fitmodeleval,
+			MODELDESCRECHO=args.modeldescrecho,
+			MODELDESCRRESULT=args.modeldescrresult,
+			RESAMPTABLEECHO=args.resamptableecho,
+			RESAMPTABLERESULT=args.resamptableresult,
+			PROFILEPLOTECHO=args.profileplotecho,
+			PROFILEPLOTFIG=args.profileplotfig,
+			STOPWORKERSECHO=args.stopworkersecho,
+			STOPWORKERSRESULT=args.stopworkersresult,
+			TESTPREDRESULT=args.testpredresult,
+			TESTPREDECHO=args.testpredecho,
+			CLASSPROBSTEXRESULT=args.classprobstexresult,
+			CLASSPROBSTEXECHO=args.classprobstexecho,
+			SAVEDATAECHO=args.savedataecho,
+			SAVEDATARESULT=args.savedataresult )
+
+	
+	f = open('result-doc.Rnw','w')
+	f.write(templt)
+	f.close()
+	
+def modelBuilding():
+
+	import os
+	os.system('R CMD Sweave result-doc.Rnw  > cmd.log.1 2>&1')
+	os.system('pdflatex result-doc.tex > cmd.log.2 2>&1')
+	os.system('pdflatex result-doc.tex > cmd.log.2 2>&1')
+# 	os.system('pdflatex result-doc.tex 2>&1 | tee cmd.log.2')
+	args = __inputArguments()
+
+        from string import Template
+        s1 = Template('cp $METHOD-Fit.RData $OUTPUTMODEL')
+        s2 = Template('cp result-doc.pdf $OUTPUTRESULTPDF')
+
+        cmd1 = s1.safe_substitute(METHOD=args.method, OUTPUTMODEL=args.outputmodel)
+        cmd2 = s2.safe_substitute(OUTPUTRESULTPDF=args.outputresultpdf)
+
+        os.system(cmd1)
+        os.system(cmd2)
+	
+
+if __name__ == "__main__" :
+	
+	generateRnwScript()
+	modelBuilding()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/templateLibrary.py	Fri Jan 22 14:16:12 2016 -0500
@@ -0,0 +1,1010 @@
+def __template4Rnw():
+
+	template4Rnw = r'''%% Regression Modeling Script
+%% Max Kuhn (max.kuhn@pfizer.com, mxkuhn@gmail.com)
+%% Version: 1.00
+%% Created on: 2010/10/02
+%%
+%% Lynn Group
+%% Version: 2.00
+%% Created on: 2014/11/15
+
+%% This is an Sweave template for building and describing
+%% classification models. It mixes R and LaTeX code. The document can
+%% be processing using R's Sweave function to produce a tex file.  
+%%
+%% The inputs are:
+%% - the initial data set in a data frame called 'rawData' 
+%% - a numeric column in the data set called 'outcome'. this should be the
+%%    outcome variable 
+%% - all other columns in rawData should be predictor variables
+%% - the type of model should be in a variable called 'modName'.
+%% 
+%% The script attempts to make some intelligent choices based on the
+%% model being used. For example, if modName is "pls", the script will
+%% automatically center and scale the predictor data. There are
+%% situations where these choices can (and should be) changed.   
+%%
+%% There are other options that may make sense to change. For example,
+%% the user may want to adjust the type of resampling. To find these
+%% parts of the script, search on the string 'OPTION'. These parts of
+%% the code will document the options. 
+
+\documentclass[12pt]{report}
+\usepackage{amsmath}
+\usepackage[pdftex]{graphicx}
+\usepackage{color}
+\usepackage{ctable}
+\usepackage{xspace}
+\usepackage{fancyvrb}
+\usepackage{fancyhdr}
+\usepackage{lastpage}
+\usepackage{longtable} 
+\usepackage{algorithm2e}
+\usepackage[
+         colorlinks=true,
+         linkcolor=blue,
+         citecolor=blue,
+         urlcolor=blue]
+         {hyperref}
+         \usepackage{lscape}
+
+\usepackage{Sweave}
+\SweaveOpts{keep.source = TRUE}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+% define new colors for use
+\definecolor{darkgreen}{rgb}{0,0.6,0}
+\definecolor{darkred}{rgb}{0.6,0.0,0}
+\definecolor{lightbrown}{rgb}{1,0.9,0.8}
+\definecolor{brown}{rgb}{0.6,0.3,0.3}
+\definecolor{darkblue}{rgb}{0,0,0.8}
+\definecolor{darkmagenta}{rgb}{0.5,0,0.5}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\newcommand{\bld}[1]{\mbox{\boldmath $#1$}}
+\newcommand{\shell}[1]{\mbox{$#1$}}
+\renewcommand{\vec}[1]{\mbox{\bf {#1}}}
+
+\newcommand{\ReallySmallSpacing}{\renewcommand{\baselinestretch}{.6}\Large\normalsize}
+\newcommand{\SmallSpacing}{\renewcommand{\baselinestretch}{1.1}\Large\normalsize}
+
+\newcommand{\halfs}{\frac{1}{2}}
+
+\setlength{\oddsidemargin}{-.25 truein}
+\setlength{\evensidemargin}{0truein}
+\setlength{\topmargin}{-0.2truein}
+\setlength{\textwidth}{7 truein}
+\setlength{\textheight}{8.5 truein}
+\setlength{\parindent}{0.20truein}
+\setlength{\parskip}{0.10truein}
+
+\setcounter{LTchunksize}{50}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\pagestyle{fancy}
+\lhead{}
+%% OPTION Report header name
+\chead{Regression Model Script}
+\rhead{}
+\lfoot{}
+\cfoot{}
+\rfoot{\thepage\ of \pageref{LastPage}}
+\renewcommand{\headrulewidth}{1pt}
+\renewcommand{\footrulewidth}{1pt}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% OPTION Report title and modeler name
+\title{Regression Model Script using $METHOD }
+\author{"M. Kuhn and Lynn Group, SCIS, JNU, New Delhi"}
+
+\begin{document}
+
+\maketitle
+
+\thispagestyle{empty}
+	
+<<startup, eval= TRUE, results = hide, echo = FALSE>>=
+library(Hmisc)
+library(caret)
+versionTest <- compareVersion(packageDescription("caret")$Version, 
+                              "4.65")
+if(versionTest < 0) stop("caret version 4.65 or later is required")
+
+library(RColorBrewer)
+
+
+listString <- function (x, period = FALSE, verbose = FALSE) 
+{
+  if (verbose)   cat("\n      entering listString\n")
+  flush.console()
+  if (!is.character(x)) 
+    x <- as.character(x)
+  numElements <- length(x)
+  out <- if (length(x) > 0) {
+    switch(min(numElements, 3), x, paste(x, collapse = " and "), 
+           {
+             x <- paste(x, c(rep(",", numElements - 2), " and", ""), sep = "")
+             paste(x, collapse = " ")
+           })
+  }
+  else ""
+  if (period)  out <- paste(out, ".", sep = "")
+  if (verbose)  cat("      leaving  listString\n\n")
+  flush.console()
+  out
+}
+
+resampleStats <- function(x, digits = 3)
+  {
+    bestPerf <- x$bestTune
+    colnames(bestPerf) <- gsub("^\\.", "", colnames(bestPerf))
+    out <- merge(x$results, bestPerf)
+    out <- out[, colnames(out) %in% x$perfNames]
+    names(out) <- gsub("ROC", "area under the ROC curve", names(out), fixed = TRUE)
+    names(out) <- gsub("Sens", "sensitivity", names(out), fixed = TRUE)
+    names(out) <- gsub("Spec", "specificity", names(out), fixed = TRUE)
+    names(out) <- gsub("Accuracy", "overall accuracy", names(out), fixed = TRUE)
+    names(out) <- gsub("Kappa", "Kappa statistics", names(out), fixed = TRUE)
+    names(out) <- gsub("RMSE", "root mean squared error", names(out), fixed = TRUE)
+    names(out) <- gsub("Rsquared", "$R^2$", names(out), fixed = TRUE)
+    
+    out <- format(out, digits = digits)
+    listString(paste(names(out), "was", out))
+  }
+
+latticeBubble <- function(x, y, z, offset = .5, splits = 10,
+                          pal = colorRampPalette(brewer.pal(9,"YlOrRd")[-(1:2)]),
+                          ...)
+{  
+  cexValues <- rank(z)/length(z) + offset
+  splits <- unique(quantile(z, probs = seq(0, 1, length = splits)))
+  splitup <- cut(z, breaks = splits, include.lowest = TRUE)
+  cols <- pal(length(levels(splitup)))
+  colValues <- cols[as.numeric(splitup)]
+  if(is.data.frame(x))
+    {
+      out <- splom(~x, col = colValues, cex = cexValues, ...)
+
+    } else out <- xyplot(y~x, col = colValues, cex = cexValues, ...)
+  out
+
+}
+
+
+##OPTION: model name: see ?train for more values/models
+modName <- "$METHOD"
+load("$RDATA")
+rawData <- dataX
+rawData$$outcome <- dataY
+
+
+
+@ 
+
+
+\section*{Data Sets}\label{S:data}
+
+%% OPTION: provide some background on the problem, the experimental
+%% data, how the compounds were selected etc
+
+
+<<getDataInfo, eval = $GETDATAINFOEVAL, echo = $GETDATAINFOECHO, results = $GETDATAINFORESULT>>=
+if(!any(names(rawData) == "outcome")) stop("a variable called outcome should be in the data set")
+if(!is.numeric(rawData$outcome)) stop("the outcome should be a numeric vector")
+
+numSamples <- nrow(rawData)
+numPredictors <- ncol(rawData) - 1
+predictorNames <- names(rawData)[names(rawData) != "outcome"]
+
+isNum <- apply(rawData[,predictorNames, drop = FALSE], 2, is.numeric)
+if(any(!isNum)) stop("all predictors in rawData should be numeric")
+
+@ 
+
+
+<<missingFilter, eval = $MISSINGFILTEREVAL, echo = $MISSINGFILTERECHO, results = $MISSINGFILTERRESULT>>=
+
+colRate <- apply(rawData[, predictorNames, drop = FALSE],
+                 2, function(x) mean(is.na(x)))
+
+##OPTION thresholds can be changed
+colExclude <- colRate > $MISSINGFILTERTHRESHC
+
+missingText <- ""
+
+if(any(colExclude))
+  {
+    missingText <- paste(missingText,
+                         ifelse(sum(colExclude) > 1,
+                                " There were ",
+                                " There was "),
+                         sum(colExclude),
+                         ifelse(sum(colExclude) > 1,
+                                " predictors ",
+                                " predictor "),
+                         "with an excessive number of ",
+                         "missing data. ",
+                         ifelse(sum(colExclude) > 1,
+                                " These were excluded. ",
+                                " This was excluded. "))
+    predictorNames <- predictorNames[!colExclude]
+    rawData <- rawData[, names(rawData) %in% c("outcome", predictorNames), drop = FALSE]
+  }
+
+
+rowRate <- apply(rawData[, predictorNames, drop = FALSE],
+                 1, function(x) mean(is.na(x)))
+
+rowExclude <- rowRate > $MISSINGFILTERTHRESHR
+
+
+if(any(rowExclude))
+  {
+    missingText <- paste(missingText,
+                         ifelse(sum(rowExclude) > 1,
+                                " There were ",
+                                " There was "),
+                         sum(colExclude),
+                         ifelse(sum(rowExclude) > 1,
+                                " samples ",
+                                " sample "),
+                         "with an excessive number of ",
+                         "missing data. ",
+                         ifelse(sum(rowExclude) > 1,
+                                " These were excluded. ",
+                                " This was excluded. "),
+                         "After filtering, ",
+                         sum(!rowExclude),
+                         " samples remained.")
+    rawData <- rawData[!rowExclude, ]
+    hasMissing <- apply(rawData[, predictorNames, drop = FALSE],
+                        1, function(x) mean(is.na(x)))
+  } else {
+        hasMissing <- apply(rawData[, predictorNames, drop = FALSE],
+                        1, function(x) any(is.na(x)))
+        missingText <- paste(missingText,
+                             ifelse(missingText == "",
+                                "There ",
+                                "Subsequently, there "),
+                             ifelse(sum(hasMissing) == 1,
+                                    "was ",
+                                    "were "),
+                             ifelse(sum(hasMissing) > 0, 
+                                    sum(hasMissing), 
+                                    "no"),
+                             ifelse(sum(hasMissing) == 1,
+                                    "sample ",
+                                    "samples "),
+                             "with missing values.")                            
+  rawData <- rawData[complete.cases(rawData),]
+  }
+
+dataDist <- summary(rawData$outcome)
+dataSD <- sd(rawData$outcome, na.rm = TRUE)
+dataText <- paste("The average outcome value was ",
+                  dataDist["Mean"],
+                  " and a standard deviation of ",
+                  dataSD, ". The minimum and maximum values were ",
+                  dataDist["Min."], " and ", dataDist["Max."],
+                  ", respectively. Figure \\\\ref{F:dens} shows a ",
+                  " density plot (i.e. a smooth histogram) of the response.",
+                  sep = "")
+
+rawData1 <- rawData[,1:length(rawData)-1]
+rawData2 <- rawData[,length(rawData)]
+
+set.seed(222)
+nzv1 <- nearZeroVar(rawData1)
+  if(length(nzv1) > 0)
+  {
+    nzvVars1 <- names(rawData1)[nzv1]
+    rawData <- rawData1[,-nzv1]
+    rawData$outcome <- rawData2
+    nzvText1 <- paste("There were ",
+                     length(nzv1),
+                     " predictors that were removed from original data due to",
+                     " severely unbalanced distributions that",
+                     " could negatively affect the model fit",
+                     ifelse(length(nzv1) > 10,
+                            ".",
+                            paste(": ",
+                                  listString(nzvVars1),
+                                  ".",
+                                  sep = "")),
+                     sep = "")
+
+ } else {
+rawData <- rawData1
+rawData$outcome <- rawData2
+nzvText1 <- ""
+
+ }
+
+remove("rawData1")
+remove("rawData2")
+
+@ 
+
+The initial data set consisted of \Sexpr{numSamples} samples and
+\Sexpr{numPredictors} predictor variables. \Sexpr{dataText} \Sexpr{missingText}
+\Sexpr{nzvText1}
+
+\setkeys{Gin}{width = 0.8\textwidth}
+\begin{figure}[b]
+  \begin{center}
+
+<<densityplot, echo = FALSE, results = hide, fig = TRUE, width = 8, height = 4.5>>=
+trellis.par.set(caretTheme(), warn = TRUE)
+print(densityplot(~rawData$outcome, pch = "|", adjust = 1.25, xlab = ""))
+@ 
+    \caption[Data Density]{A density plot of the response. The marks
+      along the $x$--axis show the locations of the data points.}
+    \label{F:dens}         
+  \end{center}
+\end{figure}  
+
+
+<<pca, eval= $PCAEVAL, echo = $PCAECHO, results = $PCARESULT>>=
+ predictorNames <- names(rawData)[names(rawData) != "outcome"]
+ numPredictors <- length(predictorNames)
+
+predictors <- rawData[, predictorNames, drop = FALSE]
+## PCA will fail with predictors having less than 2 unique values
+isZeroVar <- apply(predictors, 2, 
+                   function(x) length(unique(x)) < 2)
+if(any(isZeroVar)) predictors <- predictors[, !isZeroVar, drop = FALSE]
+## For whatever, only the formula interface to prcomp 
+## handles missing values
+pcaForm <- as.formula(
+                      paste("~",
+                            paste(names(predictors), collapse = "+")))
+pca <- prcomp(pcaForm, 
+              data = predictors,
+              center = TRUE, 
+              scale. = TRUE,
+              na.action = na.omit)
+## OPTION: the number of components plotted/discussed can be set
+numPCAcomp <- $PCACOMP
+pctVar <- pca$sdev^2/sum(pca$sdev^2)*100
+pcaText <- paste(round(pctVar[1:numPCAcomp], 1),
+                 "\\\\%", 
+                 sep = "")
+pcaText <- listString(pcaText)
+@ 
+
+To get an initial assessment of the separability of the classes,
+principal component analysis (PCA) was used to distill the
+\Sexpr{numPredictors} predictors down into \Sexpr{numPCAcomp}
+surrogate variables (i.e. the principal components) in a manner that
+attempts to maximize the amount of information preserved from the
+original predictor set. Figure \ref{F:inititalPCA} contains plots of
+the first \Sexpr{numPCAcomp} components, which accounted for
+\Sexpr{pcaText} percent of the variability in the original predictors
+(respectively).  
+
+%% OPTION: remark on how well (or poorly) the data separated
+
+\setkeys{Gin}{width = 0.8\textwidth}
+\begin{figure}[p]
+  \begin{center}
+
+<<pcaPlot, eval = $PCAPLOTEVAL, echo = $PCAPLOTECHO, results = $PCAPLOTRESULT, fig = $PCAPLOTFIG, width = 8, height = 8>>=
+trellis.par.set(caretTheme(), warn = TRUE)
+if(numPCAcomp == 2)
+  {
+    axisRange <- extendrange(pca$x[, 1:2])
+    print(
+          latticeBubble(x = as.data.frame(pca$x)$PC1,
+                        y = as.data.frame(pca$x)$PC2, 
+                 z = rawData$outcome,
+                 type = c("p", "g"),
+                 xlab = "PC1", ylab = "PC2",
+                 xlim = axisRange,
+                 ylim = axisRange))
+    
+  } else {
+    axisRange <- extendrange(pca$x[, 1:numPCAcomp])
+    print(
+          latticeBubble(x = as.data.frame(pca$x)[,1:numPCAcomp],
+                   
+                 z = rawData$outcome,
+                 type = c("p", "g"),
+                 xlab = "PC1", ylab = "PC2",
+                 xlim = axisRange,
+                 ylim = axisRange))    
+    
+    
+      } 
+@ 
+    \caption[PCA Plot]{A plot of the first \Sexpr{numPCAcomp}
+      principal components for the original data set. Smaller, lighter
+    points indicate smaller values of the response while darker,
+    larger points correspond to larger values of the outcome}
+    \label{F:inititalPCA}         
+  \end{center}
+\end{figure}  
+
+
+<<initialDataSplit, eval = $INITIALDATASPLITEVAL, echo = $INITIALDATASPLITECHO, results = $INITIALDATASPLITRESULT>>=
+
+  ## OPTION: in small samples sizes, you may not want to set aside a
+  ## training set and focus on the resampling results.   
+ numSamples <- nrow(rawData)
+
+ predictorNames <- names(rawData)[names(rawData) != "outcome"]
+ numPredictors <- length(predictorNames)
+ 
+# pctTrain <- .15
+  pctTrain <- $PERCENT
+
+if(pctTrain < 1)
+  {
+    ## OPTION: seed number can be changed
+    set.seed(1)
+    inTrain <- createDataPartition(rawData$outcome,
+                                   p = pctTrain,
+                                   list = FALSE)
+    trainX <- rawData[ inTrain, predictorNames]
+    testX  <- rawData[-inTrain, predictorNames]
+    trainY <- rawData[ inTrain, "outcome"]
+    testY  <- rawData[-inTrain, "outcome"]
+    splitText <- paste("The original data were split into ",
+                       "a training set ($n$=",
+                       nrow(trainX),
+                       ") and a test set ($n$=",
+                       nrow(testX),
+                       ") in a manner that preserved the ",
+                       "distribution of the response.",
+                       sep = "")
+    isZeroVar <- apply(trainX, 2, 
+                       function(x) length(unique(x)) < 2)
+    if(any(isZeroVar))
+      {
+        trainX <- trainX[, !isZeroVar, drop = FALSE]  
+        testX <- testX[, !isZeroVar, drop = FALSE]
+      }
+    
+  } else {
+    trainX <- rawData[, predictorNames]
+    testX  <- NULL
+    trainY <- rawData[, "outcome"]
+    testY  <- NULL 
+    splitText <- "The entire data set was used as the training set."
+  }
+
+remove("rawData")
+
+@ 
+
+\Sexpr{splitText} 
+The data set for model building consisted of \Sexpr{numSamples} samples and
+\Sexpr{numPredictors} predictor variables.
+
+
+
+<<nzv, eval= $NZVEVAL, results = $NZVRESULT, echo = $NZVECHO>>=
+## OPTION: other pre-processing steps can be used
+ppSteps <- caret:::suggestions(modName)
+
+set.seed(2)
+if(ppSteps["nzv"])
+  {
+    nzv <- nearZeroVar(trainX)
+    if(length(nzv) > 0) 
+      {
+        nzvVars <- names(trainX)[nzv]
+        trainX <- trainX[, -nzv]
+        nzvText <- paste("There were ",
+                         length(nzv),
+                         " predictors that were removed due to",
+                         " severely unbalanced distributions that",
+                         " could negatively affect the model fit",
+                         ifelse(length(nzv) > 10, 
+                                ".",
+                                paste(": ",
+                                      listString(nzvVars),
+                                      ".",
+                                      sep = "")),
+                         sep = "") 
+         testX <- testX[, -nzv]
+      } else nzvText <- ""
+  } else nzvText <- ""
+@ 
+
+\Sexpr{nzvText}
+
+<<corrFilter, eval = $CORRFILTEREVAL, results = $CORRFILTERRESULT, echo = $CORRFILTERECHO>>=
+
+if(ppSteps["corr"])
+  {
+    ## OPTION: 
+    ##corrThresh <- .75
+    corrThresh <- $THRESHHOLDCOR
+    highCorr <- findCorrelation(cor(trainX, use = "pairwise.complete.obs"), 
+                                corrThresh)
+    if(length(highCorr) > 0) 
+      {
+        corrVars <- names(trainX)[highCorr]
+        trainX <- trainX[, -highCorr]
+        corrText <- paste("There were ",
+                         length(highCorr),
+                         " predictors that were removed due to",
+                         " large between--predictor correlations that",
+                         " could negatively affect the model fit",
+                         ifelse(length(highCorr) > 10, 
+                                ".",
+                                paste(": ",
+                                      listString(highCorr),
+                                      ".",
+                                      sep = "")),
+                          " Removing these predictors forced",
+                          " all pair--wise correlations to be",
+                          " less than ",
+                          corrThresh,
+                          ".",
+                          sep = "") 
+       testX <- testX[, -highCorr]
+      } else corrText <- ""
+  }else corrText <- ""
+@
+
+ \Sexpr{corrText}
+
+<<preProc, eval = $PREPROCEVAL, echo = $PREPROCECHO, results = $PREPROCRESULT>>=
+
+ppMethods <- NULL
+if(ppSteps["center"]) ppMethods <- c(ppMethods, "center")
+if(ppSteps["scale"]) ppMethods <- c(ppMethods, "scale")
+if(any(hasMissing) > 0) ppMethods <- c(ppMethods, "knnImpute")
+##OPTION other methods, such as spatial sign, can be added to this list
+
+if(length(ppMethods) > 0)
+  {
+    ppInfo <- preProcess(trainX, method = ppMethods)
+    trainX <- predict(ppInfo, trainX)
+    if(pctTrain < 1) testX <- predict(ppInfo, testX)   
+    ppText <- paste("The following pre--processing methods were",
+                    " applied to the training",
+                    ifelse(pctTrain < 1, " and test", ""),
+                    " data: ",
+                    listString(ppMethods),
+                    ".",
+                    sep = "")
+    ppText <- gsub("center", "mean centering", ppText)
+    ppText <- gsub("scale", "scaling to unit variance", ppText)
+    ppText <- gsub("knnImpute", 
+                   paste(ppInfo$k, "--nearest neighbor imputation", sep = ""), 
+                   ppText)
+    ppText <- gsub("spatialSign", "the spatial sign transformation", ppText)
+    ppText <- gsub("pca", "principal component feature extraction", ppText)
+    ppText <- gsub("ica", "independent component feature extraction", ppText)
+    } else {
+      ppInfo <- NULL
+      ppText <- ""
+    }
+
+predictorNames <- names(trainX)
+if(nzvText != "" | corrText != "" | ppText != "")
+  {
+    varText <- paste("After pre--processing, ",
+                     ncol(trainX),
+                     "predictors remained for modeling.")
+  } else varText <- ""
+  
+@ 
+
+\Sexpr{ppText} \Sexpr{varText}
+
+\clearpage
+\section*{Model Building}
+
+<<setupWorkers, eval = TRUE, echo = $SETUPWORKERSECHO, results = $SETUPWORKERSRESULT>>=
+
+numWorkers <- $NUMWORKERS 
+##OPTION: turn up numWorkers to use MPI
+if(numWorkers > 1)
+  {
+    mpiCalcs <- function(X, FUN, ...)
+      {
+        theDots <- list(...)
+        parLapply(theDots$cl, X, FUN)
+      }
+
+    library(snow)
+    cl <- makeCluster(numWorkers, "MPI")
+  }
+@ 
+
+<<setupResampling, echo = $SETUPRESAMPLINGECHO, results = $SETUPRESAMPLINGRESULT>>=
+##<<setupResampling, echo = FALSE, results = hide>>=
+##OPTION: the resampling options can be changed. See
+##        ?trainControl for details
+resampName <- "repeatedcv"
+resampNumber <- $RESAMPLENUMBER
+numRepeat <- 3
+resampP <- $RESAMPLENUMBERPERCENT
+
+modelInfo <- modelLookup(modName)
+
+set.seed(3)
+ctlObj <- trainControl(method = resampName,
+                       number = resampNumber,
+                       repeats = numRepeat,
+                       p = resampP)
+
+
+##OPTION select other performance metrics as needed
+optMetric <- "RMSE"
+
+if(numWorkers > 1)
+  {
+    ctlObj$workers <- numWorkers
+    ctlObj$computeFunction <- mpiCalcs
+    ctlObj$computeArgs <- list(cl = cl)
+  }
+@ 
+
+<<setupGrid, results = $SETUPGRIDRESULT, echo = $SETUPGRIDECHO>>=
+
+##OPTION expand or contract these grids as needed (or
+##       add more models
+
+gridSize <- $SETUPGRIDSIZE
+
+if(modName %in% c("svmPoly", "svmRadial", "svmLinear", "ctree2", "ctree")) gridSize <- 5
+if(modName %in% c("earth")) gridSize <- 7
+if(modName %in% c("knn", "glmboost", "rf", "nodeHarvest")) gridSize <- 10
+
+if(modName %in% c("rpart")) gridSize <- 15
+if(modName %in% c("pls", "lars2", "lars")) gridSize <- min(20, ncol(trainX))
+
+if(modName == "gbm")
+  {
+    tGrid <- expand.grid(.interaction.depth = -1 + (1:5)*2 ,
+                         .n.trees = (1:10)*20,
+                         .shrinkage = .1)
+  }
+
+if(modName == "nnet")
+  {
+    tGrid <- expand.grid(.size = -1 + (1:5)*2 ,
+                         .decay = c(0, .001, .01, .1))
+  }
+
+@ 
+
+
+<<fitModel, results = $FITMODELRESULT, echo = $FITMODELECHO, eval = $FITMODELEVAL>>=
+
+##OPTION alter as needed
+
+set.seed(4)
+modelFit <- switch(modName,                  
+                   gbm = 
+                   {
+                     mix <- sample(seq(along = trainY))  
+                     train(
+                           trainX[mix,], trainY[mix], modName, 
+                           verbose = FALSE,
+                           bag.fraction = .9, 
+                           metric = optMetric,
+                           trControl = ctlObj, 
+                           tuneGrid = tGrid)
+                   },
+                                     
+                   nnet =
+                   {
+                     train(
+                           trainX, trainY, modName, 
+                           metric = optMetric,
+                           linout = TRUE,
+                           trace = FALSE, 
+                           maxiter = 1000, 
+                           MaxNWts = 5000,
+                           trControl = ctlObj, 
+                           tuneGrid = tGrid)  
+                     
+                   }, 
+                   
+                   svmRadial =, svmPoly =, svmLinear = 
+                   {
+                     train(
+                           trainX, trainY, modName,
+                           metric = optMetric,
+                           scaled = TRUE,
+                           trControl = ctlObj, 
+                           tuneLength = gridSize)    
+                   },
+                   {
+                     train(trainX, trainY, modName, 
+                           trControl = ctlObj, 
+                           metric = optMetric,
+                           tuneLength = gridSize)
+                   })
+
+@ 
+
+<<modelDescr, echo = $MODELDESCRECHO, results = $MODELDESCRRESULT>>=
+
+
+summaryText <- ""
+
+resampleName <- switch(tolower(modelFit$control$method),
+                       boot = paste("the bootstrap (", length(modelFit$control$index), " reps)", sep = ""),
+                       boot632 = paste("the bootstrap 632 rule (", length(modelFit$control$index), " reps)", sep = ""),
+                       cv = paste("cross-validation (", modelFit$control$number, " fold)", sep = ""),
+                       repeatedcv = paste("cross-validation (", modelFit$control$number, " fold, repeated ",
+                         modelFit$control$repeats, " times)", sep = ""),
+                       lgocv = paste("repeated train/test splits (", length(modelFit$control$index), " reps, ",
+                         round(modelFit$control$p, 2), "$\\%$)", sep = ""))
+
+tuneVars <- latexTranslate(tolower(modelInfo$label))
+tuneVars <- gsub("\\#", "the number of ", tuneVars, fixed = TRUE)
+if(ncol(modelFit$bestTune) == 1 && colnames(modelFit$bestTune) == ".parameter")
+  {
+    summaryText <- paste(summaryText,
+                         "\n\n",
+                         "There are no tuning parameters associated with this model.",
+                         "To characterize the model performance on the training set,",
+                         resampleName,
+                         "was used.",
+                         "Table \\\\ref{T:resamps} and Figure \\\\ref{F:profile}",
+                         "show summaries of the resampling results. ")
+
+  } else {
+    summaryText <- paste("There",
+                         ifelse(nrow(modelInfo) > 1, "are", "is"),
+                         nrow(modelInfo),
+                         ifelse(nrow(modelInfo) > 1, "tuning parameters", "tuning parameter"),
+                         "associated with this model:",
+                         listString(tuneVars, period = TRUE))
+
+
+
+    paramNames <- gsub(".", "", names(modelFit$bestTune), fixed = TRUE)
+    for(i in seq(along = paramNames))
+      {
+        check <- modelInfo$parameter %in% paramNames[i]
+        if(any(check))
+          {
+            paramNames[i] <- modelInfo$label[which(check)]          
+          }
+      }
+
+    paramNames <- gsub("#", "the number of ", paramNames, fixed = TRUE)
+    ## Check to see if there was only one combination fit
+    summaryText <- paste(summaryText,
+                         "To choose",
+                         ifelse(nrow(modelInfo) > 1,
+                                "appropriate values of the tuning parameters,",
+                                "an appropriate value of the tuning parameter,"),
+                         resampleName,
+                         "was used to generated a profile of performance across the",
+                         nrow(modelFit$results),
+                         ifelse(nrow(modelInfo) > 1,
+                                "combinations of the tuning parameters.",
+                                "candidate values."),
+                         
+                         "Table \\\\ref{T:resamps} and Figure \\\\ref{F:profile} show",
+                         "summaries of the resampling profile. ",                                                                                         "The final model fitted to the entire training set was:",
+                         listString(paste(latexTranslate(tolower(paramNames)), "=", modelFit$bestTune[1,]), period = TRUE))
+
+  }
+@ 
+
+\Sexpr{summaryText}
+
+<<resampTable, echo = $RESAMPTABLEECHO, results = $RESAMPTABLERESULT>>=
+
+tableData <- modelFit$results
+
+if(all(modelInfo$parameter == "parameter"))
+  {
+    tableData <- tableData[,-1, drop = FALSE]
+    colNums <- c(length(modelFit$perfNames), length(modelFit$perfNames))
+    colLabels <- c("Mean", "Standard Deviation")
+    constString <- ""
+    isConst <- NULL
+  } else {
+
+    isConst <- apply(tableData[, modelInfo$parameter, drop = FALSE],
+                     2, 
+                     function(x) length(unique(x)) == 1)
+
+    numParamInTable <- sum(!isConst)
+
+    if(any(isConst))
+      {
+        constParam <- modelInfo$parameter[isConst]
+        constValues <- format(tableData[, constParam, drop = FALSE], digits = 4)[1,,drop = FALSE]
+        tableData <- tableData[, !(names(tableData) %in% constParam), drop = FALSE]
+        constString <- paste("The tuning",
+                             ifelse(sum(isConst) > 1,
+                                    "parmeters",
+                                    "parameter"),
+                             listString(paste("``", names(constValues), "''", sep = "")),
+                             ifelse(sum(isConst) > 1,
+                                    "were",
+                                    "was"),
+                             "held constant at",
+                             ifelse(sum(isConst) > 1,
+                                    "a value of",
+                                    "values of"),
+                             listString(constValues[1,]))
+        
+      } else constString <- ""
+
+    cn <- colnames(tableData)
+    for(i in seq(along = cn))
+      {
+        check <- modelInfo$parameter %in% cn[i]
+        if(any(check))
+          {
+            cn[i] <- modelInfo$label[which(check)]          
+          }
+      }
+    colnames(tableData) <- cn
+
+    colNums <- c(numParamInTable, 
+                 length(modelFit$perfNames), 
+                 length(modelFit$perfNames))
+    colLabels <- c("", "Mean", "Standard Deviation")
+  }
+
+colnames(tableData) <- gsub("SD$", "", colnames(tableData))
+colnames(tableData) <- latexTranslate(colnames(tableData))
+rownames(tableData) <- latexTranslate(rownames(tableData))
+
+latex(tableData,
+      rowname = NULL,
+      file = "",
+      cgroup = colLabels,
+      n.cgroup = colNums,
+      where = "h!",
+      digits = 4,
+      longtable = nrow(tableData) > 30,
+      caption = paste(resampleName, "results from the model fit.", constString),
+      label = "T:resamps")
+@ 
+
+\setkeys{Gin}{ width = 0.9\textwidth}
+\begin{figure}[b]
+  \begin{center}
+
+<<profilePlot, echo = $PROFILEPLOTECHO, fig = $PROFILEPLOTFIG, width = 8, height = 6>>=
+
+  trellis.par.set(caretTheme(), warn = TRUE)
+if(all(modelInfo$parameter == "parameter") | all(isConst) | modName == "nb")
+  {
+    resultsPlot <- resampleHist(modelFit)
+    plotCaption <- paste("Distributions of model performance from the ",
+                         "training set estimated using ",
+                         resampleName)
+  } else {
+    if(modName %in% c("svmPoly", "svmRadial", "svmLinear"))
+      {
+        resultsPlot <- plot(modelFit, 
+                            metric = optMetric,                          
+                            xTrans = function(x) log10(x))
+        resultsPlot <- update(resultsPlot,
+                              type = c("g", "p", "l"),
+                              ylab = paste(optMetric, " (", resampleName, ")", sep = ""))
+
+      } else {
+        resultsPlot <- plot(modelFit,                         
+                            metric = optMetric) 
+        resultsPlot <- update(resultsPlot,
+                              type = c("g", "p", "l"),
+                              ylab = paste(optMetric, " (", resampleName, ")", sep = ""))     
+      }  
+   plotCaption <- paste("A plot of the estimates of the",
+                        optMetric,
+                        "values calculated using",
+                        resampleName)
+  }
+print(resultsPlot)
+@ 
+   \caption[Performance Plot]{\Sexpr{plotCaption}.}
+    \label{F:profile}         
+  \end{center}
+\end{figure}  
+
+<<stopWorkers, echo = $STOPWORKERSECHO, results = $STOPWORKERSRESULT>>=
+##<<stopWorkers, echo = FALSE, results = hide>>=
+if(numWorkers > 1) stopCluster(cl)
+@ 
+
+<<testPred, results = $TESTPREDRESULT, echo = $TESTPREDECHO>>=
+
+
+  if(pctTrain < 1) 
+  {
+    cat("\\clearpage\n\\section*{Test Set Results}\n\n")
+
+    testPreds <- extractPrediction(list(fit = modelFit),
+                                   testX = testX, testY = testY)
+    testPreds <- subset(testPreds, dataType == "Test")
+    values <- modelFit$control$summaryFunction(testPreds)
+    names(values) <- gsub("RMSE", "root mean squared error", names(values), fixed = TRUE)
+    names(values) <- gsub("Rsquared", "$R^2$", names(values), fixed = TRUE)
+    values <- format(values, digits = 3)
+
+    testString <- paste("Based on the test set of",
+                        nrow(testX),
+                        "samples,",
+                        listString(paste(names(values), "was", values), period = TRUE),
+                        " A plot of the observed and predicted outcomes for the test set ",
+                        "is given in Figure \\\\ref{F:obsPred}.")
+    testString <- paste(testString,
+                        " Using ", resampleName,
+                        ", the training set estimates were ",
+                        resampleStats(modelFit),
+                        ".", 
+                        sep = "")
+    
+    axisRange <- extendrange(testPreds[, c("obs", "pred")])
+    obsPred <- xyplot(obs ~ pred,
+                      data = testPreds,
+                      xlim = axisRange,
+                      ylim = axisRange,
+                      panel = function(x, y)
+                      {
+                        panel.abline(0, 1, col = "darkgrey", lty = 2)
+                        panel.xyplot(x, y, type = c("p", "g"))
+                        panel.loess(x, y, col = "darkred", lwd = 2)
+                        
+                        
+                        },
+                      ylab = "Observed Response",
+                      xlab = "Predicted Response")
+    
+    pdf("obsPred.pdf", height = 8, width = 8)
+    trellis.par.set(caretTheme())
+    print(obsPred)
+    dev.off()
+    
+  } else testString <- ""
+@ 
+\Sexpr{testString}
+
+
+<<classProbsTex, results = $CLASSPROBSTEXRESULT, echo = $CLASSPROBSTEXECHO>>=
+
+
+  if(pctTrain < 1)
+  {
+    cat(
+        paste("\\begin{figure}[p]\n",
+              "\\begin{center}\n",
+              "\\includegraphics{obsPred}",
+              "\\caption[Observed V Fitted Values]{",
+              "The observed and predicted responses. ",
+              "The grey line is the line of identity while the",
+              "solid red line is a smoothed trend line.}\n",
+              "\\label{F:obsPred}\n",
+              "\\end{center}\n",
+              "\\end{figure}"))
+  }
+ 
+@ 
+
+\section*{Versions}
+
+<<versions, echo = FALSE, results = tex>>=
+toLatex(sessionInfo())
+
+@ 
+
+<<save-data, echo = $SAVEDATAECHO, results = $SAVEDATARESULT>>=
+## change this to the name of modName....
+Fit<-modelFit
+save(Fit,file="$METHOD-Fit.RData")
+@
+The model was built using $METHOD and is saved as $METHOD-Fit.RData for reuse. This contains the variable Fit.
+
+
+\end{document}'''
+	return template4Rnw
Binary file templateLibrary.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool2reg.xml	Fri Jan 22 14:16:12 2016 -0500
@@ -0,0 +1,567 @@
+<tool id="cghyjaqxpsssp2" name="Create script from the template file for regression ">
+<description>
+ used to create script file from user given input to build a model
+</description>
+<requirements>
+        <requirement type="set_environment">CARET_REG_TOOL2_PATH</requirement>
+                 <requirement type="set_environment">R_ROOT_DIR</requirement>
+    <requirement type="package" version="3.2.0">R</requirement>
+    <requirement type="package" version="1.0.0">caret-tools</requirement>
+</requirements>
+<command interpreter="python">
+#if $OPTION11.PARAMAETERS == "Advance" 
+modelBuilding.py --method $METHOD $RDATA --getdatainfoeval $OPTION11.GETDATAINFOEVAL --getdatainfoecho $OPTION11.GETDATAINFOECHO --getdatainforesult $OPTION11.GETDATAINFORESULT --missingfiltereval $OPTION11.CON1.MISSINGFILTEREVAL --missingfilterecho $OPTION11.CON1.MISSINGFILTERECHO --missingfilterresult $OPTION11.CON1.MISSINGFILTERRESULT --missingfilterthreshc $OPTION11.CON1.MISSINGFILTERTHRESHC --missingfilterthreshr $OPTION11.CON1.MISSINGFILTERTHRESHR --pcaeval $OPTION11.PCAEVAL --pcaecho $OPTION11.PCAECHO --pcaresult $OPTION11.PCARESULT --pcacomp $OPTION11.PCACOMP --pcaploteval $OPTION11.PCAPLOTEVAL --pcaplotecho $OPTION11.PCAPLOTECHO --pcaplotresult $OPTION11.PCAPLOTRESULT --pcaplotfig $OPTION11.PCAPLOTFIG --initialdataspliteval $OPTION11.CON2.INITIALDATASPLITEVAL --initialdatasplitecho $OPTION11.CON2.INITIALDATASPLITECHO --initialdatasplitresult $OPTION11.CON2.INITIALDATASPLITRESULT --percent $OPTION11.CON2.PERCENT --nzveval $OPTION11.CON3.NZVEVAL --nzvresult $OPTION11.CON3.NZVRESULT --nzvecho $OPTION11.CON3.NZVECHO --corrfiltereval $OPTION11.CON4.CORRFILTEREVAL --corrfilterresult $OPTION11.CON4.CORRFILTERRESULT --corrfilterecho $OPTION11.CON4.CORRFILTERECHO --threshholdcor $OPTION11.CON4.THRESHHOLDCOR --preproceval $OPTION11.CON5.PREPROCEVAL  --preprocecho $OPTION11.CON5.PREPROCECHO --preprocresult $OPTION11.CON5.PREPROCRESULT --setupworkersecho $OPTION11.SETUPWORKERSECHO --setupworkersresult $OPTION11.SETUPWORKERSRESULT --numworkers $OPTION11.NUMWORKERS --setupresamplingecho $OPTION11.CON6.SETUPRESAMPLINGECHO --setupresamplingresult $OPTION11.CON6.SETUPRESAMPLINGRESULT --resamplenumber $OPTION11.CON6.RESAMPLENUMBER --resamplenumberpercent $OPTION11.CON6.RESAMPLENUMBERPERCENT --setupgridresult $OPTION11.SETUPGRIDRESULT --setupgridecho $OPTION11.SETUPGRIDECHO --setupgridsize $OPTION11.SETUPGRIDSIZE --fitmodelresult $OPTION11.FITMODELRESULT --fitmodelecho $OPTION11.FITMODELECHO --fitmodeleval $OPTION11.FITMODELEVAL --modeldescrecho $OPTION11.MODELDESCRECHO --modeldescrresult $OPTION11.MODELDESCRRESULT --resamptableecho $OPTION11.RESAMPTABLEECHO --resamptableresult $OPTION11.RESAMPTABLERESULT --profileplotecho $OPTION11.PROFILEPLOTECHO --profileplotfig $OPTION11.PROFILEPLOTFIG --stopworkersecho $OPTION11.STOPWORKERSECHO --stopworkersresult $OPTION11.STOPWORKERSRESULT --testpredresult $OPTION11.TESTPREDRESULT --testpredecho $OPTION11.TESTPREDECHO --classprobstexresult $OPTION11.CLASSPROBSTEXRESULT --classprobstexecho $OPTION11.CLASSPROBSTEXECHO --savedataecho $OPTION11.SAVEDATAECHO --savedataresult $OPTION11.SAVEDATARESULT --outputmodel $model --outputresultpdf $document;
+#end if 
+#if $OPTION11.PARAMAETERS == "basic" 
+modelBuilding.py --method $METHOD $RDATA --getdatainfoeval $OPTION11.GETDATAINFOEVAL --getdatainfoecho $OPTION11.GETDATAINFOECHO --getdatainforesult $OPTION11.GETDATAINFORESULT --missingfiltereval $OPTION11.MISSINGFILTEREVAL --missingfilterecho $OPTION11.MISSINGFILTERECHO --missingfilterresult $OPTION11.MISSINGFILTERRESULT --missingfilterthreshc $OPTION11.MISSINGFILTERTHRESHC --missingfilterthreshr $OPTION11.MISSINGFILTERTHRESHR --pcaeval $OPTION11.PCAEVAL --pcaecho $OPTION11.PCAECHO --pcaresult $OPTION11.PCARESULT --pcacomp $OPTION11.PCACOMP --pcaploteval $OPTION11.PCAPLOTEVAL --pcaplotecho $OPTION11.PCAPLOTECHO --pcaplotresult $OPTION11.PCAPLOTRESULT --pcaplotfig $OPTION11.PCAPLOTFIG --initialdataspliteval $OPTION11.INITIALDATASPLITEVAL --initialdatasplitecho $OPTION11.INITIALDATASPLITECHO --initialdatasplitresult $OPTION11.INITIALDATASPLITRESULT --percent $OPTION11.PERCENT --nzveval $OPTION11.NZVEVAL --nzvresult $OPTION11.NZVRESULT --nzvecho $OPTION11.NZVECHO --corrfiltereval $OPTION11.CORRFILTEREVAL --corrfilterresult $OPTION11.CORRFILTERRESULT --corrfilterecho $OPTION11.CORRFILTERECHO --threshholdcor $OPTION11.THRESHHOLDCOR --preproceval $OPTION11.PREPROCEVAL  --preprocecho $OPTION11.PREPROCECHO --preprocresult $OPTION11.PREPROCRESULT --setupworkersecho $OPTION11.SETUPWORKERSECHO --setupworkersresult $OPTION11.SETUPWORKERSRESULT --numworkers $OPTION11.NUMWORKERS --setupresamplingecho $OPTION11.SETUPRESAMPLINGECHO --setupresamplingresult $OPTION11.SETUPRESAMPLINGRESULT --resamplenumber $OPTION11.RESAMPLENUMBER --resamplenumberpercent $OPTION11.RESAMPLENUMBERPERCENT --setupgridresult $OPTION11.SETUPGRIDRESULT --setupgridecho $OPTION11.SETUPGRIDECHO --setupgridsize $OPTION11.SETUPGRIDSIZE --fitmodelresult $OPTION11.FITMODELRESULT --fitmodelecho $OPTION11.FITMODELECHO --fitmodeleval $OPTION11.FITMODELEVAL --modeldescrecho $OPTION11.MODELDESCRECHO --modeldescrresult $OPTION11.MODELDESCRRESULT --resamptableecho $OPTION11.RESAMPTABLEECHO --resamptableresult $OPTION11.RESAMPTABLERESULT --profileplotecho $OPTION11.PROFILEPLOTECHO --profileplotfig $OPTION11.PROFILEPLOTFIG --stopworkersecho $OPTION11.STOPWORKERSECHO --stopworkersresult $OPTION11.STOPWORKERSRESULT --testpredresult $OPTION11.TESTPREDRESULT --testpredecho $OPTION11.TESTPREDECHO --classprobstexresult $OPTION11.CLASSPROBSTEXRESULT --classprobstexecho $OPTION11.CLASSPROBSTEXECHO --savedataecho $OPTION11.SAVEDATAECHO --savedataresult $OPTION11.SAVEDATARESULT --outputmodel $model --outputresultpdf $document; 
+#end if
+ </command> 
+<inputs>
+	<param name="METHOD" type="select" label="Select Method to Train Data" >
+		<option value="gbm">Generalized Boosting</option>
+		<option value="treebag">treebag-Bagged CART</option>
+		<option value="earth">earth-MARS method</option>
+		<option value="Boruta">Boruta-Random forest with feature selection</option>
+		<option value="bstLs">Boosted Linear Model</option>
+		<option value="glm">Generalized Linear Model</option>
+		<option value="pls" selected="true">Partial Least Square</option>
+		<option value="svmRadial">SVM radial basis function</option>
+		<option value="svmLinear">SVM Linear function</option>
+		<option value="rpart">rpart-CART</option>
+		<option value="rf">Random Forest</option>
+		<option value="knn">k-Nearest Neighbors</option>
+		<!--option value="bagFDA">bag-Fourier Discriminant Analysis</option>
+		<option value="gbm">Generalized Boosting</option>
+		<option value="blackboost">black-boost- Boosting Method</option>
+		<option value="earth">Earth-MARS based method</option>
+		<option value="rf">Random Forest</option>
+		<option value="RRF">RRFglobal -Variant of Random Forest</option>
+		<option value="svmRadial">SVM-Radial</option> 
+		<option value="svmPoly">SVM-Polynomial</option>
+		<option value="ada">ada-boost</option>
+		<option value="glm">Generalised linear model </option>
+		<option value="treebag">tree based bagging method</option>
+		<option value="nb">Naive Bayes</option>
+		<option value="knn">K-nearest neighbour</option-->
+	</param>
+	<param name="RDATA" format="data" type="data" label="Select File Containing Training Data" help="RData format" />
+	
+	
+	<conditional name="OPTION11">
+		<param name="PARAMAETERS" type="select">
+            		<option value="basic" selected="TRUE" >Use optimized parameters </option>
+            		<option value="Advance"> Customized parameters </option>
+        	</param>
+       
+       		<when value="basic">
+        
+ 	 <param name="GETDATAINFOEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE" />	
+	
+	<param name="GETDATAINFOECHO" type="hidden" value="FALSE"  help="set True if wish to print. default is False"/>
+        <param name="GETDATAINFORESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/>
+		
+	<param name="MISSINGFILTEREVAL" type="hidden" value= "TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>	
+	
+	<param name="MISSINGFILTERECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+		
+	<param name="MISSINGFILTERRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+		
+	<param name="MISSINGFILTERTHRESHC" type="hidden" value="0.2"  help="For column wise default is 0.2"/>
+	
+	<param name="MISSINGFILTERTHRESHR" type="hidden" value="0.2"  help="For row wise default is 0.2"/>
+	
+	<param name="PCAEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>	
+	
+	<param name="PCAECHO" type="hidden" value="FALSE" help="set True if wish to print. default is False"/>
+		
+	<param name="PCARESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/>
+	
+	<param name="PCACOMP" type="hidden" value="3" help="set according to need. Default is 3"/>
+		
+	<param name="PCAPLOTEVAL" type="hidden" value="TRUE" help="set TRUE if wish to plot PCA. default is TRUE"/>	
+		
+	<param name="PCAPLOTECHO" type="hidden" value="FALSE" help="Set True if wish to Print .default is False"/>
+		
+	<param name="PCAPLOTRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+		
+   	<param name="PCAPLOTFIG" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>	
+	
+	<param name="INITIALDATASPLITEVAL" type="hidden" value= "TRUE" help="set TRUE for splitting in test and train set.default is True"/>	
+	
+	<param name="INITIALDATASPLITECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+	
+
+	<param name="INITIALDATASPLITRESULT" type="hidden" value="tex"  help="Set tex if wish to write in output pdf file. default is tex"/>
+		
+  	<param name="PERCENT" type="hidden" value="0.8" help="default is 0.8"/>
+		
+   	<param name="NZVEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>	
+		
+	<param name="NZVRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+		
+	<param name="NZVECHO" type="hidden" value="FALSE" label="Write Code in Document" help="set True if wish to print .default is False"/>
+		
+	<param name="CORRFILTEREVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>	
+	
+	<param name="CORRFILTERRESULT" type="hidden" value="tex"  help="Set tex if wish to write in output pdf file. default is tex"/>
+		
+	<param name="CORRFILTERECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+	
+	<param name="THRESHHOLDCOR" type="hidden" value="0.75" help="set according to need .default is 0.75"/>
+		
+	<param name="PREPROCEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>	
+		
+	<param name="PREPROCECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+		
+	<param name="PREPROCRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+		
+	<param name="SETUPWORKERSEVAL" type="hidden" value="FALSE" help="set TRUE if wish to evaluate. default is False"/>	
+		
+	<param name="SETUPWORKERSECHO" type="hidden" value="FALSE" label="Write Code in Document" help="set True if wish to print .default is False"/>
+		
+	<param name="SETUPWORKERSRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+		
+	<param name="NUMWORKERS" type="hidden" value="1" help ="default is 1"/>
+		
+   	<param name="SETUPRESAMPLINGECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+		
+	<param name="SETUPRESAMPLINGRESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/>
+		
+	<param name="RESAMPLENUMBER" type="hidden" value="10" label="Set Number of Times to Resample" help="default is 10"/>
+		
+	<param name="RESAMPLENUMBERPERCENT" type="hidden" value="0.75" help="default is 0.75"/>
+
+	<param name="SETUPGRIDRESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/>
+	
+  	<param name="SETUPGRIDECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+		
+	<param name="SETUPGRIDSIZE" type="hidden" value="3" help="default is 3 "/>
+		
+	<param name="FITMODELEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>	
+		
+	<param name="FITMODELRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+	
+	<param name="FITMODELECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+		
+	<param name="MODELDESCRECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+	
+	<param name="MODELDESCRRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+		
+	<param name="RESAMPTABLEECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+		
+	<param name="RESAMPTABLERESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+		
+	<param name="PROFILEPLOTECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+		
+	<param name="PROFILEPLOTFIG" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>	
+		
+	<param name="STOPWORKERSECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+		
+	<param name="STOPWORKERSRESULT" type="hidden" value= "hide" help="Set tex if wish to write in output pdf file. default is hide"/>
+
+	<param name="TESTPREDRESULT" type="hidden" value= "tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+	
+ 	<param name="TESTPREDECHO" type="hidden" value="FALSE" help="set True if wish to print. default is False"/>
+	
+	<param name="CLASSPROBSTEXRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+		
+	<param name="CLASSPROBSTEXECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"	/>
+		
+	<param name="SAVEDATAECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+		
+	<param name="SAVEDATARESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+		
+        </when>
+
+      
+        <when value="Advance">
+        <param name="GETDATAINFOEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE" />
+        <param name="GETDATAINFOECHO" type="hidden" value="FALSE"  help="set True if wish to print. default is False"/>
+        <param name="GETDATAINFORESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+	
+        <conditional name="CON1">   
+          <param name="PARAMAETERS1" type="select" label="1. Remove Missing Values from the input data">
+                        <option value="YES">YES </option>
+                        <option value="NO" selected="true">NO </option>
+                </param>
+
+                <when value="YES">
+        <param name="MISSINGFILTEREVAL" type="hidden" value= "TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>
+	<param name="MISSINGFILTERECHO" type="select" label="1(i). Write Code in Document" help="set True if wish to print .default is False"	>
+		<option value="FALSE" selected="true">false</option>
+		<option value="TRUE">true</option>
+	</param>
+	<param name="MISSINGFILTERRESULT" type="select" label="1(ii). Write Result in document" help="Set tex if wish to write in output pdf file. default is tex">
+		<option value="hide">hide-result will not written in file</option>
+		<option value="tex" selected="true">tex-result will written in file</option>
+   	</param>
+	<param name="MISSINGFILTERTHRESHC" type="select" label="1(iii). Set Cutoff Value for Missing Data value Columwise" help="For column wise default is 0.1 means column which has missing value more than 10% will be removed ">
+		<option value="0.1">0.1</option>
+		<option value="0.2">0.2</option>
+		<option value="0.25">0.25</option>
+		<option value="0.3">0.3</option>
+		<option value="0.35">0.35</option>
+		<option value="0.4">0.4</option>
+		<option value="0.45">0.45</option>
+		<option value="0.5">0.5</option>
+		<option value="0.55">0.55</option>
+		<option value="0.6">0.6</option>
+		<option value="0.65">0.65</option>
+		<option value="0.7">0.7</option>
+		<option value="0.75">0.75</option>
+		<option value="0.8">0.8</option>
+	</param>
+	<param name="MISSINGFILTERTHRESHR" type="select" label="1(iv). Set Cutoff Value for Missing Data Value Rowwise " help="For row wise default is 0.1 means row having more than 10% missing values will be removed">
+		<option value="0.1">0.1</option>
+		<option value="0.2">0.2</option>
+		<option value="0.25">0.25</option>
+		<option value="0.3">0.3</option>
+		<option value="0.35">0.35</option>
+		<option value="0.4">0.4</option>
+		<option value="0.45">0.45</option>
+		<option value="0.5">0.5</option>
+		<option value="0.55">0.55</option>
+		<option value="0.6">0.6</option>
+		<option value="0.65">0.65</option>
+		<option value="0.7">0.7</option>
+		<option value="0.75">0.75</option>
+		<option value="0.8">0.8</option>
+	</param>
+</when>
+<when value="NO">
+        <param name="MISSINGFILTEREVAL" type="hidden" value= "FALSE" help="set TRUE if wish to evaluate. default is FALSE"/>
+
+        <param name="MISSINGFILTERECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+
+        <param name="MISSINGFILTERRESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/>
+
+        <param name="MISSINGFILTERTHRESHC" type="hidden" value="0"  />
+
+        <param name="MISSINGFILTERTHRESHR" type="hidden" value="0" />
+
+</when>
+</conditional>
+
+        <param name="PCAEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>
+
+        <param name="PCAECHO" type="hidden" value="FALSE" help="set True if wish to print. default is False"/>
+
+        <param name="PCARESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/>
+
+        <param name="PCACOMP" type="select" label="2. Find number of Principle component" help="performs PCA and gives number of PC. Default is 3 ">
+                <option value="3">3</option>
+                <option value="4">4</option>
+                <option value="5">5</option>
+                <option value="6">6</option>
+                <option value="7">7</option>
+                <option value="8">8</option>
+                <option value="9">9</option>
+                <option value="10">10</option>
+        </param>
+
+        <param name="PCAPLOTEVAL" type="hidden" value="TRUE" help="set TRUE if wish to plot PCA. default is TRUE"/>
+
+        <param name="PCAPLOTECHO" type="hidden" value="FALSE" help="Set True if wish to Print .default is False"/>
+
+        <param name="PCAPLOTRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+
+        <param name="PCAPLOTFIG" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>
+
+
+ <conditional name="CON2">
+          <param name="PARAMAETERS2" type="select" label="3. Customize parameters for data splitting" help="splits data in test and train set.">
+                        <option value="YES2">YES </option>
+                        <option value="NO2" selected="true">NO </option>
+                </param>
+  <when value="YES2">
+   <param name="INITIALDATASPLITEVAL" type="hidden" value= "TRUE" help="set TRUE for splitting in test and train set.default is True"/>
+   <!--param name="SAAMPLING" type="select"  label="3(i). Select Sampling Method" help="Defult is with No sampling. you may choose downsample or upsample" >
+                <option value="garBage" selected="true">No Sampling</option>
+                <option value="downsampling">downsample</option>
+                <option value="upsampling">upsample</option>
+        </param-->
+  <param name="INITIALDATASPLITECHO" type="select" label="3(ii). Write Code in Document" help="set True if wish to print .default is False"      >
+                <option value="FALSE">false</option>
+                <option value="TRUE">true</option>
+        </param>
+        <param name="INITIALDATASPLITRESULT" type="select" value="tex" label="3(iii).  Write Result in document" help="Set tex if wish to write in output pdf file. default is tex">
+                <option value="tex" selected="true">tex-result will written in file</option>
+                <option value="hide">hide-result will not written in file</option>
+        </param>
+    <param name="PERCENT" type="select" label="3(iv) .Set Value at Which Data Will be Splitted in Train and Test Set" help="default is 0.8">
+                <option value="0.8">0.8</option>
+                <option value="0.75">0.75</option>
+                <option value="0.6">0.6</option>
+                <option value="0.5">0.5</option>
+    </param>	
+</when>
+ <when value="NO2">
+  <param name="INITIALDATASPLITEVAL" type="hidden" value= "TRUE" help="set TRUE for splitting in test and train set.default is True"/>
+
+        <param name="INITIALDATASPLITECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+
+        <!--param name="SAAMPLING" type="hidden" value="garBage" help="default is with No sampling"/-->
+
+        <param name="INITIALDATASPLITRESULT" type="hidden" value="tex"  help="Set tex if wish to write in output pdf file. default is tex"/>
+
+        <param name="PERCENT" type="hidden" value="0.8" help="default is 0.8"/>
+
+</when>
+</conditional>  
+
+      <conditional name="CON3">
+          <param name="PARAMAETERS3" type="select" label="4. Remove near zero variance" help="removes NZV from train and test set.">
+           <option value="YES3">YES </option>
+            <option value="NO3" selected="true">NO </option>
+                </param>
+<when value="YES3">
+
+      <param name="NZVEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>
+
+     
+      <param name="NZVECHO" type="select" label="4(i). Write Code in Document" help="set True if wish to print .default is False">
+                 <option value="FALSE">false</option>
+                <option value="TRUE">true</option>
+        </param>
+
+      <param name="NZVRESULT" type="select" label="4(ii). Write Result in document" help="Set tex if wish to write result in output pdf file. default is tex">
+		<option value="hide">hide-result will not written in file</option>
+		<option value="tex" selected="true">tex-result will written in file</option>
+   	 </param>
+</when>
+<when value="NO3">
+       <param name="NZVEVAL" type="hidden" value="FALSE" help="set TRUE if wish to evaluate. "/>
+       <param name="NZVECHO" type="hidden" value="FALSE" help="set TRUE if wish to evaluate. "/>
+       <param name="NZVRESULT" type="hidden" value="hide" help="set TRUE if wish to evaluate."/>
+</when>
+</conditional>
+
+	<!--param name="NZVECHO" type="select" label="Write Code in Document" help="set True if wish to print .default is False">
+		<option value="FALSE">false</option>
+		<option value="TRUE">true</option>
+	</param-->
+
+ <conditional name="CON4">
+          <param name="PARAMAETERS4" type="select" label="5. Remove Correlated Values " help="removes correlated attributes from train and test set.">
+           <option value="YES4">YES </option>
+            <option value="NO4" selected="true">NO </option>
+                </param>
+<when value="YES4">
+  <param name="CORRFILTEREVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>
+ <param name="THRESHHOLDCOR" type="select" label="5(i). cutoff for correlated Value " help="set according to need .default is 0.75 means attributes have 75% or more correlation are ommited from the data">
+                <option value="0.75">0.75</option>
+                <option value="0.4">0.4</option>
+                <option value="0.45">0.45</option>
+                <option value="0.5">0.5</option>
+                <option value="0.55">0.55</option>
+                <option value="0.6">0.6</option>
+                <option value="0.65">0.65</option>
+                <option value="0.7">0.7</option>
+                <option value="0.8">0.8</option>
+                <option value="0.85">0.85</option>
+                <option value="0.9">0.9</option>
+                <option value="0.95">0.95</option>
+        </param>
+   <param name="CORRFILTERECHO" type="select" label="5(ii).  Write Code in Document" help="set True if wish to print .default is False"    >
+                <option value="FALSE">false</option>
+                <option value="TRUE">true</option>
+        </param>
+   <param name="CORRFILTERRESULT" type="select" label="5(iii). Write Result in document" help="Set tex if wish to write in output pdf file. default is tex">
+                <option value="hide">hide-result will not written in file</option>
+                <option value="tex" selected="true">tex-result will written in file</option>
+        </param>
+</when>
+<when value="NO4">
+        <param name="CORRFILTEREVAL" type="hidden" value="FALSE"/>
+
+        <param name="CORRFILTERRESULT" type="hidden" value="hide" />
+
+        <param name="CORRFILTERECHO" type="hidden" value="FALSE" />
+
+        <param name="THRESHHOLDCOR" type="hidden" value="0" />
+</when>
+</conditional>
+
+<conditional name="CON5">
+    <param name="PARAMAETERS5" type="select" label="6. Perform Centering and Scaling of data" help="centering and scaling of  train and test set.">
+           <option value="YES5">YES </option>
+            <option value="NO5" selected="true">NO </option>
+                </param>
+
+<when value="YES5">
+ <param name="PREPROCEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>
+  <param name="PREPROCECHO" type="select" label="6(i). Write Code in Document" help="set True if wish to write code in document .default is False"       >
+                <option value="FALSE">false</option>
+                <option value="TRUE">true</option>
+        </param>
+        <param name="PREPROCRESULT" type="select" label="6(ii). Write Result in document " help="Set tex if wish to write result in output pdf file. default is tex">
+                <option value="hide">hide-result will not written in file</option>
+                <option value="tex" selected="true">tex-result will written in file</option>
+        </param>
+</when>
+<when value="NO5">
+         <param name="PREPROCEVAL" type="hidden" value="FALSE"/>
+
+        <param name="PREPROCECHO" type="hidden" value="FALSE" />
+
+        <param name="PREPROCRESULT" type="hidden" value="hide" />
+
+</when>
+</conditional>
+
+    <param name="SETUPWORKERSEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is False"/>    
+    <param name="SETUPWORKERSECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False" />
+    <param name="NUMWORKERS" type="select" label="7. Set Number of Processors" help ="default is 1">
+                <option value="1">1</option>
+                <option value="2">2</option>
+                <option value="4">4</option>
+                <option value="6">6</option>
+                <option value="8">8</option>
+                <option value="16">16</option>
+    </param>
+    <param name="SETUPWORKERSRESULT" type="select" label="7(i). Write Result" help="Set tex if wish to write in output pdf file. default is hide">
+                <option value="tex">tex-result will written in file</option>
+                <option value="hide" selected="true">hide-result will not written in file</option>
+    </param>
+
+  <conditional name="CON6">
+
+   <param name="PARAMAETERS6" type="select" label="7. custumize resampling parameters" help="resampling for cross validation">
+           <option value="YES6">YES </option>
+            <option value="NO6" selected="true">NO </option>
+                </param>
+<when value="YES6">
+  <param name="SETUPRESAMPLINGECHO" type="select" label="7(i). write code for resampling" help="set True if wish to print .default is False">
+                <option value="FALSE">false</option>
+                <option value="TRUE">true</option>
+        </param>
+        <param name="SETUPRESAMPLINGRESULT" type="select" label="7(ii). Write Result in document " help="Set tex if wish to write in output pdf file. default is hide">
+                <option value="hide">hide-result will not written in file</option>
+                <option value="tex">tex-result will written in file</option>
+        </param>
+        <param name="RESAMPLENUMBER" type="select" label="7(iii). Set Number of times Resample" help="default is 10 ">
+                <option value="10">10</option>
+                <option value="5">5</option>
+                <option value="15">15</option>
+                <option value="20">20</option>
+                <option value="25">25</option>
+        </param>
+        <param name="RESAMPLENUMBERPERCENT" type="select" label="7(iv). Set Percent splitting of data for resampling" help="default is 0.75">
+                <option value="0.75">0.75</option>
+                <option value="0.4">0.4</option>
+                <option value="0.45">0.45</option>
+                <option value="0.5">0.5</option>
+                <option value="0.55">0.55</option>
+                <option value="0.6">0.6</option>
+                <option value="0.65">0.65</option>
+                <option value="0.7">0.7</option>
+                <option value="0.8">0.8</option>
+        </param>
+</when>
+<when value="NO6">
+        <param name="SETUPRESAMPLINGECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+
+        <param name="SETUPRESAMPLINGRESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/>
+
+        <param name="RESAMPLENUMBER" type="hidden" value="10" label="Set Number of Times to Resample" help="default is 10"/>
+
+        <param name="RESAMPLENUMBERPERCENT" type="hidden" value="0.75" help="default is 0.75"/>
+
+</when>
+</conditional>
+        <param name="SETUPGRIDRESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/>
+
+        <param name="SETUPGRIDECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+
+        <param name="SETUPGRIDSIZE" type="select" label="8. Set Size of The Grid." help="default is 3 ">
+                <option value="3">3</option>
+                <option value="4">4</option>
+                <option value="5">5</option>
+                <option value="6">6</option>
+                <option value="7">7</option>
+                <option value="8">8</option>
+                <option value="9">9</option>
+                <option value="10">10</option>
+                <option value="11">11</option>
+                <option value="12">12</option>
+                <option value="13">13</option>
+                <option value="14">14</option>
+                <option value="15">15</option>
+                <option value="16">16</option>
+                <option value="17">17</option>
+                <option value="18">18</option>
+                <option value="19">19</option>
+                <option value="20">20</option>
+        </param>
+
+    <param name="FITMODELEVAL" type="boolean" checked="true" value="true" label="9. Build a Model and write result in document" help="default is TRUE"/>   
+    <param name="FITMODELRESULT" type="hidden" value="tex" />
+    <param name="FITMODELECHO" type="select" label="9(i). Write Code for model building in Document" help="set True if wish to write code in document .default is False"      >
+            <option value="FALSE">false</option>
+            <option value="TRUE">true</option>
+        </param>
+    <param name="MODELDESCRECHO" type="select" label="9(ii). Write code for Model Description " help="set True if wish to print .default is False" >
+                <option value="FALSE">false</option>
+                <option value="TRUE">true</option>
+        </param>
+    <param name="MODELDESCRRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+               
+    <param name="RESAMPTABLEECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+
+        <param name="RESAMPTABLERESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+
+        <param name="PROFILEPLOTECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+
+        <param name="PROFILEPLOTFIG" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/>
+
+        <param name="STOPWORKERSECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+
+        <param name="STOPWORKERSRESULT" type="hidden" value= "hide" help="Set tex if wish to write in output pdf file. default is hide"/>
+
+        <param name="TESTPREDRESULT" type="hidden" value= "tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+
+        <param name="TESTPREDECHO" type="hidden" value="FALSE" help="set True if wish to print. default is False"/>
+
+        <param name="CLASSPROBSTEXRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+
+        <param name="CLASSPROBSTEXECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"  />
+
+        <!--param name="CLASSPROBSTEXRESULT1" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/>
+
+        <param name="CLASSPROBSTEXECHO1" type="hidden" value="FALSE" help="set True if wish to print .default is False" /-->
+
+        <param name="SAVEDATAECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/>
+
+        <param name="SAVEDATARESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/>
+
+	</when>
+	</conditional>
+</inputs>
+
+<outputs>
+	<data type="data" format="data" label="$METHOD regression Model " name="model" />
+	<data format="pdf"  label="$METHOD regression Document" name="document" />
+</outputs>		
+<help>
+
+.. class:: infomark
+
+
+
+**Instruction**
+
+----------
+
+Users may change any parameter as their requirement. For normal practice 
+
+user required to provide only input csv file with response variable (Numeric) as last column and method for model building.
+
+More details are given in user manual.Please click here
+  
+
+
+</help>
+
+
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Fri Jan 22 14:16:12 2016 -0500
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<tool_dependency>
+
+<set_environment version="1.0">
+        <environment_variable name="CARET_REG_TOOL2_PATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable>   
+        </set_environment>
+        <package name="R" version="3.2.0">
+                        <repository changeset_revision="7833b0ebf8d6" name="package_r_3_2_0" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+                        </package>
+     <package name="caret-tools" version="1.0.0">
+                        <repository changeset_revision="e5faefaf1037" name="caret_tool_test1" owner="deepakjadmin" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+                        </package>
+</tool_dependency>