Mercurial > repos > deepakjadmin > caret_reg_tool2
changeset 0:b82c88293260 draft
Uploaded
author | deepakjadmin |
---|---|
date | Fri, 22 Jan 2016 14:16:12 -0500 |
parents | |
children | 75f274299190 |
files | modelBuilding.py templateLibrary.py templateLibrary.pyc tool2reg.xml tool_dependencies.xml |
diffstat | 5 files changed, 1760 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modelBuilding.py Fri Jan 22 14:16:12 2016 -0500 @@ -0,0 +1,170 @@ +def __inputArguments(): + + import argparse + parser = argparse.ArgumentParser() + + parser.add_argument("--method", nargs='?',default ='pls',help="Name of the method on which model will build; \ + Available Methods are:- pls, glm , glmboost ") + parser.add_argument("rdata",help="Descriptor file for model building") + parser.add_argument("--getdatainfoeval",nargs='?',default='TRUE',help="Validation of the data ") + parser.add_argument("--getdatainfoecho",nargs='?',default='FALSE',help="print on consol about Validity of the data ") + parser.add_argument("--getdatainforesult",nargs='?',default='hide',help="print in output file about Validity of the data ") + parser.add_argument("--missingfiltereval",nargs='?',default='FALSE',help="Processing step :: removal of missing value ") + parser.add_argument("--missingfilterecho",nargs='?',default='FALSE',help="print Processing step :: removal of missing value ") + parser.add_argument("--missingfilterresult",nargs='?',default='hide',help="print in output file about Processing step :: removal of missing value ") + parser.add_argument("--missingfilterthreshc",nargs='?',default=0.20,type=float,help="info about highly missing column data") + parser.add_argument("--missingfilterthreshr",nargs='?',default=0.20,type=float,help="info about highly missing row number") + parser.add_argument("--pcaeval",nargs='?',default='FALSE',help="PCA of data ") + parser.add_argument("--pcaecho",nargs='?',default='FALSE',help="PCA of data ") + parser.add_argument("--pcaresult",nargs='?',default='hide',help="print in file about PCA of data ") + parser.add_argument("--pcacomp",nargs='?',default=3,type=int,help="Number of PCA componant will be plotted ") + parser.add_argument("--pcaploteval",nargs='?',default='FALSE',help="PCA plot of data ") + parser.add_argument("--pcaplotecho",nargs='?',default='FALSE',help="print PCA plot of data ") + parser.add_argument("--pcaplotresult",nargs='?',default='hide',help="write in output file about PCA plot of data") + parser.add_argument("--pcaplotfig",nargs='?',default='TRUE',help="make figure file for integration in output file") + parser.add_argument("--initialdataspliteval",nargs='?',default='TRUE',help="data splitting in test and train set ") + parser.add_argument("--initialdatasplitecho",nargs='?',default='FALSE',help="print about data splitting in test and train set") + parser.add_argument("--initialdatasplitresult",nargs='?',default='hide',help="write in outputfile about data splitting in test and train set") + parser.add_argument("--percent",nargs='?',default=0.8,type=float,help="percent value at which data splitting is done") + parser.add_argument("--nzveval",nargs='?',default='FALSE',help="remove near zero values") + parser.add_argument("--nzvresult",nargs='?',default='hide',help="write in outputfile about removing near zero values") + parser.add_argument("--nzvecho",nargs='?',default='FALSE',help="print about removing near zero values") + parser.add_argument("--corrfiltereval",nargs='?',default='FALSE',help="remove higly correlated values") + parser.add_argument("--corrfilterresult",nargs='?',default='hide',help="write in outputfile about removing highly correlated values") + parser.add_argument("--corrfilterecho",nargs='?',default='FALSE',help="print about removing correlated values") + parser.add_argument("--threshholdcor",nargs='?',default=0.75,type=float,help="percent value at which correlated values ommitted ") + parser.add_argument("--preproceval",nargs='?',default='FALSE',help="pre proccesing") + parser.add_argument("--preprocecho",nargs='?',default='FALSE',help="print about pre proccesing") + parser.add_argument("--preprocresult",nargs='?',default='hide',help="write in output file about pre proccesing") + parser.add_argument("--setupworkersecho",nargs='?',default='FALSE',help="print about number of processors") + parser.add_argument("--setupworkersresult",nargs='?',default='tex',help="write about number of processors in output file") + parser.add_argument("--numworkers",nargs='?',default=1,type=int,help="defines used processors") + parser.add_argument("--setupresamplingecho",nargs='?',default='FALSE',help="print resampling rules") + parser.add_argument("--setupresamplingresult",nargs='?',default='hide',help="write resampling rules in output file") + parser.add_argument("--resamplenumber",nargs='?',default=10,type=int,help="set number of resampling") + parser.add_argument("--resamplenumberpercent",nargs='?',default=0.75,type=float,help="set PERCENT resampling") + parser.add_argument("--setupgridresult",nargs='?',default='hide',help="write about number of grids in output file") + parser.add_argument("--setupgridecho",nargs='?',default='FALSE',help="print about number of grids") + parser.add_argument("--setupgridsize",nargs='?',default=3,type=int,help="set number of grids") + parser.add_argument("--fitmodelresult",nargs='?',default='hide',help="write about model") + parser.add_argument("--fitmodelecho",nargs='?',default='FALSE',help="print about model") + parser.add_argument("--fitmodeleval",nargs='?',default='TRUE',help="start model building") + parser.add_argument("--modeldescrecho",nargs='?',default='FALSE',help="print model description") + parser.add_argument("--modeldescrresult",nargs='?',default='hide',help="write model description in outout file") + parser.add_argument("--resamptableecho",nargs='?',default='FALSE',help="print resample table") + parser.add_argument("--resamptableresult",nargs='?',default='tex',help="write resample table in output file") + parser.add_argument("--profileplotecho",nargs='?',default='FALSE',help="print about profile plots") + parser.add_argument("--profileplotfig",nargs='?',default='TRUE',help=" profile plots") + parser.add_argument("--stopworkersecho",nargs='?',default='FALSE',help="stop workers ie processors") + parser.add_argument("--stopworkersresult",nargs='?',default='hide',help="write about workers ie processors used") + parser.add_argument("--testpredresult",nargs='?',default='tex',help="write about statistical measure") + parser.add_argument("--testpredecho",nargs='?',default='FALSE',help="print about statistical measure") + parser.add_argument("--classprobstexresult",nargs='?',default='tex',help="paste various figure of statistical measure in outputfile") + parser.add_argument("--classprobstexecho",nargs='?',default='FALSE',help="print various figure of statistical measure") + parser.add_argument("--savedataecho",nargs='?',default='FALSE',help="information about completion of model building ") + parser.add_argument("--savedataresult",nargs='?',default='hide',help="write information about completion of model building in outputfile ") + parser.add_argument("--outputmodel", help="give name for the generated model") + parser.add_argument("--outputresultpdf", help="give name for the output pdf file") + + args = parser.parse_args() + return args + +def generateRnwScript(): + + import templateLibrary + t = templateLibrary.__template4Rnw() + + from string import Template + s = Template(t) + + args = __inputArguments() + + templt = s.safe_substitute(METHOD=args.method, + RDATA=args.rdata, + GETDATAINFOEVAL=args.getdatainfoeval, + GETDATAINFOECHO=args.getdatainfoecho, + GETDATAINFORESULT=args.getdatainforesult, + MISSINGFILTEREVAL=args.missingfiltereval, + MISSINGFILTERECHO=args.missingfilterecho, + MISSINGFILTERRESULT=args.missingfilterresult, + MISSINGFILTERTHRESHC=args.missingfilterthreshc, + MISSINGFILTERTHRESHR=args.missingfilterthreshr, + PCAEVAL=args.pcaeval, + PCAECHO=args.pcaecho, + PCARESULT=args.pcaresult, + PCACOMP=args.pcacomp, + PCAPLOTEVAL=args.pcaploteval, + PCAPLOTECHO=args.pcaplotecho, + PCAPLOTRESULT=args.pcaplotresult, + PCAPLOTFIG=args.pcaplotfig, + INITIALDATASPLITEVAL=args.initialdataspliteval, + INITIALDATASPLITECHO=args.initialdatasplitecho, + INITIALDATASPLITRESULT=args.initialdatasplitresult, + PERCENT=args.percent, + NZVEVAL=args.nzveval, + NZVRESULT=args.nzvresult, + NZVECHO=args.nzvecho, + CORRFILTEREVAL=args.corrfiltereval, + CORRFILTERRESULT=args.corrfilterresult, + CORRFILTERECHO=args.corrfilterecho, + THRESHHOLDCOR=args.threshholdcor, + PREPROCEVAL=args.preproceval, + PREPROCECHO=args.preprocecho, + PREPROCRESULT=args.preprocresult, + SETUPWORKERSECHO=args.setupworkersecho, + SETUPWORKERSRESULT=args.setupworkersresult, + NUMWORKERS=args.numworkers, + SETUPRESAMPLINGECHO=args.setupresamplingecho, + SETUPRESAMPLINGRESULT=args.setupresamplingresult, + RESAMPLENUMBER=args.resamplenumber, + RESAMPLENUMBERPERCENT=args.resamplenumberpercent, + SETUPGRIDRESULT=args.setupgridresult, + SETUPGRIDECHO=args.setupgridecho, + SETUPGRIDSIZE=args.setupgridsize, + FITMODELRESULT=args.fitmodelresult, + FITMODELECHO=args.fitmodelecho, + FITMODELEVAL=args.fitmodeleval, + MODELDESCRECHO=args.modeldescrecho, + MODELDESCRRESULT=args.modeldescrresult, + RESAMPTABLEECHO=args.resamptableecho, + RESAMPTABLERESULT=args.resamptableresult, + PROFILEPLOTECHO=args.profileplotecho, + PROFILEPLOTFIG=args.profileplotfig, + STOPWORKERSECHO=args.stopworkersecho, + STOPWORKERSRESULT=args.stopworkersresult, + TESTPREDRESULT=args.testpredresult, + TESTPREDECHO=args.testpredecho, + CLASSPROBSTEXRESULT=args.classprobstexresult, + CLASSPROBSTEXECHO=args.classprobstexecho, + SAVEDATAECHO=args.savedataecho, + SAVEDATARESULT=args.savedataresult ) + + + f = open('result-doc.Rnw','w') + f.write(templt) + f.close() + +def modelBuilding(): + + import os + os.system('R CMD Sweave result-doc.Rnw > cmd.log.1 2>&1') + os.system('pdflatex result-doc.tex > cmd.log.2 2>&1') + os.system('pdflatex result-doc.tex > cmd.log.2 2>&1') +# os.system('pdflatex result-doc.tex 2>&1 | tee cmd.log.2') + args = __inputArguments() + + from string import Template + s1 = Template('cp $METHOD-Fit.RData $OUTPUTMODEL') + s2 = Template('cp result-doc.pdf $OUTPUTRESULTPDF') + + cmd1 = s1.safe_substitute(METHOD=args.method, OUTPUTMODEL=args.outputmodel) + cmd2 = s2.safe_substitute(OUTPUTRESULTPDF=args.outputresultpdf) + + os.system(cmd1) + os.system(cmd2) + + +if __name__ == "__main__" : + + generateRnwScript() + modelBuilding()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/templateLibrary.py Fri Jan 22 14:16:12 2016 -0500 @@ -0,0 +1,1010 @@ +def __template4Rnw(): + + template4Rnw = r'''%% Regression Modeling Script +%% Max Kuhn (max.kuhn@pfizer.com, mxkuhn@gmail.com) +%% Version: 1.00 +%% Created on: 2010/10/02 +%% +%% Lynn Group +%% Version: 2.00 +%% Created on: 2014/11/15 + +%% This is an Sweave template for building and describing +%% classification models. It mixes R and LaTeX code. The document can +%% be processing using R's Sweave function to produce a tex file. +%% +%% The inputs are: +%% - the initial data set in a data frame called 'rawData' +%% - a numeric column in the data set called 'outcome'. this should be the +%% outcome variable +%% - all other columns in rawData should be predictor variables +%% - the type of model should be in a variable called 'modName'. +%% +%% The script attempts to make some intelligent choices based on the +%% model being used. For example, if modName is "pls", the script will +%% automatically center and scale the predictor data. There are +%% situations where these choices can (and should be) changed. +%% +%% There are other options that may make sense to change. For example, +%% the user may want to adjust the type of resampling. To find these +%% parts of the script, search on the string 'OPTION'. These parts of +%% the code will document the options. + +\documentclass[12pt]{report} +\usepackage{amsmath} +\usepackage[pdftex]{graphicx} +\usepackage{color} +\usepackage{ctable} +\usepackage{xspace} +\usepackage{fancyvrb} +\usepackage{fancyhdr} +\usepackage{lastpage} +\usepackage{longtable} +\usepackage{algorithm2e} +\usepackage[ + colorlinks=true, + linkcolor=blue, + citecolor=blue, + urlcolor=blue] + {hyperref} + \usepackage{lscape} + +\usepackage{Sweave} +\SweaveOpts{keep.source = TRUE} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% define new colors for use +\definecolor{darkgreen}{rgb}{0,0.6,0} +\definecolor{darkred}{rgb}{0.6,0.0,0} +\definecolor{lightbrown}{rgb}{1,0.9,0.8} +\definecolor{brown}{rgb}{0.6,0.3,0.3} +\definecolor{darkblue}{rgb}{0,0,0.8} +\definecolor{darkmagenta}{rgb}{0.5,0,0.5} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\newcommand{\bld}[1]{\mbox{\boldmath $#1$}} +\newcommand{\shell}[1]{\mbox{$#1$}} +\renewcommand{\vec}[1]{\mbox{\bf {#1}}} + +\newcommand{\ReallySmallSpacing}{\renewcommand{\baselinestretch}{.6}\Large\normalsize} +\newcommand{\SmallSpacing}{\renewcommand{\baselinestretch}{1.1}\Large\normalsize} + +\newcommand{\halfs}{\frac{1}{2}} + +\setlength{\oddsidemargin}{-.25 truein} +\setlength{\evensidemargin}{0truein} +\setlength{\topmargin}{-0.2truein} +\setlength{\textwidth}{7 truein} +\setlength{\textheight}{8.5 truein} +\setlength{\parindent}{0.20truein} +\setlength{\parskip}{0.10truein} + +\setcounter{LTchunksize}{50} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\pagestyle{fancy} +\lhead{} +%% OPTION Report header name +\chead{Regression Model Script} +\rhead{} +\lfoot{} +\cfoot{} +\rfoot{\thepage\ of \pageref{LastPage}} +\renewcommand{\headrulewidth}{1pt} +\renewcommand{\footrulewidth}{1pt} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% OPTION Report title and modeler name +\title{Regression Model Script using $METHOD } +\author{"M. Kuhn and Lynn Group, SCIS, JNU, New Delhi"} + +\begin{document} + +\maketitle + +\thispagestyle{empty} + +<<startup, eval= TRUE, results = hide, echo = FALSE>>= +library(Hmisc) +library(caret) +versionTest <- compareVersion(packageDescription("caret")$Version, + "4.65") +if(versionTest < 0) stop("caret version 4.65 or later is required") + +library(RColorBrewer) + + +listString <- function (x, period = FALSE, verbose = FALSE) +{ + if (verbose) cat("\n entering listString\n") + flush.console() + if (!is.character(x)) + x <- as.character(x) + numElements <- length(x) + out <- if (length(x) > 0) { + switch(min(numElements, 3), x, paste(x, collapse = " and "), + { + x <- paste(x, c(rep(",", numElements - 2), " and", ""), sep = "") + paste(x, collapse = " ") + }) + } + else "" + if (period) out <- paste(out, ".", sep = "") + if (verbose) cat(" leaving listString\n\n") + flush.console() + out +} + +resampleStats <- function(x, digits = 3) + { + bestPerf <- x$bestTune + colnames(bestPerf) <- gsub("^\\.", "", colnames(bestPerf)) + out <- merge(x$results, bestPerf) + out <- out[, colnames(out) %in% x$perfNames] + names(out) <- gsub("ROC", "area under the ROC curve", names(out), fixed = TRUE) + names(out) <- gsub("Sens", "sensitivity", names(out), fixed = TRUE) + names(out) <- gsub("Spec", "specificity", names(out), fixed = TRUE) + names(out) <- gsub("Accuracy", "overall accuracy", names(out), fixed = TRUE) + names(out) <- gsub("Kappa", "Kappa statistics", names(out), fixed = TRUE) + names(out) <- gsub("RMSE", "root mean squared error", names(out), fixed = TRUE) + names(out) <- gsub("Rsquared", "$R^2$", names(out), fixed = TRUE) + + out <- format(out, digits = digits) + listString(paste(names(out), "was", out)) + } + +latticeBubble <- function(x, y, z, offset = .5, splits = 10, + pal = colorRampPalette(brewer.pal(9,"YlOrRd")[-(1:2)]), + ...) +{ + cexValues <- rank(z)/length(z) + offset + splits <- unique(quantile(z, probs = seq(0, 1, length = splits))) + splitup <- cut(z, breaks = splits, include.lowest = TRUE) + cols <- pal(length(levels(splitup))) + colValues <- cols[as.numeric(splitup)] + if(is.data.frame(x)) + { + out <- splom(~x, col = colValues, cex = cexValues, ...) + + } else out <- xyplot(y~x, col = colValues, cex = cexValues, ...) + out + +} + + +##OPTION: model name: see ?train for more values/models +modName <- "$METHOD" +load("$RDATA") +rawData <- dataX +rawData$$outcome <- dataY + + + +@ + + +\section*{Data Sets}\label{S:data} + +%% OPTION: provide some background on the problem, the experimental +%% data, how the compounds were selected etc + + +<<getDataInfo, eval = $GETDATAINFOEVAL, echo = $GETDATAINFOECHO, results = $GETDATAINFORESULT>>= +if(!any(names(rawData) == "outcome")) stop("a variable called outcome should be in the data set") +if(!is.numeric(rawData$outcome)) stop("the outcome should be a numeric vector") + +numSamples <- nrow(rawData) +numPredictors <- ncol(rawData) - 1 +predictorNames <- names(rawData)[names(rawData) != "outcome"] + +isNum <- apply(rawData[,predictorNames, drop = FALSE], 2, is.numeric) +if(any(!isNum)) stop("all predictors in rawData should be numeric") + +@ + + +<<missingFilter, eval = $MISSINGFILTEREVAL, echo = $MISSINGFILTERECHO, results = $MISSINGFILTERRESULT>>= + +colRate <- apply(rawData[, predictorNames, drop = FALSE], + 2, function(x) mean(is.na(x))) + +##OPTION thresholds can be changed +colExclude <- colRate > $MISSINGFILTERTHRESHC + +missingText <- "" + +if(any(colExclude)) + { + missingText <- paste(missingText, + ifelse(sum(colExclude) > 1, + " There were ", + " There was "), + sum(colExclude), + ifelse(sum(colExclude) > 1, + " predictors ", + " predictor "), + "with an excessive number of ", + "missing data. ", + ifelse(sum(colExclude) > 1, + " These were excluded. ", + " This was excluded. ")) + predictorNames <- predictorNames[!colExclude] + rawData <- rawData[, names(rawData) %in% c("outcome", predictorNames), drop = FALSE] + } + + +rowRate <- apply(rawData[, predictorNames, drop = FALSE], + 1, function(x) mean(is.na(x))) + +rowExclude <- rowRate > $MISSINGFILTERTHRESHR + + +if(any(rowExclude)) + { + missingText <- paste(missingText, + ifelse(sum(rowExclude) > 1, + " There were ", + " There was "), + sum(colExclude), + ifelse(sum(rowExclude) > 1, + " samples ", + " sample "), + "with an excessive number of ", + "missing data. ", + ifelse(sum(rowExclude) > 1, + " These were excluded. ", + " This was excluded. "), + "After filtering, ", + sum(!rowExclude), + " samples remained.") + rawData <- rawData[!rowExclude, ] + hasMissing <- apply(rawData[, predictorNames, drop = FALSE], + 1, function(x) mean(is.na(x))) + } else { + hasMissing <- apply(rawData[, predictorNames, drop = FALSE], + 1, function(x) any(is.na(x))) + missingText <- paste(missingText, + ifelse(missingText == "", + "There ", + "Subsequently, there "), + ifelse(sum(hasMissing) == 1, + "was ", + "were "), + ifelse(sum(hasMissing) > 0, + sum(hasMissing), + "no"), + ifelse(sum(hasMissing) == 1, + "sample ", + "samples "), + "with missing values.") + rawData <- rawData[complete.cases(rawData),] + } + +dataDist <- summary(rawData$outcome) +dataSD <- sd(rawData$outcome, na.rm = TRUE) +dataText <- paste("The average outcome value was ", + dataDist["Mean"], + " and a standard deviation of ", + dataSD, ". The minimum and maximum values were ", + dataDist["Min."], " and ", dataDist["Max."], + ", respectively. Figure \\\\ref{F:dens} shows a ", + " density plot (i.e. a smooth histogram) of the response.", + sep = "") + +rawData1 <- rawData[,1:length(rawData)-1] +rawData2 <- rawData[,length(rawData)] + +set.seed(222) +nzv1 <- nearZeroVar(rawData1) + if(length(nzv1) > 0) + { + nzvVars1 <- names(rawData1)[nzv1] + rawData <- rawData1[,-nzv1] + rawData$outcome <- rawData2 + nzvText1 <- paste("There were ", + length(nzv1), + " predictors that were removed from original data due to", + " severely unbalanced distributions that", + " could negatively affect the model fit", + ifelse(length(nzv1) > 10, + ".", + paste(": ", + listString(nzvVars1), + ".", + sep = "")), + sep = "") + + } else { +rawData <- rawData1 +rawData$outcome <- rawData2 +nzvText1 <- "" + + } + +remove("rawData1") +remove("rawData2") + +@ + +The initial data set consisted of \Sexpr{numSamples} samples and +\Sexpr{numPredictors} predictor variables. \Sexpr{dataText} \Sexpr{missingText} +\Sexpr{nzvText1} + +\setkeys{Gin}{width = 0.8\textwidth} +\begin{figure}[b] + \begin{center} + +<<densityplot, echo = FALSE, results = hide, fig = TRUE, width = 8, height = 4.5>>= +trellis.par.set(caretTheme(), warn = TRUE) +print(densityplot(~rawData$outcome, pch = "|", adjust = 1.25, xlab = "")) +@ + \caption[Data Density]{A density plot of the response. The marks + along the $x$--axis show the locations of the data points.} + \label{F:dens} + \end{center} +\end{figure} + + +<<pca, eval= $PCAEVAL, echo = $PCAECHO, results = $PCARESULT>>= + predictorNames <- names(rawData)[names(rawData) != "outcome"] + numPredictors <- length(predictorNames) + +predictors <- rawData[, predictorNames, drop = FALSE] +## PCA will fail with predictors having less than 2 unique values +isZeroVar <- apply(predictors, 2, + function(x) length(unique(x)) < 2) +if(any(isZeroVar)) predictors <- predictors[, !isZeroVar, drop = FALSE] +## For whatever, only the formula interface to prcomp +## handles missing values +pcaForm <- as.formula( + paste("~", + paste(names(predictors), collapse = "+"))) +pca <- prcomp(pcaForm, + data = predictors, + center = TRUE, + scale. = TRUE, + na.action = na.omit) +## OPTION: the number of components plotted/discussed can be set +numPCAcomp <- $PCACOMP +pctVar <- pca$sdev^2/sum(pca$sdev^2)*100 +pcaText <- paste(round(pctVar[1:numPCAcomp], 1), + "\\\\%", + sep = "") +pcaText <- listString(pcaText) +@ + +To get an initial assessment of the separability of the classes, +principal component analysis (PCA) was used to distill the +\Sexpr{numPredictors} predictors down into \Sexpr{numPCAcomp} +surrogate variables (i.e. the principal components) in a manner that +attempts to maximize the amount of information preserved from the +original predictor set. Figure \ref{F:inititalPCA} contains plots of +the first \Sexpr{numPCAcomp} components, which accounted for +\Sexpr{pcaText} percent of the variability in the original predictors +(respectively). + +%% OPTION: remark on how well (or poorly) the data separated + +\setkeys{Gin}{width = 0.8\textwidth} +\begin{figure}[p] + \begin{center} + +<<pcaPlot, eval = $PCAPLOTEVAL, echo = $PCAPLOTECHO, results = $PCAPLOTRESULT, fig = $PCAPLOTFIG, width = 8, height = 8>>= +trellis.par.set(caretTheme(), warn = TRUE) +if(numPCAcomp == 2) + { + axisRange <- extendrange(pca$x[, 1:2]) + print( + latticeBubble(x = as.data.frame(pca$x)$PC1, + y = as.data.frame(pca$x)$PC2, + z = rawData$outcome, + type = c("p", "g"), + xlab = "PC1", ylab = "PC2", + xlim = axisRange, + ylim = axisRange)) + + } else { + axisRange <- extendrange(pca$x[, 1:numPCAcomp]) + print( + latticeBubble(x = as.data.frame(pca$x)[,1:numPCAcomp], + + z = rawData$outcome, + type = c("p", "g"), + xlab = "PC1", ylab = "PC2", + xlim = axisRange, + ylim = axisRange)) + + + } +@ + \caption[PCA Plot]{A plot of the first \Sexpr{numPCAcomp} + principal components for the original data set. Smaller, lighter + points indicate smaller values of the response while darker, + larger points correspond to larger values of the outcome} + \label{F:inititalPCA} + \end{center} +\end{figure} + + +<<initialDataSplit, eval = $INITIALDATASPLITEVAL, echo = $INITIALDATASPLITECHO, results = $INITIALDATASPLITRESULT>>= + + ## OPTION: in small samples sizes, you may not want to set aside a + ## training set and focus on the resampling results. + numSamples <- nrow(rawData) + + predictorNames <- names(rawData)[names(rawData) != "outcome"] + numPredictors <- length(predictorNames) + +# pctTrain <- .15 + pctTrain <- $PERCENT + +if(pctTrain < 1) + { + ## OPTION: seed number can be changed + set.seed(1) + inTrain <- createDataPartition(rawData$outcome, + p = pctTrain, + list = FALSE) + trainX <- rawData[ inTrain, predictorNames] + testX <- rawData[-inTrain, predictorNames] + trainY <- rawData[ inTrain, "outcome"] + testY <- rawData[-inTrain, "outcome"] + splitText <- paste("The original data were split into ", + "a training set ($n$=", + nrow(trainX), + ") and a test set ($n$=", + nrow(testX), + ") in a manner that preserved the ", + "distribution of the response.", + sep = "") + isZeroVar <- apply(trainX, 2, + function(x) length(unique(x)) < 2) + if(any(isZeroVar)) + { + trainX <- trainX[, !isZeroVar, drop = FALSE] + testX <- testX[, !isZeroVar, drop = FALSE] + } + + } else { + trainX <- rawData[, predictorNames] + testX <- NULL + trainY <- rawData[, "outcome"] + testY <- NULL + splitText <- "The entire data set was used as the training set." + } + +remove("rawData") + +@ + +\Sexpr{splitText} +The data set for model building consisted of \Sexpr{numSamples} samples and +\Sexpr{numPredictors} predictor variables. + + + +<<nzv, eval= $NZVEVAL, results = $NZVRESULT, echo = $NZVECHO>>= +## OPTION: other pre-processing steps can be used +ppSteps <- caret:::suggestions(modName) + +set.seed(2) +if(ppSteps["nzv"]) + { + nzv <- nearZeroVar(trainX) + if(length(nzv) > 0) + { + nzvVars <- names(trainX)[nzv] + trainX <- trainX[, -nzv] + nzvText <- paste("There were ", + length(nzv), + " predictors that were removed due to", + " severely unbalanced distributions that", + " could negatively affect the model fit", + ifelse(length(nzv) > 10, + ".", + paste(": ", + listString(nzvVars), + ".", + sep = "")), + sep = "") + testX <- testX[, -nzv] + } else nzvText <- "" + } else nzvText <- "" +@ + +\Sexpr{nzvText} + +<<corrFilter, eval = $CORRFILTEREVAL, results = $CORRFILTERRESULT, echo = $CORRFILTERECHO>>= + +if(ppSteps["corr"]) + { + ## OPTION: + ##corrThresh <- .75 + corrThresh <- $THRESHHOLDCOR + highCorr <- findCorrelation(cor(trainX, use = "pairwise.complete.obs"), + corrThresh) + if(length(highCorr) > 0) + { + corrVars <- names(trainX)[highCorr] + trainX <- trainX[, -highCorr] + corrText <- paste("There were ", + length(highCorr), + " predictors that were removed due to", + " large between--predictor correlations that", + " could negatively affect the model fit", + ifelse(length(highCorr) > 10, + ".", + paste(": ", + listString(highCorr), + ".", + sep = "")), + " Removing these predictors forced", + " all pair--wise correlations to be", + " less than ", + corrThresh, + ".", + sep = "") + testX <- testX[, -highCorr] + } else corrText <- "" + }else corrText <- "" +@ + + \Sexpr{corrText} + +<<preProc, eval = $PREPROCEVAL, echo = $PREPROCECHO, results = $PREPROCRESULT>>= + +ppMethods <- NULL +if(ppSteps["center"]) ppMethods <- c(ppMethods, "center") +if(ppSteps["scale"]) ppMethods <- c(ppMethods, "scale") +if(any(hasMissing) > 0) ppMethods <- c(ppMethods, "knnImpute") +##OPTION other methods, such as spatial sign, can be added to this list + +if(length(ppMethods) > 0) + { + ppInfo <- preProcess(trainX, method = ppMethods) + trainX <- predict(ppInfo, trainX) + if(pctTrain < 1) testX <- predict(ppInfo, testX) + ppText <- paste("The following pre--processing methods were", + " applied to the training", + ifelse(pctTrain < 1, " and test", ""), + " data: ", + listString(ppMethods), + ".", + sep = "") + ppText <- gsub("center", "mean centering", ppText) + ppText <- gsub("scale", "scaling to unit variance", ppText) + ppText <- gsub("knnImpute", + paste(ppInfo$k, "--nearest neighbor imputation", sep = ""), + ppText) + ppText <- gsub("spatialSign", "the spatial sign transformation", ppText) + ppText <- gsub("pca", "principal component feature extraction", ppText) + ppText <- gsub("ica", "independent component feature extraction", ppText) + } else { + ppInfo <- NULL + ppText <- "" + } + +predictorNames <- names(trainX) +if(nzvText != "" | corrText != "" | ppText != "") + { + varText <- paste("After pre--processing, ", + ncol(trainX), + "predictors remained for modeling.") + } else varText <- "" + +@ + +\Sexpr{ppText} \Sexpr{varText} + +\clearpage +\section*{Model Building} + +<<setupWorkers, eval = TRUE, echo = $SETUPWORKERSECHO, results = $SETUPWORKERSRESULT>>= + +numWorkers <- $NUMWORKERS +##OPTION: turn up numWorkers to use MPI +if(numWorkers > 1) + { + mpiCalcs <- function(X, FUN, ...) + { + theDots <- list(...) + parLapply(theDots$cl, X, FUN) + } + + library(snow) + cl <- makeCluster(numWorkers, "MPI") + } +@ + +<<setupResampling, echo = $SETUPRESAMPLINGECHO, results = $SETUPRESAMPLINGRESULT>>= +##<<setupResampling, echo = FALSE, results = hide>>= +##OPTION: the resampling options can be changed. See +## ?trainControl for details +resampName <- "repeatedcv" +resampNumber <- $RESAMPLENUMBER +numRepeat <- 3 +resampP <- $RESAMPLENUMBERPERCENT + +modelInfo <- modelLookup(modName) + +set.seed(3) +ctlObj <- trainControl(method = resampName, + number = resampNumber, + repeats = numRepeat, + p = resampP) + + +##OPTION select other performance metrics as needed +optMetric <- "RMSE" + +if(numWorkers > 1) + { + ctlObj$workers <- numWorkers + ctlObj$computeFunction <- mpiCalcs + ctlObj$computeArgs <- list(cl = cl) + } +@ + +<<setupGrid, results = $SETUPGRIDRESULT, echo = $SETUPGRIDECHO>>= + +##OPTION expand or contract these grids as needed (or +## add more models + +gridSize <- $SETUPGRIDSIZE + +if(modName %in% c("svmPoly", "svmRadial", "svmLinear", "ctree2", "ctree")) gridSize <- 5 +if(modName %in% c("earth")) gridSize <- 7 +if(modName %in% c("knn", "glmboost", "rf", "nodeHarvest")) gridSize <- 10 + +if(modName %in% c("rpart")) gridSize <- 15 +if(modName %in% c("pls", "lars2", "lars")) gridSize <- min(20, ncol(trainX)) + +if(modName == "gbm") + { + tGrid <- expand.grid(.interaction.depth = -1 + (1:5)*2 , + .n.trees = (1:10)*20, + .shrinkage = .1) + } + +if(modName == "nnet") + { + tGrid <- expand.grid(.size = -1 + (1:5)*2 , + .decay = c(0, .001, .01, .1)) + } + +@ + + +<<fitModel, results = $FITMODELRESULT, echo = $FITMODELECHO, eval = $FITMODELEVAL>>= + +##OPTION alter as needed + +set.seed(4) +modelFit <- switch(modName, + gbm = + { + mix <- sample(seq(along = trainY)) + train( + trainX[mix,], trainY[mix], modName, + verbose = FALSE, + bag.fraction = .9, + metric = optMetric, + trControl = ctlObj, + tuneGrid = tGrid) + }, + + nnet = + { + train( + trainX, trainY, modName, + metric = optMetric, + linout = TRUE, + trace = FALSE, + maxiter = 1000, + MaxNWts = 5000, + trControl = ctlObj, + tuneGrid = tGrid) + + }, + + svmRadial =, svmPoly =, svmLinear = + { + train( + trainX, trainY, modName, + metric = optMetric, + scaled = TRUE, + trControl = ctlObj, + tuneLength = gridSize) + }, + { + train(trainX, trainY, modName, + trControl = ctlObj, + metric = optMetric, + tuneLength = gridSize) + }) + +@ + +<<modelDescr, echo = $MODELDESCRECHO, results = $MODELDESCRRESULT>>= + + +summaryText <- "" + +resampleName <- switch(tolower(modelFit$control$method), + boot = paste("the bootstrap (", length(modelFit$control$index), " reps)", sep = ""), + boot632 = paste("the bootstrap 632 rule (", length(modelFit$control$index), " reps)", sep = ""), + cv = paste("cross-validation (", modelFit$control$number, " fold)", sep = ""), + repeatedcv = paste("cross-validation (", modelFit$control$number, " fold, repeated ", + modelFit$control$repeats, " times)", sep = ""), + lgocv = paste("repeated train/test splits (", length(modelFit$control$index), " reps, ", + round(modelFit$control$p, 2), "$\\%$)", sep = "")) + +tuneVars <- latexTranslate(tolower(modelInfo$label)) +tuneVars <- gsub("\\#", "the number of ", tuneVars, fixed = TRUE) +if(ncol(modelFit$bestTune) == 1 && colnames(modelFit$bestTune) == ".parameter") + { + summaryText <- paste(summaryText, + "\n\n", + "There are no tuning parameters associated with this model.", + "To characterize the model performance on the training set,", + resampleName, + "was used.", + "Table \\\\ref{T:resamps} and Figure \\\\ref{F:profile}", + "show summaries of the resampling results. ") + + } else { + summaryText <- paste("There", + ifelse(nrow(modelInfo) > 1, "are", "is"), + nrow(modelInfo), + ifelse(nrow(modelInfo) > 1, "tuning parameters", "tuning parameter"), + "associated with this model:", + listString(tuneVars, period = TRUE)) + + + + paramNames <- gsub(".", "", names(modelFit$bestTune), fixed = TRUE) + for(i in seq(along = paramNames)) + { + check <- modelInfo$parameter %in% paramNames[i] + if(any(check)) + { + paramNames[i] <- modelInfo$label[which(check)] + } + } + + paramNames <- gsub("#", "the number of ", paramNames, fixed = TRUE) + ## Check to see if there was only one combination fit + summaryText <- paste(summaryText, + "To choose", + ifelse(nrow(modelInfo) > 1, + "appropriate values of the tuning parameters,", + "an appropriate value of the tuning parameter,"), + resampleName, + "was used to generated a profile of performance across the", + nrow(modelFit$results), + ifelse(nrow(modelInfo) > 1, + "combinations of the tuning parameters.", + "candidate values."), + + "Table \\\\ref{T:resamps} and Figure \\\\ref{F:profile} show", + "summaries of the resampling profile. ", "The final model fitted to the entire training set was:", + listString(paste(latexTranslate(tolower(paramNames)), "=", modelFit$bestTune[1,]), period = TRUE)) + + } +@ + +\Sexpr{summaryText} + +<<resampTable, echo = $RESAMPTABLEECHO, results = $RESAMPTABLERESULT>>= + +tableData <- modelFit$results + +if(all(modelInfo$parameter == "parameter")) + { + tableData <- tableData[,-1, drop = FALSE] + colNums <- c(length(modelFit$perfNames), length(modelFit$perfNames)) + colLabels <- c("Mean", "Standard Deviation") + constString <- "" + isConst <- NULL + } else { + + isConst <- apply(tableData[, modelInfo$parameter, drop = FALSE], + 2, + function(x) length(unique(x)) == 1) + + numParamInTable <- sum(!isConst) + + if(any(isConst)) + { + constParam <- modelInfo$parameter[isConst] + constValues <- format(tableData[, constParam, drop = FALSE], digits = 4)[1,,drop = FALSE] + tableData <- tableData[, !(names(tableData) %in% constParam), drop = FALSE] + constString <- paste("The tuning", + ifelse(sum(isConst) > 1, + "parmeters", + "parameter"), + listString(paste("``", names(constValues), "''", sep = "")), + ifelse(sum(isConst) > 1, + "were", + "was"), + "held constant at", + ifelse(sum(isConst) > 1, + "a value of", + "values of"), + listString(constValues[1,])) + + } else constString <- "" + + cn <- colnames(tableData) + for(i in seq(along = cn)) + { + check <- modelInfo$parameter %in% cn[i] + if(any(check)) + { + cn[i] <- modelInfo$label[which(check)] + } + } + colnames(tableData) <- cn + + colNums <- c(numParamInTable, + length(modelFit$perfNames), + length(modelFit$perfNames)) + colLabels <- c("", "Mean", "Standard Deviation") + } + +colnames(tableData) <- gsub("SD$", "", colnames(tableData)) +colnames(tableData) <- latexTranslate(colnames(tableData)) +rownames(tableData) <- latexTranslate(rownames(tableData)) + +latex(tableData, + rowname = NULL, + file = "", + cgroup = colLabels, + n.cgroup = colNums, + where = "h!", + digits = 4, + longtable = nrow(tableData) > 30, + caption = paste(resampleName, "results from the model fit.", constString), + label = "T:resamps") +@ + +\setkeys{Gin}{ width = 0.9\textwidth} +\begin{figure}[b] + \begin{center} + +<<profilePlot, echo = $PROFILEPLOTECHO, fig = $PROFILEPLOTFIG, width = 8, height = 6>>= + + trellis.par.set(caretTheme(), warn = TRUE) +if(all(modelInfo$parameter == "parameter") | all(isConst) | modName == "nb") + { + resultsPlot <- resampleHist(modelFit) + plotCaption <- paste("Distributions of model performance from the ", + "training set estimated using ", + resampleName) + } else { + if(modName %in% c("svmPoly", "svmRadial", "svmLinear")) + { + resultsPlot <- plot(modelFit, + metric = optMetric, + xTrans = function(x) log10(x)) + resultsPlot <- update(resultsPlot, + type = c("g", "p", "l"), + ylab = paste(optMetric, " (", resampleName, ")", sep = "")) + + } else { + resultsPlot <- plot(modelFit, + metric = optMetric) + resultsPlot <- update(resultsPlot, + type = c("g", "p", "l"), + ylab = paste(optMetric, " (", resampleName, ")", sep = "")) + } + plotCaption <- paste("A plot of the estimates of the", + optMetric, + "values calculated using", + resampleName) + } +print(resultsPlot) +@ + \caption[Performance Plot]{\Sexpr{plotCaption}.} + \label{F:profile} + \end{center} +\end{figure} + +<<stopWorkers, echo = $STOPWORKERSECHO, results = $STOPWORKERSRESULT>>= +##<<stopWorkers, echo = FALSE, results = hide>>= +if(numWorkers > 1) stopCluster(cl) +@ + +<<testPred, results = $TESTPREDRESULT, echo = $TESTPREDECHO>>= + + + if(pctTrain < 1) + { + cat("\\clearpage\n\\section*{Test Set Results}\n\n") + + testPreds <- extractPrediction(list(fit = modelFit), + testX = testX, testY = testY) + testPreds <- subset(testPreds, dataType == "Test") + values <- modelFit$control$summaryFunction(testPreds) + names(values) <- gsub("RMSE", "root mean squared error", names(values), fixed = TRUE) + names(values) <- gsub("Rsquared", "$R^2$", names(values), fixed = TRUE) + values <- format(values, digits = 3) + + testString <- paste("Based on the test set of", + nrow(testX), + "samples,", + listString(paste(names(values), "was", values), period = TRUE), + " A plot of the observed and predicted outcomes for the test set ", + "is given in Figure \\\\ref{F:obsPred}.") + testString <- paste(testString, + " Using ", resampleName, + ", the training set estimates were ", + resampleStats(modelFit), + ".", + sep = "") + + axisRange <- extendrange(testPreds[, c("obs", "pred")]) + obsPred <- xyplot(obs ~ pred, + data = testPreds, + xlim = axisRange, + ylim = axisRange, + panel = function(x, y) + { + panel.abline(0, 1, col = "darkgrey", lty = 2) + panel.xyplot(x, y, type = c("p", "g")) + panel.loess(x, y, col = "darkred", lwd = 2) + + + }, + ylab = "Observed Response", + xlab = "Predicted Response") + + pdf("obsPred.pdf", height = 8, width = 8) + trellis.par.set(caretTheme()) + print(obsPred) + dev.off() + + } else testString <- "" +@ +\Sexpr{testString} + + +<<classProbsTex, results = $CLASSPROBSTEXRESULT, echo = $CLASSPROBSTEXECHO>>= + + + if(pctTrain < 1) + { + cat( + paste("\\begin{figure}[p]\n", + "\\begin{center}\n", + "\\includegraphics{obsPred}", + "\\caption[Observed V Fitted Values]{", + "The observed and predicted responses. ", + "The grey line is the line of identity while the", + "solid red line is a smoothed trend line.}\n", + "\\label{F:obsPred}\n", + "\\end{center}\n", + "\\end{figure}")) + } + +@ + +\section*{Versions} + +<<versions, echo = FALSE, results = tex>>= +toLatex(sessionInfo()) + +@ + +<<save-data, echo = $SAVEDATAECHO, results = $SAVEDATARESULT>>= +## change this to the name of modName.... +Fit<-modelFit +save(Fit,file="$METHOD-Fit.RData") +@ +The model was built using $METHOD and is saved as $METHOD-Fit.RData for reuse. This contains the variable Fit. + + +\end{document}''' + return template4Rnw
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool2reg.xml Fri Jan 22 14:16:12 2016 -0500 @@ -0,0 +1,567 @@ +<tool id="cghyjaqxpsssp2" name="Create script from the template file for regression "> +<description> + used to create script file from user given input to build a model +</description> +<requirements> + <requirement type="set_environment">CARET_REG_TOOL2_PATH</requirement> + <requirement type="set_environment">R_ROOT_DIR</requirement> + <requirement type="package" version="3.2.0">R</requirement> + <requirement type="package" version="1.0.0">caret-tools</requirement> +</requirements> +<command interpreter="python"> +#if $OPTION11.PARAMAETERS == "Advance" +modelBuilding.py --method $METHOD $RDATA --getdatainfoeval $OPTION11.GETDATAINFOEVAL --getdatainfoecho $OPTION11.GETDATAINFOECHO --getdatainforesult $OPTION11.GETDATAINFORESULT --missingfiltereval $OPTION11.CON1.MISSINGFILTEREVAL --missingfilterecho $OPTION11.CON1.MISSINGFILTERECHO --missingfilterresult $OPTION11.CON1.MISSINGFILTERRESULT --missingfilterthreshc $OPTION11.CON1.MISSINGFILTERTHRESHC --missingfilterthreshr $OPTION11.CON1.MISSINGFILTERTHRESHR --pcaeval $OPTION11.PCAEVAL --pcaecho $OPTION11.PCAECHO --pcaresult $OPTION11.PCARESULT --pcacomp $OPTION11.PCACOMP --pcaploteval $OPTION11.PCAPLOTEVAL --pcaplotecho $OPTION11.PCAPLOTECHO --pcaplotresult $OPTION11.PCAPLOTRESULT --pcaplotfig $OPTION11.PCAPLOTFIG --initialdataspliteval $OPTION11.CON2.INITIALDATASPLITEVAL --initialdatasplitecho $OPTION11.CON2.INITIALDATASPLITECHO --initialdatasplitresult $OPTION11.CON2.INITIALDATASPLITRESULT --percent $OPTION11.CON2.PERCENT --nzveval $OPTION11.CON3.NZVEVAL --nzvresult $OPTION11.CON3.NZVRESULT --nzvecho $OPTION11.CON3.NZVECHO --corrfiltereval $OPTION11.CON4.CORRFILTEREVAL --corrfilterresult $OPTION11.CON4.CORRFILTERRESULT --corrfilterecho $OPTION11.CON4.CORRFILTERECHO --threshholdcor $OPTION11.CON4.THRESHHOLDCOR --preproceval $OPTION11.CON5.PREPROCEVAL --preprocecho $OPTION11.CON5.PREPROCECHO --preprocresult $OPTION11.CON5.PREPROCRESULT --setupworkersecho $OPTION11.SETUPWORKERSECHO --setupworkersresult $OPTION11.SETUPWORKERSRESULT --numworkers $OPTION11.NUMWORKERS --setupresamplingecho $OPTION11.CON6.SETUPRESAMPLINGECHO --setupresamplingresult $OPTION11.CON6.SETUPRESAMPLINGRESULT --resamplenumber $OPTION11.CON6.RESAMPLENUMBER --resamplenumberpercent $OPTION11.CON6.RESAMPLENUMBERPERCENT --setupgridresult $OPTION11.SETUPGRIDRESULT --setupgridecho $OPTION11.SETUPGRIDECHO --setupgridsize $OPTION11.SETUPGRIDSIZE --fitmodelresult $OPTION11.FITMODELRESULT --fitmodelecho $OPTION11.FITMODELECHO --fitmodeleval $OPTION11.FITMODELEVAL --modeldescrecho $OPTION11.MODELDESCRECHO --modeldescrresult $OPTION11.MODELDESCRRESULT --resamptableecho $OPTION11.RESAMPTABLEECHO --resamptableresult $OPTION11.RESAMPTABLERESULT --profileplotecho $OPTION11.PROFILEPLOTECHO --profileplotfig $OPTION11.PROFILEPLOTFIG --stopworkersecho $OPTION11.STOPWORKERSECHO --stopworkersresult $OPTION11.STOPWORKERSRESULT --testpredresult $OPTION11.TESTPREDRESULT --testpredecho $OPTION11.TESTPREDECHO --classprobstexresult $OPTION11.CLASSPROBSTEXRESULT --classprobstexecho $OPTION11.CLASSPROBSTEXECHO --savedataecho $OPTION11.SAVEDATAECHO --savedataresult $OPTION11.SAVEDATARESULT --outputmodel $model --outputresultpdf $document; +#end if +#if $OPTION11.PARAMAETERS == "basic" +modelBuilding.py --method $METHOD $RDATA --getdatainfoeval $OPTION11.GETDATAINFOEVAL --getdatainfoecho $OPTION11.GETDATAINFOECHO --getdatainforesult $OPTION11.GETDATAINFORESULT --missingfiltereval $OPTION11.MISSINGFILTEREVAL --missingfilterecho $OPTION11.MISSINGFILTERECHO --missingfilterresult $OPTION11.MISSINGFILTERRESULT --missingfilterthreshc $OPTION11.MISSINGFILTERTHRESHC --missingfilterthreshr $OPTION11.MISSINGFILTERTHRESHR --pcaeval $OPTION11.PCAEVAL --pcaecho $OPTION11.PCAECHO --pcaresult $OPTION11.PCARESULT --pcacomp $OPTION11.PCACOMP --pcaploteval $OPTION11.PCAPLOTEVAL --pcaplotecho $OPTION11.PCAPLOTECHO --pcaplotresult $OPTION11.PCAPLOTRESULT --pcaplotfig $OPTION11.PCAPLOTFIG --initialdataspliteval $OPTION11.INITIALDATASPLITEVAL --initialdatasplitecho $OPTION11.INITIALDATASPLITECHO --initialdatasplitresult $OPTION11.INITIALDATASPLITRESULT --percent $OPTION11.PERCENT --nzveval $OPTION11.NZVEVAL --nzvresult $OPTION11.NZVRESULT --nzvecho $OPTION11.NZVECHO --corrfiltereval $OPTION11.CORRFILTEREVAL --corrfilterresult $OPTION11.CORRFILTERRESULT --corrfilterecho $OPTION11.CORRFILTERECHO --threshholdcor $OPTION11.THRESHHOLDCOR --preproceval $OPTION11.PREPROCEVAL --preprocecho $OPTION11.PREPROCECHO --preprocresult $OPTION11.PREPROCRESULT --setupworkersecho $OPTION11.SETUPWORKERSECHO --setupworkersresult $OPTION11.SETUPWORKERSRESULT --numworkers $OPTION11.NUMWORKERS --setupresamplingecho $OPTION11.SETUPRESAMPLINGECHO --setupresamplingresult $OPTION11.SETUPRESAMPLINGRESULT --resamplenumber $OPTION11.RESAMPLENUMBER --resamplenumberpercent $OPTION11.RESAMPLENUMBERPERCENT --setupgridresult $OPTION11.SETUPGRIDRESULT --setupgridecho $OPTION11.SETUPGRIDECHO --setupgridsize $OPTION11.SETUPGRIDSIZE --fitmodelresult $OPTION11.FITMODELRESULT --fitmodelecho $OPTION11.FITMODELECHO --fitmodeleval $OPTION11.FITMODELEVAL --modeldescrecho $OPTION11.MODELDESCRECHO --modeldescrresult $OPTION11.MODELDESCRRESULT --resamptableecho $OPTION11.RESAMPTABLEECHO --resamptableresult $OPTION11.RESAMPTABLERESULT --profileplotecho $OPTION11.PROFILEPLOTECHO --profileplotfig $OPTION11.PROFILEPLOTFIG --stopworkersecho $OPTION11.STOPWORKERSECHO --stopworkersresult $OPTION11.STOPWORKERSRESULT --testpredresult $OPTION11.TESTPREDRESULT --testpredecho $OPTION11.TESTPREDECHO --classprobstexresult $OPTION11.CLASSPROBSTEXRESULT --classprobstexecho $OPTION11.CLASSPROBSTEXECHO --savedataecho $OPTION11.SAVEDATAECHO --savedataresult $OPTION11.SAVEDATARESULT --outputmodel $model --outputresultpdf $document; +#end if + </command> +<inputs> + <param name="METHOD" type="select" label="Select Method to Train Data" > + <option value="gbm">Generalized Boosting</option> + <option value="treebag">treebag-Bagged CART</option> + <option value="earth">earth-MARS method</option> + <option value="Boruta">Boruta-Random forest with feature selection</option> + <option value="bstLs">Boosted Linear Model</option> + <option value="glm">Generalized Linear Model</option> + <option value="pls" selected="true">Partial Least Square</option> + <option value="svmRadial">SVM radial basis function</option> + <option value="svmLinear">SVM Linear function</option> + <option value="rpart">rpart-CART</option> + <option value="rf">Random Forest</option> + <option value="knn">k-Nearest Neighbors</option> + <!--option value="bagFDA">bag-Fourier Discriminant Analysis</option> + <option value="gbm">Generalized Boosting</option> + <option value="blackboost">black-boost- Boosting Method</option> + <option value="earth">Earth-MARS based method</option> + <option value="rf">Random Forest</option> + <option value="RRF">RRFglobal -Variant of Random Forest</option> + <option value="svmRadial">SVM-Radial</option> + <option value="svmPoly">SVM-Polynomial</option> + <option value="ada">ada-boost</option> + <option value="glm">Generalised linear model </option> + <option value="treebag">tree based bagging method</option> + <option value="nb">Naive Bayes</option> + <option value="knn">K-nearest neighbour</option--> + </param> + <param name="RDATA" format="data" type="data" label="Select File Containing Training Data" help="RData format" /> + + + <conditional name="OPTION11"> + <param name="PARAMAETERS" type="select"> + <option value="basic" selected="TRUE" >Use optimized parameters </option> + <option value="Advance"> Customized parameters </option> + </param> + + <when value="basic"> + + <param name="GETDATAINFOEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE" /> + + <param name="GETDATAINFOECHO" type="hidden" value="FALSE" help="set True if wish to print. default is False"/> + <param name="GETDATAINFORESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/> + + <param name="MISSINGFILTEREVAL" type="hidden" value= "TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + + <param name="MISSINGFILTERECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="MISSINGFILTERRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="MISSINGFILTERTHRESHC" type="hidden" value="0.2" help="For column wise default is 0.2"/> + + <param name="MISSINGFILTERTHRESHR" type="hidden" value="0.2" help="For row wise default is 0.2"/> + + <param name="PCAEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + + <param name="PCAECHO" type="hidden" value="FALSE" help="set True if wish to print. default is False"/> + + <param name="PCARESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/> + + <param name="PCACOMP" type="hidden" value="3" help="set according to need. Default is 3"/> + + <param name="PCAPLOTEVAL" type="hidden" value="TRUE" help="set TRUE if wish to plot PCA. default is TRUE"/> + + <param name="PCAPLOTECHO" type="hidden" value="FALSE" help="Set True if wish to Print .default is False"/> + + <param name="PCAPLOTRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="PCAPLOTFIG" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + + <param name="INITIALDATASPLITEVAL" type="hidden" value= "TRUE" help="set TRUE for splitting in test and train set.default is True"/> + + <param name="INITIALDATASPLITECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + + <param name="INITIALDATASPLITRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="PERCENT" type="hidden" value="0.8" help="default is 0.8"/> + + <param name="NZVEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + + <param name="NZVRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="NZVECHO" type="hidden" value="FALSE" label="Write Code in Document" help="set True if wish to print .default is False"/> + + <param name="CORRFILTEREVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + + <param name="CORRFILTERRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="CORRFILTERECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="THRESHHOLDCOR" type="hidden" value="0.75" help="set according to need .default is 0.75"/> + + <param name="PREPROCEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + + <param name="PREPROCECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="PREPROCRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="SETUPWORKERSEVAL" type="hidden" value="FALSE" help="set TRUE if wish to evaluate. default is False"/> + + <param name="SETUPWORKERSECHO" type="hidden" value="FALSE" label="Write Code in Document" help="set True if wish to print .default is False"/> + + <param name="SETUPWORKERSRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="NUMWORKERS" type="hidden" value="1" help ="default is 1"/> + + <param name="SETUPRESAMPLINGECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="SETUPRESAMPLINGRESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/> + + <param name="RESAMPLENUMBER" type="hidden" value="10" label="Set Number of Times to Resample" help="default is 10"/> + + <param name="RESAMPLENUMBERPERCENT" type="hidden" value="0.75" help="default is 0.75"/> + + <param name="SETUPGRIDRESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/> + + <param name="SETUPGRIDECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="SETUPGRIDSIZE" type="hidden" value="3" help="default is 3 "/> + + <param name="FITMODELEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + + <param name="FITMODELRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="FITMODELECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="MODELDESCRECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="MODELDESCRRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="RESAMPTABLEECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="RESAMPTABLERESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="PROFILEPLOTECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="PROFILEPLOTFIG" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + + <param name="STOPWORKERSECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="STOPWORKERSRESULT" type="hidden" value= "hide" help="Set tex if wish to write in output pdf file. default is hide"/> + + <param name="TESTPREDRESULT" type="hidden" value= "tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="TESTPREDECHO" type="hidden" value="FALSE" help="set True if wish to print. default is False"/> + + <param name="CLASSPROBSTEXRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="CLASSPROBSTEXECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False" /> + + <param name="SAVEDATAECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="SAVEDATARESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + </when> + + + <when value="Advance"> + <param name="GETDATAINFOEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE" /> + <param name="GETDATAINFOECHO" type="hidden" value="FALSE" help="set True if wish to print. default is False"/> + <param name="GETDATAINFORESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <conditional name="CON1"> + <param name="PARAMAETERS1" type="select" label="1. Remove Missing Values from the input data"> + <option value="YES">YES </option> + <option value="NO" selected="true">NO </option> + </param> + + <when value="YES"> + <param name="MISSINGFILTEREVAL" type="hidden" value= "TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + <param name="MISSINGFILTERECHO" type="select" label="1(i). Write Code in Document" help="set True if wish to print .default is False" > + <option value="FALSE" selected="true">false</option> + <option value="TRUE">true</option> + </param> + <param name="MISSINGFILTERRESULT" type="select" label="1(ii). Write Result in document" help="Set tex if wish to write in output pdf file. default is tex"> + <option value="hide">hide-result will not written in file</option> + <option value="tex" selected="true">tex-result will written in file</option> + </param> + <param name="MISSINGFILTERTHRESHC" type="select" label="1(iii). Set Cutoff Value for Missing Data value Columwise" help="For column wise default is 0.1 means column which has missing value more than 10% will be removed "> + <option value="0.1">0.1</option> + <option value="0.2">0.2</option> + <option value="0.25">0.25</option> + <option value="0.3">0.3</option> + <option value="0.35">0.35</option> + <option value="0.4">0.4</option> + <option value="0.45">0.45</option> + <option value="0.5">0.5</option> + <option value="0.55">0.55</option> + <option value="0.6">0.6</option> + <option value="0.65">0.65</option> + <option value="0.7">0.7</option> + <option value="0.75">0.75</option> + <option value="0.8">0.8</option> + </param> + <param name="MISSINGFILTERTHRESHR" type="select" label="1(iv). Set Cutoff Value for Missing Data Value Rowwise " help="For row wise default is 0.1 means row having more than 10% missing values will be removed"> + <option value="0.1">0.1</option> + <option value="0.2">0.2</option> + <option value="0.25">0.25</option> + <option value="0.3">0.3</option> + <option value="0.35">0.35</option> + <option value="0.4">0.4</option> + <option value="0.45">0.45</option> + <option value="0.5">0.5</option> + <option value="0.55">0.55</option> + <option value="0.6">0.6</option> + <option value="0.65">0.65</option> + <option value="0.7">0.7</option> + <option value="0.75">0.75</option> + <option value="0.8">0.8</option> + </param> +</when> +<when value="NO"> + <param name="MISSINGFILTEREVAL" type="hidden" value= "FALSE" help="set TRUE if wish to evaluate. default is FALSE"/> + + <param name="MISSINGFILTERECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="MISSINGFILTERRESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/> + + <param name="MISSINGFILTERTHRESHC" type="hidden" value="0" /> + + <param name="MISSINGFILTERTHRESHR" type="hidden" value="0" /> + +</when> +</conditional> + + <param name="PCAEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + + <param name="PCAECHO" type="hidden" value="FALSE" help="set True if wish to print. default is False"/> + + <param name="PCARESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/> + + <param name="PCACOMP" type="select" label="2. Find number of Principle component" help="performs PCA and gives number of PC. Default is 3 "> + <option value="3">3</option> + <option value="4">4</option> + <option value="5">5</option> + <option value="6">6</option> + <option value="7">7</option> + <option value="8">8</option> + <option value="9">9</option> + <option value="10">10</option> + </param> + + <param name="PCAPLOTEVAL" type="hidden" value="TRUE" help="set TRUE if wish to plot PCA. default is TRUE"/> + + <param name="PCAPLOTECHO" type="hidden" value="FALSE" help="Set True if wish to Print .default is False"/> + + <param name="PCAPLOTRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="PCAPLOTFIG" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + + + <conditional name="CON2"> + <param name="PARAMAETERS2" type="select" label="3. Customize parameters for data splitting" help="splits data in test and train set."> + <option value="YES2">YES </option> + <option value="NO2" selected="true">NO </option> + </param> + <when value="YES2"> + <param name="INITIALDATASPLITEVAL" type="hidden" value= "TRUE" help="set TRUE for splitting in test and train set.default is True"/> + <!--param name="SAAMPLING" type="select" label="3(i). Select Sampling Method" help="Defult is with No sampling. you may choose downsample or upsample" > + <option value="garBage" selected="true">No Sampling</option> + <option value="downsampling">downsample</option> + <option value="upsampling">upsample</option> + </param--> + <param name="INITIALDATASPLITECHO" type="select" label="3(ii). Write Code in Document" help="set True if wish to print .default is False" > + <option value="FALSE">false</option> + <option value="TRUE">true</option> + </param> + <param name="INITIALDATASPLITRESULT" type="select" value="tex" label="3(iii). Write Result in document" help="Set tex if wish to write in output pdf file. default is tex"> + <option value="tex" selected="true">tex-result will written in file</option> + <option value="hide">hide-result will not written in file</option> + </param> + <param name="PERCENT" type="select" label="3(iv) .Set Value at Which Data Will be Splitted in Train and Test Set" help="default is 0.8"> + <option value="0.8">0.8</option> + <option value="0.75">0.75</option> + <option value="0.6">0.6</option> + <option value="0.5">0.5</option> + </param> +</when> + <when value="NO2"> + <param name="INITIALDATASPLITEVAL" type="hidden" value= "TRUE" help="set TRUE for splitting in test and train set.default is True"/> + + <param name="INITIALDATASPLITECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <!--param name="SAAMPLING" type="hidden" value="garBage" help="default is with No sampling"/--> + + <param name="INITIALDATASPLITRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="PERCENT" type="hidden" value="0.8" help="default is 0.8"/> + +</when> +</conditional> + + <conditional name="CON3"> + <param name="PARAMAETERS3" type="select" label="4. Remove near zero variance" help="removes NZV from train and test set."> + <option value="YES3">YES </option> + <option value="NO3" selected="true">NO </option> + </param> +<when value="YES3"> + + <param name="NZVEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + + + <param name="NZVECHO" type="select" label="4(i). Write Code in Document" help="set True if wish to print .default is False"> + <option value="FALSE">false</option> + <option value="TRUE">true</option> + </param> + + <param name="NZVRESULT" type="select" label="4(ii). Write Result in document" help="Set tex if wish to write result in output pdf file. default is tex"> + <option value="hide">hide-result will not written in file</option> + <option value="tex" selected="true">tex-result will written in file</option> + </param> +</when> +<when value="NO3"> + <param name="NZVEVAL" type="hidden" value="FALSE" help="set TRUE if wish to evaluate. "/> + <param name="NZVECHO" type="hidden" value="FALSE" help="set TRUE if wish to evaluate. "/> + <param name="NZVRESULT" type="hidden" value="hide" help="set TRUE if wish to evaluate."/> +</when> +</conditional> + + <!--param name="NZVECHO" type="select" label="Write Code in Document" help="set True if wish to print .default is False"> + <option value="FALSE">false</option> + <option value="TRUE">true</option> + </param--> + + <conditional name="CON4"> + <param name="PARAMAETERS4" type="select" label="5. Remove Correlated Values " help="removes correlated attributes from train and test set."> + <option value="YES4">YES </option> + <option value="NO4" selected="true">NO </option> + </param> +<when value="YES4"> + <param name="CORRFILTEREVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + <param name="THRESHHOLDCOR" type="select" label="5(i). cutoff for correlated Value " help="set according to need .default is 0.75 means attributes have 75% or more correlation are ommited from the data"> + <option value="0.75">0.75</option> + <option value="0.4">0.4</option> + <option value="0.45">0.45</option> + <option value="0.5">0.5</option> + <option value="0.55">0.55</option> + <option value="0.6">0.6</option> + <option value="0.65">0.65</option> + <option value="0.7">0.7</option> + <option value="0.8">0.8</option> + <option value="0.85">0.85</option> + <option value="0.9">0.9</option> + <option value="0.95">0.95</option> + </param> + <param name="CORRFILTERECHO" type="select" label="5(ii). Write Code in Document" help="set True if wish to print .default is False" > + <option value="FALSE">false</option> + <option value="TRUE">true</option> + </param> + <param name="CORRFILTERRESULT" type="select" label="5(iii). Write Result in document" help="Set tex if wish to write in output pdf file. default is tex"> + <option value="hide">hide-result will not written in file</option> + <option value="tex" selected="true">tex-result will written in file</option> + </param> +</when> +<when value="NO4"> + <param name="CORRFILTEREVAL" type="hidden" value="FALSE"/> + + <param name="CORRFILTERRESULT" type="hidden" value="hide" /> + + <param name="CORRFILTERECHO" type="hidden" value="FALSE" /> + + <param name="THRESHHOLDCOR" type="hidden" value="0" /> +</when> +</conditional> + +<conditional name="CON5"> + <param name="PARAMAETERS5" type="select" label="6. Perform Centering and Scaling of data" help="centering and scaling of train and test set."> + <option value="YES5">YES </option> + <option value="NO5" selected="true">NO </option> + </param> + +<when value="YES5"> + <param name="PREPROCEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + <param name="PREPROCECHO" type="select" label="6(i). Write Code in Document" help="set True if wish to write code in document .default is False" > + <option value="FALSE">false</option> + <option value="TRUE">true</option> + </param> + <param name="PREPROCRESULT" type="select" label="6(ii). Write Result in document " help="Set tex if wish to write result in output pdf file. default is tex"> + <option value="hide">hide-result will not written in file</option> + <option value="tex" selected="true">tex-result will written in file</option> + </param> +</when> +<when value="NO5"> + <param name="PREPROCEVAL" type="hidden" value="FALSE"/> + + <param name="PREPROCECHO" type="hidden" value="FALSE" /> + + <param name="PREPROCRESULT" type="hidden" value="hide" /> + +</when> +</conditional> + + <param name="SETUPWORKERSEVAL" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is False"/> + <param name="SETUPWORKERSECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False" /> + <param name="NUMWORKERS" type="select" label="7. Set Number of Processors" help ="default is 1"> + <option value="1">1</option> + <option value="2">2</option> + <option value="4">4</option> + <option value="6">6</option> + <option value="8">8</option> + <option value="16">16</option> + </param> + <param name="SETUPWORKERSRESULT" type="select" label="7(i). Write Result" help="Set tex if wish to write in output pdf file. default is hide"> + <option value="tex">tex-result will written in file</option> + <option value="hide" selected="true">hide-result will not written in file</option> + </param> + + <conditional name="CON6"> + + <param name="PARAMAETERS6" type="select" label="7. custumize resampling parameters" help="resampling for cross validation"> + <option value="YES6">YES </option> + <option value="NO6" selected="true">NO </option> + </param> +<when value="YES6"> + <param name="SETUPRESAMPLINGECHO" type="select" label="7(i). write code for resampling" help="set True if wish to print .default is False"> + <option value="FALSE">false</option> + <option value="TRUE">true</option> + </param> + <param name="SETUPRESAMPLINGRESULT" type="select" label="7(ii). Write Result in document " help="Set tex if wish to write in output pdf file. default is hide"> + <option value="hide">hide-result will not written in file</option> + <option value="tex">tex-result will written in file</option> + </param> + <param name="RESAMPLENUMBER" type="select" label="7(iii). Set Number of times Resample" help="default is 10 "> + <option value="10">10</option> + <option value="5">5</option> + <option value="15">15</option> + <option value="20">20</option> + <option value="25">25</option> + </param> + <param name="RESAMPLENUMBERPERCENT" type="select" label="7(iv). Set Percent splitting of data for resampling" help="default is 0.75"> + <option value="0.75">0.75</option> + <option value="0.4">0.4</option> + <option value="0.45">0.45</option> + <option value="0.5">0.5</option> + <option value="0.55">0.55</option> + <option value="0.6">0.6</option> + <option value="0.65">0.65</option> + <option value="0.7">0.7</option> + <option value="0.8">0.8</option> + </param> +</when> +<when value="NO6"> + <param name="SETUPRESAMPLINGECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="SETUPRESAMPLINGRESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/> + + <param name="RESAMPLENUMBER" type="hidden" value="10" label="Set Number of Times to Resample" help="default is 10"/> + + <param name="RESAMPLENUMBERPERCENT" type="hidden" value="0.75" help="default is 0.75"/> + +</when> +</conditional> + <param name="SETUPGRIDRESULT" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/> + + <param name="SETUPGRIDECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="SETUPGRIDSIZE" type="select" label="8. Set Size of The Grid." help="default is 3 "> + <option value="3">3</option> + <option value="4">4</option> + <option value="5">5</option> + <option value="6">6</option> + <option value="7">7</option> + <option value="8">8</option> + <option value="9">9</option> + <option value="10">10</option> + <option value="11">11</option> + <option value="12">12</option> + <option value="13">13</option> + <option value="14">14</option> + <option value="15">15</option> + <option value="16">16</option> + <option value="17">17</option> + <option value="18">18</option> + <option value="19">19</option> + <option value="20">20</option> + </param> + + <param name="FITMODELEVAL" type="boolean" checked="true" value="true" label="9. Build a Model and write result in document" help="default is TRUE"/> + <param name="FITMODELRESULT" type="hidden" value="tex" /> + <param name="FITMODELECHO" type="select" label="9(i). Write Code for model building in Document" help="set True if wish to write code in document .default is False" > + <option value="FALSE">false</option> + <option value="TRUE">true</option> + </param> + <param name="MODELDESCRECHO" type="select" label="9(ii). Write code for Model Description " help="set True if wish to print .default is False" > + <option value="FALSE">false</option> + <option value="TRUE">true</option> + </param> + <param name="MODELDESCRRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="RESAMPTABLEECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="RESAMPTABLERESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="PROFILEPLOTECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="PROFILEPLOTFIG" type="hidden" value="TRUE" help="set TRUE if wish to evaluate. default is TRUE"/> + + <param name="STOPWORKERSECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="STOPWORKERSRESULT" type="hidden" value= "hide" help="Set tex if wish to write in output pdf file. default is hide"/> + + <param name="TESTPREDRESULT" type="hidden" value= "tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="TESTPREDECHO" type="hidden" value="FALSE" help="set True if wish to print. default is False"/> + + <param name="CLASSPROBSTEXRESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + <param name="CLASSPROBSTEXECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False" /> + + <!--param name="CLASSPROBSTEXRESULT1" type="hidden" value="hide" help="Set tex if wish to write in output pdf file. default is hide"/> + + <param name="CLASSPROBSTEXECHO1" type="hidden" value="FALSE" help="set True if wish to print .default is False" /--> + + <param name="SAVEDATAECHO" type="hidden" value="FALSE" help="set True if wish to print .default is False"/> + + <param name="SAVEDATARESULT" type="hidden" value="tex" help="Set tex if wish to write in output pdf file. default is tex"/> + + </when> + </conditional> +</inputs> + +<outputs> + <data type="data" format="data" label="$METHOD regression Model " name="model" /> + <data format="pdf" label="$METHOD regression Document" name="document" /> +</outputs> +<help> + +.. class:: infomark + + + +**Instruction** + +---------- + +Users may change any parameter as their requirement. For normal practice + +user required to provide only input csv file with response variable (Numeric) as last column and method for model building. + +More details are given in user manual.Please click here + + + +</help> + + + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Jan 22 14:16:12 2016 -0500 @@ -0,0 +1,13 @@ +<?xml version="1.0"?> +<tool_dependency> + +<set_environment version="1.0"> + <environment_variable name="CARET_REG_TOOL2_PATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable> + </set_environment> + <package name="R" version="3.2.0"> + <repository changeset_revision="7833b0ebf8d6" name="package_r_3_2_0" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="caret-tools" version="1.0.0"> + <repository changeset_revision="e5faefaf1037" name="caret_tool_test1" owner="deepakjadmin" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>