changeset 5:1118410b26dc draft

Deleted selected files
author deepakjadmin
date Mon, 04 Apr 2016 02:55:22 -0400
parents 4df942aff9bb
children f8c9f83c7abb
files templateLibrary.py
diffstat 1 files changed, 0 insertions(+), 1253 deletions(-) [+]
line wrap: on
line diff
--- a/templateLibrary.py	Fri Jan 22 11:57:21 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1253 +0,0 @@
-def __template4Rnw():
-
-	template4Rnw = r'''%% Classification Modeling Script 
-%% Max Kuhn (max.kuhn@pfizer.com, mxkuhn@gmail.com)
-%% Version: 1.00
-%% Created on: 2010/10/02
-%%
-%% This is an Sweave template for building and describing
-%% classification models. It mixes R and LaTeX code. The document can
-%% be processing using R's Sweave function to produce a tex file.  
-%%
-%% The inputs are:
-%% - the initial data set in a data frame called 'rawData' 
-%% - a factor column in the data set called 'class'. this should be the
-%%    outcome variable 
-%% - all other columns in rawData should be predictor variables
-%% - the type of model should be in a variable called 'modName'.
-%% 
-%% The script attempts to make some intelligent choices based on the
-%% model being used. For example, if modName is "pls", the script will
-%% automatically center and scale the predictor data. There are
-%% situations where these choices can (and should be) changed.   
-%%
-%% There are other options that may make sense to change. For example,
-%% the user may want to adjust the type of resampling. To find these
-%% parts of the script, search on the string 'OPTION'. These parts of
-%% the code will document the options. 
-
-\documentclass[14pt]{report}
-\usepackage{amsmath}
-\usepackage[pdftex]{graphicx}
-\usepackage{color}
-\usepackage{ctable}
-\usepackage{xspace}
-\usepackage{fancyvrb}
-\usepackage{fancyhdr}
-\usepackage{lastpage}
-\usepackage{longtable} 
-\usepackage{algorithm2e}
-\usepackage[
-         colorlinks=true,
-         linkcolor=blue,
-         citecolor=blue,
-         urlcolor=blue]
-           {hyperref}
-\usepackage{lscape}
-\usepackage{Sweave}
-\SweaveOpts{keep.source = TRUE}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-% define new colors for use
-\definecolor{darkgreen}{rgb}{0,0.6,0}
-\definecolor{darkred}{rgb}{0.6,0.0,0}
-\definecolor{lightbrown}{rgb}{1,0.9,0.8}
-\definecolor{brown}{rgb}{0.6,0.3,0.3}
-\definecolor{darkblue}{rgb}{0,0,0.8}
-\definecolor{darkmagenta}{rgb}{0.5,0,0.5}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\newcommand{\bld}[1]{\mbox{\boldmath $$#1$$}}
-\newcommand{\shell}[1]{\mbox{$$#1$$}}
-\renewcommand{\vec}[1]{\mbox{\bf {#1}}}
-
-\newcommand{\ReallySmallSpacing}{\renewcommand{\baselinestretch}{.6}\Large\normalsize}
-\newcommand{\SmallSpacing}{\renewcommand{\baselinestretch}{1.1}\Large\normalsize}
-
-\newcommand{\halfs}{\frac{1}{2}}
-
-\setlength{\oddsidemargin}{-.25 truein}
-\setlength{\evensidemargin}{0truein}
-\setlength{\topmargin}{-0.2truein}
-\setlength{\textwidth}{7 truein}
-\setlength{\textheight}{8.5 truein}
-\setlength{\parindent}{0.20truein}
-\setlength{\parskip}{0.10truein}
-
-\setcounter{LTchunksize}{50}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\pagestyle{fancy}
-\lhead{}
-%% OPTION Report header name
-\chead{Classification Model Script}
-\rhead{}
-\lfoot{}
-\cfoot{}
-\rfoot{\thepage\ of \pageref{LastPage}}
-\renewcommand{\headrulewidth}{1pt}
-\renewcommand{\footrulewidth}{1pt}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%% OPTION Report title and modeler name
-\title{Classification Model Script using $METHOD}
-\author{"Lynn Group with M. Kuhn, SCIS, JNU, New Delhi"} 
-
-\begin{document}
-
-\maketitle
-
-\thispagestyle{empty}
-<<dummy, eval=TRUE, echo=FALSE, results=hide>>=
-# sets values for variables used later in the program to prevent the \Sexpr error on parsing with Sweave
-numSamples=''
-classDistString=''
-missingText=''
-numPredictors=''
-numPCAcomp=''
-pcaText=''
-nzvText=''
-corrText=''
-ppText=''
-varText=''
-splitText="Dummy Text"
-nirText="Dummy Text"
-# pctTrain is a variable that is initialised in Data splitting, and reused later in testPred
-pctTrain=0.8
-Smpling=''
-nzvText1=''
-classDistString1=''
-dwnsmpl=''
-upsmpl=''
-
-@	
-<<startup, eval= TRUE, results = hide, echo = FALSE>>=
-library(Hmisc)
-library(caret)
-library(pROC)
-versionTest <- compareVersion(packageDescription("caret")$$Version, 
-                              "4.65")
-if(versionTest < 0) stop("caret version 4.65 or later is required")
-
-library(RColorBrewer)
-
-
-listString <- function (x, period = FALSE, verbose = FALSE) 
-{
-  if (verbose)   cat("\n      entering listString\n")
-  flush.console()
-  if (!is.character(x)) 
-    x <- as.character(x)
-  numElements <- length(x)
-  out <- if (length(x) > 0) {
-    switch(min(numElements, 3), x, paste(x, collapse = " and "), 
-           {
-             x <- paste(x, c(rep(",", numElements - 2), " and", ""), sep = "")
-             paste(x, collapse = " ")
-           })
-  }
-  else ""
-  if (period)  out <- paste(out, ".", sep = "")
-  if (verbose)  cat("      leaving  listString\n\n")
-  flush.console()
-  out
-}
-
-resampleStats <- function(x, digits = 3)
-  {
-    bestPerf <- x$$bestTune
-    colnames(bestPerf) <- gsub("^\\.", "", colnames(bestPerf))
-    out <- merge(x$$results, bestPerf)
-    out <- out[, colnames(out) %in% x$$perfNames]
-    names(out) <- gsub("ROC", "area under the ROC curve", names(out), fixed = TRUE)
-    names(out) <- gsub("Sens", "sensitivity", names(out), fixed = TRUE)
-    names(out) <- gsub("Spec", "specificity", names(out), fixed = TRUE)
-    names(out) <- gsub("Accuracy", "overall accuracy", names(out), fixed = TRUE)
-    names(out) <- gsub("Kappa", "Kappa statistics", names(out), fixed = TRUE)
-    
-    out <- format(out, digits = digits)
-    listString(paste(names(out), "was", out))
-  }
-
-twoClassNoProbs <- function (data, lev = NULL, model = NULL) 
-{
-  out <- c(sensitivity(data[, "pred"], data[, "obs"], lev[1]), 
-           specificity(data[, "pred"], data[, "obs"], lev[2]),
-           confusionMatrix(data[, "pred"], data[, "obs"])$$overall["Kappa"])
-  
-  names(out) <- c("Sens", "Spec", "Kappa")
-  out
-}
-
-
-
-##OPTION: model name: see ?train for more values/models
-modName <- "$METHOD"
-
-
-load("$RDATA")
-rawData <- dataX
-rawData$$outcome <- dataY
-
-@ 
-
-
-\section*{Data Sets}\label{S:data}
-
-%% OPTION: provide some background on the problem, the experimental
-%% data, how the compounds were selected etc
-
-<<getDataInfo, eval = $GETDATAINFOEVAL, echo = $GETDATAINFOECHO, results = $GETDATAINFORESULT>>=
-if(!any(names(rawData) == "outcome")) stop("a variable called outcome should be in the data set")
-if(!is.factor(rawData$$outcome)) stop("the outcome should be a factor vector")
-
-## OPTION: when there are only two classes, the first level of the 
-##         factor is used as the "positive" or "event" for calculating
-##         sensitivity and specificity. Adjust the outcome factor accordingly.
-numClasses <- length(levels(rawData$$outcome))
-numSamples <- nrow(rawData)
-numPredictors <- ncol(rawData) - 1
-predictorNames <- names(rawData)[names(rawData) != "outcome"]
-
-isNum <- apply(rawData[,predictorNames, drop = FALSE], 2, is.numeric)
-if(any(!isNum)) stop("all predictors in rawData should be numeric")
-
-classTextCheck <- all.equal(levels(rawData$$outcome), make.names(levels(rawData$$outcome)))
-if(!classTextCheck) warning("the class levels are not valid R variable names; this may cause errors")
-
-## Get the class distribution
-classDist <- table(rawData$$outcome)
-classDistString <- paste("``",
-                         names(classDist),
-                         "'' ($$n$$=",
-                         classDist,
-                         ")",
-                         sep = "")
-classDistString <- listString(classDistString)
-@ 
-
-<<missingFilter, eval = $MISSINGFILTEREVAL, echo = $MISSINGFILTERECHO, results = $MISSINGFILTERRESULT>>=
-colRate <- apply(rawData[, predictorNames, drop = FALSE],
-                 2, function(x) mean(is.na(x)))
-
-##OPTION thresholds can be changed
-colExclude <- colRate > $MISSINGFILTERTHRESHC
-
-missingText <- ""
-
-if(any(colExclude))
-  {
-    missingText <- paste(missingText,
-                         ifelse(sum(colExclude) > 1,
-                                " There were ",
-                                " There was "),
-                         sum(colExclude),
-                         ifelse(sum(colExclude) > 1,
-                                " predictors ",
-                                " predictor "),
-                         "with an excessive number of ",
-                         "missing data. ",
-                         ifelse(sum(colExclude) > 1,
-                                " These were excluded. ",
-                                " This was excluded. "))
-    predictorNames <- predictorNames[!colExclude]
-    rawData <- rawData[, names(rawData) %in% c("outcome", predictorNames), drop = FALSE]
-  }
-
-
-rowRate <- apply(rawData[, predictorNames, drop = FALSE],
-                 1, function(x) mean(is.na(x)))
-
-rowExclude <- rowRate > $MISSINGFILTERTHRESHR
-
-
-if(any(rowExclude)) {
-    missingText <- paste(missingText,
-                         ifelse(sum(rowExclude) > 1,
-                                " There were ",
-                                " There was "),
-                         sum(colExclude),
-                         ifelse(sum(rowExclude) > 1,
-                                " samples ",
-                                " sample "),
-                         "with an excessive number of ",
-                         "missing data. ",
-                         ifelse(sum(rowExclude) > 1,
-                                " These were excluded. ",
-                                " This was excluded. "),
-                         "After filtering, ",
-                         sum(!rowExclude),
-                         " samples remained.")
-    rawData <- rawData[!rowExclude, ]
-    hasMissing <- apply(rawData[, predictorNames, drop = FALSE],
-                        1, function(x) mean(is.na(x)))
-  } else {
-        hasMissing <- apply(rawData[, predictorNames, drop = FALSE],
-                        1, function(x) any(is.na(x)))
-        missingText <- paste(missingText,
-                             ifelse(missingText == "",
-                                "There ",
-                                "Subsequently, there "),
-                             ifelse(sum(hasMissing) == 1,
-                                    "was ",
-                                    "were "),
-                             ifelse(sum(hasMissing) > 0, 
-                                    sum(hasMissing), 
-                                    "no"),
-                             ifelse(sum(hasMissing) == 1,
-                                    "sample ",
-                                    "samples "),
-                             "with missing values.")                            
-    
-  rawData <- rawData[complete.cases(rawData),]
-
-  }
-
-rawData1 <- rawData[,1:length(rawData)-1]
-rawData2 <- rawData[,length(rawData)]
-
-set.seed(222)
-nzv1 <- nearZeroVar(rawData1)
-  if(length(nzv1) > 0)
-  {
-    nzvVars1 <- names(rawData1)[nzv1]
-    rawData <- rawData1[, -nzv1]
-    rawData$outcome <- rawData2
-    nzvText1 <- paste("There were ",
-                     length(nzv1),
-                     " predictors that were removed from original data due to",
-                     " severely unbalanced distributions that",
-                     " could negatively affect the model fit",
-                     ifelse(length(nzv1) > 10,
-                            ".",
-                            paste(": ",
-                                  listString(nzvVars1),
-                                  ".",
-                                  sep = "")),
-                     sep = "")
-
- } else {
-rawData <- rawData1
-rawData$outcome <- rawData2
-nzvText1 <- ""
-
-}
-
-remove("rawData1")
-remove("rawData2")
-
-@ 
-
-  The initial data set consisted of \Sexpr{numSamples} samples and
-\Sexpr{numPredictors} predictor variables. The breakdown of the
-outcome data classes were: \Sexpr{classDistString}. 
-
- \Sexpr{missingText}
- 
- \Sexpr{nzvText1}
-
-<<pca, eval= $PCAEVAL, echo = $PCAECHO, results = $PCARESULT>>=
-
-predictorNames <- names(rawData)[names(rawData) != "outcome"]
-numPredictors <- length(predictorNames)
-predictors <- rawData[, predictorNames, drop = FALSE]
-## PCA will fail with predictors having less than 2 unique values
-isZeroVar <- apply(predictors, 2, 
-                   function(x) length(unique(x)) < 2)
-if(any(isZeroVar)) predictors <- predictors[, !isZeroVar, drop = FALSE]
-## For whatever, only the formula interface to prcomp 
-## handles missing values
-pcaForm <- as.formula(
-                      paste("~",
-                            paste(names(predictors), collapse = "+")))
-pca <- prcomp(pcaForm, 
-              data = predictors,
-              center = TRUE, 
-              scale. = TRUE,
-              na.action = na.omit)
-## OPTION: the number of components plotted/discussed can be set
-numPCAcomp <- $PCACOMP
-pctVar <- pca$$sdev^2/sum(pca$$sdev^2)*100
-pcaText <- paste(round(pctVar[1:numPCAcomp], 1),
-                 "\\\\%", 
-                 sep = "")
-pcaText <- listString(pcaText)
-@
-
- To get an initial assessment of the separability of the classes,
- principal component analysis (PCA) was used to distill the
- \Sexpr{numPredictors} predictors down into \Sexpr{numPCAcomp}
- surrogate variables (i.e. the principal components) in a manner that
- attempts to maximize the amount of information preserved from the
- original predictor set. Figure \ref{F:inititalPCA} contains plots of
- the first \Sexpr{numPCAcomp} components, which accounted for
- \Sexpr{pcaText} percent of the variability in the original predictors
- (respectively).  
-
-
-%% OPTION: remark on how well (or poorly) the data separated
-
- \setkeys{Gin}{width = 0.8\textwidth}
- \begin{figure}[p]
- \begin{center}
-
-<<pcaPlot, eval = $PCAPLOTEVAL, echo = $PCAPLOTECHO, results = $PCAPLOTRESULT, fig = $PCAPLOTFIG, width = 8, height = 8>>=
-trellis.par.set(caretTheme(), warn = TRUE)
-if(numPCAcomp == 2)
-  {
-    axisRange <- extendrange(pca$$x[, 1:2])
-    print(
-          xyplot(PC1 ~ PC2, 
-                 data = as.data.frame(pca$$x),
-                 type = c("p", "g"),
-                 groups = rawData$$outcome,
-                 auto.key = list(columns = 2),
-                 xlim = axisRange,
-                 ylim = axisRange))
-  } else {
-    axisRange <- extendrange(pca$$x[, 1:numPCAcomp])
-    print(
-          splom(~as.data.frame(pca$$x)[, 1:numPCAcomp],
-                type = c("p", "g"),
-                groups = rawData$$outcome,
-                auto.key = list(columns = 2),
-                as.table = TRUE,
-                prepanel.limits = function(x) axisRange
-                ))      
-    
-      } 
-
-@
- 
-   \caption[PCA Plot]{A plot of the first \Sexpr{numPCAcomp}
-   principal components for the original data set.}
-   \label{F:inititalPCA}         
- \end{center}
- \end{figure}  
-
-
-
-<<initialDataSplit, eval = $INITIALDATASPLITEVAL, echo = $INITIALDATASPLITECHO, results = $INITIALDATASPLITRESULT>>=
-
-  ## OPTION: in small samples sizes, you may not want to set aside a
-  ## training set and focus on the resampling results.   
-
-set.seed(1234)
-dataX <- rawData[,1:length(rawData)-1]
-dataY <- rawData[,length(rawData)]
-
- Smpling <- "$SAAMPLING"
-
-if(Smpling=="downsampling")
-{
-dwnsmpl <- downSample(dataX,dataY)
-rawData <- dwnsmpl[,1:length(dwnsmpl)-1]
-rawData$outcome <- dwnsmpl[,length(dwnsmpl)]
-remove("dwnsmpl")
-remove("dataX")
-remove("dataY")
-}else if(Smpling=="upsampling"){
-upsmpl <- upSample(dataX,dataY)
-rawData <- upsmpl[,1:length(upsmpl)-1]
-rawData$outcome <- upsmpl[,length(upsmpl)]
-remove("upsmpl")
-remove("dataX")
-remove("dataY")
-}else{remove("dataX")
-remove("dataY")
-}
-
-
-
-numSamples <- nrow(rawData)
-
-predictorNames <- names(rawData)[names(rawData) != "outcome"]
-numPredictors <- length(predictorNames)
-
-
-classDist1 <- table(rawData$outcome)
-classDistString1 <- paste("``",
-                         names(classDist1),
-                         "'' ($n$=",
-                         classDist1,
-                         ")",
-                         sep = "")
-classDistString1 <- listString(classDistString1)
-
-  pctTrain <- $PERCENT
-
-if(pctTrain < 1)
-  {
-    ## OPTION: seed number can be changed
-    set.seed(1)
-    inTrain <- createDataPartition(rawData$$outcome,
-                                   p = pctTrain,
-                                   list = FALSE)
-    trainX <- rawData[ inTrain, predictorNames]
-    testX  <- rawData[-inTrain, predictorNames]
-    trainY <- rawData[ inTrain, "outcome"]
-    testY  <- rawData[-inTrain, "outcome"]
-    splitText <- paste("The original data were split into ",
-                       "a training set ($$n$$=",
-                       nrow(trainX),
-                       ") and a test set ($$n$$=",
-                       nrow(testX),
-                       ") in a manner that preserved the ",
-                       "distribution of the classes.",
-                       sep = "")
-    isZeroVar <- apply(trainX, 2, 
-                       function(x) length(unique(x)) < 2)
-    if(any(isZeroVar))
-      {
-        trainX <- trainX[, !isZeroVar, drop = FALSE]  
-        testX <- testX[, !isZeroVar, drop = FALSE]
-      }
-    
-  } else {
-    trainX <- rawData[, predictorNames]
-    testX  <- NULL
-    trainY <- rawData[, "outcome"]
-    testY  <- NULL 
-    splitText <- "The entire data set was used as the training set."
-  }
-trainDist <- table(trainY)
-nir <- max(trainDist)/length(trainY)*100
-niClass <- names(trainDist)[which.max(trainDist)]
-nirText <- paste("The non--information rate is the accuracy that can be ",
-                 "achieved by predicting all samples using the most ",
-                 "dominant class. For these data, the rate is ",
-                 round(nir, 2), "\\\\% using the ``",
-                 niClass,
-                 "'' class.",
-                 sep = "")
-
-remove("rawData")
-
-if((!is.null(testX)) && (!is.null(testY))){
-save(trainX,trainY,testX,testY,file="datasets.RData")
-} else {
-save(trainX,trainY,file="datasets.RData")
-}
-
-@ 
-
- \Sexpr{splitText} 
-
- \Sexpr{nirText}
-
-The data set for model building consisted of \Sexpr{numSamples} samples and
-\Sexpr{numPredictors} predictor variables. The breakdown of the
-outcome data classes were: \Sexpr{classDistString1}.
-
-<<nzv, eval= $NZVEVAL, results = $NZVRESULT, echo = $NZVECHO>>=
-## OPTION: other pre-processing steps can be used
-ppSteps <- caret:::suggestions(modName)
-
-set.seed(2)
-if(ppSteps["nzv"])
-  {
-    nzv <- nearZeroVar(trainX)
-    if(length(nzv) > 0) 
-      {
-        nzvVars <- names(trainX)[nzv]
-        trainX <- trainX[, -nzv]
-        nzvText <- paste("There were ",
-                         length(nzv),
-                         " predictors that were removed from train set due to",
-                         " severely unbalanced distributions that",
-                         " could negatively affect the model",
-                         ifelse(length(nzv) > 10, 
-                                ".",
-                                paste(": ",
-                                      listString(nzvVars),
-                                      ".",
-                                      sep = "")),
-                         sep = "") 
-        testX <- testX[, -nzv]
-      } else nzvText <- ""
-  } else nzvText <- ""
-@ 
-
-\Sexpr{nzvText}
-
-
-<<corrFilter, eval = $CORRFILTEREVAL, results = $CORRFILTERRESULT, echo = $CORRFILTERECHO>>=
-if(ppSteps["corr"])
-  {
-    ## OPTION: 
-    corrThresh <- $THRESHHOLDCOR
-    highCorr <- findCorrelation(cor(trainX, use = "pairwise.complete.obs"), 
-                                corrThresh)
-    if(length(highCorr) > 0) 
-      {
-        corrVars <- names(trainX)[highCorr]
-        trainX <- trainX[, -highCorr]
-        corrText <- paste("There were ",
-                         length(highCorr),
-                         " predictors that were removed due to",
-                         " large between--predictor correlations that",
-                         " could negatively affect the model fit",
-                         ifelse(length(highCorr) > 10, 
-                                ".",
-                                paste(": ",
-                                      listString(highCorr),
-                                      ".",
-                                      sep = "")),
-                          " Removing these predictors forced",
-                          " all pair--wise correlations to be",
-                          " less than ",
-                          corrThresh,
-                          ".",
-                          sep = "") 
-        testX <- testX[, -highCorr]
-      } else corrText <- "No correlation among data on given threshold"
-  }else corrText <- ""
-@
-
- \Sexpr{corrText}
-
-<<preProc, eval = $PREPROCEVAL, echo = $PREPROCECHO, results = $PREPROCRESULT>>=
-ppMethods <- NULL
-if(ppSteps["center"]) ppMethods <- c(ppMethods, "center")
-if(ppSteps["scale"]) ppMethods <- c(ppMethods, "scale")
-if(any(hasMissing) > 0) ppMethods <- c(ppMethods, "knnImpute")
-##OPTION other methods, such as spatial sign, can be added to this list
-
-if(length(ppMethods) > 0)
-  {
-    ppInfo <- preProcess(trainX, method = ppMethods)
-    trainX <- predict(ppInfo, trainX)
-    if(pctTrain < 1) testX <- predict(ppInfo, testX)   
-    ppText <- paste("The following pre--processing methods were",
-                    " applied to the training",
-                    ifelse(pctTrain < 1, " and test", ""),
-                    " data: ",
-                    listString(ppMethods),
-                    ".",
-                    sep = "")
-    ppText <- gsub("center", "mean centering", ppText)
-    ppText <- gsub("scale", "scaling to unit variance", ppText)
-    ppText <- gsub("knnImpute", 
-                   paste(ppInfo$$k, "--nearest neighbor imputation", sep = ""), 
-                   ppText)
-    ppText <- gsub("spatialSign", "the spatial sign transformation", ppText)
-    ppText <- gsub("pca", "principal component feature extraction", ppText)
-    ppText <- gsub("ica", "independent component feature extraction", ppText)
-    } else {
-      ppInfo <- NULL
-      ppText <- ""
-    }
-
-predictorNames <- names(trainX)
-if(nzvText != "" | corrText != "" | ppText != "")
-  {
-    varText <- paste("After pre--processing, ",
-                     ncol(trainX),
-                     "predictors remained for modeling.")
-  } else varText <- ""
-  
-@ 
-
- \Sexpr{ppText} 
- \Sexpr{varText}
-
-\clearpage
-
-\section*{Model Building}
-
-<<setupWorkers, eval = TRUE, echo = $SETUPWORKERSECHO, results = $SETUPWORKERSRESULT>>=
-numWorkers <- $NUMWORKERS
-##OPTION: turn up numWorkers to use MPI
-if(numWorkers > 1)
-  {
-    mpiCalcs <- function(X, FUN, ...)
-      {
-        theDots <- list(...)
-        parLapply(theDots$$cl, X, FUN)
-      }
-
-    library(snow)
-    cl <- makeCluster(numWorkers, "MPI")
-  }
-@ 
-
-<<setupResampling, echo = $SETUPRESAMPLINGECHO, results = $SETUPRESAMPLINGRESULT>>=
-##OPTION: the resampling options can be changed. See
-##        ?trainControl for details
-
-resampName <- "$RESAMPNAME" 
-resampNumber <- $RESAMPLENUMBER
-numRepeat <- $NUMREPEAT
-resampP <- $RESAMPLENUMBERPERCENT
-
-modelInfo <- modelLookup(modName)
-
-if(numClasses == 2)
-  {
-    foo <- if(any(modelInfo$$probModel)) twoClassSummary else twoClassNoProbs
-  } else foo <- defaultSummary
-   
-set.seed(3)
-ctlObj <- trainControl(method = resampName,
-                       number = resampNumber,
-                       repeats = numRepeat,
-                       p = resampP,
-                       classProbs = any(modelInfo$$probModel),
-                       summaryFunction = foo)
-
-
-##OPTION select other performance metrics as needed
-optMetric <- if(numClasses == 2 & any(modelInfo$$probModel)) "ROC" else "Kappa"
-
-if(numWorkers > 1)
-  {
-    ctlObj$$workers <- numWorkers
-    ctlObj$$computeFunction <- mpiCalcs
-    ctlObj$$computeArgs <- list(cl = cl)
-  }
-@ 
-
-<<setupGrid, results = $SETUPGRIDRESULT, echo = $SETUPGRIDECHO>>=
-##OPTION expand or contract these grids as needed (or
-##       add more models
-
-gridSize <- $SETUPGRIDSIZE
-
-if(modName %in% c("svmPoly", "svmRadial", "svmLinear", "lvq", "ctree2", "ctree")) gridSize <- 5
-if(modName %in% c("earth", "fda")) gridSize <- 7
-if(modName %in% c("knn", "rocc", "glmboost", "rf", "nodeHarvest")) gridSize <- 10
-
-if(modName %in% c("nb")) gridSize <- 2
-if(modName %in% c("pam", "rpart")) gridSize <- 15
-if(modName %in% c("pls")) gridSize <- min(20, ncol(trainX))
-
-if(modName == "gbm")
-  {
-    tGrid <- expand.grid(.interaction.depth = -1 + (1:5)*2 ,
-                         .n.trees = (1:10)*20,
-                         .shrinkage = .1)
-  }
-
-if(modName == "nnet")
-  {
-    tGrid <- expand.grid(.size = -1 + (1:5)*2 ,
-                         .decay = c(0, .001, .01, .1))
-  }
-
-if(modName == "ada")
- {
-  tGrid <- expand.grid(.maxdepth = 1, .iter = c(100,200,300,400), .nu = 1 )
-
- }
-
-
-@ 
-
-<<fitModel, results = $FITMODELRESULT, echo = $FITMODELECHO, eval = $FITMODELEVAL>>=
-##OPTION alter as needed
-
-set.seed(4)
-modelFit <- switch(modName,                  
-                   gbm = 
-                   {
-                     mix <- sample(seq(along = trainY))  
-                     train(
-                           trainX[mix,], trainY[mix], modName, 
-                           verbose = FALSE,
-                           bag.fraction = .9, 
-                           metric = optMetric,
-                           trControl = ctlObj, 
-                           tuneGrid = tGrid)
-                   },
-                   
-                   multinom =
-                   {
-                     train(
-                           trainX, trainY, modName, 
-                           trace = FALSE, 
-                           metric = optMetric,
-                           maxiter = 1000, 
-                           MaxNWts = 5000,
-                           trControl = ctlObj, 
-                           tuneLength = gridSize)   
-                   },
-                   
-                   nnet =
-                   {
-                     train(
-                           trainX, trainY, modName, 
-                           metric = optMetric,
-                           linout = FALSE,
-                           trace = FALSE, 
-                           maxiter = 1000, 
-                           MaxNWts = 5000,
-                           trControl = ctlObj, 
-                           tuneGrid = tGrid)  
-                     
-                   }, 
-                   
-                   svmRadial =, svmPoly =, svmLinear = 
-                   {
-                     train(
-                           trainX, trainY, modName,
-                           metric = optMetric,
-                           scaled = TRUE,
-                           trControl = ctlObj, 
-                           tuneLength = gridSize)    
-                   },
-                   {
-                     train(trainX, trainY, modName, 
-                           trControl = ctlObj, 
-                           metric = optMetric,
-                           tuneLength = gridSize)
-                   })
-
-@ 
-
-<<modelDescr, echo = $MODELDESCRECHO, results = $MODELDESCRRESULT>>=
-summaryText <- ""
-
-resampleName <- switch(tolower(modelFit$$control$$method),
-                       boot = paste("the bootstrap (", length(modelFit$$control$$index), " reps)", sep = ""),
-                       boot632 = paste("the bootstrap 632 rule (", length(modelFit$$control$$index), " reps)", sep = ""),
-                       cv = paste("cross-validation (", modelFit$$control$$number, " fold)", sep = ""),
-                       repeatedcv = paste("cross-validation (", modelFit$$control$$number, " fold, repeated ",
-                         modelFit$$control$$repeats, " times)", sep = ""),
-                       lgocv = paste("repeated train/test splits (", length(modelFit$$control$$index), " reps, ",
-                         round(modelFit$$control$$p, 2), "$$\\%$$)", sep = ""))
-
-tuneVars <- latexTranslate(tolower(modelInfo$$label))
-tuneVars <- gsub("\\#", "the number of ", tuneVars, fixed = TRUE)
-if(ncol(modelFit$$bestTune) == 1 && colnames(modelFit$$bestTune) == ".parameter")
-  {
-    summaryText <- paste(summaryText,
-                         "\n\n",
-                         "There are no tuning parameters associated with this model.",
-                         "To characterize the model performance on the training set,",
-                         resampleName,
-                         "was used.",
-                         "Table \\\\ref{T:resamps} and Figure \\\\ref{F:profile}",
-                         "show summaries of the resampling results. ")
-
-  } else {
-    summaryText <- paste("There",
-                         ifelse(nrow(modelInfo) > 1, "are", "is"),
-                         nrow(modelInfo),
-                         ifelse(nrow(modelInfo) > 1, "tuning parameters", "tuning parameter"),
-                         "associated with this model:",
-                         listString(tuneVars, period = TRUE))
-
-
-
-    paramNames <- gsub(".", "", names(modelFit$$bestTune), fixed = TRUE)
-    for(i in seq(along = paramNames))
-      {
-        check <- modelInfo$$parameter %in% paramNames[i]
-        if(any(check))
-          {
-            paramNames[i] <- modelInfo$$label[which(check)]          
-          }
-      }
-
-    paramNames <- gsub("#", "the number of ", paramNames, fixed = TRUE)
-    ## Check to see if there was only one combination fit
-    summaryText <- paste(summaryText,
-                         "To choose",
-                         ifelse(nrow(modelInfo) > 1,
-                                "appropriate values of the tuning parameters,",
-                                "an appropriate value of the tuning parameter,"),
-                         resampleName,
-                         "was used to generated a profile of performance across the",
-                         nrow(modelFit$$results),
-                         ifelse(nrow(modelInfo) > 1,
-                                "combinations of the tuning parameters.",
-                                "candidate values."),
-                         
-                         "Table \\\\ref{T:resamps} and Figure \\\\ref{F:profile} show",
-                         "summaries of the resampling profile. ",                                                                                         "The final model fitted to the entire training set was:",
-                         listString(paste(latexTranslate(tolower(paramNames)), "=", modelFit$$bestTune[1,]), period = TRUE))
-
-  }
-@ 
-
-\Sexpr{summaryText}
-
-<<resampTable, echo = $RESAMPTABLEECHO, results = $RESAMPTABLERESULT>>=
-tableData <- modelFit$$results
-
-if(all(modelInfo$$parameter == "parameter") && resampName == "boot632")
-    {
-    tableData <- tableData[,-1, drop = FALSE]
-    colNums <- c( length(modelFit$$perfNames), length(modelFit$$perfNames), length(modelFit$$perfNames))
-    colLabels <- c("Mean", "Standard Deviation","Apparant")
-    constString <- ""
-    isConst <- NULL
-    } else if (all(modelInfo$$parameter == "parameter") && (resampName == "boot" | resampName == "cv" | resampName == "repeatedcv" )){
-    tableData <- tableData[,-1, drop = FALSE]
-    colNums <- c(length(modelFit$$perfNames), length(modelFit$$perfNames))
-    colLabels <- c("Mean", "Standard Deviation")
-    constString <- ""
-    isConst <- NULL
-    } else if (all(modelInfo$$parameter == "parameter") && resampName == "LOOCV" ){
-   tableData <- tableData[,-1, drop = FALSE]
-   colNums <- length(modelFit$$perfNames)
-   colLabels <- c("Measures")
-   constString <- ""
-   isConst <- NULL
-} else  {
- if (all(modelInfo$$parameter != "parameter") && resampName == "boot632" ){
- isConst <- apply(tableData[, modelInfo$$parameter, drop = FALSE],
-                     2, 
-                     function(x) length(unique(x)) == 1)
-
-    numParamInTable <- sum(!isConst)
-
-    if(any(isConst))
-      {
-        constParam <- modelInfo$$parameter[isConst]
-        constValues <- format(tableData[, constParam, drop = FALSE], digits = 4)[1,,drop = FALSE]
-        tableData <- tableData[, !(names(tableData) %in% constParam), drop = FALSE]
-        constString <- paste("The tuning",
-                             ifelse(sum(isConst) > 1,
-                                    "parmeters",
-                                    "parameter"),
-                             listString(paste("``", names(constValues), "''", sep = "")),
-                             ifelse(sum(isConst) > 1,
-                                    "were",
-                                    "was"),
-                             "held constant at",
-                             ifelse(sum(isConst) > 1,
-                                    "a value of",
-                                    "values of"),
-                             listString(constValues[1,]))
-        
-      } else constString <- ""
-
-    cn <- colnames(tableData)
-    for(i in seq(along = cn))
-      {
-        check <- modelInfo$$parameter %in% cn[i]
-        if(any(check))
-          {
-            cn[i] <- modelInfo$$label[which(check)]          
-          }
-      }
-    colnames(tableData) <- cn
-
-    colNums <- c(numParamInTable, 
-                 length(modelFit$$perfNames),
-                 length(modelFit$$perfNames),
-                 length(modelFit$$perfNames))
-    colLabels <- c("", "Mean", "Standard Deviation", "Apparant")
-
-}else if (all(modelInfo$$parameter != "parameter") && (resampName == "boot" | resampName == "repeatedcv" | resampName == "cv") ){
- isConst <- apply(tableData[, modelInfo$$parameter, drop = FALSE],
-                     2,
-                     function(x) length(unique(x)) == 1)
-
-    numParamInTable <- sum(!isConst)
-
-    if(any(isConst))
-      {
-        constParam <- modelInfo$$parameter[isConst]
-        constValues <- format(tableData[, constParam, drop = FALSE], digits = 4)[1,,drop = FALSE]
-        tableData <- tableData[, !(names(tableData) %in% constParam), drop = FALSE]
-        constString <- paste("The tuning",
-                             ifelse(sum(isConst) > 1,
-                                    "parmeters",
-                                    "parameter"),
-                             listString(paste("``", names(constValues), "''", sep = "")),
-                             ifelse(sum(isConst) > 1,
-                                    "were",
-                                    "was"),
-                             "held constant at",
-                             ifelse(sum(isConst) > 1,
-                                    "a value of",
-                                    "values of"),
-                             listString(constValues[1,]))
-
-      } else constString <- ""
-
-    cn <- colnames(tableData)
-    for(i in seq(along = cn))
-      {
-        check <- modelInfo$$parameter %in% cn[i]
-        if(any(check))
-          {
-            cn[i] <- modelInfo$$label[which(check)]
-          }
-      }
-    colnames(tableData) <- cn
-
-    colNums <- c(numParamInTable,
-                 length(modelFit$$perfNames),
-                  length(modelFit$$perfNames))
-    colLabels <- c("", "Mean", "Standard Deviation")
-
-}
-else if (all(modelInfo$$parameter != "parameter") && resampName == "LOOCV"){
- isConst <- apply(tableData[, modelInfo$$parameter, drop = FALSE],
-                     2,
-                     function(x) length(unique(x)) == 1)
-
-    numParamInTable <- sum(!isConst)
-
-    if(any(isConst))
-      {
-        constParam <- modelInfo$$parameter[isConst]
-        constValues <- format(tableData[, constParam, drop = FALSE], digits = 4)[1,,drop = FALSE]
-        tableData <- tableData[, !(names(tableData) %in% constParam), drop = FALSE]
-        constString <- paste("The tuning",
-                             ifelse(sum(isConst) > 1,
-                                    "parmeters",
-                                    "parameter"),
-                             listString(paste("``", names(constValues), "''", sep = "")),
-                             ifelse(sum(isConst) > 1,
-                                    "were",
-                                    "was"),
-                             "held constant at",
-                             ifelse(sum(isConst) > 1,
-                                    "a value of",
-                                    "values of"),
-                             listString(constValues[1,]))
-
-      } else constString <- ""
-
-    cn <- colnames(tableData)
-    for(i in seq(along = cn))
-      {
-        check <- modelInfo$$parameter %in% cn[i]
-        if(any(check))
-          {
-            cn[i] <- modelInfo$$label[which(check)]
-          }
-      }
-    colnames(tableData) <- cn
-
-    colNums <- c(numParamInTable,
-                  length(modelFit$$perfNames))
-    colLabels <- c("", "Measures")
-
-}
-
-}
-
-
-
-colnames(tableData) <- gsub("SD$$", "", colnames(tableData))
-colnames(tableData) <- gsub("Apparent$$", "", colnames(tableData))
-colnames(tableData) <- latexTranslate(colnames(tableData))
-rownames(tableData) <- latexTranslate(rownames(tableData))
-
-latex(tableData,
-      rowname = NULL,
-      file = "",
-      cgroup = colLabels,
-      n.cgroup = colNums,
-      where = "h!",
-      digits = 4,
-      longtable = nrow(tableData) > 30,
-      caption = paste(resampleName, "results from the model fit.", constString),
-      label = "T:resamps")
-@ 
-
-  \setkeys{Gin}{ width = 0.9\textwidth}
-  \begin{figure}[b]
-  \begin{center}
-
-<<profilePlot, echo = $PROFILEPLOTECHO, fig = $PROFILEPLOTFIG, width = 8, height = 6>>=
-  trellis.par.set(caretTheme(), warn = TRUE)
-if(all(modelInfo$$parameter == "parameter") | all(isConst) | modName == "nb")
-  {
-    resultsPlot <- resampleHist(modelFit)
-    plotCaption <- paste("Distributions of model performance from the ",
-                         "training set estimated using ",
-                         resampleName)
-  } else {
-    if(modName %in% c("svmPoly", "svmRadial", "svmLinear"))
-      {
-        resultsPlot <- plot(modelFit, 
-                            metric = optMetric,                          
-                            xTrans = function(x) log10(x))
-        resultsPlot <- update(resultsPlot,
-                              type = c("g", "p", "l"),
-                              ylab = paste(optMetric, " (", resampleName, ")", sep = ""))
-
-      } else {
-        resultsPlot <- plot(modelFit,                         
-                            metric = optMetric) 
-        resultsPlot <- update(resultsPlot,
-                              type = c("g", "p", "l"),
-                              ylab = paste(optMetric, " (", resampleName, ")", sep = ""))     
-      }  
-   plotCaption <- paste("A plot of the estimates of the",
-                        optMetric,
-                        "values calculated using",
-                        resampleName)
-  }
-print(resultsPlot)
-@ 
-   \caption[Performance Plot]{\Sexpr{plotCaption}.}
-    \label{F:profile}         
-  \end{center}
- \end{figure}  
-
-
-<<stopWorkers, echo = $STOPWORKERSECHO, results = $STOPWORKERSRESULT>>=
-if(numWorkers > 1) stopCluster(cl)
-@ 
-
-<<testPred, results = $TESTPREDRESULT, echo = $TESTPREDECHO>>=
-  if(pctTrain < 1) 
-  {
-    cat("\\clearpage\n\\section*{Test Set Results}\n\n")
-    classPred <- predict(modelFit, testX)
-    cm <- confusionMatrix(classPred, testY)
-    values <- cm$$overall[c("Accuracy", "Kappa", "AccuracyPValue", "McnemarPValue")]
-    
-    values <- values[!is.na(values) & !is.nan(values)]
-    values <- c(format(values[1:2], digits = 3),
-                format.pval(values[-(1:2)], digits = 5))
-    nms <- c("the overall accuracy", "the Kappa statistic", 
-                       "the $$p$$--value that accuracy is greater than the no--information rate",
-                       "the $$p$$--value of concordance from McNemar's test")
-    nms <- nms[seq(along = values)]
-    names(values) <- nms
-    
-    if(any(modelInfo$$probModel))
-      {
-        classProbs <- extractProb(list(fit = modelFit), 
-                                  testX = testX,
-                                  testY = testY)
-        classProbs <- subset(classProbs, dataType == "Test")  
-        if(numClasses == 2)
-          {
-            tmp <- twoClassSummary(classProbs, lev = levels(classProbs$$obs))
-            tmp <- c(format(tmp, digits = 3))
-            names(tmp) <- c("the sensitivity", "the specificity",
-                            "the area under the ROC curve")
-            values <- c(values, tmp)
-            
-          }
-        probPlot <- plotClassProbs(classProbs)
-      }
-    testString <- paste("Based on the test set of",
-                        nrow(testX),
-                        "samples,",
-                        listString(paste(names(values), "was", values), period = TRUE),
-                        "The confusion matrix for the test set is shown in Table",
-                        "\\\\ref{T:cm}.")
-    testString <- paste(testString,
-                        " Using ", resampleName,
-                        ", the training set estimates were ",
-                        resampleStats(modelFit),
-                        ".", 
-                        sep = "")
-    
-    if(any(modelInfo$$probModel)) testString <- paste(testString,
-                                                     "Histograms of the class probabilities",
-                                                     "for the test set samples are shown in",
-                                                     "Figure \\\\ref{F:probs}",
-                                                     ifelse(numClasses == 2,
-                                                            " and the test set ROC curve is in Figure \\\\ref{F:roc}.",
-                                                            "."))
-    
-    
-    
-    latex(cm$$table,
-          title = "",
-          file = "",
-          where = "h",
-          cgroup = "Observed Values",
-          n.cgroup = numClasses,
-          caption = "The confusion matrix for the test set",
-          label = "T:cm")
-    
-  } else testString <- ""
-@ 
-\Sexpr{testString}
-
-
-<<classProbsTex, results = $CLASSPROBSTEXRESULT, echo = $CLASSPROBSTEXECHO>>=
- if(any(modelInfo$probModel) && pctTrain < 1 )  {
-    cat(
-        paste("\\begin{figure}[p]\n",
-              "\\begin{center}\n",
-              "\\includegraphics{classProbs}",
-              "\\caption[PCA Plot]{Class probabilities",
-              "for the test set. Each panel contains ",
-            "separate classes}\n",
-              "\\label{F:probs}\n",
-              "\\end{center}\n",
-              "\\end{figure}"))
-  }
-  if(any(modelInfo$$probModel) & numClasses == 2  & pctTrain < 1 )
-  {
-    cat(
-        paste("\\begin{figure}[p]\n",
-              "\\begin{center}\n",
-              "\\includegraphics[clip, width = .8\\textwidth]{roc}",
-              "\\caption[ROC Plot]{ROC Curve",
-              "for the test set.}\n",
-              "\\label{F:roc}\n",
-              "\\end{center}\n",
-              "\\end{figure}"))
-  } else {
-cat (paste(""))
-}
-
-@ 
-<<classProbsTex, results = $CLASSPROBSTEXRESULT1, echo = $CLASSPROBSTEXECHO1 >>=
- if(any(modelInfo$probModel) && pctTrain < 1)  {
-    pdf("classProbs.pdf", height = 7, width = 7)
-    trellis.par.set(caretTheme(), warn = FALSE)
-    print(probPlot)
-    dev.off()
-  }
- if(any(modelInfo$probModel) & numClasses == 2 & pctTrain < 1) { 
-    resPonse<-testY
-    preDictor<-classProbs[, levels(trainY)[1]]
-    pdf("roc.pdf", height = 8, width = 8)
-# from pROC example at http://web.expasy.org/pROC/screenshots.htm   
-    plot.roc(resPonse, preDictor, # data
-         percent=TRUE, # show all values in percent
-         partial.auc=c(100, 90), partial.auc.correct=TRUE, # define a partial AUC (pAUC)
-         print.auc=TRUE, #display pAUC value on the plot with following options:
-         print.auc.pattern="Corrected pAUC (100-90%% SP):\n%.1f%%", print.auc.col="#1c61b6",
-         auc.polygon=TRUE, auc.polygon.col="#1c61b6", # show pAUC as a polygon
-         max.auc.polygon=TRUE,     max.auc.polygon.col="#1c61b622", # also show the 100% polygon
-         main="Partial AUC (pAUC)")
-    plot.roc(resPonse, preDictor,
-         percent=TRUE, add=TRUE, type="n", # add to plot, but don't re-add the ROC itself (useless)
-         partial.auc=c(100, 90), partial.auc.correct=TRUE,
-         partial.auc.focus="se", # focus pAUC on the sensitivity
-         print.auc=TRUE, print.auc.pattern="Corrected pAUC (100-90%% SE):\n%.1f%%", print.auc.col="#008600",
-         print.auc.y=40, # do not print auc over the previous one
-         auc.polygon=TRUE, auc.polygon.col="#008600",
-         max.auc.polygon=TRUE, max.auc.polygon.col="#00860022")
-    dev.off()
-  } else {
-cat("")
-  }
-
-@ 
-
-\section*{Versions}
-
-<<versions, echo = FALSE, results = tex>>=
-toLatex(sessionInfo())
-
-@ 
-
-<<save-data, echo = $SAVEDATAECHO, results = $SAVEDATARESULT>>=
-## change this to the name of modName....
-Fit<-modelFit
-save(Fit,file="$METHOD-Fit.RData")
-@
-The model was built using $METHOD and is saved as $METHOD Model for reuse. This contains the variable Fit.
-
-\end{document}'''
-
-	return template4Rnw