annotate caret_future/tool2/TEST.Rnw @ 0:68300206e90d draft default tip

Uploaded
author deepakjadmin
date Thu, 05 Nov 2015 02:41:30 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1 %% Classification Modeling Script
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
2 %% Max Kuhn (max.kuhn@pfizer.com, mxkuhn@gmail.com)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
3 %% Version: 1.00
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
4 %% Created on: 2010/10/02
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
5 %%
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
6 %% This is an Sweave template for building and describing
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
7 %% classification models. It mixes R and LaTeX code. The document can
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
8 %% be processing using R's Sweave function to produce a tex file.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
9 %%
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
10 %% The inputs are:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
11 %% - the initial data set in a data frame called 'rawData'
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
12 %% - a factor column in the data set called 'class'. this should be the
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
13 %% outcome variable
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
14 %% - all other columns in rawData should be predictor variables
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
15 %% - the type of model should be in a variable called 'modName'.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
16 %%
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
17 %% The script attempts to make some intelligent choices based on the
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
18 %% model being used. For example, if modName is "pls", the script will
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
19 %% automatically center and scale the predictor data. There are
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
20 %% situations where these choices can (and should be) changed.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
21 %%
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
22 %% There are other options that may make sense to change. For example,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
23 %% the user may want to adjust the type of resampling. To find these
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
24 %% parts of the script, search on the string 'OPTION'. These parts of
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
25 %% the code will document the options.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
26
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
27 \documentclass[14pt]{report}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
28 \usepackage{amsmath}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
29 \usepackage[pdftex]{graphicx}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
30 \usepackage{color}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
31 \usepackage{ctable}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
32 \usepackage{xspace}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
33 \usepackage{fancyvrb}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
34 \usepackage{fancyhdr}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
35 \usepackage{lastpage}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
36 \usepackage{longtable}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
37 \usepackage{algorithm2e}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
38 \usepackage[
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
39 colorlinks=true,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
40 linkcolor=blue,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
41 citecolor=blue,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
42 urlcolor=blue]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
43 {hyperref}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
44 \usepackage{lscape}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
45 \usepackage{Sweave}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
46 \SweaveOpts{keep.source = TRUE}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
47
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
48 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
49
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
50 % define new colors for use
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
51 \definecolor{darkgreen}{rgb}{0,0.6,0}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
52 \definecolor{darkred}{rgb}{0.6,0.0,0}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
53 \definecolor{lightbrown}{rgb}{1,0.9,0.8}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
54 \definecolor{brown}{rgb}{0.6,0.3,0.3}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
55 \definecolor{darkblue}{rgb}{0,0,0.8}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
56 \definecolor{darkmagenta}{rgb}{0.5,0,0.5}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
57
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
58 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
59
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
60 \newcommand{\bld}[1]{\mbox{\boldmath $#1$}}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
61 \newcommand{\shell}[1]{\mbox{$#1$}}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
62 \renewcommand{\vec}[1]{\mbox{\bf {#1}}}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
63
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
64 \newcommand{\ReallySmallSpacing}{\renewcommand{\baselinestretch}{.6}\Large\normalsize}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
65 \newcommand{\SmallSpacing}{\renewcommand{\baselinestretch}{1.1}\Large\normalsize}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
66
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
67 \newcommand{\halfs}{\frac{1}{2}}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
68
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
69 \setlength{\oddsidemargin}{-.25 truein}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
70 \setlength{\evensidemargin}{0truein}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
71 \setlength{\topmargin}{-0.2truein}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
72 \setlength{\textwidth}{7 truein}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
73 \setlength{\textheight}{8.5 truein}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
74 \setlength{\parindent}{0.20truein}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
75 \setlength{\parskip}{0.10truein}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
76
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
77 \setcounter{LTchunksize}{50}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
78
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
79 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
80 \pagestyle{fancy}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
81 \lhead{}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
82 %% OPTION Report header name
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
83 \chead{Classification Model Script}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
84 \rhead{}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
85 \lfoot{}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
86 \cfoot{}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
87 \rfoot{\thepage\ of \pageref{LastPage}}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
88 \renewcommand{\headrulewidth}{1pt}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
89 \renewcommand{\footrulewidth}{1pt}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
90 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
91
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
92 %% OPTION Report title and modeler name
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
93 \title{Classification Model Script using rf}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
94 \author{"Lynn Group with M. Kuhn, SCIS, JNU, New Delhi"}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
95
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
96 \begin{document}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
97
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
98 \maketitle
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
99
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
100 \thispagestyle{empty}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
101 <<dummy, eval=TRUE, echo=FALSE, results=hide>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
102 # sets values for variables used later in the program to prevent the \Sexpr error on parsing with Sweave
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
103 numSamples=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
104 classDistString=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
105 missingText=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
106 numPredictors=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
107 numPCAcomp=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
108 pcaText=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
109 nzvText=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
110 corrText=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
111 ppText=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
112 varText=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
113 splitText="Dummy Text"
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
114 nirText="Dummy Text"
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
115 # pctTrain is a variable that is initialised in Data splitting, and reused later in testPred
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
116 pctTrain=0.8
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
117 Smpling=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
118 nzvText1=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
119 classDistString1=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
120 dwnsmpl=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
121 upsmpl=''
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
122
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
123 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
124 <<startup, eval= TRUE, results = hide, echo = FALSE>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
125 library(Hmisc)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
126 library(caret)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
127 library(pROC)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
128 versionTest <- compareVersion(packageDescription("caret")$Version,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
129 "4.65")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
130 if(versionTest < 0) stop("caret version 4.65 or later is required")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
131
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
132 library(RColorBrewer)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
133
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
134
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
135 listString <- function (x, period = FALSE, verbose = FALSE)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
136 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
137 if (verbose) cat("\n entering listString\n")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
138 flush.console()
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
139 if (!is.character(x))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
140 x <- as.character(x)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
141 numElements <- length(x)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
142 out <- if (length(x) > 0) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
143 switch(min(numElements, 3), x, paste(x, collapse = " and "),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
144 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
145 x <- paste(x, c(rep(",", numElements - 2), " and", ""), sep = "")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
146 paste(x, collapse = " ")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
147 })
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
148 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
149 else ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
150 if (period) out <- paste(out, ".", sep = "")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
151 if (verbose) cat(" leaving listString\n\n")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
152 flush.console()
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
153 out
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
154 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
155
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
156 resampleStats <- function(x, digits = 3)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
157 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
158 bestPerf <- x$bestTune
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
159 colnames(bestPerf) <- gsub("^\\.", "", colnames(bestPerf))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
160 out <- merge(x$results, bestPerf)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
161 out <- out[, colnames(out) %in% x$perfNames]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
162 names(out) <- gsub("ROC", "area under the ROC curve", names(out), fixed = TRUE)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
163 names(out) <- gsub("Sens", "sensitivity", names(out), fixed = TRUE)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
164 names(out) <- gsub("Spec", "specificity", names(out), fixed = TRUE)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
165 names(out) <- gsub("Accuracy", "overall accuracy", names(out), fixed = TRUE)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
166 names(out) <- gsub("Kappa", "Kappa statistics", names(out), fixed = TRUE)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
167
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
168 out <- format(out, digits = digits)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
169 listString(paste(names(out), "was", out))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
170 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
171
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
172 twoClassNoProbs <- function (data, lev = NULL, model = NULL)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
173 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
174 out <- c(sensitivity(data[, "pred"], data[, "obs"], lev[1]),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
175 specificity(data[, "pred"], data[, "obs"], lev[2]),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
176 confusionMatrix(data[, "pred"], data[, "obs"])$overall["Kappa"])
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
177
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
178 names(out) <- c("Sens", "Spec", "Kappa")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
179 out
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
180 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
181
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
182
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
183
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
184 ##OPTION: model name: see ?train for more values/models
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
185 modName <- "svmRadial"
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
186
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
187
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
188 load("/home/galaxy/galaxy-dist/database/files/000/dataset_521.dat")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
189 rawData <- dataX
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
190 rawData$outcome <- dataY
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
191
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
192 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
193
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
194
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
195 \section*{Data Sets}\label{S:data}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
196
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
197 %% OPTION: provide some background on the problem, the experimental
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
198 %% data, how the compounds were selected etc
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
199
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
200 <<getDataInfo, eval = TRUE, echo = FALSE, results = hide>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
201 if(!any(names(rawData) == "outcome")) stop("a variable called outcome should be in the data set")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
202 if(!is.factor(rawData$outcome)) stop("the outcome should be a factor vector")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
203
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
204 ## OPTION: when there are only two classes, the first level of the
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
205 ## factor is used as the "positive" or "event" for calculating
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
206 ## sensitivity and specificity. Adjust the outcome factor accordingly.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
207 numClasses <- length(levels(rawData$outcome))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
208 numSamples <- nrow(rawData)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
209 numPredictors <- ncol(rawData) - 1
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
210 predictorNames <- names(rawData)[names(rawData) != "outcome"]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
211
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
212 isNum <- apply(rawData[,predictorNames, drop = FALSE], 2, is.numeric)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
213 if(any(!isNum)) stop("all predictors in rawData should be numeric")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
214
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
215 classTextCheck <- all.equal(levels(rawData$outcome), make.names(levels(rawData$outcome)))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
216 if(!classTextCheck) warning("the class levels are not valid R variable names; this may cause errors")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
217
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
218 ## Get the class distribution
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
219 classDist <- table(rawData$outcome)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
220 classDistString <- paste("``",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
221 names(classDist),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
222 "'' ($n$=",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
223 classDist,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
224 ")",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
225 sep = "")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
226 classDistString <- listString(classDistString)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
227 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
228
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
229 <<missingFilter, eval = TRUE, echo = FALSE, results = tex>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
230 colRate <- apply(rawData[, predictorNames, drop = FALSE],
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
231 2, function(x) mean(is.na(x)))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
232
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
233 ##OPTION thresholds can be changed
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
234 colExclude <- colRate > 0.2
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
235
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
236 missingText <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
237
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
238 if(any(colExclude))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
239 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
240 missingText <- paste(missingText,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
241 ifelse(sum(colExclude) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
242 " There were ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
243 " There was "),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
244 sum(colExclude),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
245 ifelse(sum(colExclude) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
246 " predictors ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
247 " predictor "),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
248 "with an excessive number of ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
249 "missing data. ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
250 ifelse(sum(colExclude) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
251 " These were excluded. ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
252 " This was excluded. "))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
253 predictorNames <- predictorNames[!colExclude]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
254 rawData <- rawData[, names(rawData) %in% c("outcome", predictorNames), drop = FALSE]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
255 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
256
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
257
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
258 rowRate <- apply(rawData[, predictorNames, drop = FALSE],
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
259 1, function(x) mean(is.na(x)))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
260
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
261 rowExclude <- rowRate > 0.2
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
262
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
263
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
264 if(any(rowExclude)) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
265 missingText <- paste(missingText,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
266 ifelse(sum(rowExclude) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
267 " There were ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
268 " There was "),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
269 sum(colExclude),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
270 ifelse(sum(rowExclude) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
271 " samples ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
272 " sample "),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
273 "with an excessive number of ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
274 "missing data. ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
275 ifelse(sum(rowExclude) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
276 " These were excluded. ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
277 " This was excluded. "),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
278 "After filtering, ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
279 sum(!rowExclude),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
280 " samples remained.")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
281 rawData <- rawData[!rowExclude, ]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
282 hasMissing <- apply(rawData[, predictorNames, drop = FALSE],
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
283 1, function(x) mean(is.na(x)))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
284 } else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
285 hasMissing <- apply(rawData[, predictorNames, drop = FALSE],
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
286 1, function(x) any(is.na(x)))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
287 missingText <- paste(missingText,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
288 ifelse(missingText == "",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
289 "There ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
290 "Subsequently, there "),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
291 ifelse(sum(hasMissing) == 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
292 "was ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
293 "were "),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
294 ifelse(sum(hasMissing) > 0,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
295 sum(hasMissing),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
296 "no"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
297 ifelse(sum(hasMissing) == 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
298 "sample ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
299 "samples "),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
300 "with missing values.")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
301
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
302 rawData <- rawData[complete.cases(rawData),]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
303
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
304 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
305
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
306 rawData1 <- rawData[,1:length(rawData)-1]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
307 rawData2 <- rawData[,length(rawData)]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
308
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
309 set.seed(222)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
310 nzv1 <- nearZeroVar(rawData1)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
311 if(length(nzv1) > 0)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
312 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
313 nzvVars1 <- names(rawData1)[nzv1]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
314 rawData <- rawData1[, -nzv1]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
315 rawData$outcome <- rawData2
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
316 nzvText1 <- paste("There were ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
317 length(nzv1),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
318 " predictors that were removed from original data due to",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
319 " severely unbalanced distributions that",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
320 " could negatively affect the model fit",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
321 ifelse(length(nzv1) > 10,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
322 ".",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
323 paste(": ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
324 listString(nzvVars1),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
325 ".",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
326 sep = "")),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
327 sep = "")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
328
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
329 } else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
330 rawData <- rawData1
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
331 rawData$outcome <- rawData2
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
332 nzvText1 <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
333
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
334 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
335
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
336 remove("rawData1")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
337 remove("rawData2")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
338
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
339 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
340
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
341 The initial data set consisted of \Sexpr{numSamples} samples and
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
342 \Sexpr{numPredictors} predictor variables. The breakdown of the
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
343 outcome data classes were: \Sexpr{classDistString}.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
344
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
345 \Sexpr{missingText}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
346
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
347 \Sexpr{nzvText1}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
348
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
349 <<pca, eval= TRUE, echo = FALSE, results = hide>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
350
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
351 predictorNames <- names(rawData)[names(rawData) != "outcome"]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
352 numPredictors <- length(predictorNames)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
353 predictors <- rawData[, predictorNames, drop = FALSE]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
354 ## PCA will fail with predictors having less than 2 unique values
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
355 isZeroVar <- apply(predictors, 2,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
356 function(x) length(unique(x)) < 2)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
357 if(any(isZeroVar)) predictors <- predictors[, !isZeroVar, drop = FALSE]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
358 ## For whatever, only the formula interface to prcomp
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
359 ## handles missing values
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
360 pcaForm <- as.formula(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
361 paste("~",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
362 paste(names(predictors), collapse = "+")))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
363 pca <- prcomp(pcaForm,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
364 data = predictors,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
365 center = TRUE,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
366 scale. = TRUE,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
367 na.action = na.omit)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
368 ## OPTION: the number of components plotted/discussed can be set
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
369 numPCAcomp <- 3
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
370 pctVar <- pca$sdev^2/sum(pca$sdev^2)*100
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
371 pcaText <- paste(round(pctVar[1:numPCAcomp], 1),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
372 "\\\\%",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
373 sep = "")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
374 pcaText <- listString(pcaText)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
375 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
376
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
377 To get an initial assessment of the separability of the classes,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
378 principal component analysis (PCA) was used to distill the
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
379 \Sexpr{numPredictors} predictors down into \Sexpr{numPCAcomp}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
380 surrogate variables (i.e. the principal components) in a manner that
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
381 attempts to maximize the amount of information preserved from the
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
382 original predictor set. Figure \ref{F:inititalPCA} contains plots of
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
383 the first \Sexpr{numPCAcomp} components, which accounted for
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
384 \Sexpr{pcaText} percent of the variability in the original predictors
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
385 (respectively).
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
386
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
387
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
388 %% OPTION: remark on how well (or poorly) the data separated
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
389
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
390 \setkeys{Gin}{width = 0.8\textwidth}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
391 \begin{figure}[p]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
392 \begin{center}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
393
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
394 <<pcaPlot, eval = TRUE, echo = FALSE, results = tex, fig = TRUE, width = 8, height = 8>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
395 trellis.par.set(caretTheme(), warn = TRUE)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
396 if(numPCAcomp == 2)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
397 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
398 axisRange <- extendrange(pca$x[, 1:2])
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
399 print(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
400 xyplot(PC1 ~ PC2,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
401 data = as.data.frame(pca$x),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
402 type = c("p", "g"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
403 groups = rawData$outcome,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
404 auto.key = list(columns = 2),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
405 xlim = axisRange,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
406 ylim = axisRange))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
407 } else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
408 axisRange <- extendrange(pca$x[, 1:numPCAcomp])
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
409 print(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
410 splom(~as.data.frame(pca$x)[, 1:numPCAcomp],
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
411 type = c("p", "g"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
412 groups = rawData$outcome,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
413 auto.key = list(columns = 2),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
414 as.table = TRUE,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
415 prepanel.limits = function(x) axisRange
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
416 ))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
417
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
418 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
419
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
420 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
421
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
422 \caption[PCA Plot]{A plot of the first \Sexpr{numPCAcomp}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
423 principal components for the original data set.}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
424 \label{F:inititalPCA}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
425 \end{center}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
426 \end{figure}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
427
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
428
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
429
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
430 <<initialDataSplit, eval = TRUE, echo = FALSE, results = tex>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
431
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
432 ## OPTION: in small samples sizes, you may not want to set aside a
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
433 ## training set and focus on the resampling results.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
434
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
435 set.seed(1234)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
436 dataX <- rawData[,1:length(rawData)-1]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
437 dataY <- rawData[,length(rawData)]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
438
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
439 Smpling <- "garBage"
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
440
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
441 if(Smpling=="downsampling")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
442 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
443 dwnsmpl <- downSample(dataX,dataY)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
444 rawData <- dwnsmpl[,1:length(dwnsmpl)-1]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
445 rawData$outcome <- dwnsmpl[,length(dwnsmpl)]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
446 remove("dwnsmpl")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
447 remove("dataX")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
448 remove("dataY")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
449 }else if(Smpling=="upsampling"){
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
450 upsmpl <- upSample(dataX,dataY)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
451 rawData <- upsmpl[,1:length(upsmpl)-1]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
452 rawData$outcome <- upsmpl[,length(upsmpl)]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
453 remove("upsmpl")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
454 remove("dataX")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
455 remove("dataY")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
456 }else{remove("dataX")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
457 remove("dataY")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
458 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
459
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
460
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
461
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
462 numSamples <- nrow(rawData)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
463
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
464 predictorNames <- names(rawData)[names(rawData) != "outcome"]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
465 numPredictors <- length(predictorNames)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
466
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
467
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
468 classDist1 <- table(rawData$outcome)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
469 classDistString1 <- paste("``",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
470 names(classDist1),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
471 "'' ($n$=",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
472 classDist1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
473 ")",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
474 sep = "")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
475 classDistString1 <- listString(classDistString1)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
476
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
477 pctTrain <- 0.8
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
478
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
479 if(pctTrain < 1)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
480 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
481 ## OPTION: seed number can be changed
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
482 set.seed(1)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
483 inTrain <- createDataPartition(rawData$outcome,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
484 p = pctTrain,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
485 list = FALSE)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
486 trainX <- rawData[ inTrain, predictorNames]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
487 testX <- rawData[-inTrain, predictorNames]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
488 trainY <- rawData[ inTrain, "outcome"]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
489 testY <- rawData[-inTrain, "outcome"]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
490 splitText <- paste("The original data were split into ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
491 "a training set ($n$=",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
492 nrow(trainX),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
493 ") and a test set ($n$=",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
494 nrow(testX),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
495 ") in a manner that preserved the ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
496 "distribution of the classes.",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
497 sep = "")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
498 isZeroVar <- apply(trainX, 2,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
499 function(x) length(unique(x)) < 2)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
500 if(any(isZeroVar))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
501 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
502 trainX <- trainX[, !isZeroVar, drop = FALSE]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
503 testX <- testX[, !isZeroVar, drop = FALSE]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
504 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
505
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
506 } else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
507 trainX <- rawData[, predictorNames]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
508 testX <- NULL
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
509 trainY <- rawData[, "outcome"]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
510 testY <- NULL
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
511 splitText <- "The entire data set was used as the training set."
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
512 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
513 trainDist <- table(trainY)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
514 nir <- max(trainDist)/length(trainY)*100
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
515 niClass <- names(trainDist)[which.max(trainDist)]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
516 nirText <- paste("The non--information rate is the accuracy that can be ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
517 "achieved by predicting all samples using the most ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
518 "dominant class. For these data, the rate is ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
519 round(nir, 2), "\\\\% using the ``",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
520 niClass,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
521 "'' class.",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
522 sep = "")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
523
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
524 remove("rawData")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
525
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
526 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
527
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
528 \Sexpr{splitText}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
529
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
530 \Sexpr{nirText}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
531
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
532 The data set for model building consisted of \Sexpr{numSamples} samples and
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
533 \Sexpr{numPredictors} predictor variables. The breakdown of the
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
534 outcome data classes were: \Sexpr{classDistString1}.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
535
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
536 <<nzv, eval= TRUE, results = tex, echo = FALSE>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
537 ## OPTION: other pre-processing steps can be used
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
538 ppSteps <- caret:::suggestions(modName)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
539
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
540 set.seed(2)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
541 if(ppSteps["nzv"])
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
542 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
543 nzv <- nearZeroVar(trainX)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
544 if(length(nzv) > 0)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
545 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
546 nzvVars <- names(trainX)[nzv]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
547 trainX <- trainX[, -nzv]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
548 nzvText <- paste("There were ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
549 length(nzv),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
550 " predictors that were removed from train set due to",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
551 " severely unbalanced distributions that",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
552 " could negatively affect the model",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
553 ifelse(length(nzv) > 10,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
554 ".",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
555 paste(": ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
556 listString(nzvVars),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
557 ".",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
558 sep = "")),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
559 sep = "")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
560 testX <- testX[, -nzv]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
561 } else nzvText <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
562 } else nzvText <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
563 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
564
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
565 \Sexpr{nzvText}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
566
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
567
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
568 <<corrFilter, eval = TRUE, results = tex, echo = FALSE>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
569 if(ppSteps["corr"])
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
570 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
571 ## OPTION:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
572 corrThresh <- 0.75
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
573 highCorr <- findCorrelation(cor(trainX, use = "pairwise.complete.obs"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
574 corrThresh)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
575 if(length(highCorr) > 0)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
576 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
577 corrVars <- names(trainX)[highCorr]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
578 trainX <- trainX[, -highCorr]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
579 corrText <- paste("There were ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
580 length(highCorr),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
581 " predictors that were removed due to",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
582 " large between--predictor correlations that",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
583 " could negatively affect the model fit",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
584 ifelse(length(highCorr) > 10,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
585 ".",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
586 paste(": ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
587 listString(highCorr),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
588 ".",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
589 sep = "")),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
590 " Removing these predictors forced",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
591 " all pair--wise correlations to be",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
592 " less than ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
593 corrThresh,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
594 ".",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
595 sep = "")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
596 testX <- testX[, -highCorr]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
597 } else corrText <- "No correlation among data on given threshold"
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
598 }else corrText <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
599 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
600
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
601 \Sexpr{corrText}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
602
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
603 <<preProc, eval = TRUE, echo = FALSE, results = tex>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
604 ppMethods <- NULL
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
605 if(ppSteps["center"]) ppMethods <- c(ppMethods, "center")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
606 if(ppSteps["scale"]) ppMethods <- c(ppMethods, "scale")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
607 if(any(hasMissing) > 0) ppMethods <- c(ppMethods, "knnImpute")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
608 ##OPTION other methods, such as spatial sign, can be added to this list
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
609
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
610 if(length(ppMethods) > 0)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
611 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
612 ppInfo <- preProcess(trainX, method = ppMethods)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
613 trainX <- predict(ppInfo, trainX)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
614 if(pctTrain < 1) testX <- predict(ppInfo, testX)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
615 ppText <- paste("The following pre--processing methods were",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
616 " applied to the training",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
617 ifelse(pctTrain < 1, " and test", ""),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
618 " data: ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
619 listString(ppMethods),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
620 ".",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
621 sep = "")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
622 ppText <- gsub("center", "mean centering", ppText)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
623 ppText <- gsub("scale", "scaling to unit variance", ppText)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
624 ppText <- gsub("knnImpute",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
625 paste(ppInfo$k, "--nearest neighbor imputation", sep = ""),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
626 ppText)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
627 ppText <- gsub("spatialSign", "the spatial sign transformation", ppText)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
628 ppText <- gsub("pca", "principal component feature extraction", ppText)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
629 ppText <- gsub("ica", "independent component feature extraction", ppText)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
630 } else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
631 ppInfo <- NULL
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
632 ppText <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
633 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
634
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
635 predictorNames <- names(trainX)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
636 if(nzvText != "" | corrText != "" | ppText != "")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
637 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
638 varText <- paste("After pre--processing, ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
639 ncol(trainX),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
640 "predictors remained for modeling.")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
641 } else varText <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
642
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
643 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
644
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
645 \Sexpr{ppText}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
646 \Sexpr{varText}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
647
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
648 \clearpage
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
649
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
650 \section*{Model Building}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
651
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
652 <<setupWorkers, eval = TRUE, echo = FALSE, results = tex>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
653 numWorkers <- 1
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
654 ##OPTION: turn up numWorkers to use MPI
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
655 if(numWorkers > 1)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
656 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
657 mpiCalcs <- function(X, FUN, ...)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
658 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
659 theDots <- list(...)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
660 parLapply(theDots$cl, X, FUN)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
661 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
662
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
663 library(snow)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
664 cl <- makeCluster(numWorkers, "MPI")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
665 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
666 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
667
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
668 <<setupResampling, echo = FALSE, results = hide>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
669 ##OPTION: the resampling options can be changed. See
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
670 ## ?trainControl for details
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
671
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
672 resampName <- "boot"
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
673 resampNumber <- 3
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
674 numRepeat <- 1
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
675 resampP <- 0.75
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
676
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
677 modelInfo <- modelLookup(modName)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
678
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
679 if(numClasses == 2)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
680 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
681 foo <- if(any(modelInfo$probModel)) twoClassSummary else twoClassNoProbs
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
682 } else foo <- defaultSummary
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
683
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
684 set.seed(3)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
685 ctlObj <- trainControl(method = resampName,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
686 number = resampNumber,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
687 repeats = numRepeat,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
688 p = resampP,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
689 classProbs = any(modelInfo$probModel),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
690 summaryFunction = foo)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
691
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
692
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
693 ##OPTION select other performance metrics as needed
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
694 optMetric <- if(numClasses == 2 & any(modelInfo$probModel)) "ROC" else "Kappa"
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
695
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
696 if(numWorkers > 1)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
697 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
698 ctlObj$workers <- numWorkers
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
699 ctlObj$computeFunction <- mpiCalcs
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
700 ctlObj$computeArgs <- list(cl = cl)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
701 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
702 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
703
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
704 <<setupGrid, results = hide, echo = FALSE>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
705 ##OPTION expand or contract these grids as needed (or
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
706 ## add more models
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
707
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
708 gridSize <- 3
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
709
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
710 if(modName %in% c("svmPoly", "svmRadial", "svmLinear", "lvq", "ctree2", "ctree")) gridSize <- 5
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
711 if(modName %in% c("earth", "fda")) gridSize <- 7
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
712 if(modName %in% c("knn", "rocc", "glmboost", "rf", "nodeHarvest")) gridSize <- 10
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
713
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
714 if(modName %in% c("nb")) gridSize <- 2
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
715 if(modName %in% c("pam", "rpart")) gridSize <- 15
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
716 if(modName %in% c("pls")) gridSize <- min(20, ncol(trainX))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
717
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
718 if(modName == "gbm")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
719 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
720 tGrid <- expand.grid(.interaction.depth = -1 + (1:5)*2 ,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
721 .n.trees = (1:10)*20,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
722 .shrinkage = .1)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
723 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
724
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
725 if(modName == "nnet")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
726 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
727 tGrid <- expand.grid(.size = -1 + (1:5)*2 ,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
728 .decay = c(0, .001, .01, .1))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
729 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
730
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
731 if(modName == "ada")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
732 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
733 tGrid <- expand.grid(.maxdepth = 1, .iter = c(100,200,300,400), .nu = 1 )
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
734
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
735 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
736
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
737
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
738 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
739
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
740 <<fitModel, results = tex, echo = FALSE, eval = TRUE>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
741 ##OPTION alter as needed
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
742
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
743 set.seed(4)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
744 modelFit <- switch(modName,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
745 gbm =
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
746 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
747 mix <- sample(seq(along = trainY))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
748 train(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
749 trainX[mix,], trainY[mix], modName,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
750 verbose = FALSE,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
751 bag.fraction = .9,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
752 metric = optMetric,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
753 trControl = ctlObj,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
754 tuneGrid = tGrid)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
755 },
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
756
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
757 multinom =
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
758 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
759 train(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
760 trainX, trainY, modName,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
761 trace = FALSE,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
762 metric = optMetric,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
763 maxiter = 1000,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
764 MaxNWts = 5000,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
765 trControl = ctlObj,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
766 tuneLength = gridSize)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
767 },
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
768
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
769 nnet =
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
770 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
771 train(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
772 trainX, trainY, modName,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
773 metric = optMetric,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
774 linout = FALSE,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
775 trace = FALSE,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
776 maxiter = 1000,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
777 MaxNWts = 5000,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
778 trControl = ctlObj,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
779 tuneGrid = tGrid)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
780
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
781 },
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
782
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
783 svmRadial =, svmPoly =, svmLinear =
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
784 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
785 train(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
786 trainX, trainY, modName,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
787 metric = optMetric,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
788 scaled = TRUE,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
789 trControl = ctlObj,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
790 tuneLength = gridSize)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
791 },
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
792 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
793 train(trainX, trainY, modName,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
794 trControl = ctlObj,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
795 metric = optMetric,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
796 tuneLength = gridSize)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
797 })
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
798
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
799 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
800
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
801 <<modelDescr, echo = FALSE, results = tex>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
802 summaryText <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
803
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
804 resampleName <- switch(tolower(modelFit$control$method),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
805 boot = paste("the bootstrap (", length(modelFit$control$index), " reps)", sep = ""),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
806 boot632 = paste("the bootstrap 632 rule (", length(modelFit$control$index), " reps)", sep = ""),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
807 cv = paste("cross-validation (", modelFit$control$number, " fold)", sep = ""),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
808 repeatedcv = paste("cross-validation (", modelFit$control$number, " fold, repeated ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
809 modelFit$control$repeats, " times)", sep = ""),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
810 lgocv = paste("repeated train/test splits (", length(modelFit$control$index), " reps, ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
811 round(modelFit$control$p, 2), "$\\%$)", sep = ""))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
812
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
813 tuneVars <- latexTranslate(tolower(modelInfo$label))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
814 tuneVars <- gsub("\\#", "the number of ", tuneVars, fixed = TRUE)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
815 if(ncol(modelFit$bestTune) == 1 && colnames(modelFit$bestTune) == ".parameter")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
816 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
817 summaryText <- paste(summaryText,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
818 "\n\n",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
819 "There are no tuning parameters associated with this model.",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
820 "To characterize the model performance on the training set,",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
821 resampleName,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
822 "was used.",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
823 "Table \\\\ref{T:resamps} and Figure \\\\ref{F:profile}",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
824 "show summaries of the resampling results. ")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
825
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
826 } else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
827 summaryText <- paste("There",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
828 ifelse(nrow(modelInfo) > 1, "are", "is"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
829 nrow(modelInfo),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
830 ifelse(nrow(modelInfo) > 1, "tuning parameters", "tuning parameter"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
831 "associated with this model:",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
832 listString(tuneVars, period = TRUE))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
833
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
834
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
835
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
836 paramNames <- gsub(".", "", names(modelFit$bestTune), fixed = TRUE)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
837 for(i in seq(along = paramNames))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
838 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
839 check <- modelInfo$parameter %in% paramNames[i]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
840 if(any(check))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
841 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
842 paramNames[i] <- modelInfo$label[which(check)]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
843 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
844 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
845
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
846 paramNames <- gsub("#", "the number of ", paramNames, fixed = TRUE)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
847 ## Check to see if there was only one combination fit
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
848 summaryText <- paste(summaryText,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
849 "To choose",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
850 ifelse(nrow(modelInfo) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
851 "appropriate values of the tuning parameters,",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
852 "an appropriate value of the tuning parameter,"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
853 resampleName,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
854 "was used to generated a profile of performance across the",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
855 nrow(modelFit$results),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
856 ifelse(nrow(modelInfo) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
857 "combinations of the tuning parameters.",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
858 "candidate values."),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
859
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
860 "Table \\\\ref{T:resamps} and Figure \\\\ref{F:profile} show",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
861 "summaries of the resampling profile. ", "The final model fitted to the entire training set was:",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
862 listString(paste(latexTranslate(tolower(paramNames)), "=", modelFit$bestTune[1,]), period = TRUE))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
863
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
864 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
865 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
866
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
867 \Sexpr{summaryText}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
868
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
869 <<resampTable, echo = FALSE, results = tex>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
870 tableData <- modelFit$results
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
871
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
872 if(all(modelInfo$parameter == "parameter") && resampName == "boot632")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
873 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
874 tableData <- tableData[,-1, drop = FALSE]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
875 colNums <- c( length(modelFit$perfNames), length(modelFit$perfNames), length(modelFit$perfNames))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
876 colLabels <- c("Mean", "Standard Deviation","Apparant")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
877 constString <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
878 isConst <- NULL
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
879 } else if (all(modelInfo$parameter == "parameter") && (resampName == "boot" | resampName == "cv" | resampName == "repeatedcv" )){
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
880 tableData <- tableData[,-1, drop = FALSE]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
881 colNums <- c(length(modelFit$perfNames), length(modelFit$perfNames))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
882 colLabels <- c("Mean", "Standard Deviation")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
883 constString <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
884 isConst <- NULL
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
885 } else if (all(modelInfo$parameter == "parameter") && resampName == "LOOCV" ){
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
886 tableData <- tableData[,-1, drop = FALSE]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
887 colNums <- length(modelFit$perfNames)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
888 colLabels <- c("Measures")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
889 constString <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
890 isConst <- NULL
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
891 } else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
892 if (all(modelInfo$parameter != "parameter") && resampName == "boot632" ){
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
893 isConst <- apply(tableData[, modelInfo$parameter, drop = FALSE],
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
894 2,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
895 function(x) length(unique(x)) == 1)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
896
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
897 numParamInTable <- sum(!isConst)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
898
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
899 if(any(isConst))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
900 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
901 constParam <- modelInfo$parameter[isConst]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
902 constValues <- format(tableData[, constParam, drop = FALSE], digits = 4)[1,,drop = FALSE]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
903 tableData <- tableData[, !(names(tableData) %in% constParam), drop = FALSE]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
904 constString <- paste("The tuning",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
905 ifelse(sum(isConst) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
906 "parmeters",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
907 "parameter"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
908 listString(paste("``", names(constValues), "''", sep = "")),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
909 ifelse(sum(isConst) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
910 "were",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
911 "was"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
912 "held constant at",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
913 ifelse(sum(isConst) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
914 "a value of",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
915 "values of"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
916 listString(constValues[1,]))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
917
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
918 } else constString <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
919
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
920 cn <- colnames(tableData)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
921 for(i in seq(along = cn))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
922 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
923 check <- modelInfo$parameter %in% cn[i]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
924 if(any(check))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
925 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
926 cn[i] <- modelInfo$label[which(check)]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
927 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
928 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
929 colnames(tableData) <- cn
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
930
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
931 colNums <- c(numParamInTable,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
932 length(modelFit$perfNames),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
933 length(modelFit$perfNames),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
934 length(modelFit$perfNames))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
935 colLabels <- c("", "Mean", "Standard Deviation", "Apparant")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
936
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
937 }else if (all(modelInfo$parameter != "parameter") && (resampName == "boot" | resampName == "repeatedcv" | resampName == "cv") ){
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
938 isConst <- apply(tableData[, modelInfo$parameter, drop = FALSE],
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
939 2,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
940 function(x) length(unique(x)) == 1)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
941
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
942 numParamInTable <- sum(!isConst)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
943
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
944 if(any(isConst))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
945 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
946 constParam <- modelInfo$parameter[isConst]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
947 constValues <- format(tableData[, constParam, drop = FALSE], digits = 4)[1,,drop = FALSE]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
948 tableData <- tableData[, !(names(tableData) %in% constParam), drop = FALSE]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
949 constString <- paste("The tuning",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
950 ifelse(sum(isConst) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
951 "parmeters",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
952 "parameter"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
953 listString(paste("``", names(constValues), "''", sep = "")),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
954 ifelse(sum(isConst) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
955 "were",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
956 "was"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
957 "held constant at",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
958 ifelse(sum(isConst) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
959 "a value of",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
960 "values of"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
961 listString(constValues[1,]))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
962
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
963 } else constString <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
964
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
965 cn <- colnames(tableData)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
966 for(i in seq(along = cn))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
967 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
968 check <- modelInfo$parameter %in% cn[i]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
969 if(any(check))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
970 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
971 cn[i] <- modelInfo$label[which(check)]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
972 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
973 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
974 colnames(tableData) <- cn
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
975
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
976 colNums <- c(numParamInTable,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
977 length(modelFit$perfNames),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
978 length(modelFit$perfNames))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
979 colLabels <- c("", "Mean", "Standard Deviation")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
980
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
981 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
982 else if (all(modelInfo$parameter != "parameter") && resampName == "LOOCV"){
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
983 isConst <- apply(tableData[, modelInfo$parameter, drop = FALSE],
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
984 2,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
985 function(x) length(unique(x)) == 1)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
986
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
987 numParamInTable <- sum(!isConst)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
988
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
989 if(any(isConst))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
990 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
991 constParam <- modelInfo$parameter[isConst]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
992 constValues <- format(tableData[, constParam, drop = FALSE], digits = 4)[1,,drop = FALSE]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
993 tableData <- tableData[, !(names(tableData) %in% constParam), drop = FALSE]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
994 constString <- paste("The tuning",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
995 ifelse(sum(isConst) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
996 "parmeters",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
997 "parameter"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
998 listString(paste("``", names(constValues), "''", sep = "")),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
999 ifelse(sum(isConst) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1000 "were",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1001 "was"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1002 "held constant at",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1003 ifelse(sum(isConst) > 1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1004 "a value of",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1005 "values of"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1006 listString(constValues[1,]))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1007
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1008 } else constString <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1009
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1010 cn <- colnames(tableData)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1011 for(i in seq(along = cn))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1012 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1013 check <- modelInfo$parameter %in% cn[i]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1014 if(any(check))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1015 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1016 cn[i] <- modelInfo$label[which(check)]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1017 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1018 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1019 colnames(tableData) <- cn
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1020
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1021 colNums <- c(numParamInTable,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1022 length(modelFit$perfNames))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1023 colLabels <- c("", "Measures")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1024
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1025 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1026
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1027 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1028
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1029 colnames(tableData) <- gsub("SD$", "", colnames(tableData))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1030 colnames(tableData) <- gsub("Apparent$", "", colnames(tableData))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1031 colnames(tableData) <- latexTranslate(colnames(tableData))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1032 rownames(tableData) <- latexTranslate(rownames(tableData))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1033
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1034 latex(tableData,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1035 rowname = NULL,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1036 file = "",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1037 cgroup = colLabels,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1038 n.cgroup = colNums,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1039 where = "h!",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1040 digits = 4,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1041 longtable = nrow(tableData) > 30,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1042 caption = paste(resampleName, "results from the model fit.", constString),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1043 label = "T:resamps")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1044 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1045
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1046 \setkeys{Gin}{ width = 0.9\textwidth}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1047 \begin{figure}[b]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1048 \begin{center}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1049
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1050 <<profilePlot, echo = FALSE, fig = TRUE, width = 8, height = 6>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1051 trellis.par.set(caretTheme(), warn = TRUE)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1052 if(all(modelInfo$parameter == "parameter") | all(isConst) | modName == "nb")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1053 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1054 resultsPlot <- resampleHist(modelFit)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1055 plotCaption <- paste("Distributions of model performance from the ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1056 "training set estimated using ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1057 resampleName)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1058 } else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1059 if(modName %in% c("svmPoly", "svmRadial", "svmLinear"))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1060 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1061 resultsPlot <- plot(modelFit,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1062 metric = optMetric,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1063 xTrans = function(x) log10(x))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1064 resultsPlot <- update(resultsPlot,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1065 type = c("g", "p", "l"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1066 ylab = paste(optMetric, " (", resampleName, ")", sep = ""))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1067
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1068 } else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1069 resultsPlot <- plot(modelFit,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1070 metric = optMetric)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1071 resultsPlot <- update(resultsPlot,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1072 type = c("g", "p", "l"),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1073 ylab = paste(optMetric, " (", resampleName, ")", sep = ""))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1074 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1075 plotCaption <- paste("A plot of the estimates of the",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1076 optMetric,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1077 "values calculated using",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1078 resampleName)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1079 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1080 print(resultsPlot)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1081 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1082 \caption[Performance Plot]{\Sexpr{plotCaption}.}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1083 \label{F:profile}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1084 \end{center}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1085 \end{figure}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1086
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1087
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1088 <<stopWorkers, echo = FALSE, results = hide>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1089 if(numWorkers > 1) stopCluster(cl)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1090 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1091
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1092 <<testPred, results = tex, echo = FALSE>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1093 if(pctTrain < 1)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1094 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1095 cat("\\clearpage\n\\section*{Test Set Results}\n\n")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1096 classPred <- predict(modelFit, testX)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1097 cm <- confusionMatrix(classPred, testY)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1098 values <- cm$overall[c("Accuracy", "Kappa", "AccuracyPValue", "McnemarPValue")]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1099
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1100 values <- values[!is.na(values) & !is.nan(values)]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1101 values <- c(format(values[1:2], digits = 3),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1102 format.pval(values[-(1:2)], digits = 5))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1103 nms <- c("the overall accuracy", "the Kappa statistic",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1104 "the $p$--value that accuracy is greater than the no--information rate",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1105 "the $p$--value of concordance from McNemar's test")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1106 nms <- nms[seq(along = values)]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1107 names(values) <- nms
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1108
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1109 if(any(modelInfo$probModel))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1110 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1111 classProbs <- extractProb(list(fit = modelFit),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1112 testX = testX,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1113 testY = testY)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1114 classProbs <- subset(classProbs, dataType == "Test")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1115 if(numClasses == 2)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1116 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1117 tmp <- twoClassSummary(classProbs, lev = levels(classProbs$obs))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1118 tmp <- c(format(tmp, digits = 3))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1119 names(tmp) <- c("the sensitivity", "the specificity",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1120 "the area under the ROC curve")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1121 values <- c(values, tmp)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1122
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1123 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1124 probPlot <- plotClassProbs(classProbs)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1125 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1126 testString <- paste("Based on the test set of",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1127 nrow(testX),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1128 "samples,",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1129 listString(paste(names(values), "was", values), period = TRUE),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1130 "The confusion matrix for the test set is shown in Table",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1131 "\\\\ref{T:cm}.")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1132 testString <- paste(testString,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1133 " Using ", resampleName,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1134 ", the training set estimates were ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1135 resampleStats(modelFit),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1136 ".",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1137 sep = "")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1138
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1139 if(any(modelInfo$probModel)) testString <- paste(testString,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1140 "Histograms of the class probabilities",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1141 "for the test set samples are shown in",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1142 "Figure \\\\ref{F:probs}",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1143 ifelse(numClasses == 2,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1144 " and the test set ROC curve is in Figure \\\\ref{F:roc}.",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1145 "."))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1146
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1147
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1148
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1149 latex(cm$table,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1150 title = "",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1151 file = "",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1152 where = "h",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1153 cgroup = "Observed Values",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1154 n.cgroup = numClasses,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1155 caption = "The confusion matrix for the test set",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1156 label = "T:cm")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1157
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1158 } else testString <- ""
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1159 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1160 \Sexpr{testString}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1161
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1162
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1163 <<classProbsTex, results = tex, echo = FALSE>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1164 if(any(modelInfo$probModel))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1165 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1166 cat(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1167 paste("\\begin{figure}[p]\n",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1168 "\\begin{center}\n",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1169 "\\includegraphics{classProbs}",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1170 "\\caption[PCA Plot]{Class probabilities",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1171 "for the test set. Each panel contains ",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1172 "separate classes}\n",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1173 "\\label{F:probs}\n",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1174 "\\end{center}\n",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1175 "\\end{figure}"))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1176 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1177 if(any(modelInfo$probModel) & numClasses == 2)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1178 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1179 cat(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1180 paste("\\begin{figure}[p]\n",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1181 "\\begin{center}\n",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1182 "\\includegraphics[clip, width = .8\\textwidth]{roc}",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1183 "\\caption[ROC Plot]{ROC Curve",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1184 "for the test set.}\n",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1185 "\\label{F:roc}\n",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1186 "\\end{center}\n",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1187 "\\end{figure}"))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1188 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1189 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1190 <<classProbsTex, results = hide, echo = FALSE >>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1191 if(any(modelInfo$probModel))
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1192 {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1193 pdf("classProbs.pdf", height = 7, width = 7)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1194 trellis.par.set(caretTheme(), warn = FALSE)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1195 print(probPlot)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1196 dev.off()
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1197 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1198
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1199 if(any(modelInfo$probModel) & numClasses == 2)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1200 { resPonse<-testY
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1201 preDictor<-classProbs[, levels(trainY)[1]]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1202 pdf("roc.pdf", height = 8, width = 8)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1203 # from pROC example at http://web.expasy.org/pROC/screenshots.htm
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1204 plot.roc(resPonse, preDictor, # data
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1205 percent=TRUE, # show all values in percent
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1206 partial.auc=c(100, 90), partial.auc.correct=TRUE, # define a partial AUC (pAUC)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1207 print.auc=TRUE, #display pAUC value on the plot with following options:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1208 print.auc.pattern="Corrected pAUC (100-90%% SP):\n%.1f%%", print.auc.col="#1c61b6",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1209 auc.polygon=TRUE, auc.polygon.col="#1c61b6", # show pAUC as a polygon
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1210 max.auc.polygon=TRUE, max.auc.polygon.col="#1c61b622", # also show the 100% polygon
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1211 main="Partial AUC (pAUC)")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1212 plot.roc(resPonse, preDictor,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1213 percent=TRUE, add=TRUE, type="n", # add to plot, but don't re-add the ROC itself (useless)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1214 partial.auc=c(100, 90), partial.auc.correct=TRUE,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1215 partial.auc.focus="se", # focus pAUC on the sensitivity
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1216 print.auc=TRUE, print.auc.pattern="Corrected pAUC (100-90%% SE):\n%.1f%%", print.auc.col="#008600",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1217 print.auc.y=40, # do not print auc over the previous one
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1218 auc.polygon=TRUE, auc.polygon.col="#008600",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1219 max.auc.polygon=TRUE, max.auc.polygon.col="#00860022")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1220 dev.off()
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1221 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1222
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1223
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1224 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1225
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1226 \section*{Versions}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1227
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1228 <<versions, echo = FALSE, results = tex>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1229 toLatex(sessionInfo())
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1230
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1231 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1232
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1233 <<save-data, echo = FALSE, results = tex>>=
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1234 ## change this to the name of modName....
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1235 Fit<-modelFit
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1236 save(Fit,file="pls-Fit.RData")
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1237 @
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1238 The model was built using pls and is saved as pls-Fit.RData for reuse. This contains the variable Fit.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1239
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1240 \end{document}