14
|
1 <tool id="rg_nri" name="NRI" version="0.04">
|
3
|
2 <description>and other model improvement measures</description>
|
|
3 <requirements>
|
14
|
4 <requirement type="package" version="3.1.2">R</requirement>
|
3
|
5 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
|
|
6 <requirement type="package" version="9.10">ghostscript</requirement>
|
|
7 <requirement type="package" version="2.14">glmnet_lars_2_14</requirement>
|
|
8 </requirements>
|
|
9 <command interpreter="python">
|
|
10 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "rg_NRI"
|
|
11 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
|
|
12 </command>
|
|
13 <inputs>
|
|
14 <param name="title" type="text" value="NRI test" size="80" label="Plot Title" help="Will appear as the title for the comparison plot"/>
|
|
15 <param name="input1" type="data" format="tabular" label="Select a tabular file from the baseline model with predicted and observed outcome column for each subject"
|
|
16 multiple='False' help="Observed and predicted status columns must be selected from this file below - NOTE both models must be in same order with exact matches in all observed outcomes" optional="False"/>
|
|
17 <param name="input1_observed" label="Select column containing observed outcome (0 for no event, 1 for an event)" type="data_column" data_ref="input1" numerical="True"
|
|
18 multiple="False" use_header_names="True" optional="False" help = "Observed outcomes are compared in the two files to check that the datasets are from the same data"/>
|
|
19 <param name="input1_predicted" label="Select column containing predicted event probabilies from baseline model" type="data_column" data_ref="input1" numerical="True"
|
|
20 multiple="False" use_header_names="True" optional="False" help="Must be in range 0-1"/>
|
|
21 <param name="input1_id" label="Select column containing subject ID from baseline model" type="data_column" data_ref="input1" numerical="True"
|
|
22 multiple="False" use_header_names="True" optional="False" help="Subect IDs are needed to match subjects to compare predictions in the two inputs"/>
|
|
23 <param name="input2" type="data" format="tabular" label="Select a tabular file from the new model with predicted and observed outcome columns for each subject"
|
|
24 multiple='False' help="Observed and predicted status columns must be selected from this file below" />
|
|
25 <param name="input2_observed" label="Select column containing observed outcome (0 for no event, 1 for an event)" type="data_column" data_ref="input2" numerical="True"
|
|
26 multiple="False" use_header_names="True" optional="False" help = "Observed outcomes are compared in the two files to check that the datasets are from the same data"/>
|
|
27 <param name="input2_predicted" label="Select column containing predicted event probabilities from the new model" type="data_column" data_ref="input2" numerical="True"
|
|
28 multiple="False" use_header_names="True" optional="False" help="Must be in range 0-1"/>
|
|
29 <param name="input2_id" label="Select column containing subject ID from the new model" type="data_column" data_ref="input2" numerical="True"
|
|
30 multiple="False" use_header_names="True" optional="False" help="Subect IDs are needed to match subjects to compare predictions in the two inputs"/>
|
|
31 <conditional name="CImeth">
|
|
32 <param name="cis" type="select" label="CI calculation method"
|
|
33 help="Bootstrap will take time - a long time for thousands - asymptotic is quick and informative">
|
|
34 <option value="asymptotic" selected="true">Asymptotic estimate</option>
|
|
35 <option value="boot">Bootstrap for empirical CIs</option>
|
|
36 </param>
|
|
37 <when value="boot">
|
|
38 <param name="nboot" type="integer" value="1000" label="Number of bootstrap replicates"/>
|
|
39 </when>
|
|
40 <when value="asymptotic">
|
|
41 <param name="nboot" type="hidden" value="1000"/>
|
|
42 </when>
|
|
43 </conditional>
|
|
44 </inputs>
|
|
45 <outputs>
|
|
46 <data format="html" name="html_file" label="${title}.html"/>
|
|
47 <data format="tabular" name="nri_file" label="${title}_nrires.xls"/>
|
|
48 </outputs>
|
|
49 <tests>
|
|
50 <test>
|
13
|
51 <param name='title' value='nri_test1' />
|
3
|
52 <param name='input1' value='nri_test1.xls' ftype='tabular' />
|
13
|
53 <param name='input2' value='nri_test1.xls' ftype='tabular' />
|
|
54 <param name='input1_id' value="1" />
|
|
55 <param name='input1_observed' value="2" />
|
|
56 <param name='input1_predicted' value="3" />
|
|
57 <param name='input2_observed' value="2" />
|
|
58 <param name='input2_predicted' value="4" />
|
3
|
59 <output name='html_file' file='nri_test1_out.html' compare='diff' lines_diff='10' />
|
|
60 <output name='nri_file' file='nri_test1_out.xls' />
|
|
61 </test>
|
|
62 </tests>
|
|
63 <help>
|
|
64
|
|
65 **Before you start**
|
|
66
|
|
67 This is a simple tool to calculate various measures of improvement in prediction between two models described in pickering_paper_
|
|
68 It is based on an R script pickering_code_ written by Dr John W Pickering and Dr David Cairns from sunny Otago University which
|
|
69 has been debugged and slightly adjusted to fit a Galaxy tool wrapper.
|
|
70
|
|
71
|
|
72 **What it does**
|
|
73
|
|
74 Copied from the documentation in pickering_code_ ::
|
|
75
|
|
76
|
|
77 Functions to create risk assessment plots and associated summary statistics
|
|
78
|
|
79
|
|
80 (c) 2012 Dr John W Pickering, john.pickering@otago.ac.nz, and Dr David Cairns
|
|
81 Last modified August 2014
|
|
82
|
|
83 Redistribution and use in source and binary forms, with or without
|
|
84 modification, are permitted provided that the following conditions are met:
|
|
85 * Redistributions of source code must retain the above copyright
|
|
86 notice, this list of conditions and the following disclaimer.
|
|
87 * Redistributions in binary form must reproduce the above copyright
|
|
88 notice, this list of conditions and the following disclaimer in
|
|
89 the documentation and/or other materials provided with the distribution
|
|
90
|
|
91 FUNCTIONS
|
|
92 raplot
|
|
93 Produces a Risk Assessment Plot and outputs the coordinates of the four curves
|
|
94 Based on: Pickering, J. W. and Endre, Z. H. (2012). New Metrics for Assessing Diagnostic Potential of
|
|
95 Candidate Biomarkers. Clinical Journal of the American Society of Nephrology, 7, 1355â1364. doi:10.2215/CJN.09590911
|
|
96
|
|
97 statistics.raplot
|
|
98 Produces the NRIs, IDIs, IS, IP, AUCs.
|
|
99 Based on: Pencina, M. J., D'Agostino, R. B. and Steyerberg, E. W. (2011). Extensions of net reclassification improvement calculations to
|
|
100 measure usefulness of new biomarkers. Statistics in Medicine, 30(1), 11â21. doi:10.1002/sim.4085
|
|
101 Pencina, M. J., D'Agostino, R. B. and Vasan, R. S. (2008). Evaluating the added predictive ability of a new marker: From area under the
|
|
102 ROC curve to reclassification and beyond.
|
|
103 Statistics in Medicine, 27(2), 157â172. doi:10.1002/sim.2929
|
|
104 DeLong, E., DeLong, D. and Clarke-Pearson, D. (1988). Comparing the areas under 2 or more correlated receiver operating characteristic curves - a nonparametric approach.
|
|
105 Biometrics, 44(3), 837â845.
|
|
106
|
|
107 summary.raplot
|
|
108 Produces the NRIs, IDIs, IS, IP, AUCs with confidence intervals using a bootstrap or asymptotic procedure. (I prefer bootstrap which is chosed by cis=c("boot"))
|
|
109
|
|
110
|
|
111 Required arguments for all functions:
|
|
112 x1 is calculated risk (eg from a glm) for the null model, i.e. predict(,type="response") on a glm object
|
|
113 x2 is calculated risk (eg from a glm) for the alternative model
|
|
114 y is the case-control indicator (0 for controls, 1 for cases)
|
|
115 Optional argument
|
|
116 t are the boundaries of the risks for each group (ie 0, 1 and the thresholds beteween. eg c(0,0,3,0,7,1)). If missing, defaults to c(0, the incidence, 1)
|
|
117
|
|
118
|
|
119 **Input**
|
|
120
|
|
121 The observed and predicted outcomes from two models to be compared.
|
|
122
|
|
123 **Output**
|
|
124
|
|
125 Lots'o'measures (TM) see pickering_paper_ for details
|
|
126
|
|
127 **Attributions**
|
|
128
|
|
129 pickering_paper_ is the paper the caclulations performed by this tool is based on
|
|
130
|
|
131 pickering_code_ is the R function from John Pickering exposed by this Galaxy tool with minor modifications and hacks by Ross Lazarus.
|
|
132
|
|
133 Galaxy_ (that's what you are using right now!) for gluing everything together
|
|
134
|
|
135 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
|
|
136 licensed to you under the LGPL_ like other rgenetics artefacts
|
|
137
|
|
138 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
|
|
139 .. _pickering_code: http://www.researchgate.net/publication/264672640_R_function_for_Risk_Assessment_Plot__reclassification_metrics_NRI_IDI_cfNRI
|
|
140 .. _pickering_paper: http://cjasn.asnjournals.org/content/early/2012/05/24/CJN.09590911.full
|
|
141 .. _Galaxy: http://getgalaxy.org
|
|
142
|
|
143
|
|
144 </help>
|
|
145
|
|
146 <configfiles>
|
|
147 <configfile name="runme">
|
|
148
|
|
149 <![CDATA[
|
|
150
|
|
151 ### http://www.researchgate.net/publication/264672640_R_function_for_Risk_Assessment_Plot__reclassification_metrics_NRI_IDI_cfNRI code
|
|
152 ### http://cjasn.asnjournals.org/content/early/2012/05/24/CJN.09590911.full is the reference
|
|
153 ### lots of little tweaks and but fixes. Using t as a variable name seems fraught to me.
|
|
154 ### Ross Lazarus october 2014 for a Galaxy tool wrapper using the toolfactory infrastucture
|
|
155
|
|
156 #############################################################################
|
|
157 ###Functions to create risk assessment plots and associated summary statistics
|
|
158 #############################################################################
|
|
159 ###
|
|
160 ### (c) 2012 Dr John W Pickering, john.pickering@otago.ac.nz, and Dr David Cairns
|
|
161 ### Last modified August 2014
|
|
162 ###
|
|
163 ### Redistribution and use in source and binary forms, with or without
|
|
164 ### modification, are permitted provided that the following conditions are met:
|
|
165 ### * Redistributions of source code must retain the above copyright
|
|
166 ### notice, this list of conditions and the following disclaimer.
|
|
167 ### * Redistributions in binary form must reproduce the above copyright
|
|
168 ### notice, this list of conditions and the following disclaimer in
|
|
169 ### the documentation and/or other materials provided with the distribution
|
|
170
|
|
171 ### FUNCTIONS
|
|
172 ### raplot
|
|
173 ### Produces a Risk Assessment Plot and outputs the coordinates of the four curves
|
|
174 ### Based on: Pickering, J. W. and Endre, Z. H. (2012). New Metrics for Assessing Diagnostic Potential of
|
|
175 ### Candidate Biomarkers. Clinical Journal of the American Society of Nephrology, 7, 1355â1364. doi:10.2215/CJN.09590911
|
|
176 ###
|
|
177 ### statistics.raplot
|
|
178 ### Produces the NRIs, IDIs, IS, IP, AUCs.
|
|
179 ### Based on: Pencina, M. J., D'Agostino, R. B. and Steyerberg, E. W. (2011).
|
|
180 ### Extensions of net reclassification improvement calculations to measure usefulness of new biomarkers. Statistics in Medicine, 30(1), 11â21. doi:10.1002/sim.4085
|
|
181 ### Pencina, M. J., D'Agostino, R. B. and Vasan, R. S. (2008). Evaluating the added predictive ability of a new marker: From area under the ROC curve to reclassification and beyond.
|
|
182 ### Statistics in Medicine, 27(2), 157â172. doi:10.1002/sim.2929
|
|
183 ### DeLong, E., DeLong, D. and Clarke-Pearson, D. (1988). Comparing the areas under 2 or more correlated receiver operating characteristic curves - a nonparametric approach.
|
|
184 ### Biometrics, 44(3), 837â845.
|
|
185 ###
|
|
186 ### summary.raplot
|
|
187 ### Produces the NRIs, IDIs, IS, IP, AUCs with confidence intervals using a bootstrap or asymptotic procedure. (I prefer bootstrap which is chosed by cis=c("boot"))
|
|
188
|
|
189
|
|
190 ### Required arguments for all functions:
|
|
191 ### x1 is calculated risk (eg from a glm) for the null model, i.e. predict(,type="response") on a glm object
|
|
192 ### x2 is calculated risk (eg from a glm) for the alternative model
|
|
193 ### y is the case-control indicator (0 for controls, 1 for cases)
|
|
194 ### Optional argument
|
|
195 ### t are the boundaries of the risks for each group (ie 0, 1 and the thresholds beteween. eg c(0,0,3,0,7,1)). If missing, defaults to c(0, the incidence, 1)
|
|
196
|
|
197
|
|
198 ### risk assessment plot
|
|
199
|
|
200 library('e1071')
|
|
201 library('caret')
|
|
202 library('pROC')
|
|
203 library('Hmisc')
|
|
204 library('pracma')
|
|
205
|
|
206 raplot = function(x1, x2, y, outplot, title) {
|
|
207
|
|
208 roc.model1 = roc(y, x1)
|
|
209 roc.model2 = roc(y, x2)
|
|
210 sens.model1 = roc.model1\$sensitivities
|
|
211 spec.model1 = 1 - roc.model1\$specificities
|
|
212 n.model1 = length(sens.model1)
|
|
213 thresh.model1 = roc.model1\$thresholds
|
|
214 thresh.model1 = thresh.model1[c(-1,-n.model1)]
|
|
215 sens.model1 = sens.model1[c(-1,-n.model1)]
|
|
216 spec.model1 = spec.model1[c(-1,-n.model1)]
|
|
217 sens.model2 = roc.model2\$sensitivities
|
|
218 spec.model2 = 1 - roc.model2\$specificities
|
|
219 n.model2 = length(sens.model2)
|
|
220 thresh.model2 = roc.model2\$thresholds
|
|
221 thresh.model2[1]=0
|
|
222 thresh.model2[length(thresh.model2)]=1
|
|
223 thresh.model2 = thresh.model2[c(-1,-n.model2)]
|
|
224 sens.model2 = sens.model2[c(-1,-n.model2)]
|
|
225 spec.model2 = spec.model2[c(-1,-n.model2)]
|
|
226
|
|
227 n.model1 = length(sens.model1)
|
|
228 n.model2 = length(sens.model2)
|
|
229
|
|
230 ### actual plotting
|
|
231 pdf(outplot)
|
|
232 plot(thresh.model1, sens.model1, xlim = c(0, 1), ylim = c(0, 1), type = "n",
|
|
233 lty = 2, lwd = 2, xlab = "Risk of Event", ylab = "", col = "black", main=title)
|
|
234 grid()
|
|
235
|
|
236 polygon(x = c(thresh.model1, thresh.model2[n.model2:1]),
|
|
237 y = c(sens.model1, sens.model2[n.model2:1]), border = NA, col = gray(0.8))
|
|
238 polygon(x = c(thresh.model1, thresh.model2[n.model2:1]),
|
|
239 y = c(spec.model1, spec.model2[n.model2:1]), border = NA, col = gray(0.8))
|
|
240
|
|
241 lines(thresh.model1, sens.model1, type = "l", lty = 2, lwd = 2, col = "black")
|
|
242 lines(thresh.model2, sens.model2, type = "l", lty = 1, lwd = 2, col = "black")
|
|
243
|
|
244 lines(thresh.model1, spec.model1, type = "l", lty = 2, lwd = 2, col = "red")
|
|
245 lines(thresh.model2, spec.model2, type = "l", lty = 1, lwd = 2, col = "red")
|
|
246
|
|
247 text(x = -0.15, y = 0.4, labels = "Sensitivity, ", col = "black", xpd = TRUE, srt = 90)
|
|
248 text(x = -0.15, y = 0.4 + 0.175, labels = "1-Specificity", col = "red", xpd = TRUE, srt = 90)
|
|
249 legend("topleft", c("Event: New model", "Event: Baseline model",
|
|
250 "No Event: New model", "No Event: Baseline model"),
|
|
251 col = c("black", "black", "red", "red"),
|
|
252 lty = c(1,2, 1, 2), lwd = 2, bty = "n")
|
|
253 dev.off()
|
|
254 return(data.frame("Null.p.sens"=thresh.model1,
|
|
255 "Null.sens"=sens.model1,
|
|
256 "Null.p.1spec"=thresh.model1,
|
|
257 "Null.1spec"=sens.model1,
|
|
258 "Alt.p.sens"=thresh.model2,
|
|
259 "Alt.sens"=sens.model2,
|
|
260 "Alt.p.1spec"=thresh.model2,
|
|
261 "Alt.1spec"=sens.model2))
|
|
262
|
|
263 }
|
|
264
|
|
265
|
|
266
|
|
267 ### statistics from a raplot (is an adaptation of improveProb() from Hmisc)
|
|
268
|
|
269 statistics.raplot = function(x1, x2, y, threshvec)
|
|
270 {
|
|
271
|
|
272 s = is.na(x1 + x2 + y) ###Remove rows with missing data
|
|
273 if (any(s)) {
|
|
274 smiss = sum(s)
|
|
275 s = !s
|
|
276 x1 = x1[s]
|
|
277 x2 = x2[s]
|
|
278 y = y[s]
|
|
279 print.noquote(paste('Warning: removed',smiss,'cases with missing values'))
|
|
280 }
|
|
281 n = length(y)
|
|
282 y = as.numeric(y)
|
|
283 u = sort(unique(y))
|
|
284 if (length(u) != 2 || u[1] != 0 || u[2] != 1) {
|
|
285 print.noquote("INPUT ERROR: y must have only two values: 0 and 1")
|
|
286 sink()
|
|
287 quit(save="no",status=2)
|
|
288 }
|
|
289 r = range(x1, x2)
|
|
290 if (r[1] < 0 || r[2] > 1) {
|
|
291 print.noquote("INPUT ERROR: x1 and x2 must be in [0,1]")
|
|
292 sink()
|
|
293 quit(save="no",status=3)
|
|
294 }
|
|
295 incidence=sum(y)/n
|
|
296 if (missing(threshvec)) {
|
|
297 threshvec=c(0, incidence,1)
|
|
298 print(paste('threshvec missing. using',paste(threshvec,collapse=',')))
|
|
299 }
|
|
300 a = (y == 1)
|
|
301 b = (y == 0)
|
|
302 na = sum(a)
|
|
303 nb = sum(b)
|
|
304 d = x2 - x1
|
|
305 ### NRI
|
|
306 n.thresh=length(threshvec)-1
|
|
307 risk.class.x1.ev=cut2(x1[a],threshvec)
|
|
308 risk.class.x2.ev=cut2(x2[a],threshvec)
|
|
309 thresh=c()
|
|
310 lt = length(threshvec)
|
|
311 for (i in 1:(lt-1)) {
|
|
312 thresh[i] = paste("[",toString(threshvec[i]),",",toString(threshvec[i+1]),"]")
|
|
313 }
|
|
314 levels(risk.class.x1.ev)=thresh
|
|
315 levels(risk.class.x2.ev)=thresh
|
|
316 cM.ev=confusionMatrix(risk.class.x2.ev,risk.class.x1.ev)
|
|
317 pup.ev=0
|
|
318 pdown.ev=0
|
|
319 for (i in 1:(n.thresh-1)) { pup.ev = pup.ev + sum(cM.ev\$table[(i+1):n.thresh,i])}
|
|
320 for (i in 2:n.thresh) { pdown.ev = pdown.ev + sum(cM.ev\$table[1:(i-1),i])}
|
|
321 pup.ev=pup.ev/na
|
|
322 pdown.ev=pdown.ev/na
|
|
323 risk.class.x1.ne=cut2(x1[b],threshvec)
|
|
324 risk.class.x2.ne=cut2(x2[b],threshvec)
|
|
325 levels(risk.class.x1.ne)=thresh
|
|
326 levels(risk.class.x2.ne)=thresh
|
|
327 cM.ne=confusionMatrix(risk.class.x2.ne,risk.class.x1.ne)
|
|
328 pup.ne=0
|
|
329 pdown.ne=0
|
|
330 for (i in 1:(n.thresh-1)){pup.ne=pup.ev+sum(cM.ne\$table[(i+1):n.thresh,i])}
|
|
331 for (i in 2:n.thresh){pdown.ne=pdown.ne+sum(cM.ne\$table[1:(i-1),i])}
|
|
332 pdown.ne=pdown.ne/nb
|
|
333 pup.ne=pup.ne/nb
|
|
334 nri = pup.ev - pdown.ev - (pup.ne - pdown.ne)
|
|
335 se.nri = sqrt((pup.ev + pdown.ev)/na + (pup.ne + pdown.ne)/nb)
|
|
336 z.nri = nri/se.nri
|
|
337 nri.ev = pup.ev - pdown.ev
|
|
338 se.nri.ev = sqrt((pup.ev + pdown.ev)/na)
|
|
339 z.nri.ev = nri.ev/se.nri.ev
|
|
340 nri.ne = pdown.ne - pup.ne
|
|
341 se.nri.ne = sqrt((pdown.ne + pup.ne)/nb)
|
|
342 z.nri.ne = nri.ne/se.nri.ne
|
|
343 ### Category Free NRI calculations
|
|
344 cfpup.ev = mean(d[a] > 0)
|
|
345 cfpup.ne = mean(d[b] > 0)
|
|
346 cfpdown.ev = mean(d[a] < 0)
|
|
347 cfpdown.ne = mean(d[b] < 0)
|
|
348 cfnri = cfpup.ev - cfpdown.ev - (cfpup.ne - cfpdown.ne)
|
|
349 se.cfnri = sqrt((cfpup.ev + cfpdown.ev)/na + (cfpup.ne + cfpdown.ne)/nb)
|
|
350 z.cfnri = cfnri/se.cfnri
|
|
351 cfnri.ev = cfpup.ev - cfpdown.ev
|
|
352 se.cfnri.ev = sqrt((cfpup.ev + cfpdown.ev)/na)
|
|
353 z.cfnri.ev = cfnri.ev/se.cfnri.ev
|
|
354 cfnri.ne = cfpdown.ne - cfpup.ne
|
|
355 se.cfnri.ne = sqrt((cfpdown.ne + cfpup.ne)/nb)
|
|
356 z.cfnri.ne = cfnri.ne/se.cfnri.ne
|
|
357 ### IDI calculations
|
|
358 improveSens = sum(d[a])/na
|
|
359 improveSpec = -sum(d[b])/nb
|
|
360 idi.ev = mean(improveSens)
|
|
361 idi.ne = mean(improveSpec)
|
|
362 idi = idi.ev - idi.ne
|
|
363 var.ev = var(d[a])/na
|
|
364 se.idi.ev = sqrt(var.ev)
|
|
365 z.idi.ev = idi.ev/se.idi.ev
|
|
366 var.ne = var(d[b])/nb
|
|
367 se.idi.ne = sqrt(var.ne)
|
|
368 z.idi.ne = idi.ne/se.idi.ne
|
|
369 se.idi = sqrt(var.ev + var.ne)
|
|
370 z.idi = idi/se.idi
|
|
371 ### AUC calculations
|
|
372 roc.x1 = roc(y, x1)
|
|
373 auc.x1 = auc(roc.x1)
|
|
374 ci.auc.x1 = ci.auc(roc.x1)
|
|
375 se.auc.x1 = (ci.auc.x1[3] - auc.x1)/qnorm(0.975)
|
|
376 roc.x2 = roc(y, x2)
|
|
377 auc.x2 = auc(roc.x2)
|
|
378 ci.auc.x2 = ci.auc(roc.x2)
|
|
379 se.auc.x2 = (ci.auc.x2[3] - auc.x2)/qnorm(0.975)
|
|
380 roc.test.x1.x2 = roc.test(roc.x1, roc.x2) ###Uses the default Delong method
|
|
381 sens.x1 = roc.x1\$sensitivities
|
|
382 spec.x1 = 1 - roc.x1\$specificities
|
|
383 n.x1 = length(sens.x1)
|
|
384 x1 = roc.x1\$thresholds
|
|
385 x1 = x1[c(-1,-n.x1)]
|
|
386 sens.x1 = sens.x1[c(-1,-n.x1)]
|
|
387 spec.x1 = spec.x1[c(-1,-n.x1)]
|
|
388 sens.x2 = roc.x2\$sensitivities
|
|
389 spec.x2 = 1 - roc.x2\$specificities
|
|
390 n.x2 = length(sens.x2)
|
|
391 x2 = roc.x2\$thresholds
|
|
392 x2 = x2[c(-1,-n.x2)]
|
|
393 sens.x2 = sens.x2[c(-1,-n.x2)]
|
|
394 spec.x2 = spec.x2[c(-1,-n.x2)]
|
|
395 ### Integrated sensitivity and 1-specificity calculations
|
|
396 is.x1 = trapz(x = x1, y = sens.x1) ### area under curves (relates to integrated sens, 1-spec)
|
|
397 is.x2 = trapz(x = x2, y = sens.x2)
|
|
398 ip.x1 = trapz(x = x1, y = spec.x1)
|
|
399 ip.x2 = trapz(x = x2, y = spec.x2)
|
|
400
|
|
401 ### Output
|
|
402 output = c(n, na, nb, pup.ev, pup.ne, pdown.ev, pdown.ne, nri, se.nri, z.nri,
|
|
403 nri.ev, se.nri.ev, z.nri.ev, nri.ne, se.nri.ne, z.nri.ne,
|
|
404 cfpup.ev, cfpup.ne, cfpdown.ev, cfpdown.ne, cfnri, se.cfnri, z.cfnri,
|
|
405 cfnri.ev, se.cfnri.ev, z.cfnri.ev, cfnri.ne, se.cfnri.ne, z.cfnri.ne,
|
|
406 improveSens, improveSpec, idi.ev, se.idi.ev, z.idi.ev, idi.ne,
|
|
407 se.idi.ne, z.idi.ne, idi, se.idi, z.idi, is.x1, NA, is.x2, NA,
|
|
408 ip.x1, NA, ip.x2, NA, auc.x1, se.auc.x1, auc.x2, se.auc.x2,
|
|
409 roc.test.x1.x2\$p.value,incidence)
|
|
410 names(output) = c("n", "na", "nb", "pup.ev", "pup.ne", "pdown.ev", "pdown.ne",
|
|
411 "nri", "se.nri", "z.nri", "nri.ev", "se.nri.ev", "z.nri.ev",
|
|
412 "nri.ne", "se.nri.ne", "z.nri.ne",
|
|
413 "cfpup.ev", "cfpup.ne", "cfpdown.ev", "cfpdown.ne",
|
|
414 "cfnri", "se.cfnri", "z.cfnri", "cfnri.ev", "se.cfnri.ev", "z.cfnri.ev",
|
|
415 "cfnri.ne", "se.cfnri.ne", "z.cfnri.ne", "improveSens", "improveSpec",
|
|
416 "idi.ev", "se.idi.ev", "z.idi.ev", "idi.ne", "se.idi.ne",
|
|
417 "z.idi.ne", "idi", "se.idi", "z.idi", "is.x1", "se.is.x1",
|
|
418 "is.x2", "se.is.x2", "ip.x1", "se.ip.x1", "ip.x2", "se.ip.x2",
|
|
419 "auc.x1", "se.auc.x1", "auc.x2", "se.auc.x2",
|
|
420 "roc.test.x1.x2.pvalue","incidence")
|
|
421 resdf = data.frame(N=n, Na=na, Nb=nb, pup.ev=pup.ev, pup.ne=pup.ne, pdown.ev=pdown.ev, pdown.ne=pdown.ne, NRI=nri, NRI.se=se.nri, NRI.z=z.nri,
|
|
422 NRI.ev=nri.ev, NRI.ev.se=se.nri.ev, NRI.ev.z=z.nri.ev, NRI.ne=nri.ne, NRI.ne.se=se.nri.ne, NRI.ne.z=z.nri.ne,
|
|
423 cfpup.ev=cfpup.ev, cfpup.ne=cfpup.ne, cfpdown.ev=cfpdown.ev, cfpdown.ne=cfpdown.ne, CFNRI=cfnri, CFNRI.se=se.cfnri, CFNRI.z=z.cfnri,
|
|
424 CFNRI.ev=cfnri.ev, CFNRI.ev.se=se.cfnri.ev, CFNRI.ev.z=z.cfnri.ev, CFNRI.ne=cfnri.ne, CFNRI.ne.se=se.cfnri.ne, CFNRI.ne.z=z.cfnri.ne,
|
|
425 improvSens=improveSens, improvSpec=improveSpec, IDI.ev=idi.ev, IDI.ev.se=se.idi.ev, IDI.ev.z=z.idi.ev, IDI.ne=idi.ne,
|
|
426 IDI.ne.se=se.idi.ne, IDI.ne.z=z.idi.ne, IDI=idi, IDI.se=se.idi, IDI.z=z.idi, isx1=is.x1, isx2=is.x2,
|
|
427 ipxi=ip.x1, ipx2=ip.x2, AUC.x1=auc.x1, AUC.x1.se=se.auc.x1, AUC.x2=auc.x2, AUC.x2.se=se.auc.x2,
|
|
428 roctestpval=roc.test.x1.x2\$p.value,incidence=incidence)
|
|
429 tr = t(resdf)
|
|
430 tresdf = data.frame(measure=colnames(resdf),value=tr[,1])
|
|
431 return(list(resdf=tresdf,output=output))
|
|
432 }
|
|
433
|
|
434
|
|
435 ### More comprehensive summary statistics from a raplot
|
|
436 ### Choice of confidence intervals determined through asymptotics or bootstrapping (n.boot = ### of bootstrap resamples)
|
|
437 ### dp is number of decimal places for results table
|
|
438
|
|
439 summary.raplot = function(x1, x2, y, threshvec, cis = "boot", conf.level = 0.95, n.boot = 2000, dp = 4, stat_ra=NA)
|
|
440 {
|
|
441 results = stat_ra
|
|
442 if (cis == "boot") {
|
|
443 print.noquote("Bootstrap estimates for SE")
|
|
444 results.boot = matrix(NA, n.boot, length(names(results)))
|
|
445
|
|
446 colnames(results.boot) = names(results)
|
|
447
|
|
448 for (i in 1:n.boot) {
|
|
449 ###boot.index = sample(length(cc.status), replace = TRUE)
|
|
450 ###risk.model1.boot = risk.model1[boot.index]
|
|
451 ###risk.model2.boot = risk.model2[boot.index]
|
|
452 ###cc.status.boot = cc.status[boot.index]
|
|
453 boot.index = sample(length(y), replace = TRUE)
|
|
454 risk.model1.boot = x1[boot.index]
|
|
455 risk.model2.boot = x2[boot.index]
|
|
456 cc.status.boot = y[boot.index]
|
|
457 r = statistics.raplot(x1 = risk.model1.boot, x2 = risk.model2.boot, y = cc.status.boot)
|
|
458 results.boot[i, ] = r\$output
|
|
459 }
|
|
460
|
|
461 results.se.boot = apply(results.boot, 2, sd)
|
|
462 print(paste(results.se.boot,collapse=','))
|
|
463
|
|
464
|
|
465 results[grep("se", names(results))] = results.se.boot[grep("se", names(results)) - 1]
|
|
466
|
|
467 }
|
|
468
|
|
469
|
|
470
|
|
471 ### calculate cis and return
|
|
472
|
|
473 z = abs(qnorm((1 - conf.level)/2))
|
|
474
|
|
475 results.matrix = matrix(NA, 24, 2)
|
|
476
|
|
477 results.matrix[1, ] = c("Total (n)", results["n"])
|
|
478 results.matrix[2, ] = c("Events (n)", results["na"])
|
|
479 results.matrix[3, ] = c("Non-events (n)", results["nb"])
|
|
480 results.matrix[4, ] = c("Category free NRI and summary statistics","-------------------------")
|
|
481 results.matrix[5, ] = c("cfNRI events (%)",
|
|
482 paste(round(100*results["cfnri.ev"], dp-2), " (",
|
|
483 round(100*results["cfnri.ev"] - z * 100*results["se.cfnri.ev"], dp-2),
|
|
484 " to ", round(100*results["cfnri.ev"] +
|
|
485 z * 100*results["se.cfnri.ev"], dp-2), ")", sep = ""))
|
|
486 results.matrix[6, ] = c("cfNRI non-events (%)",
|
|
487 paste(round(100*results["cfnri.ne"], dp-2), " (",
|
|
488 round(100*results["cfnri.ne"] - z * 100*results["se.cfnri.ne"], dp)-2,
|
|
489 " to ", round(100*results["cfnri.ne"] + z * 100*results["se.cfnri.ne"],
|
|
490 dp-2), ")", sep = ""))
|
|
491 results.matrix[7, ] = c("cfNRI (%)",
|
|
492 paste(round(100*results["cfnri"], dp-2), " (",
|
|
493 round(100*results["cfnri"] - z * 100*results["se.cfnri"], dp-2),
|
|
494 " to ", round(100*results["cfnri"] + z * 100*results["se.cfnri"],
|
|
495 dp-2), ")", sep = ""))
|
|
496 results.matrix[8, ] = c("NRI and summary statistics","-------------------------")
|
|
497 results.matrix[9, ] = c("NRI events (%)",
|
|
498 paste(round(100*results["nri.ev"], dp-2), " (",
|
|
499 round(100*results["nri.ev"] - z * 100*results["se.nri.ev"], dp-2),
|
|
500 " to ", round(100*results["nri.ev"] +
|
|
501 z * 100*results["se.nri.ev"], dp-2), ")", sep = ""))
|
|
502 results.matrix[10, ] = c("NRI non-events (%)",
|
|
503 paste(round(100*results["nri.ne"], dp-2), " (",
|
|
504 round(100*results["nri.ne"] - z * 100*results["se.nri.ne"], dp-2),
|
|
505 " to ", round(100*results["nri.ne"] + z * 100*results["se.nri.ne"],
|
|
506 dp-2), ")", sep = ""))
|
|
507 results.matrix[11, ] = c("NRI (%)",
|
|
508 paste(round(100*results["nri"], dp-2), " (",
|
|
509 round(100*results["nri"] - z * 100*results["se.nri"], dp-2),
|
|
510 " to ", round(100*results["nri"] + z * 100*results["se.nri"],
|
|
511 dp-2), ")", sep = ""))
|
|
512 results.matrix[12, ] = c("IDI and summary statistics","-------------------------")
|
|
513 results.matrix[13, ] = c("IDI events",
|
|
514 paste(round(results["idi.ev"], dp), " (",
|
|
515 round(results["idi.ev"] - z * results["se.idi.ev"], dp),
|
|
516 " to ", round(results["idi.ev"] + z * results["se.idi.ev"],
|
|
517 dp), ")", sep = ""))
|
|
518 results.matrix[14, ] = c("IDI non-events",
|
|
519 paste(round(results["idi.ne"], dp), " (",
|
|
520 round(results["idi.ne"] - z * results["se.idi.ne"], dp),
|
|
521 " to ", round(results["idi.ne"] + z * results["se.idi.ne"],
|
|
522 dp), ")", sep = ""))
|
|
523 results.matrix[15, ] = c("IDI",
|
|
524 paste(round(results["idi"], dp), " (",
|
|
525 round(results["idi"] - z * results["se.idi"], dp),
|
|
526 " to ", round(results["idi"] + z * results["se.idi"],
|
|
527 dp), ")", sep = ""))
|
|
528 results.matrix[16, ] = c("IS (null model)",
|
|
529 paste(round(results["is.x1"], dp), " (",
|
|
530 round(results["is.x1"] - z * results["se.is.x1"], dp),
|
|
531 " to ", round(results["is.x1"] + z * results["se.is.x1"],
|
|
532 dp), ")", sep = ""))
|
|
533 results.matrix[17, ] = c("IS (alt model)",
|
|
534 paste(round(results["is.x2"], dp), " (",
|
|
535 round(results["is.x2"] - z * results["se.is.x2"], dp),
|
|
536 " to ", round(results["is.x2"] + z * results["se.is.x2"],
|
|
537 dp), ")", sep = ""))
|
|
538 results.matrix[18, ] = c("IP (null model)",
|
|
539 paste(round(results["ip.x1"], dp), " (",
|
|
540 round(results["ip.x1"] - z * results["se.ip.x1"], dp),
|
|
541 " to ", round(results["ip.x1"] + z * results["se.ip.x1"],
|
|
542 dp), ")", sep = ""))
|
|
543 results.matrix[19, ] = c("IP (alt model)",
|
|
544 paste(round(results["ip.x2"], dp), " (",
|
|
545 round(results["ip.x2"] - z * results["se.ip.x2"], dp),
|
|
546 " to ", round(results["ip.x2"] + z * results["se.ip.x2"],
|
|
547 dp), ")", sep = ""))
|
|
548 results.matrix[20, ] = c("AUC","-------------------------")
|
|
549 results.matrix[21, ] = c("AUC (null model)",
|
|
550 paste(round(results["auc.x1"], dp), " (",
|
|
551 round(results["auc.x1"] - z * results["se.auc.x1"], dp),
|
|
552 " to ", round(results["auc.x1"] + z * results["se.auc.x1"],
|
|
553 dp), ")", sep = ""))
|
|
554 results.matrix[22, ] = c("AUC (alt model)",
|
|
555 paste(round(results["auc.x2"], dp), " (",
|
|
556 round(results["auc.x2"] - z * results["se.auc.x2"], dp),
|
|
557 " to ", round(results["auc.x2"] + z * results["se.auc.x2"],
|
|
558 dp), ")", sep = ""))
|
|
559 results.matrix[23, ] = c("difference (P)", round(results["roc.test.x1.x2.pvalue"], dp))
|
|
560 results.matrix[24, ] = c("Incidence", round(results["incidence"], dp))
|
|
561
|
|
562 return(results.matrix)
|
|
563 }
|
|
564
|
|
565
|
|
566
|
|
567 ]]>
|
|
568
|
|
569 options(width=120)
|
|
570 options(digits=5)
|
|
571 logf = file("rgNRI.log", open = "a")
|
|
572 sink(logf,type = c("output", "message"))
|
|
573 Out_Dir = "$html_file.files_path"
|
|
574 Input1 = "$input1"
|
|
575 Input2 = "$input2"
|
|
576 myTitle = "$title"
|
|
577 outtab = "$nri_file"
|
|
578 input1_obs = $input1_observed
|
|
579 input1_pred = $input1_predicted
|
|
580 input1_id = $input1_id
|
|
581 input2_obs = $input2_observed
|
|
582 input2_pred = $input2_predicted
|
|
583 input2_id = $input2_id
|
|
584 in1 = read.table(Input1,head=T,sep='\t')
|
|
585 in2 = read.table(Input2,head=T,sep='\t')
|
|
586 id1 = in1[,input1_id]
|
|
587 id2 = in2[,input2_id]
|
|
588 useme1 = in1[which(id1 %in% id2),]
|
|
589 useme2 = in2[which(id2 %in% id1),]
|
|
590 id1 = useme1[,input1_id]
|
|
591 id2 = useme2[,input2_id]
|
|
592 useme1 = useme1[order(id1),]
|
|
593 useme2 = useme2[order(id2),]
|
|
594 x1 = useme1[,input1_pred]
|
|
595 x2 = useme2[,input2_pred]
|
|
596 y1 = useme1[,input1_obs]
|
|
597 y2 = useme2[,input2_obs]
|
|
598 n.boot = $CImeth.nboot
|
|
599 conf.level = 0.95
|
|
600 cis = "$CImeth.cis"
|
|
601 digits = 4
|
|
602 nydiff = sum(y1 != y2)
|
|
603 if (nydiff > 0) {
|
|
604 print.noquote(paste('Input error: observed status column has',nydiff,'differences - cannot reliably proceed'))
|
|
605 quit(save="no",status=1)
|
|
606 }
|
|
607 y = y2
|
|
608 outplot = 'rgNRI_EventRisk.pdf'
|
|
609 res = raplot(x1=x1, x2=x2, y=y, outplot=outplot,title=myTitle)
|
|
610
|
|
611 stats = statistics.raplot(x1=x1, x2=x2, y=y)
|
|
612 res1 = stats\$resdf
|
|
613 out1 = stats\$output
|
|
614 print.noquote('Results:')
|
|
615 print.noquote(res1,digits=4)
|
|
616 res2 = summary.raplot(x1=x1, x2=x2, y=y, cis = cis, conf.level = conf.level, n.boot = n.boot, dp = digits, stat_ra=out1)
|
|
617 print.noquote('Summary:')
|
|
618 print.noquote(res2,digits=4)
|
|
619 write.table(format(res1,digits=4),outtab,quote=F, col.names=F,sep="\t",row.names=F)
|
|
620 print.noquote('SessionInfo for this R session:')
|
|
621 sessionInfo()
|
|
622 print.noquote('warnings for this R session:')
|
|
623 warnings()
|
|
624 sink()
|
|
625 </configfile>
|
|
626 </configfiles>
|
|
627 <citations>
|
|
628 <citation type="doi">doi: 10.2215/âCJN.09590911</citation>
|
|
629 </citations>
|
|
630 </tool>
|
|
631
|
|
632
|
|
633
|