comparison rg_nri.xml @ 17:0e87f636bdd8 draft

Uploaded
author iuc
date Tue, 28 Apr 2015 22:56:48 -0400
parents
children bb725f6d6d38
comparison
equal deleted inserted replaced
16:fa6d1e1a84c9 17:0e87f636bdd8
1 <tool id="rg_nri" name="NRI" version="0.03">
2 <description>and other model improvement measures</description>
3 <requirements>
4 <requirement type="package" version="3.1.1">R_3_1_1</requirement>
5 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
6 <requirement type="package" version="9.10">ghostscript</requirement>
7 <requirement type="package" version="2.14">glmnet_lars_2_14</requirement>
8 </requirements>
9 <command interpreter="python">
10 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "rg_NRI"
11 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
12 </command>
13 <inputs>
14 <param name="title" type="text" value="NRI test" size="80" label="Plot Title" help="Will appear as the title for the comparison plot"/>
15 <param name="input1" type="data" format="tabular" label="Select a tabular file from the baseline model with predicted and observed outcome column for each subject"
16 multiple='False' help="Observed and predicted status columns must be selected from this file below - NOTE both models must be in same order with exact matches in all observed outcomes" optional="False"/>
17 <param name="input1_observed" label="Select column containing observed outcome (0 for no event, 1 for an event)" type="data_column" data_ref="input1" numerical="True"
18 multiple="False" use_header_names="True" optional="False" help = "Observed outcomes are compared in the two files to check that the datasets are from the same data"/>
19 <param name="input1_predicted" label="Select column containing predicted event probabilies from baseline model" type="data_column" data_ref="input1" numerical="True"
20 multiple="False" use_header_names="True" optional="False" help="Must be in range 0-1"/>
21 <param name="input1_id" label="Select column containing subject ID from baseline model" type="data_column" data_ref="input1" numerical="True"
22 multiple="False" use_header_names="True" optional="False" help="Subect IDs are needed to match subjects to compare predictions in the two inputs"/>
23 <param name="input2" type="data" format="tabular" label="Select a tabular file from the new model with predicted and observed outcome columns for each subject"
24 multiple='False' help="Observed and predicted status columns must be selected from this file below" />
25 <param name="input2_observed" label="Select column containing observed outcome (0 for no event, 1 for an event)" type="data_column" data_ref="input2" numerical="True"
26 multiple="False" use_header_names="True" optional="False" help = "Observed outcomes are compared in the two files to check that the datasets are from the same data"/>
27 <param name="input2_predicted" label="Select column containing predicted event probabilities from the new model" type="data_column" data_ref="input2" numerical="True"
28 multiple="False" use_header_names="True" optional="False" help="Must be in range 0-1"/>
29 <param name="input2_id" label="Select column containing subject ID from the new model" type="data_column" data_ref="input2" numerical="True"
30 multiple="False" use_header_names="True" optional="False" help="Subect IDs are needed to match subjects to compare predictions in the two inputs"/>
31 <conditional name="CImeth">
32 <param name="cis" type="select" label="CI calculation method"
33 help="Bootstrap will take time - a long time for thousands - asymptotic is quick and informative">
34 <option value="asymptotic" selected="true">Asymptotic estimate</option>
35 <option value="boot">Bootstrap for empirical CIs</option>
36 </param>
37 <when value="boot">
38 <param name="nboot" type="integer" value="1000" label="Number of bootstrap replicates"/>
39 </when>
40 <when value="asymptotic">
41 <param name="nboot" type="hidden" value="1000"/>
42 </when>
43 </conditional>
44 </inputs>
45 <outputs>
46 <data format="html" name="html_file" label="${title}.html"/>
47 <data format="tabular" name="nri_file" label="${title}_nrires.xls"/>
48 </outputs>
49 <tests>
50 <test>
51 <param name='title' value='nri_test1' />
52 <param name='input1' value='nri_test1.xls' ftype='tabular' />
53 <param name='input2' value='nri_test1.xls' ftype='tabular' />
54 <param name='input1_id' value="1" />
55 <param name='input1_observed' value="2" />
56 <param name='input1_predicted' value="3" />
57 <param name='input2_observed' value="2" />
58 <param name='input2_predicted' value="4" />
59 <output name='html_file' file='nri_test1_out.html' compare='diff' lines_diff='10' />
60 <output name='nri_file' file='nri_test1_out.xls' />
61 </test>
62 </tests>
63 <help>
64
65 **Before you start**
66
67 This is a simple tool to calculate various measures of improvement in prediction between two models described in pickering_paper_
68 It is based on an R script pickering_code_ written by Dr John W Pickering and Dr David Cairns from sunny Otago University which
69 has been debugged and slightly adjusted to fit a Galaxy tool wrapper.
70
71
72 **What it does**
73
74 Copied from the documentation in pickering_code_ ::
75
76
77 Functions to create risk assessment plots and associated summary statistics
78
79
80 (c) 2012 Dr John W Pickering, john.pickering@otago.ac.nz, and Dr David Cairns
81 Last modified August 2014
82
83 Redistribution and use in source and binary forms, with or without
84 modification, are permitted provided that the following conditions are met:
85 * Redistributions of source code must retain the above copyright
86 notice, this list of conditions and the following disclaimer.
87 * Redistributions in binary form must reproduce the above copyright
88 notice, this list of conditions and the following disclaimer in
89 the documentation and/or other materials provided with the distribution
90
91 FUNCTIONS
92 raplot
93 Produces a Risk Assessment Plot and outputs the coordinates of the four curves
94 Based on: Pickering, J. W. and Endre, Z. H. (2012). New Metrics for Assessing Diagnostic Potential of
95 Candidate Biomarkers. Clinical Journal of the American Society of Nephrology, 7, 1355–1364. doi:10.2215/CJN.09590911
96
97 statistics.raplot
98 Produces the NRIs, IDIs, IS, IP, AUCs.
99 Based on: Pencina, M. J., D'Agostino, R. B. and Steyerberg, E. W. (2011). Extensions of net reclassification improvement calculations to
100 measure usefulness of new biomarkers. Statistics in Medicine, 30(1), 11–21. doi:10.1002/sim.4085
101 Pencina, M. J., D'Agostino, R. B. and Vasan, R. S. (2008). Evaluating the added predictive ability of a new marker: From area under the
102 ROC curve to reclassification and beyond.
103 Statistics in Medicine, 27(2), 157–172. doi:10.1002/sim.2929
104 DeLong, E., DeLong, D. and Clarke-Pearson, D. (1988). Comparing the areas under 2 or more correlated receiver operating characteristic curves - a nonparametric approach.
105 Biometrics, 44(3), 837–845.
106
107 summary.raplot
108 Produces the NRIs, IDIs, IS, IP, AUCs with confidence intervals using a bootstrap or asymptotic procedure. (I prefer bootstrap which is chosed by cis=c("boot"))
109
110
111 Required arguments for all functions:
112 x1 is calculated risk (eg from a glm) for the null model, i.e. predict(,type="response") on a glm object
113 x2 is calculated risk (eg from a glm) for the alternative model
114 y is the case-control indicator (0 for controls, 1 for cases)
115 Optional argument
116 t are the boundaries of the risks for each group (ie 0, 1 and the thresholds beteween. eg c(0,0,3,0,7,1)). If missing, defaults to c(0, the incidence, 1)
117
118
119 **Input**
120
121 The observed and predicted outcomes from two models to be compared.
122
123 **Output**
124
125 Lots'o'measures (TM) see pickering_paper_ for details
126
127 **Attributions**
128
129 pickering_paper_ is the paper the caclulations performed by this tool is based on
130
131 pickering_code_ is the R function from John Pickering exposed by this Galaxy tool with minor modifications and hacks by Ross Lazarus.
132
133 Galaxy_ (that's what you are using right now!) for gluing everything together
134
135 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
136 licensed to you under the LGPL_ like other rgenetics artefacts
137
138 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
139 .. _pickering_code: http://www.researchgate.net/publication/264672640_R_function_for_Risk_Assessment_Plot__reclassification_metrics_NRI_IDI_cfNRI
140 .. _pickering_paper: http://cjasn.asnjournals.org/content/early/2012/05/24/CJN.09590911.full
141 .. _Galaxy: http://getgalaxy.org
142
143
144 </help>
145
146 <configfiles>
147 <configfile name="runme">
148
149 <![CDATA[
150
151 ### http://www.researchgate.net/publication/264672640_R_function_for_Risk_Assessment_Plot__reclassification_metrics_NRI_IDI_cfNRI code
152 ### http://cjasn.asnjournals.org/content/early/2012/05/24/CJN.09590911.full is the reference
153 ### lots of little tweaks and but fixes. Using t as a variable name seems fraught to me.
154 ### Ross Lazarus october 2014 for a Galaxy tool wrapper using the toolfactory infrastucture
155
156 #############################################################################
157 ###Functions to create risk assessment plots and associated summary statistics
158 #############################################################################
159 ###
160 ### (c) 2012 Dr John W Pickering, john.pickering@otago.ac.nz, and Dr David Cairns
161 ### Last modified August 2014
162 ###
163 ### Redistribution and use in source and binary forms, with or without
164 ### modification, are permitted provided that the following conditions are met:
165 ### * Redistributions of source code must retain the above copyright
166 ### notice, this list of conditions and the following disclaimer.
167 ### * Redistributions in binary form must reproduce the above copyright
168 ### notice, this list of conditions and the following disclaimer in
169 ### the documentation and/or other materials provided with the distribution
170
171 ### FUNCTIONS
172 ### raplot
173 ### Produces a Risk Assessment Plot and outputs the coordinates of the four curves
174 ### Based on: Pickering, J. W. and Endre, Z. H. (2012). New Metrics for Assessing Diagnostic Potential of
175 ### Candidate Biomarkers. Clinical Journal of the American Society of Nephrology, 7, 1355–1364. doi:10.2215/CJN.09590911
176 ###
177 ### statistics.raplot
178 ### Produces the NRIs, IDIs, IS, IP, AUCs.
179 ### Based on: Pencina, M. J., D'Agostino, R. B. and Steyerberg, E. W. (2011).
180 ### Extensions of net reclassification improvement calculations to measure usefulness of new biomarkers. Statistics in Medicine, 30(1), 11–21. doi:10.1002/sim.4085
181 ### Pencina, M. J., D'Agostino, R. B. and Vasan, R. S. (2008). Evaluating the added predictive ability of a new marker: From area under the ROC curve to reclassification and beyond.
182 ### Statistics in Medicine, 27(2), 157–172. doi:10.1002/sim.2929
183 ### DeLong, E., DeLong, D. and Clarke-Pearson, D. (1988). Comparing the areas under 2 or more correlated receiver operating characteristic curves - a nonparametric approach.
184 ### Biometrics, 44(3), 837–845.
185 ###
186 ### summary.raplot
187 ### Produces the NRIs, IDIs, IS, IP, AUCs with confidence intervals using a bootstrap or asymptotic procedure. (I prefer bootstrap which is chosed by cis=c("boot"))
188
189
190 ### Required arguments for all functions:
191 ### x1 is calculated risk (eg from a glm) for the null model, i.e. predict(,type="response") on a glm object
192 ### x2 is calculated risk (eg from a glm) for the alternative model
193 ### y is the case-control indicator (0 for controls, 1 for cases)
194 ### Optional argument
195 ### t are the boundaries of the risks for each group (ie 0, 1 and the thresholds beteween. eg c(0,0,3,0,7,1)). If missing, defaults to c(0, the incidence, 1)
196
197
198 ### risk assessment plot
199
200 library('e1071')
201 library('caret')
202 library('pROC')
203 library('Hmisc')
204 library('pracma')
205
206 raplot = function(x1, x2, y, outplot, title) {
207
208 roc.model1 = roc(y, x1)
209 roc.model2 = roc(y, x2)
210 sens.model1 = roc.model1\$sensitivities
211 spec.model1 = 1 - roc.model1\$specificities
212 n.model1 = length(sens.model1)
213 thresh.model1 = roc.model1\$thresholds
214 thresh.model1 = thresh.model1[c(-1,-n.model1)]
215 sens.model1 = sens.model1[c(-1,-n.model1)]
216 spec.model1 = spec.model1[c(-1,-n.model1)]
217 sens.model2 = roc.model2\$sensitivities
218 spec.model2 = 1 - roc.model2\$specificities
219 n.model2 = length(sens.model2)
220 thresh.model2 = roc.model2\$thresholds
221 thresh.model2[1]=0
222 thresh.model2[length(thresh.model2)]=1
223 thresh.model2 = thresh.model2[c(-1,-n.model2)]
224 sens.model2 = sens.model2[c(-1,-n.model2)]
225 spec.model2 = spec.model2[c(-1,-n.model2)]
226
227 n.model1 = length(sens.model1)
228 n.model2 = length(sens.model2)
229
230 ### actual plotting
231 pdf(outplot)
232 plot(thresh.model1, sens.model1, xlim = c(0, 1), ylim = c(0, 1), type = "n",
233 lty = 2, lwd = 2, xlab = "Risk of Event", ylab = "", col = "black", main=title)
234 grid()
235
236 polygon(x = c(thresh.model1, thresh.model2[n.model2:1]),
237 y = c(sens.model1, sens.model2[n.model2:1]), border = NA, col = gray(0.8))
238 polygon(x = c(thresh.model1, thresh.model2[n.model2:1]),
239 y = c(spec.model1, spec.model2[n.model2:1]), border = NA, col = gray(0.8))
240
241 lines(thresh.model1, sens.model1, type = "l", lty = 2, lwd = 2, col = "black")
242 lines(thresh.model2, sens.model2, type = "l", lty = 1, lwd = 2, col = "black")
243
244 lines(thresh.model1, spec.model1, type = "l", lty = 2, lwd = 2, col = "red")
245 lines(thresh.model2, spec.model2, type = "l", lty = 1, lwd = 2, col = "red")
246
247 text(x = -0.15, y = 0.4, labels = "Sensitivity, ", col = "black", xpd = TRUE, srt = 90)
248 text(x = -0.15, y = 0.4 + 0.175, labels = "1-Specificity", col = "red", xpd = TRUE, srt = 90)
249 legend("topleft", c("Event: New model", "Event: Baseline model",
250 "No Event: New model", "No Event: Baseline model"),
251 col = c("black", "black", "red", "red"),
252 lty = c(1,2, 1, 2), lwd = 2, bty = "n")
253 dev.off()
254 return(data.frame("Null.p.sens"=thresh.model1,
255 "Null.sens"=sens.model1,
256 "Null.p.1spec"=thresh.model1,
257 "Null.1spec"=sens.model1,
258 "Alt.p.sens"=thresh.model2,
259 "Alt.sens"=sens.model2,
260 "Alt.p.1spec"=thresh.model2,
261 "Alt.1spec"=sens.model2))
262
263 }
264
265
266
267 ### statistics from a raplot (is an adaptation of improveProb() from Hmisc)
268
269 statistics.raplot = function(x1, x2, y, threshvec)
270 {
271
272 s = is.na(x1 + x2 + y) ###Remove rows with missing data
273 if (any(s)) {
274 smiss = sum(s)
275 s = !s
276 x1 = x1[s]
277 x2 = x2[s]
278 y = y[s]
279 print.noquote(paste('Warning: removed',smiss,'cases with missing values'))
280 }
281 n = length(y)
282 y = as.numeric(y)
283 u = sort(unique(y))
284 if (length(u) != 2 || u[1] != 0 || u[2] != 1) {
285 print.noquote("INPUT ERROR: y must have only two values: 0 and 1")
286 sink()
287 quit(save="no",status=2)
288 }
289 r = range(x1, x2)
290 if (r[1] < 0 || r[2] > 1) {
291 print.noquote("INPUT ERROR: x1 and x2 must be in [0,1]")
292 sink()
293 quit(save="no",status=3)
294 }
295 incidence=sum(y)/n
296 if (missing(threshvec)) {
297 threshvec=c(0, incidence,1)
298 print(paste('threshvec missing. using',paste(threshvec,collapse=',')))
299 }
300 a = (y == 1)
301 b = (y == 0)
302 na = sum(a)
303 nb = sum(b)
304 d = x2 - x1
305 ### NRI
306 n.thresh=length(threshvec)-1
307 risk.class.x1.ev=cut2(x1[a],threshvec)
308 risk.class.x2.ev=cut2(x2[a],threshvec)
309 thresh=c()
310 lt = length(threshvec)
311 for (i in 1:(lt-1)) {
312 thresh[i] = paste("[",toString(threshvec[i]),",",toString(threshvec[i+1]),"]")
313 }
314 levels(risk.class.x1.ev)=thresh
315 levels(risk.class.x2.ev)=thresh
316 cM.ev=confusionMatrix(risk.class.x2.ev,risk.class.x1.ev)
317 pup.ev=0
318 pdown.ev=0
319 for (i in 1:(n.thresh-1)) { pup.ev = pup.ev + sum(cM.ev\$table[(i+1):n.thresh,i])}
320 for (i in 2:n.thresh) { pdown.ev = pdown.ev + sum(cM.ev\$table[1:(i-1),i])}
321 pup.ev=pup.ev/na
322 pdown.ev=pdown.ev/na
323 risk.class.x1.ne=cut2(x1[b],threshvec)
324 risk.class.x2.ne=cut2(x2[b],threshvec)
325 levels(risk.class.x1.ne)=thresh
326 levels(risk.class.x2.ne)=thresh
327 cM.ne=confusionMatrix(risk.class.x2.ne,risk.class.x1.ne)
328 pup.ne=0
329 pdown.ne=0
330 for (i in 1:(n.thresh-1)){pup.ne=pup.ev+sum(cM.ne\$table[(i+1):n.thresh,i])}
331 for (i in 2:n.thresh){pdown.ne=pdown.ne+sum(cM.ne\$table[1:(i-1),i])}
332 pdown.ne=pdown.ne/nb
333 pup.ne=pup.ne/nb
334 nri = pup.ev - pdown.ev - (pup.ne - pdown.ne)
335 se.nri = sqrt((pup.ev + pdown.ev)/na + (pup.ne + pdown.ne)/nb)
336 z.nri = nri/se.nri
337 nri.ev = pup.ev - pdown.ev
338 se.nri.ev = sqrt((pup.ev + pdown.ev)/na)
339 z.nri.ev = nri.ev/se.nri.ev
340 nri.ne = pdown.ne - pup.ne
341 se.nri.ne = sqrt((pdown.ne + pup.ne)/nb)
342 z.nri.ne = nri.ne/se.nri.ne
343 ### Category Free NRI calculations
344 cfpup.ev = mean(d[a] > 0)
345 cfpup.ne = mean(d[b] > 0)
346 cfpdown.ev = mean(d[a] < 0)
347 cfpdown.ne = mean(d[b] < 0)
348 cfnri = cfpup.ev - cfpdown.ev - (cfpup.ne - cfpdown.ne)
349 se.cfnri = sqrt((cfpup.ev + cfpdown.ev)/na + (cfpup.ne + cfpdown.ne)/nb)
350 z.cfnri = cfnri/se.cfnri
351 cfnri.ev = cfpup.ev - cfpdown.ev
352 se.cfnri.ev = sqrt((cfpup.ev + cfpdown.ev)/na)
353 z.cfnri.ev = cfnri.ev/se.cfnri.ev
354 cfnri.ne = cfpdown.ne - cfpup.ne
355 se.cfnri.ne = sqrt((cfpdown.ne + cfpup.ne)/nb)
356 z.cfnri.ne = cfnri.ne/se.cfnri.ne
357 ### IDI calculations
358 improveSens = sum(d[a])/na
359 improveSpec = -sum(d[b])/nb
360 idi.ev = mean(improveSens)
361 idi.ne = mean(improveSpec)
362 idi = idi.ev - idi.ne
363 var.ev = var(d[a])/na
364 se.idi.ev = sqrt(var.ev)
365 z.idi.ev = idi.ev/se.idi.ev
366 var.ne = var(d[b])/nb
367 se.idi.ne = sqrt(var.ne)
368 z.idi.ne = idi.ne/se.idi.ne
369 se.idi = sqrt(var.ev + var.ne)
370 z.idi = idi/se.idi
371 ### AUC calculations
372 roc.x1 = roc(y, x1)
373 auc.x1 = auc(roc.x1)
374 ci.auc.x1 = ci.auc(roc.x1)
375 se.auc.x1 = (ci.auc.x1[3] - auc.x1)/qnorm(0.975)
376 roc.x2 = roc(y, x2)
377 auc.x2 = auc(roc.x2)
378 ci.auc.x2 = ci.auc(roc.x2)
379 se.auc.x2 = (ci.auc.x2[3] - auc.x2)/qnorm(0.975)
380 roc.test.x1.x2 = roc.test(roc.x1, roc.x2) ###Uses the default Delong method
381 sens.x1 = roc.x1\$sensitivities
382 spec.x1 = 1 - roc.x1\$specificities
383 n.x1 = length(sens.x1)
384 x1 = roc.x1\$thresholds
385 x1 = x1[c(-1,-n.x1)]
386 sens.x1 = sens.x1[c(-1,-n.x1)]
387 spec.x1 = spec.x1[c(-1,-n.x1)]
388 sens.x2 = roc.x2\$sensitivities
389 spec.x2 = 1 - roc.x2\$specificities
390 n.x2 = length(sens.x2)
391 x2 = roc.x2\$thresholds
392 x2 = x2[c(-1,-n.x2)]
393 sens.x2 = sens.x2[c(-1,-n.x2)]
394 spec.x2 = spec.x2[c(-1,-n.x2)]
395 ### Integrated sensitivity and 1-specificity calculations
396 is.x1 = trapz(x = x1, y = sens.x1) ### area under curves (relates to integrated sens, 1-spec)
397 is.x2 = trapz(x = x2, y = sens.x2)
398 ip.x1 = trapz(x = x1, y = spec.x1)
399 ip.x2 = trapz(x = x2, y = spec.x2)
400
401 ### Output
402 output = c(n, na, nb, pup.ev, pup.ne, pdown.ev, pdown.ne, nri, se.nri, z.nri,
403 nri.ev, se.nri.ev, z.nri.ev, nri.ne, se.nri.ne, z.nri.ne,
404 cfpup.ev, cfpup.ne, cfpdown.ev, cfpdown.ne, cfnri, se.cfnri, z.cfnri,
405 cfnri.ev, se.cfnri.ev, z.cfnri.ev, cfnri.ne, se.cfnri.ne, z.cfnri.ne,
406 improveSens, improveSpec, idi.ev, se.idi.ev, z.idi.ev, idi.ne,
407 se.idi.ne, z.idi.ne, idi, se.idi, z.idi, is.x1, NA, is.x2, NA,
408 ip.x1, NA, ip.x2, NA, auc.x1, se.auc.x1, auc.x2, se.auc.x2,
409 roc.test.x1.x2\$p.value,incidence)
410 names(output) = c("n", "na", "nb", "pup.ev", "pup.ne", "pdown.ev", "pdown.ne",
411 "nri", "se.nri", "z.nri", "nri.ev", "se.nri.ev", "z.nri.ev",
412 "nri.ne", "se.nri.ne", "z.nri.ne",
413 "cfpup.ev", "cfpup.ne", "cfpdown.ev", "cfpdown.ne",
414 "cfnri", "se.cfnri", "z.cfnri", "cfnri.ev", "se.cfnri.ev", "z.cfnri.ev",
415 "cfnri.ne", "se.cfnri.ne", "z.cfnri.ne", "improveSens", "improveSpec",
416 "idi.ev", "se.idi.ev", "z.idi.ev", "idi.ne", "se.idi.ne",
417 "z.idi.ne", "idi", "se.idi", "z.idi", "is.x1", "se.is.x1",
418 "is.x2", "se.is.x2", "ip.x1", "se.ip.x1", "ip.x2", "se.ip.x2",
419 "auc.x1", "se.auc.x1", "auc.x2", "se.auc.x2",
420 "roc.test.x1.x2.pvalue","incidence")
421 resdf = data.frame(N=n, Na=na, Nb=nb, pup.ev=pup.ev, pup.ne=pup.ne, pdown.ev=pdown.ev, pdown.ne=pdown.ne, NRI=nri, NRI.se=se.nri, NRI.z=z.nri,
422 NRI.ev=nri.ev, NRI.ev.se=se.nri.ev, NRI.ev.z=z.nri.ev, NRI.ne=nri.ne, NRI.ne.se=se.nri.ne, NRI.ne.z=z.nri.ne,
423 cfpup.ev=cfpup.ev, cfpup.ne=cfpup.ne, cfpdown.ev=cfpdown.ev, cfpdown.ne=cfpdown.ne, CFNRI=cfnri, CFNRI.se=se.cfnri, CFNRI.z=z.cfnri,
424 CFNRI.ev=cfnri.ev, CFNRI.ev.se=se.cfnri.ev, CFNRI.ev.z=z.cfnri.ev, CFNRI.ne=cfnri.ne, CFNRI.ne.se=se.cfnri.ne, CFNRI.ne.z=z.cfnri.ne,
425 improvSens=improveSens, improvSpec=improveSpec, IDI.ev=idi.ev, IDI.ev.se=se.idi.ev, IDI.ev.z=z.idi.ev, IDI.ne=idi.ne,
426 IDI.ne.se=se.idi.ne, IDI.ne.z=z.idi.ne, IDI=idi, IDI.se=se.idi, IDI.z=z.idi, isx1=is.x1, isx2=is.x2,
427 ipxi=ip.x1, ipx2=ip.x2, AUC.x1=auc.x1, AUC.x1.se=se.auc.x1, AUC.x2=auc.x2, AUC.x2.se=se.auc.x2,
428 roctestpval=roc.test.x1.x2\$p.value,incidence=incidence)
429 tr = t(resdf)
430 tresdf = data.frame(measure=colnames(resdf),value=tr[,1])
431 return(list(resdf=tresdf,output=output))
432 }
433
434
435 ### More comprehensive summary statistics from a raplot
436 ### Choice of confidence intervals determined through asymptotics or bootstrapping (n.boot = ### of bootstrap resamples)
437 ### dp is number of decimal places for results table
438
439 summary.raplot = function(x1, x2, y, threshvec, cis = "boot", conf.level = 0.95, n.boot = 2000, dp = 4, stat_ra=NA)
440 {
441 results = stat_ra
442 if (cis == "boot") {
443 print.noquote("Bootstrap estimates for SE")
444 results.boot = matrix(NA, n.boot, length(names(results)))
445
446 colnames(results.boot) = names(results)
447
448 for (i in 1:n.boot) {
449 ###boot.index = sample(length(cc.status), replace = TRUE)
450 ###risk.model1.boot = risk.model1[boot.index]
451 ###risk.model2.boot = risk.model2[boot.index]
452 ###cc.status.boot = cc.status[boot.index]
453 boot.index = sample(length(y), replace = TRUE)
454 risk.model1.boot = x1[boot.index]
455 risk.model2.boot = x2[boot.index]
456 cc.status.boot = y[boot.index]
457 r = statistics.raplot(x1 = risk.model1.boot, x2 = risk.model2.boot, y = cc.status.boot)
458 results.boot[i, ] = r\$output
459 }
460
461 results.se.boot = apply(results.boot, 2, sd)
462 print(paste(results.se.boot,collapse=','))
463
464
465 results[grep("se", names(results))] = results.se.boot[grep("se", names(results)) - 1]
466
467 }
468
469
470
471 ### calculate cis and return
472
473 z = abs(qnorm((1 - conf.level)/2))
474
475 results.matrix = matrix(NA, 24, 2)
476
477 results.matrix[1, ] = c("Total (n)", results["n"])
478 results.matrix[2, ] = c("Events (n)", results["na"])
479 results.matrix[3, ] = c("Non-events (n)", results["nb"])
480 results.matrix[4, ] = c("Category free NRI and summary statistics","-------------------------")
481 results.matrix[5, ] = c("cfNRI events (%)",
482 paste(round(100*results["cfnri.ev"], dp-2), " (",
483 round(100*results["cfnri.ev"] - z * 100*results["se.cfnri.ev"], dp-2),
484 " to ", round(100*results["cfnri.ev"] +
485 z * 100*results["se.cfnri.ev"], dp-2), ")", sep = ""))
486 results.matrix[6, ] = c("cfNRI non-events (%)",
487 paste(round(100*results["cfnri.ne"], dp-2), " (",
488 round(100*results["cfnri.ne"] - z * 100*results["se.cfnri.ne"], dp)-2,
489 " to ", round(100*results["cfnri.ne"] + z * 100*results["se.cfnri.ne"],
490 dp-2), ")", sep = ""))
491 results.matrix[7, ] = c("cfNRI (%)",
492 paste(round(100*results["cfnri"], dp-2), " (",
493 round(100*results["cfnri"] - z * 100*results["se.cfnri"], dp-2),
494 " to ", round(100*results["cfnri"] + z * 100*results["se.cfnri"],
495 dp-2), ")", sep = ""))
496 results.matrix[8, ] = c("NRI and summary statistics","-------------------------")
497 results.matrix[9, ] = c("NRI events (%)",
498 paste(round(100*results["nri.ev"], dp-2), " (",
499 round(100*results["nri.ev"] - z * 100*results["se.nri.ev"], dp-2),
500 " to ", round(100*results["nri.ev"] +
501 z * 100*results["se.nri.ev"], dp-2), ")", sep = ""))
502 results.matrix[10, ] = c("NRI non-events (%)",
503 paste(round(100*results["nri.ne"], dp-2), " (",
504 round(100*results["nri.ne"] - z * 100*results["se.nri.ne"], dp-2),
505 " to ", round(100*results["nri.ne"] + z * 100*results["se.nri.ne"],
506 dp-2), ")", sep = ""))
507 results.matrix[11, ] = c("NRI (%)",
508 paste(round(100*results["nri"], dp-2), " (",
509 round(100*results["nri"] - z * 100*results["se.nri"], dp-2),
510 " to ", round(100*results["nri"] + z * 100*results["se.nri"],
511 dp-2), ")", sep = ""))
512 results.matrix[12, ] = c("IDI and summary statistics","-------------------------")
513 results.matrix[13, ] = c("IDI events",
514 paste(round(results["idi.ev"], dp), " (",
515 round(results["idi.ev"] - z * results["se.idi.ev"], dp),
516 " to ", round(results["idi.ev"] + z * results["se.idi.ev"],
517 dp), ")", sep = ""))
518 results.matrix[14, ] = c("IDI non-events",
519 paste(round(results["idi.ne"], dp), " (",
520 round(results["idi.ne"] - z * results["se.idi.ne"], dp),
521 " to ", round(results["idi.ne"] + z * results["se.idi.ne"],
522 dp), ")", sep = ""))
523 results.matrix[15, ] = c("IDI",
524 paste(round(results["idi"], dp), " (",
525 round(results["idi"] - z * results["se.idi"], dp),
526 " to ", round(results["idi"] + z * results["se.idi"],
527 dp), ")", sep = ""))
528 results.matrix[16, ] = c("IS (null model)",
529 paste(round(results["is.x1"], dp), " (",
530 round(results["is.x1"] - z * results["se.is.x1"], dp),
531 " to ", round(results["is.x1"] + z * results["se.is.x1"],
532 dp), ")", sep = ""))
533 results.matrix[17, ] = c("IS (alt model)",
534 paste(round(results["is.x2"], dp), " (",
535 round(results["is.x2"] - z * results["se.is.x2"], dp),
536 " to ", round(results["is.x2"] + z * results["se.is.x2"],
537 dp), ")", sep = ""))
538 results.matrix[18, ] = c("IP (null model)",
539 paste(round(results["ip.x1"], dp), " (",
540 round(results["ip.x1"] - z * results["se.ip.x1"], dp),
541 " to ", round(results["ip.x1"] + z * results["se.ip.x1"],
542 dp), ")", sep = ""))
543 results.matrix[19, ] = c("IP (alt model)",
544 paste(round(results["ip.x2"], dp), " (",
545 round(results["ip.x2"] - z * results["se.ip.x2"], dp),
546 " to ", round(results["ip.x2"] + z * results["se.ip.x2"],
547 dp), ")", sep = ""))
548 results.matrix[20, ] = c("AUC","-------------------------")
549 results.matrix[21, ] = c("AUC (null model)",
550 paste(round(results["auc.x1"], dp), " (",
551 round(results["auc.x1"] - z * results["se.auc.x1"], dp),
552 " to ", round(results["auc.x1"] + z * results["se.auc.x1"],
553 dp), ")", sep = ""))
554 results.matrix[22, ] = c("AUC (alt model)",
555 paste(round(results["auc.x2"], dp), " (",
556 round(results["auc.x2"] - z * results["se.auc.x2"], dp),
557 " to ", round(results["auc.x2"] + z * results["se.auc.x2"],
558 dp), ")", sep = ""))
559 results.matrix[23, ] = c("difference (P)", round(results["roc.test.x1.x2.pvalue"], dp))
560 results.matrix[24, ] = c("Incidence", round(results["incidence"], dp))
561
562 return(results.matrix)
563 }
564
565
566
567 ]]>
568
569 options(width=120)
570 options(digits=5)
571 logf = file("rgNRI.log", open = "a")
572 sink(logf,type = c("output", "message"))
573 Out_Dir = "$html_file.files_path"
574 Input1 = "$input1"
575 Input2 = "$input2"
576 myTitle = "$title"
577 outtab = "$nri_file"
578 input1_obs = $input1_observed
579 input1_pred = $input1_predicted
580 input1_id = $input1_id
581 input2_obs = $input2_observed
582 input2_pred = $input2_predicted
583 input2_id = $input2_id
584 in1 = read.table(Input1,head=T,sep='\t')
585 in2 = read.table(Input2,head=T,sep='\t')
586 id1 = in1[,input1_id]
587 id2 = in2[,input2_id]
588 useme1 = in1[which(id1 %in% id2),]
589 useme2 = in2[which(id2 %in% id1),]
590 id1 = useme1[,input1_id]
591 id2 = useme2[,input2_id]
592 useme1 = useme1[order(id1),]
593 useme2 = useme2[order(id2),]
594 x1 = useme1[,input1_pred]
595 x2 = useme2[,input2_pred]
596 y1 = useme1[,input1_obs]
597 y2 = useme2[,input2_obs]
598 n.boot = $CImeth.nboot
599 conf.level = 0.95
600 cis = "$CImeth.cis"
601 digits = 4
602 nydiff = sum(y1 != y2)
603 if (nydiff &gt; 0) {
604 print.noquote(paste('Input error: observed status column has',nydiff,'differences - cannot reliably proceed'))
605 quit(save="no",status=1)
606 }
607 y = y2
608 outplot = 'rgNRI_EventRisk.pdf'
609 res = raplot(x1=x1, x2=x2, y=y, outplot=outplot,title=myTitle)
610
611 stats = statistics.raplot(x1=x1, x2=x2, y=y)
612 res1 = stats\$resdf
613 out1 = stats\$output
614 print.noquote('Results:')
615 print.noquote(res1,digits=4)
616 res2 = summary.raplot(x1=x1, x2=x2, y=y, cis = cis, conf.level = conf.level, n.boot = n.boot, dp = digits, stat_ra=out1)
617 print.noquote('Summary:')
618 print.noquote(res2,digits=4)
619 write.table(format(res1,digits=4),outtab,quote=F, col.names=F,sep="\t",row.names=F)
620 print.noquote('SessionInfo for this R session:')
621 sessionInfo()
622 print.noquote('warnings for this R session:')
623 warnings()
624 sink()
625 </configfile>
626 </configfiles>
627 <citations>
628 <citation type="doi">doi: 10.2215/​CJN.09590911</citation>
629 </citations>
630 </tool>
631
632
633