comparison rg_nri.xml @ 5:e9e354d64f8a draft

Uploaded
author fubar
date Fri, 19 Dec 2014 05:58:15 -0500
parents e0e11c2cae3f
children 445cb45d1bc0
comparison
equal deleted inserted replaced
4:5f2db639f8eb 5:e9e354d64f8a
1 <tool id="rg_nri" name="NRI" version="0.03">
2 <description>and other model improvement measures</description>
3 <requirements>
4 <requirement type="package" version="3.1.1">R_3_1_1</requirement>
5 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
6 <requirement type="package" version="9.10">ghostscript</requirement>
7 <requirement type="package" version="2.14">glmnet_lars_2_14</requirement>
8 </requirements>
9 <command interpreter="python">
10 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "rg_NRI"
11 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
12 </command>
13 <inputs>
14 <param name="title" type="text" value="NRI test" size="80" label="Plot Title" help="Will appear as the title for the comparison plot"/>
15 <param name="input1" type="data" format="tabular" label="Select a tabular file from the baseline model with predicted and observed outcome column for each subject"
16 multiple='False' help="Observed and predicted status columns must be selected from this file below - NOTE both models must be in same order with exact matches in all observed outcomes" optional="False"/>
17 <param name="input1_observed" label="Select column containing observed outcome (0 for no event, 1 for an event)" type="data_column" data_ref="input1" numerical="True"
18 multiple="False" use_header_names="True" optional="False" help = "Observed outcomes are compared in the two files to check that the datasets are from the same data"/>
19 <param name="input1_predicted" label="Select column containing predicted event probabilies from baseline model" type="data_column" data_ref="input1" numerical="True"
20 multiple="False" use_header_names="True" optional="False" help="Must be in range 0-1"/>
21 <param name="input1_id" label="Select column containing subject ID from baseline model" type="data_column" data_ref="input1" numerical="True"
22 multiple="False" use_header_names="True" optional="False" help="Subect IDs are needed to match subjects to compare predictions in the two inputs"/>
23 <param name="input2" type="data" format="tabular" label="Select a tabular file from the new model with predicted and observed outcome columns for each subject"
24 multiple='False' help="Observed and predicted status columns must be selected from this file below" />
25 <param name="input2_observed" label="Select column containing observed outcome (0 for no event, 1 for an event)" type="data_column" data_ref="input2" numerical="True"
26 multiple="False" use_header_names="True" optional="False" help = "Observed outcomes are compared in the two files to check that the datasets are from the same data"/>
27 <param name="input2_predicted" label="Select column containing predicted event probabilities from the new model" type="data_column" data_ref="input2" numerical="True"
28 multiple="False" use_header_names="True" optional="False" help="Must be in range 0-1"/>
29 <param name="input2_id" label="Select column containing subject ID from the new model" type="data_column" data_ref="input2" numerical="True"
30 multiple="False" use_header_names="True" optional="False" help="Subect IDs are needed to match subjects to compare predictions in the two inputs"/>
31 <conditional name="CImeth">
32 <param name="cis" type="select" label="CI calculation method"
33 help="Bootstrap will take time - a long time for thousands - asymptotic is quick and informative">
34 <option value="asymptotic" selected="true">Asymptotic estimate</option>
35 <option value="boot">Bootstrap for empirical CIs</option>
36 </param>
37 <when value="boot">
38 <param name="nboot" type="integer" value="1000" label="Number of bootstrap replicates"/>
39 </when>
40 <when value="asymptotic">
41 <param name="nboot" type="hidden" value="1000"/>
42 </when>
43 </conditional>
44 </inputs>
45 <outputs>
46 <data format="html" name="html_file" label="${title}.html"/>
47 <data format="tabular" name="nri_file" label="${title}_nrires.xls"/>
48 </outputs>
49 <tests>
50 <test>
51 <param name='input1' value='nri_test1.xls' ftype='tabular' />
52 <param name='input2' value='nri_test2.xls' ftype='tabular' />
53 <output name='html_file' file='nri_test1_out.html' compare='diff' lines_diff='10' />
54 <output name='nri_file' file='nri_test1_out.xls' />
55 </test>
56 </tests>
57 <help>
58
59 **Before you start**
60
61 This is a simple tool to calculate various measures of improvement in prediction between two models described in pickering_paper_
62 It is based on an R script pickering_code_ written by Dr John W Pickering and Dr David Cairns from sunny Otago University which
63 has been debugged and slightly adjusted to fit a Galaxy tool wrapper.
64
65
66 **What it does**
67
68 Copied from the documentation in pickering_code_ ::
69
70
71 Functions to create risk assessment plots and associated summary statistics
72
73
74 (c) 2012 Dr John W Pickering, john.pickering@otago.ac.nz, and Dr David Cairns
75 Last modified August 2014
76
77 Redistribution and use in source and binary forms, with or without
78 modification, are permitted provided that the following conditions are met:
79 * Redistributions of source code must retain the above copyright
80 notice, this list of conditions and the following disclaimer.
81 * Redistributions in binary form must reproduce the above copyright
82 notice, this list of conditions and the following disclaimer in
83 the documentation and/or other materials provided with the distribution
84
85 FUNCTIONS
86 raplot
87 Produces a Risk Assessment Plot and outputs the coordinates of the four curves
88 Based on: Pickering, J. W. and Endre, Z. H. (2012). New Metrics for Assessing Diagnostic Potential of
89 Candidate Biomarkers. Clinical Journal of the American Society of Nephrology, 7, 1355–1364. doi:10.2215/CJN.09590911
90
91 statistics.raplot
92 Produces the NRIs, IDIs, IS, IP, AUCs.
93 Based on: Pencina, M. J., D'Agostino, R. B. and Steyerberg, E. W. (2011). Extensions of net reclassification improvement calculations to
94 measure usefulness of new biomarkers. Statistics in Medicine, 30(1), 11–21. doi:10.1002/sim.4085
95 Pencina, M. J., D'Agostino, R. B. and Vasan, R. S. (2008). Evaluating the added predictive ability of a new marker: From area under the
96 ROC curve to reclassification and beyond.
97 Statistics in Medicine, 27(2), 157–172. doi:10.1002/sim.2929
98 DeLong, E., DeLong, D. and Clarke-Pearson, D. (1988). Comparing the areas under 2 or more correlated receiver operating characteristic curves - a nonparametric approach.
99 Biometrics, 44(3), 837–845.
100
101 summary.raplot
102 Produces the NRIs, IDIs, IS, IP, AUCs with confidence intervals using a bootstrap or asymptotic procedure. (I prefer bootstrap which is chosed by cis=c("boot"))
103
104
105 Required arguments for all functions:
106 x1 is calculated risk (eg from a glm) for the null model, i.e. predict(,type="response") on a glm object
107 x2 is calculated risk (eg from a glm) for the alternative model
108 y is the case-control indicator (0 for controls, 1 for cases)
109 Optional argument
110 t are the boundaries of the risks for each group (ie 0, 1 and the thresholds beteween. eg c(0,0,3,0,7,1)). If missing, defaults to c(0, the incidence, 1)
111
112
113 **Input**
114
115 The observed and predicted outcomes from two models to be compared.
116
117 **Output**
118
119 Lots'o'measures (TM) see pickering_paper_ for details
120
121 **Attributions**
122
123 pickering_paper_ is the paper the caclulations performed by this tool is based on
124
125 pickering_code_ is the R function from John Pickering exposed by this Galaxy tool with minor modifications and hacks by Ross Lazarus.
126
127 Galaxy_ (that's what you are using right now!) for gluing everything together
128
129 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
130 licensed to you under the LGPL_ like other rgenetics artefacts
131
132 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
133 .. _pickering_code: http://www.researchgate.net/publication/264672640_R_function_for_Risk_Assessment_Plot__reclassification_metrics_NRI_IDI_cfNRI
134 .. _pickering_paper: http://cjasn.asnjournals.org/content/early/2012/05/24/CJN.09590911.full
135 .. _Galaxy: http://getgalaxy.org
136
137
138 </help>
139
140 <configfiles>
141 <configfile name="runme">
142
143 <![CDATA[
144
145 ### http://www.researchgate.net/publication/264672640_R_function_for_Risk_Assessment_Plot__reclassification_metrics_NRI_IDI_cfNRI code
146 ### http://cjasn.asnjournals.org/content/early/2012/05/24/CJN.09590911.full is the reference
147 ### lots of little tweaks and but fixes. Using t as a variable name seems fraught to me.
148 ### Ross Lazarus october 2014 for a Galaxy tool wrapper using the toolfactory infrastucture
149
150 #############################################################################
151 ###Functions to create risk assessment plots and associated summary statistics
152 #############################################################################
153 ###
154 ### (c) 2012 Dr John W Pickering, john.pickering@otago.ac.nz, and Dr David Cairns
155 ### Last modified August 2014
156 ###
157 ### Redistribution and use in source and binary forms, with or without
158 ### modification, are permitted provided that the following conditions are met:
159 ### * Redistributions of source code must retain the above copyright
160 ### notice, this list of conditions and the following disclaimer.
161 ### * Redistributions in binary form must reproduce the above copyright
162 ### notice, this list of conditions and the following disclaimer in
163 ### the documentation and/or other materials provided with the distribution
164
165 ### FUNCTIONS
166 ### raplot
167 ### Produces a Risk Assessment Plot and outputs the coordinates of the four curves
168 ### Based on: Pickering, J. W. and Endre, Z. H. (2012). New Metrics for Assessing Diagnostic Potential of
169 ### Candidate Biomarkers. Clinical Journal of the American Society of Nephrology, 7, 1355–1364. doi:10.2215/CJN.09590911
170 ###
171 ### statistics.raplot
172 ### Produces the NRIs, IDIs, IS, IP, AUCs.
173 ### Based on: Pencina, M. J., D'Agostino, R. B. and Steyerberg, E. W. (2011).
174 ### Extensions of net reclassification improvement calculations to measure usefulness of new biomarkers. Statistics in Medicine, 30(1), 11–21. doi:10.1002/sim.4085
175 ### Pencina, M. J., D'Agostino, R. B. and Vasan, R. S. (2008). Evaluating the added predictive ability of a new marker: From area under the ROC curve to reclassification and beyond.
176 ### Statistics in Medicine, 27(2), 157–172. doi:10.1002/sim.2929
177 ### DeLong, E., DeLong, D. and Clarke-Pearson, D. (1988). Comparing the areas under 2 or more correlated receiver operating characteristic curves - a nonparametric approach.
178 ### Biometrics, 44(3), 837–845.
179 ###
180 ### summary.raplot
181 ### Produces the NRIs, IDIs, IS, IP, AUCs with confidence intervals using a bootstrap or asymptotic procedure. (I prefer bootstrap which is chosed by cis=c("boot"))
182
183
184 ### Required arguments for all functions:
185 ### x1 is calculated risk (eg from a glm) for the null model, i.e. predict(,type="response") on a glm object
186 ### x2 is calculated risk (eg from a glm) for the alternative model
187 ### y is the case-control indicator (0 for controls, 1 for cases)
188 ### Optional argument
189 ### t are the boundaries of the risks for each group (ie 0, 1 and the thresholds beteween. eg c(0,0,3,0,7,1)). If missing, defaults to c(0, the incidence, 1)
190
191
192 ### risk assessment plot
193
194 library('e1071')
195 library('caret')
196 library('pROC')
197 library('Hmisc')
198 library('pracma')
199
200 raplot = function(x1, x2, y, outplot, title) {
201
202 roc.model1 = roc(y, x1)
203 roc.model2 = roc(y, x2)
204 sens.model1 = roc.model1\$sensitivities
205 spec.model1 = 1 - roc.model1\$specificities
206 n.model1 = length(sens.model1)
207 thresh.model1 = roc.model1\$thresholds
208 thresh.model1 = thresh.model1[c(-1,-n.model1)]
209 sens.model1 = sens.model1[c(-1,-n.model1)]
210 spec.model1 = spec.model1[c(-1,-n.model1)]
211 sens.model2 = roc.model2\$sensitivities
212 spec.model2 = 1 - roc.model2\$specificities
213 n.model2 = length(sens.model2)
214 thresh.model2 = roc.model2\$thresholds
215 thresh.model2[1]=0
216 thresh.model2[length(thresh.model2)]=1
217 thresh.model2 = thresh.model2[c(-1,-n.model2)]
218 sens.model2 = sens.model2[c(-1,-n.model2)]
219 spec.model2 = spec.model2[c(-1,-n.model2)]
220
221 n.model1 = length(sens.model1)
222 n.model2 = length(sens.model2)
223
224 ### actual plotting
225 pdf(outplot)
226 plot(thresh.model1, sens.model1, xlim = c(0, 1), ylim = c(0, 1), type = "n",
227 lty = 2, lwd = 2, xlab = "Risk of Event", ylab = "", col = "black", main=title)
228 grid()
229
230 polygon(x = c(thresh.model1, thresh.model2[n.model2:1]),
231 y = c(sens.model1, sens.model2[n.model2:1]), border = NA, col = gray(0.8))
232 polygon(x = c(thresh.model1, thresh.model2[n.model2:1]),
233 y = c(spec.model1, spec.model2[n.model2:1]), border = NA, col = gray(0.8))
234
235 lines(thresh.model1, sens.model1, type = "l", lty = 2, lwd = 2, col = "black")
236 lines(thresh.model2, sens.model2, type = "l", lty = 1, lwd = 2, col = "black")
237
238 lines(thresh.model1, spec.model1, type = "l", lty = 2, lwd = 2, col = "red")
239 lines(thresh.model2, spec.model2, type = "l", lty = 1, lwd = 2, col = "red")
240
241 text(x = -0.15, y = 0.4, labels = "Sensitivity, ", col = "black", xpd = TRUE, srt = 90)
242 text(x = -0.15, y = 0.4 + 0.175, labels = "1-Specificity", col = "red", xpd = TRUE, srt = 90)
243 legend("topleft", c("Event: New model", "Event: Baseline model",
244 "No Event: New model", "No Event: Baseline model"),
245 col = c("black", "black", "red", "red"),
246 lty = c(1,2, 1, 2), lwd = 2, bty = "n")
247 dev.off()
248 return(data.frame("Null.p.sens"=thresh.model1,
249 "Null.sens"=sens.model1,
250 "Null.p.1spec"=thresh.model1,
251 "Null.1spec"=sens.model1,
252 "Alt.p.sens"=thresh.model2,
253 "Alt.sens"=sens.model2,
254 "Alt.p.1spec"=thresh.model2,
255 "Alt.1spec"=sens.model2))
256
257 }
258
259
260
261 ### statistics from a raplot (is an adaptation of improveProb() from Hmisc)
262
263 statistics.raplot = function(x1, x2, y, threshvec)
264 {
265
266 s = is.na(x1 + x2 + y) ###Remove rows with missing data
267 if (any(s)) {
268 smiss = sum(s)
269 s = !s
270 x1 = x1[s]
271 x2 = x2[s]
272 y = y[s]
273 print.noquote(paste('Warning: removed',smiss,'cases with missing values'))
274 }
275 n = length(y)
276 y = as.numeric(y)
277 u = sort(unique(y))
278 if (length(u) != 2 || u[1] != 0 || u[2] != 1) {
279 print.noquote("INPUT ERROR: y must have only two values: 0 and 1")
280 sink()
281 quit(save="no",status=2)
282 }
283 r = range(x1, x2)
284 if (r[1] < 0 || r[2] > 1) {
285 print.noquote("INPUT ERROR: x1 and x2 must be in [0,1]")
286 sink()
287 quit(save="no",status=3)
288 }
289 incidence=sum(y)/n
290 if (missing(threshvec)) {
291 threshvec=c(0, incidence,1)
292 print(paste('threshvec missing. using',paste(threshvec,collapse=',')))
293 }
294 a = (y == 1)
295 b = (y == 0)
296 na = sum(a)
297 nb = sum(b)
298 d = x2 - x1
299 ### NRI
300 n.thresh=length(threshvec)-1
301 risk.class.x1.ev=cut2(x1[a],threshvec)
302 risk.class.x2.ev=cut2(x2[a],threshvec)
303 thresh=c()
304 lt = length(threshvec)
305 for (i in 1:(lt-1)) {
306 thresh[i] = paste("[",toString(threshvec[i]),",",toString(threshvec[i+1]),"]")
307 }
308 levels(risk.class.x1.ev)=thresh
309 levels(risk.class.x2.ev)=thresh
310 cM.ev=confusionMatrix(risk.class.x2.ev,risk.class.x1.ev)
311 pup.ev=0
312 pdown.ev=0
313 for (i in 1:(n.thresh-1)) { pup.ev = pup.ev + sum(cM.ev\$table[(i+1):n.thresh,i])}
314 for (i in 2:n.thresh) { pdown.ev = pdown.ev + sum(cM.ev\$table[1:(i-1),i])}
315 pup.ev=pup.ev/na
316 pdown.ev=pdown.ev/na
317 risk.class.x1.ne=cut2(x1[b],threshvec)
318 risk.class.x2.ne=cut2(x2[b],threshvec)
319 levels(risk.class.x1.ne)=thresh
320 levels(risk.class.x2.ne)=thresh
321 cM.ne=confusionMatrix(risk.class.x2.ne,risk.class.x1.ne)
322 pup.ne=0
323 pdown.ne=0
324 for (i in 1:(n.thresh-1)){pup.ne=pup.ev+sum(cM.ne\$table[(i+1):n.thresh,i])}
325 for (i in 2:n.thresh){pdown.ne=pdown.ne+sum(cM.ne\$table[1:(i-1),i])}
326 pdown.ne=pdown.ne/nb
327 pup.ne=pup.ne/nb
328 nri = pup.ev - pdown.ev - (pup.ne - pdown.ne)
329 se.nri = sqrt((pup.ev + pdown.ev)/na + (pup.ne + pdown.ne)/nb)
330 z.nri = nri/se.nri
331 nri.ev = pup.ev - pdown.ev
332 se.nri.ev = sqrt((pup.ev + pdown.ev)/na)
333 z.nri.ev = nri.ev/se.nri.ev
334 nri.ne = pdown.ne - pup.ne
335 se.nri.ne = sqrt((pdown.ne + pup.ne)/nb)
336 z.nri.ne = nri.ne/se.nri.ne
337 ### Category Free NRI calculations
338 cfpup.ev = mean(d[a] > 0)
339 cfpup.ne = mean(d[b] > 0)
340 cfpdown.ev = mean(d[a] < 0)
341 cfpdown.ne = mean(d[b] < 0)
342 cfnri = cfpup.ev - cfpdown.ev - (cfpup.ne - cfpdown.ne)
343 se.cfnri = sqrt((cfpup.ev + cfpdown.ev)/na + (cfpup.ne + cfpdown.ne)/nb)
344 z.cfnri = cfnri/se.cfnri
345 cfnri.ev = cfpup.ev - cfpdown.ev
346 se.cfnri.ev = sqrt((cfpup.ev + cfpdown.ev)/na)
347 z.cfnri.ev = cfnri.ev/se.cfnri.ev
348 cfnri.ne = cfpdown.ne - cfpup.ne
349 se.cfnri.ne = sqrt((cfpdown.ne + cfpup.ne)/nb)
350 z.cfnri.ne = cfnri.ne/se.cfnri.ne
351 ### IDI calculations
352 improveSens = sum(d[a])/na
353 improveSpec = -sum(d[b])/nb
354 idi.ev = mean(improveSens)
355 idi.ne = mean(improveSpec)
356 idi = idi.ev - idi.ne
357 var.ev = var(d[a])/na
358 se.idi.ev = sqrt(var.ev)
359 z.idi.ev = idi.ev/se.idi.ev
360 var.ne = var(d[b])/nb
361 se.idi.ne = sqrt(var.ne)
362 z.idi.ne = idi.ne/se.idi.ne
363 se.idi = sqrt(var.ev + var.ne)
364 z.idi = idi/se.idi
365 ### AUC calculations
366 roc.x1 = roc(y, x1)
367 auc.x1 = auc(roc.x1)
368 ci.auc.x1 = ci.auc(roc.x1)
369 se.auc.x1 = (ci.auc.x1[3] - auc.x1)/qnorm(0.975)
370 roc.x2 = roc(y, x2)
371 auc.x2 = auc(roc.x2)
372 ci.auc.x2 = ci.auc(roc.x2)
373 se.auc.x2 = (ci.auc.x2[3] - auc.x2)/qnorm(0.975)
374 roc.test.x1.x2 = roc.test(roc.x1, roc.x2) ###Uses the default Delong method
375 sens.x1 = roc.x1\$sensitivities
376 spec.x1 = 1 - roc.x1\$specificities
377 n.x1 = length(sens.x1)
378 x1 = roc.x1\$thresholds
379 x1 = x1[c(-1,-n.x1)]
380 sens.x1 = sens.x1[c(-1,-n.x1)]
381 spec.x1 = spec.x1[c(-1,-n.x1)]
382 sens.x2 = roc.x2\$sensitivities
383 spec.x2 = 1 - roc.x2\$specificities
384 n.x2 = length(sens.x2)
385 x2 = roc.x2\$thresholds
386 x2 = x2[c(-1,-n.x2)]
387 sens.x2 = sens.x2[c(-1,-n.x2)]
388 spec.x2 = spec.x2[c(-1,-n.x2)]
389 ### Integrated sensitivity and 1-specificity calculations
390 is.x1 = trapz(x = x1, y = sens.x1) ### area under curves (relates to integrated sens, 1-spec)
391 is.x2 = trapz(x = x2, y = sens.x2)
392 ip.x1 = trapz(x = x1, y = spec.x1)
393 ip.x2 = trapz(x = x2, y = spec.x2)
394
395 ### Output
396 output = c(n, na, nb, pup.ev, pup.ne, pdown.ev, pdown.ne, nri, se.nri, z.nri,
397 nri.ev, se.nri.ev, z.nri.ev, nri.ne, se.nri.ne, z.nri.ne,
398 cfpup.ev, cfpup.ne, cfpdown.ev, cfpdown.ne, cfnri, se.cfnri, z.cfnri,
399 cfnri.ev, se.cfnri.ev, z.cfnri.ev, cfnri.ne, se.cfnri.ne, z.cfnri.ne,
400 improveSens, improveSpec, idi.ev, se.idi.ev, z.idi.ev, idi.ne,
401 se.idi.ne, z.idi.ne, idi, se.idi, z.idi, is.x1, NA, is.x2, NA,
402 ip.x1, NA, ip.x2, NA, auc.x1, se.auc.x1, auc.x2, se.auc.x2,
403 roc.test.x1.x2\$p.value,incidence)
404 names(output) = c("n", "na", "nb", "pup.ev", "pup.ne", "pdown.ev", "pdown.ne",
405 "nri", "se.nri", "z.nri", "nri.ev", "se.nri.ev", "z.nri.ev",
406 "nri.ne", "se.nri.ne", "z.nri.ne",
407 "cfpup.ev", "cfpup.ne", "cfpdown.ev", "cfpdown.ne",
408 "cfnri", "se.cfnri", "z.cfnri", "cfnri.ev", "se.cfnri.ev", "z.cfnri.ev",
409 "cfnri.ne", "se.cfnri.ne", "z.cfnri.ne", "improveSens", "improveSpec",
410 "idi.ev", "se.idi.ev", "z.idi.ev", "idi.ne", "se.idi.ne",
411 "z.idi.ne", "idi", "se.idi", "z.idi", "is.x1", "se.is.x1",
412 "is.x2", "se.is.x2", "ip.x1", "se.ip.x1", "ip.x2", "se.ip.x2",
413 "auc.x1", "se.auc.x1", "auc.x2", "se.auc.x2",
414 "roc.test.x1.x2.pvalue","incidence")
415 resdf = data.frame(N=n, Na=na, Nb=nb, pup.ev=pup.ev, pup.ne=pup.ne, pdown.ev=pdown.ev, pdown.ne=pdown.ne, NRI=nri, NRI.se=se.nri, NRI.z=z.nri,
416 NRI.ev=nri.ev, NRI.ev.se=se.nri.ev, NRI.ev.z=z.nri.ev, NRI.ne=nri.ne, NRI.ne.se=se.nri.ne, NRI.ne.z=z.nri.ne,
417 cfpup.ev=cfpup.ev, cfpup.ne=cfpup.ne, cfpdown.ev=cfpdown.ev, cfpdown.ne=cfpdown.ne, CFNRI=cfnri, CFNRI.se=se.cfnri, CFNRI.z=z.cfnri,
418 CFNRI.ev=cfnri.ev, CFNRI.ev.se=se.cfnri.ev, CFNRI.ev.z=z.cfnri.ev, CFNRI.ne=cfnri.ne, CFNRI.ne.se=se.cfnri.ne, CFNRI.ne.z=z.cfnri.ne,
419 improvSens=improveSens, improvSpec=improveSpec, IDI.ev=idi.ev, IDI.ev.se=se.idi.ev, IDI.ev.z=z.idi.ev, IDI.ne=idi.ne,
420 IDI.ne.se=se.idi.ne, IDI.ne.z=z.idi.ne, IDI=idi, IDI.se=se.idi, IDI.z=z.idi, isx1=is.x1, isx2=is.x2,
421 ipxi=ip.x1, ipx2=ip.x2, AUC.x1=auc.x1, AUC.x1.se=se.auc.x1, AUC.x2=auc.x2, AUC.x2.se=se.auc.x2,
422 roctestpval=roc.test.x1.x2\$p.value,incidence=incidence)
423 tr = t(resdf)
424 tresdf = data.frame(measure=colnames(resdf),value=tr[,1])
425 return(list(resdf=tresdf,output=output))
426 }
427
428
429 ### More comprehensive summary statistics from a raplot
430 ### Choice of confidence intervals determined through asymptotics or bootstrapping (n.boot = ### of bootstrap resamples)
431 ### dp is number of decimal places for results table
432
433 summary.raplot = function(x1, x2, y, threshvec, cis = "boot", conf.level = 0.95, n.boot = 2000, dp = 4, stat_ra=NA)
434 {
435 results = stat_ra
436 if (cis == "boot") {
437 print.noquote("Bootstrap estimates for SE")
438 results.boot = matrix(NA, n.boot, length(names(results)))
439
440 colnames(results.boot) = names(results)
441
442 for (i in 1:n.boot) {
443 ###boot.index = sample(length(cc.status), replace = TRUE)
444 ###risk.model1.boot = risk.model1[boot.index]
445 ###risk.model2.boot = risk.model2[boot.index]
446 ###cc.status.boot = cc.status[boot.index]
447 boot.index = sample(length(y), replace = TRUE)
448 risk.model1.boot = x1[boot.index]
449 risk.model2.boot = x2[boot.index]
450 cc.status.boot = y[boot.index]
451 r = statistics.raplot(x1 = risk.model1.boot, x2 = risk.model2.boot, y = cc.status.boot)
452 results.boot[i, ] = r\$output
453 }
454
455 results.se.boot = apply(results.boot, 2, sd)
456 print(paste(results.se.boot,collapse=','))
457
458
459 results[grep("se", names(results))] = results.se.boot[grep("se", names(results)) - 1]
460
461 }
462
463
464
465 ### calculate cis and return
466
467 z = abs(qnorm((1 - conf.level)/2))
468
469 results.matrix = matrix(NA, 24, 2)
470
471 results.matrix[1, ] = c("Total (n)", results["n"])
472 results.matrix[2, ] = c("Events (n)", results["na"])
473 results.matrix[3, ] = c("Non-events (n)", results["nb"])
474 results.matrix[4, ] = c("Category free NRI and summary statistics","-------------------------")
475 results.matrix[5, ] = c("cfNRI events (%)",
476 paste(round(100*results["cfnri.ev"], dp-2), " (",
477 round(100*results["cfnri.ev"] - z * 100*results["se.cfnri.ev"], dp-2),
478 " to ", round(100*results["cfnri.ev"] +
479 z * 100*results["se.cfnri.ev"], dp-2), ")", sep = ""))
480 results.matrix[6, ] = c("cfNRI non-events (%)",
481 paste(round(100*results["cfnri.ne"], dp-2), " (",
482 round(100*results["cfnri.ne"] - z * 100*results["se.cfnri.ne"], dp)-2,
483 " to ", round(100*results["cfnri.ne"] + z * 100*results["se.cfnri.ne"],
484 dp-2), ")", sep = ""))
485 results.matrix[7, ] = c("cfNRI (%)",
486 paste(round(100*results["cfnri"], dp-2), " (",
487 round(100*results["cfnri"] - z * 100*results["se.cfnri"], dp-2),
488 " to ", round(100*results["cfnri"] + z * 100*results["se.cfnri"],
489 dp-2), ")", sep = ""))
490 results.matrix[8, ] = c("NRI and summary statistics","-------------------------")
491 results.matrix[9, ] = c("NRI events (%)",
492 paste(round(100*results["nri.ev"], dp-2), " (",
493 round(100*results["nri.ev"] - z * 100*results["se.nri.ev"], dp-2),
494 " to ", round(100*results["nri.ev"] +
495 z * 100*results["se.nri.ev"], dp-2), ")", sep = ""))
496 results.matrix[10, ] = c("NRI non-events (%)",
497 paste(round(100*results["nri.ne"], dp-2), " (",
498 round(100*results["nri.ne"] - z * 100*results["se.nri.ne"], dp-2),
499 " to ", round(100*results["nri.ne"] + z * 100*results["se.nri.ne"],
500 dp-2), ")", sep = ""))
501 results.matrix[11, ] = c("NRI (%)",
502 paste(round(100*results["nri"], dp-2), " (",
503 round(100*results["nri"] - z * 100*results["se.nri"], dp-2),
504 " to ", round(100*results["nri"] + z * 100*results["se.nri"],
505 dp-2), ")", sep = ""))
506 results.matrix[12, ] = c("IDI and summary statistics","-------------------------")
507 results.matrix[13, ] = c("IDI events",
508 paste(round(results["idi.ev"], dp), " (",
509 round(results["idi.ev"] - z * results["se.idi.ev"], dp),
510 " to ", round(results["idi.ev"] + z * results["se.idi.ev"],
511 dp), ")", sep = ""))
512 results.matrix[14, ] = c("IDI non-events",
513 paste(round(results["idi.ne"], dp), " (",
514 round(results["idi.ne"] - z * results["se.idi.ne"], dp),
515 " to ", round(results["idi.ne"] + z * results["se.idi.ne"],
516 dp), ")", sep = ""))
517 results.matrix[15, ] = c("IDI",
518 paste(round(results["idi"], dp), " (",
519 round(results["idi"] - z * results["se.idi"], dp),
520 " to ", round(results["idi"] + z * results["se.idi"],
521 dp), ")", sep = ""))
522 results.matrix[16, ] = c("IS (null model)",
523 paste(round(results["is.x1"], dp), " (",
524 round(results["is.x1"] - z * results["se.is.x1"], dp),
525 " to ", round(results["is.x1"] + z * results["se.is.x1"],
526 dp), ")", sep = ""))
527 results.matrix[17, ] = c("IS (alt model)",
528 paste(round(results["is.x2"], dp), " (",
529 round(results["is.x2"] - z * results["se.is.x2"], dp),
530 " to ", round(results["is.x2"] + z * results["se.is.x2"],
531 dp), ")", sep = ""))
532 results.matrix[18, ] = c("IP (null model)",
533 paste(round(results["ip.x1"], dp), " (",
534 round(results["ip.x1"] - z * results["se.ip.x1"], dp),
535 " to ", round(results["ip.x1"] + z * results["se.ip.x1"],
536 dp), ")", sep = ""))
537 results.matrix[19, ] = c("IP (alt model)",
538 paste(round(results["ip.x2"], dp), " (",
539 round(results["ip.x2"] - z * results["se.ip.x2"], dp),
540 " to ", round(results["ip.x2"] + z * results["se.ip.x2"],
541 dp), ")", sep = ""))
542 results.matrix[20, ] = c("AUC","-------------------------")
543 results.matrix[21, ] = c("AUC (null model)",
544 paste(round(results["auc.x1"], dp), " (",
545 round(results["auc.x1"] - z * results["se.auc.x1"], dp),
546 " to ", round(results["auc.x1"] + z * results["se.auc.x1"],
547 dp), ")", sep = ""))
548 results.matrix[22, ] = c("AUC (alt model)",
549 paste(round(results["auc.x2"], dp), " (",
550 round(results["auc.x2"] - z * results["se.auc.x2"], dp),
551 " to ", round(results["auc.x2"] + z * results["se.auc.x2"],
552 dp), ")", sep = ""))
553 results.matrix[23, ] = c("difference (P)", round(results["roc.test.x1.x2.pvalue"], dp))
554 results.matrix[24, ] = c("Incidence", round(results["incidence"], dp))
555
556 return(results.matrix)
557 }
558
559
560
561 ]]>
562
563 options(width=120)
564 options(digits=5)
565 logf = file("rgNRI.log", open = "a")
566 sink(logf,type = c("output", "message"))
567 Out_Dir = "$html_file.files_path"
568 Input1 = "$input1"
569 Input2 = "$input2"
570 myTitle = "$title"
571 outtab = "$nri_file"
572 input1_obs = $input1_observed
573 input1_pred = $input1_predicted
574 input1_id = $input1_id
575 input2_obs = $input2_observed
576 input2_pred = $input2_predicted
577 input2_id = $input2_id
578 in1 = read.table(Input1,head=T,sep='\t')
579 in2 = read.table(Input2,head=T,sep='\t')
580 id1 = in1[,input1_id]
581 id2 = in2[,input2_id]
582 useme1 = in1[which(id1 %in% id2),]
583 useme2 = in2[which(id2 %in% id1),]
584 id1 = useme1[,input1_id]
585 id2 = useme2[,input2_id]
586 useme1 = useme1[order(id1),]
587 useme2 = useme2[order(id2),]
588 x1 = useme1[,input1_pred]
589 x2 = useme2[,input2_pred]
590 y1 = useme1[,input1_obs]
591 y2 = useme2[,input2_obs]
592 n.boot = $CImeth.nboot
593 conf.level = 0.95
594 cis = "$CImeth.cis"
595 digits = 4
596 nydiff = sum(y1 != y2)
597 if (nydiff &gt; 0) {
598 print.noquote(paste('Input error: observed status column has',nydiff,'differences - cannot reliably proceed'))
599 quit(save="no",status=1)
600 }
601 y = y2
602 outplot = 'rgNRI_EventRisk.pdf'
603 res = raplot(x1=x1, x2=x2, y=y, outplot=outplot,title=myTitle)
604
605 stats = statistics.raplot(x1=x1, x2=x2, y=y)
606 res1 = stats\$resdf
607 out1 = stats\$output
608 print.noquote('Results:')
609 print.noquote(res1,digits=4)
610 res2 = summary.raplot(x1=x1, x2=x2, y=y, cis = cis, conf.level = conf.level, n.boot = n.boot, dp = digits, stat_ra=out1)
611 print.noquote('Summary:')
612 print.noquote(res2,digits=4)
613 write.table(format(res1,digits=4),outtab,quote=F, col.names=F,sep="\t",row.names=F)
614 print.noquote('SessionInfo for this R session:')
615 sessionInfo()
616 print.noquote('warnings for this R session:')
617 warnings()
618 sink()
619 </configfile>
620 </configfiles>
621 <citations>
622 <citation type="doi">doi: 10.2215/​CJN.09590911</citation>
623 </citations>
624 </tool>
625
626
627