Mercurial > repos > fubar > rglasso_1_9_8
comparison rg_nri.xml @ 5:e9e354d64f8a draft
Uploaded
author | fubar |
---|---|
date | Fri, 19 Dec 2014 05:58:15 -0500 |
parents | e0e11c2cae3f |
children | 445cb45d1bc0 |
comparison
equal
deleted
inserted
replaced
4:5f2db639f8eb | 5:e9e354d64f8a |
---|---|
1 <tool id="rg_nri" name="NRI" version="0.03"> | |
2 <description>and other model improvement measures</description> | |
3 <requirements> | |
4 <requirement type="package" version="3.1.1">R_3_1_1</requirement> | |
5 <requirement type="package" version="1.3.18">graphicsmagick</requirement> | |
6 <requirement type="package" version="9.10">ghostscript</requirement> | |
7 <requirement type="package" version="2.14">glmnet_lars_2_14</requirement> | |
8 </requirements> | |
9 <command interpreter="python"> | |
10 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "rg_NRI" | |
11 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" | |
12 </command> | |
13 <inputs> | |
14 <param name="title" type="text" value="NRI test" size="80" label="Plot Title" help="Will appear as the title for the comparison plot"/> | |
15 <param name="input1" type="data" format="tabular" label="Select a tabular file from the baseline model with predicted and observed outcome column for each subject" | |
16 multiple='False' help="Observed and predicted status columns must be selected from this file below - NOTE both models must be in same order with exact matches in all observed outcomes" optional="False"/> | |
17 <param name="input1_observed" label="Select column containing observed outcome (0 for no event, 1 for an event)" type="data_column" data_ref="input1" numerical="True" | |
18 multiple="False" use_header_names="True" optional="False" help = "Observed outcomes are compared in the two files to check that the datasets are from the same data"/> | |
19 <param name="input1_predicted" label="Select column containing predicted event probabilies from baseline model" type="data_column" data_ref="input1" numerical="True" | |
20 multiple="False" use_header_names="True" optional="False" help="Must be in range 0-1"/> | |
21 <param name="input1_id" label="Select column containing subject ID from baseline model" type="data_column" data_ref="input1" numerical="True" | |
22 multiple="False" use_header_names="True" optional="False" help="Subect IDs are needed to match subjects to compare predictions in the two inputs"/> | |
23 <param name="input2" type="data" format="tabular" label="Select a tabular file from the new model with predicted and observed outcome columns for each subject" | |
24 multiple='False' help="Observed and predicted status columns must be selected from this file below" /> | |
25 <param name="input2_observed" label="Select column containing observed outcome (0 for no event, 1 for an event)" type="data_column" data_ref="input2" numerical="True" | |
26 multiple="False" use_header_names="True" optional="False" help = "Observed outcomes are compared in the two files to check that the datasets are from the same data"/> | |
27 <param name="input2_predicted" label="Select column containing predicted event probabilities from the new model" type="data_column" data_ref="input2" numerical="True" | |
28 multiple="False" use_header_names="True" optional="False" help="Must be in range 0-1"/> | |
29 <param name="input2_id" label="Select column containing subject ID from the new model" type="data_column" data_ref="input2" numerical="True" | |
30 multiple="False" use_header_names="True" optional="False" help="Subect IDs are needed to match subjects to compare predictions in the two inputs"/> | |
31 <conditional name="CImeth"> | |
32 <param name="cis" type="select" label="CI calculation method" | |
33 help="Bootstrap will take time - a long time for thousands - asymptotic is quick and informative"> | |
34 <option value="asymptotic" selected="true">Asymptotic estimate</option> | |
35 <option value="boot">Bootstrap for empirical CIs</option> | |
36 </param> | |
37 <when value="boot"> | |
38 <param name="nboot" type="integer" value="1000" label="Number of bootstrap replicates"/> | |
39 </when> | |
40 <when value="asymptotic"> | |
41 <param name="nboot" type="hidden" value="1000"/> | |
42 </when> | |
43 </conditional> | |
44 </inputs> | |
45 <outputs> | |
46 <data format="html" name="html_file" label="${title}.html"/> | |
47 <data format="tabular" name="nri_file" label="${title}_nrires.xls"/> | |
48 </outputs> | |
49 <tests> | |
50 <test> | |
51 <param name='input1' value='nri_test1.xls' ftype='tabular' /> | |
52 <param name='input2' value='nri_test2.xls' ftype='tabular' /> | |
53 <output name='html_file' file='nri_test1_out.html' compare='diff' lines_diff='10' /> | |
54 <output name='nri_file' file='nri_test1_out.xls' /> | |
55 </test> | |
56 </tests> | |
57 <help> | |
58 | |
59 **Before you start** | |
60 | |
61 This is a simple tool to calculate various measures of improvement in prediction between two models described in pickering_paper_ | |
62 It is based on an R script pickering_code_ written by Dr John W Pickering and Dr David Cairns from sunny Otago University which | |
63 has been debugged and slightly adjusted to fit a Galaxy tool wrapper. | |
64 | |
65 | |
66 **What it does** | |
67 | |
68 Copied from the documentation in pickering_code_ :: | |
69 | |
70 | |
71 Functions to create risk assessment plots and associated summary statistics | |
72 | |
73 | |
74 (c) 2012 Dr John W Pickering, john.pickering@otago.ac.nz, and Dr David Cairns | |
75 Last modified August 2014 | |
76 | |
77 Redistribution and use in source and binary forms, with or without | |
78 modification, are permitted provided that the following conditions are met: | |
79 * Redistributions of source code must retain the above copyright | |
80 notice, this list of conditions and the following disclaimer. | |
81 * Redistributions in binary form must reproduce the above copyright | |
82 notice, this list of conditions and the following disclaimer in | |
83 the documentation and/or other materials provided with the distribution | |
84 | |
85 FUNCTIONS | |
86 raplot | |
87 Produces a Risk Assessment Plot and outputs the coordinates of the four curves | |
88 Based on: Pickering, J. W. and Endre, Z. H. (2012). New Metrics for Assessing Diagnostic Potential of | |
89 Candidate Biomarkers. Clinical Journal of the American Society of Nephrology, 7, 1355–1364. doi:10.2215/CJN.09590911 | |
90 | |
91 statistics.raplot | |
92 Produces the NRIs, IDIs, IS, IP, AUCs. | |
93 Based on: Pencina, M. J., D'Agostino, R. B. and Steyerberg, E. W. (2011). Extensions of net reclassification improvement calculations to | |
94 measure usefulness of new biomarkers. Statistics in Medicine, 30(1), 11–21. doi:10.1002/sim.4085 | |
95 Pencina, M. J., D'Agostino, R. B. and Vasan, R. S. (2008). Evaluating the added predictive ability of a new marker: From area under the | |
96 ROC curve to reclassification and beyond. | |
97 Statistics in Medicine, 27(2), 157–172. doi:10.1002/sim.2929 | |
98 DeLong, E., DeLong, D. and Clarke-Pearson, D. (1988). Comparing the areas under 2 or more correlated receiver operating characteristic curves - a nonparametric approach. | |
99 Biometrics, 44(3), 837–845. | |
100 | |
101 summary.raplot | |
102 Produces the NRIs, IDIs, IS, IP, AUCs with confidence intervals using a bootstrap or asymptotic procedure. (I prefer bootstrap which is chosed by cis=c("boot")) | |
103 | |
104 | |
105 Required arguments for all functions: | |
106 x1 is calculated risk (eg from a glm) for the null model, i.e. predict(,type="response") on a glm object | |
107 x2 is calculated risk (eg from a glm) for the alternative model | |
108 y is the case-control indicator (0 for controls, 1 for cases) | |
109 Optional argument | |
110 t are the boundaries of the risks for each group (ie 0, 1 and the thresholds beteween. eg c(0,0,3,0,7,1)). If missing, defaults to c(0, the incidence, 1) | |
111 | |
112 | |
113 **Input** | |
114 | |
115 The observed and predicted outcomes from two models to be compared. | |
116 | |
117 **Output** | |
118 | |
119 Lots'o'measures (TM) see pickering_paper_ for details | |
120 | |
121 **Attributions** | |
122 | |
123 pickering_paper_ is the paper the caclulations performed by this tool is based on | |
124 | |
125 pickering_code_ is the R function from John Pickering exposed by this Galaxy tool with minor modifications and hacks by Ross Lazarus. | |
126 | |
127 Galaxy_ (that's what you are using right now!) for gluing everything together | |
128 | |
129 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is | |
130 licensed to you under the LGPL_ like other rgenetics artefacts | |
131 | |
132 .. _LGPL: http://www.gnu.org/copyleft/lesser.html | |
133 .. _pickering_code: http://www.researchgate.net/publication/264672640_R_function_for_Risk_Assessment_Plot__reclassification_metrics_NRI_IDI_cfNRI | |
134 .. _pickering_paper: http://cjasn.asnjournals.org/content/early/2012/05/24/CJN.09590911.full | |
135 .. _Galaxy: http://getgalaxy.org | |
136 | |
137 | |
138 </help> | |
139 | |
140 <configfiles> | |
141 <configfile name="runme"> | |
142 | |
143 <![CDATA[ | |
144 | |
145 ### http://www.researchgate.net/publication/264672640_R_function_for_Risk_Assessment_Plot__reclassification_metrics_NRI_IDI_cfNRI code | |
146 ### http://cjasn.asnjournals.org/content/early/2012/05/24/CJN.09590911.full is the reference | |
147 ### lots of little tweaks and but fixes. Using t as a variable name seems fraught to me. | |
148 ### Ross Lazarus october 2014 for a Galaxy tool wrapper using the toolfactory infrastucture | |
149 | |
150 ############################################################################# | |
151 ###Functions to create risk assessment plots and associated summary statistics | |
152 ############################################################################# | |
153 ### | |
154 ### (c) 2012 Dr John W Pickering, john.pickering@otago.ac.nz, and Dr David Cairns | |
155 ### Last modified August 2014 | |
156 ### | |
157 ### Redistribution and use in source and binary forms, with or without | |
158 ### modification, are permitted provided that the following conditions are met: | |
159 ### * Redistributions of source code must retain the above copyright | |
160 ### notice, this list of conditions and the following disclaimer. | |
161 ### * Redistributions in binary form must reproduce the above copyright | |
162 ### notice, this list of conditions and the following disclaimer in | |
163 ### the documentation and/or other materials provided with the distribution | |
164 | |
165 ### FUNCTIONS | |
166 ### raplot | |
167 ### Produces a Risk Assessment Plot and outputs the coordinates of the four curves | |
168 ### Based on: Pickering, J. W. and Endre, Z. H. (2012). New Metrics for Assessing Diagnostic Potential of | |
169 ### Candidate Biomarkers. Clinical Journal of the American Society of Nephrology, 7, 1355–1364. doi:10.2215/CJN.09590911 | |
170 ### | |
171 ### statistics.raplot | |
172 ### Produces the NRIs, IDIs, IS, IP, AUCs. | |
173 ### Based on: Pencina, M. J., D'Agostino, R. B. and Steyerberg, E. W. (2011). | |
174 ### Extensions of net reclassification improvement calculations to measure usefulness of new biomarkers. Statistics in Medicine, 30(1), 11–21. doi:10.1002/sim.4085 | |
175 ### Pencina, M. J., D'Agostino, R. B. and Vasan, R. S. (2008). Evaluating the added predictive ability of a new marker: From area under the ROC curve to reclassification and beyond. | |
176 ### Statistics in Medicine, 27(2), 157–172. doi:10.1002/sim.2929 | |
177 ### DeLong, E., DeLong, D. and Clarke-Pearson, D. (1988). Comparing the areas under 2 or more correlated receiver operating characteristic curves - a nonparametric approach. | |
178 ### Biometrics, 44(3), 837–845. | |
179 ### | |
180 ### summary.raplot | |
181 ### Produces the NRIs, IDIs, IS, IP, AUCs with confidence intervals using a bootstrap or asymptotic procedure. (I prefer bootstrap which is chosed by cis=c("boot")) | |
182 | |
183 | |
184 ### Required arguments for all functions: | |
185 ### x1 is calculated risk (eg from a glm) for the null model, i.e. predict(,type="response") on a glm object | |
186 ### x2 is calculated risk (eg from a glm) for the alternative model | |
187 ### y is the case-control indicator (0 for controls, 1 for cases) | |
188 ### Optional argument | |
189 ### t are the boundaries of the risks for each group (ie 0, 1 and the thresholds beteween. eg c(0,0,3,0,7,1)). If missing, defaults to c(0, the incidence, 1) | |
190 | |
191 | |
192 ### risk assessment plot | |
193 | |
194 library('e1071') | |
195 library('caret') | |
196 library('pROC') | |
197 library('Hmisc') | |
198 library('pracma') | |
199 | |
200 raplot = function(x1, x2, y, outplot, title) { | |
201 | |
202 roc.model1 = roc(y, x1) | |
203 roc.model2 = roc(y, x2) | |
204 sens.model1 = roc.model1\$sensitivities | |
205 spec.model1 = 1 - roc.model1\$specificities | |
206 n.model1 = length(sens.model1) | |
207 thresh.model1 = roc.model1\$thresholds | |
208 thresh.model1 = thresh.model1[c(-1,-n.model1)] | |
209 sens.model1 = sens.model1[c(-1,-n.model1)] | |
210 spec.model1 = spec.model1[c(-1,-n.model1)] | |
211 sens.model2 = roc.model2\$sensitivities | |
212 spec.model2 = 1 - roc.model2\$specificities | |
213 n.model2 = length(sens.model2) | |
214 thresh.model2 = roc.model2\$thresholds | |
215 thresh.model2[1]=0 | |
216 thresh.model2[length(thresh.model2)]=1 | |
217 thresh.model2 = thresh.model2[c(-1,-n.model2)] | |
218 sens.model2 = sens.model2[c(-1,-n.model2)] | |
219 spec.model2 = spec.model2[c(-1,-n.model2)] | |
220 | |
221 n.model1 = length(sens.model1) | |
222 n.model2 = length(sens.model2) | |
223 | |
224 ### actual plotting | |
225 pdf(outplot) | |
226 plot(thresh.model1, sens.model1, xlim = c(0, 1), ylim = c(0, 1), type = "n", | |
227 lty = 2, lwd = 2, xlab = "Risk of Event", ylab = "", col = "black", main=title) | |
228 grid() | |
229 | |
230 polygon(x = c(thresh.model1, thresh.model2[n.model2:1]), | |
231 y = c(sens.model1, sens.model2[n.model2:1]), border = NA, col = gray(0.8)) | |
232 polygon(x = c(thresh.model1, thresh.model2[n.model2:1]), | |
233 y = c(spec.model1, spec.model2[n.model2:1]), border = NA, col = gray(0.8)) | |
234 | |
235 lines(thresh.model1, sens.model1, type = "l", lty = 2, lwd = 2, col = "black") | |
236 lines(thresh.model2, sens.model2, type = "l", lty = 1, lwd = 2, col = "black") | |
237 | |
238 lines(thresh.model1, spec.model1, type = "l", lty = 2, lwd = 2, col = "red") | |
239 lines(thresh.model2, spec.model2, type = "l", lty = 1, lwd = 2, col = "red") | |
240 | |
241 text(x = -0.15, y = 0.4, labels = "Sensitivity, ", col = "black", xpd = TRUE, srt = 90) | |
242 text(x = -0.15, y = 0.4 + 0.175, labels = "1-Specificity", col = "red", xpd = TRUE, srt = 90) | |
243 legend("topleft", c("Event: New model", "Event: Baseline model", | |
244 "No Event: New model", "No Event: Baseline model"), | |
245 col = c("black", "black", "red", "red"), | |
246 lty = c(1,2, 1, 2), lwd = 2, bty = "n") | |
247 dev.off() | |
248 return(data.frame("Null.p.sens"=thresh.model1, | |
249 "Null.sens"=sens.model1, | |
250 "Null.p.1spec"=thresh.model1, | |
251 "Null.1spec"=sens.model1, | |
252 "Alt.p.sens"=thresh.model2, | |
253 "Alt.sens"=sens.model2, | |
254 "Alt.p.1spec"=thresh.model2, | |
255 "Alt.1spec"=sens.model2)) | |
256 | |
257 } | |
258 | |
259 | |
260 | |
261 ### statistics from a raplot (is an adaptation of improveProb() from Hmisc) | |
262 | |
263 statistics.raplot = function(x1, x2, y, threshvec) | |
264 { | |
265 | |
266 s = is.na(x1 + x2 + y) ###Remove rows with missing data | |
267 if (any(s)) { | |
268 smiss = sum(s) | |
269 s = !s | |
270 x1 = x1[s] | |
271 x2 = x2[s] | |
272 y = y[s] | |
273 print.noquote(paste('Warning: removed',smiss,'cases with missing values')) | |
274 } | |
275 n = length(y) | |
276 y = as.numeric(y) | |
277 u = sort(unique(y)) | |
278 if (length(u) != 2 || u[1] != 0 || u[2] != 1) { | |
279 print.noquote("INPUT ERROR: y must have only two values: 0 and 1") | |
280 sink() | |
281 quit(save="no",status=2) | |
282 } | |
283 r = range(x1, x2) | |
284 if (r[1] < 0 || r[2] > 1) { | |
285 print.noquote("INPUT ERROR: x1 and x2 must be in [0,1]") | |
286 sink() | |
287 quit(save="no",status=3) | |
288 } | |
289 incidence=sum(y)/n | |
290 if (missing(threshvec)) { | |
291 threshvec=c(0, incidence,1) | |
292 print(paste('threshvec missing. using',paste(threshvec,collapse=','))) | |
293 } | |
294 a = (y == 1) | |
295 b = (y == 0) | |
296 na = sum(a) | |
297 nb = sum(b) | |
298 d = x2 - x1 | |
299 ### NRI | |
300 n.thresh=length(threshvec)-1 | |
301 risk.class.x1.ev=cut2(x1[a],threshvec) | |
302 risk.class.x2.ev=cut2(x2[a],threshvec) | |
303 thresh=c() | |
304 lt = length(threshvec) | |
305 for (i in 1:(lt-1)) { | |
306 thresh[i] = paste("[",toString(threshvec[i]),",",toString(threshvec[i+1]),"]") | |
307 } | |
308 levels(risk.class.x1.ev)=thresh | |
309 levels(risk.class.x2.ev)=thresh | |
310 cM.ev=confusionMatrix(risk.class.x2.ev,risk.class.x1.ev) | |
311 pup.ev=0 | |
312 pdown.ev=0 | |
313 for (i in 1:(n.thresh-1)) { pup.ev = pup.ev + sum(cM.ev\$table[(i+1):n.thresh,i])} | |
314 for (i in 2:n.thresh) { pdown.ev = pdown.ev + sum(cM.ev\$table[1:(i-1),i])} | |
315 pup.ev=pup.ev/na | |
316 pdown.ev=pdown.ev/na | |
317 risk.class.x1.ne=cut2(x1[b],threshvec) | |
318 risk.class.x2.ne=cut2(x2[b],threshvec) | |
319 levels(risk.class.x1.ne)=thresh | |
320 levels(risk.class.x2.ne)=thresh | |
321 cM.ne=confusionMatrix(risk.class.x2.ne,risk.class.x1.ne) | |
322 pup.ne=0 | |
323 pdown.ne=0 | |
324 for (i in 1:(n.thresh-1)){pup.ne=pup.ev+sum(cM.ne\$table[(i+1):n.thresh,i])} | |
325 for (i in 2:n.thresh){pdown.ne=pdown.ne+sum(cM.ne\$table[1:(i-1),i])} | |
326 pdown.ne=pdown.ne/nb | |
327 pup.ne=pup.ne/nb | |
328 nri = pup.ev - pdown.ev - (pup.ne - pdown.ne) | |
329 se.nri = sqrt((pup.ev + pdown.ev)/na + (pup.ne + pdown.ne)/nb) | |
330 z.nri = nri/se.nri | |
331 nri.ev = pup.ev - pdown.ev | |
332 se.nri.ev = sqrt((pup.ev + pdown.ev)/na) | |
333 z.nri.ev = nri.ev/se.nri.ev | |
334 nri.ne = pdown.ne - pup.ne | |
335 se.nri.ne = sqrt((pdown.ne + pup.ne)/nb) | |
336 z.nri.ne = nri.ne/se.nri.ne | |
337 ### Category Free NRI calculations | |
338 cfpup.ev = mean(d[a] > 0) | |
339 cfpup.ne = mean(d[b] > 0) | |
340 cfpdown.ev = mean(d[a] < 0) | |
341 cfpdown.ne = mean(d[b] < 0) | |
342 cfnri = cfpup.ev - cfpdown.ev - (cfpup.ne - cfpdown.ne) | |
343 se.cfnri = sqrt((cfpup.ev + cfpdown.ev)/na + (cfpup.ne + cfpdown.ne)/nb) | |
344 z.cfnri = cfnri/se.cfnri | |
345 cfnri.ev = cfpup.ev - cfpdown.ev | |
346 se.cfnri.ev = sqrt((cfpup.ev + cfpdown.ev)/na) | |
347 z.cfnri.ev = cfnri.ev/se.cfnri.ev | |
348 cfnri.ne = cfpdown.ne - cfpup.ne | |
349 se.cfnri.ne = sqrt((cfpdown.ne + cfpup.ne)/nb) | |
350 z.cfnri.ne = cfnri.ne/se.cfnri.ne | |
351 ### IDI calculations | |
352 improveSens = sum(d[a])/na | |
353 improveSpec = -sum(d[b])/nb | |
354 idi.ev = mean(improveSens) | |
355 idi.ne = mean(improveSpec) | |
356 idi = idi.ev - idi.ne | |
357 var.ev = var(d[a])/na | |
358 se.idi.ev = sqrt(var.ev) | |
359 z.idi.ev = idi.ev/se.idi.ev | |
360 var.ne = var(d[b])/nb | |
361 se.idi.ne = sqrt(var.ne) | |
362 z.idi.ne = idi.ne/se.idi.ne | |
363 se.idi = sqrt(var.ev + var.ne) | |
364 z.idi = idi/se.idi | |
365 ### AUC calculations | |
366 roc.x1 = roc(y, x1) | |
367 auc.x1 = auc(roc.x1) | |
368 ci.auc.x1 = ci.auc(roc.x1) | |
369 se.auc.x1 = (ci.auc.x1[3] - auc.x1)/qnorm(0.975) | |
370 roc.x2 = roc(y, x2) | |
371 auc.x2 = auc(roc.x2) | |
372 ci.auc.x2 = ci.auc(roc.x2) | |
373 se.auc.x2 = (ci.auc.x2[3] - auc.x2)/qnorm(0.975) | |
374 roc.test.x1.x2 = roc.test(roc.x1, roc.x2) ###Uses the default Delong method | |
375 sens.x1 = roc.x1\$sensitivities | |
376 spec.x1 = 1 - roc.x1\$specificities | |
377 n.x1 = length(sens.x1) | |
378 x1 = roc.x1\$thresholds | |
379 x1 = x1[c(-1,-n.x1)] | |
380 sens.x1 = sens.x1[c(-1,-n.x1)] | |
381 spec.x1 = spec.x1[c(-1,-n.x1)] | |
382 sens.x2 = roc.x2\$sensitivities | |
383 spec.x2 = 1 - roc.x2\$specificities | |
384 n.x2 = length(sens.x2) | |
385 x2 = roc.x2\$thresholds | |
386 x2 = x2[c(-1,-n.x2)] | |
387 sens.x2 = sens.x2[c(-1,-n.x2)] | |
388 spec.x2 = spec.x2[c(-1,-n.x2)] | |
389 ### Integrated sensitivity and 1-specificity calculations | |
390 is.x1 = trapz(x = x1, y = sens.x1) ### area under curves (relates to integrated sens, 1-spec) | |
391 is.x2 = trapz(x = x2, y = sens.x2) | |
392 ip.x1 = trapz(x = x1, y = spec.x1) | |
393 ip.x2 = trapz(x = x2, y = spec.x2) | |
394 | |
395 ### Output | |
396 output = c(n, na, nb, pup.ev, pup.ne, pdown.ev, pdown.ne, nri, se.nri, z.nri, | |
397 nri.ev, se.nri.ev, z.nri.ev, nri.ne, se.nri.ne, z.nri.ne, | |
398 cfpup.ev, cfpup.ne, cfpdown.ev, cfpdown.ne, cfnri, se.cfnri, z.cfnri, | |
399 cfnri.ev, se.cfnri.ev, z.cfnri.ev, cfnri.ne, se.cfnri.ne, z.cfnri.ne, | |
400 improveSens, improveSpec, idi.ev, se.idi.ev, z.idi.ev, idi.ne, | |
401 se.idi.ne, z.idi.ne, idi, se.idi, z.idi, is.x1, NA, is.x2, NA, | |
402 ip.x1, NA, ip.x2, NA, auc.x1, se.auc.x1, auc.x2, se.auc.x2, | |
403 roc.test.x1.x2\$p.value,incidence) | |
404 names(output) = c("n", "na", "nb", "pup.ev", "pup.ne", "pdown.ev", "pdown.ne", | |
405 "nri", "se.nri", "z.nri", "nri.ev", "se.nri.ev", "z.nri.ev", | |
406 "nri.ne", "se.nri.ne", "z.nri.ne", | |
407 "cfpup.ev", "cfpup.ne", "cfpdown.ev", "cfpdown.ne", | |
408 "cfnri", "se.cfnri", "z.cfnri", "cfnri.ev", "se.cfnri.ev", "z.cfnri.ev", | |
409 "cfnri.ne", "se.cfnri.ne", "z.cfnri.ne", "improveSens", "improveSpec", | |
410 "idi.ev", "se.idi.ev", "z.idi.ev", "idi.ne", "se.idi.ne", | |
411 "z.idi.ne", "idi", "se.idi", "z.idi", "is.x1", "se.is.x1", | |
412 "is.x2", "se.is.x2", "ip.x1", "se.ip.x1", "ip.x2", "se.ip.x2", | |
413 "auc.x1", "se.auc.x1", "auc.x2", "se.auc.x2", | |
414 "roc.test.x1.x2.pvalue","incidence") | |
415 resdf = data.frame(N=n, Na=na, Nb=nb, pup.ev=pup.ev, pup.ne=pup.ne, pdown.ev=pdown.ev, pdown.ne=pdown.ne, NRI=nri, NRI.se=se.nri, NRI.z=z.nri, | |
416 NRI.ev=nri.ev, NRI.ev.se=se.nri.ev, NRI.ev.z=z.nri.ev, NRI.ne=nri.ne, NRI.ne.se=se.nri.ne, NRI.ne.z=z.nri.ne, | |
417 cfpup.ev=cfpup.ev, cfpup.ne=cfpup.ne, cfpdown.ev=cfpdown.ev, cfpdown.ne=cfpdown.ne, CFNRI=cfnri, CFNRI.se=se.cfnri, CFNRI.z=z.cfnri, | |
418 CFNRI.ev=cfnri.ev, CFNRI.ev.se=se.cfnri.ev, CFNRI.ev.z=z.cfnri.ev, CFNRI.ne=cfnri.ne, CFNRI.ne.se=se.cfnri.ne, CFNRI.ne.z=z.cfnri.ne, | |
419 improvSens=improveSens, improvSpec=improveSpec, IDI.ev=idi.ev, IDI.ev.se=se.idi.ev, IDI.ev.z=z.idi.ev, IDI.ne=idi.ne, | |
420 IDI.ne.se=se.idi.ne, IDI.ne.z=z.idi.ne, IDI=idi, IDI.se=se.idi, IDI.z=z.idi, isx1=is.x1, isx2=is.x2, | |
421 ipxi=ip.x1, ipx2=ip.x2, AUC.x1=auc.x1, AUC.x1.se=se.auc.x1, AUC.x2=auc.x2, AUC.x2.se=se.auc.x2, | |
422 roctestpval=roc.test.x1.x2\$p.value,incidence=incidence) | |
423 tr = t(resdf) | |
424 tresdf = data.frame(measure=colnames(resdf),value=tr[,1]) | |
425 return(list(resdf=tresdf,output=output)) | |
426 } | |
427 | |
428 | |
429 ### More comprehensive summary statistics from a raplot | |
430 ### Choice of confidence intervals determined through asymptotics or bootstrapping (n.boot = ### of bootstrap resamples) | |
431 ### dp is number of decimal places for results table | |
432 | |
433 summary.raplot = function(x1, x2, y, threshvec, cis = "boot", conf.level = 0.95, n.boot = 2000, dp = 4, stat_ra=NA) | |
434 { | |
435 results = stat_ra | |
436 if (cis == "boot") { | |
437 print.noquote("Bootstrap estimates for SE") | |
438 results.boot = matrix(NA, n.boot, length(names(results))) | |
439 | |
440 colnames(results.boot) = names(results) | |
441 | |
442 for (i in 1:n.boot) { | |
443 ###boot.index = sample(length(cc.status), replace = TRUE) | |
444 ###risk.model1.boot = risk.model1[boot.index] | |
445 ###risk.model2.boot = risk.model2[boot.index] | |
446 ###cc.status.boot = cc.status[boot.index] | |
447 boot.index = sample(length(y), replace = TRUE) | |
448 risk.model1.boot = x1[boot.index] | |
449 risk.model2.boot = x2[boot.index] | |
450 cc.status.boot = y[boot.index] | |
451 r = statistics.raplot(x1 = risk.model1.boot, x2 = risk.model2.boot, y = cc.status.boot) | |
452 results.boot[i, ] = r\$output | |
453 } | |
454 | |
455 results.se.boot = apply(results.boot, 2, sd) | |
456 print(paste(results.se.boot,collapse=',')) | |
457 | |
458 | |
459 results[grep("se", names(results))] = results.se.boot[grep("se", names(results)) - 1] | |
460 | |
461 } | |
462 | |
463 | |
464 | |
465 ### calculate cis and return | |
466 | |
467 z = abs(qnorm((1 - conf.level)/2)) | |
468 | |
469 results.matrix = matrix(NA, 24, 2) | |
470 | |
471 results.matrix[1, ] = c("Total (n)", results["n"]) | |
472 results.matrix[2, ] = c("Events (n)", results["na"]) | |
473 results.matrix[3, ] = c("Non-events (n)", results["nb"]) | |
474 results.matrix[4, ] = c("Category free NRI and summary statistics","-------------------------") | |
475 results.matrix[5, ] = c("cfNRI events (%)", | |
476 paste(round(100*results["cfnri.ev"], dp-2), " (", | |
477 round(100*results["cfnri.ev"] - z * 100*results["se.cfnri.ev"], dp-2), | |
478 " to ", round(100*results["cfnri.ev"] + | |
479 z * 100*results["se.cfnri.ev"], dp-2), ")", sep = "")) | |
480 results.matrix[6, ] = c("cfNRI non-events (%)", | |
481 paste(round(100*results["cfnri.ne"], dp-2), " (", | |
482 round(100*results["cfnri.ne"] - z * 100*results["se.cfnri.ne"], dp)-2, | |
483 " to ", round(100*results["cfnri.ne"] + z * 100*results["se.cfnri.ne"], | |
484 dp-2), ")", sep = "")) | |
485 results.matrix[7, ] = c("cfNRI (%)", | |
486 paste(round(100*results["cfnri"], dp-2), " (", | |
487 round(100*results["cfnri"] - z * 100*results["se.cfnri"], dp-2), | |
488 " to ", round(100*results["cfnri"] + z * 100*results["se.cfnri"], | |
489 dp-2), ")", sep = "")) | |
490 results.matrix[8, ] = c("NRI and summary statistics","-------------------------") | |
491 results.matrix[9, ] = c("NRI events (%)", | |
492 paste(round(100*results["nri.ev"], dp-2), " (", | |
493 round(100*results["nri.ev"] - z * 100*results["se.nri.ev"], dp-2), | |
494 " to ", round(100*results["nri.ev"] + | |
495 z * 100*results["se.nri.ev"], dp-2), ")", sep = "")) | |
496 results.matrix[10, ] = c("NRI non-events (%)", | |
497 paste(round(100*results["nri.ne"], dp-2), " (", | |
498 round(100*results["nri.ne"] - z * 100*results["se.nri.ne"], dp-2), | |
499 " to ", round(100*results["nri.ne"] + z * 100*results["se.nri.ne"], | |
500 dp-2), ")", sep = "")) | |
501 results.matrix[11, ] = c("NRI (%)", | |
502 paste(round(100*results["nri"], dp-2), " (", | |
503 round(100*results["nri"] - z * 100*results["se.nri"], dp-2), | |
504 " to ", round(100*results["nri"] + z * 100*results["se.nri"], | |
505 dp-2), ")", sep = "")) | |
506 results.matrix[12, ] = c("IDI and summary statistics","-------------------------") | |
507 results.matrix[13, ] = c("IDI events", | |
508 paste(round(results["idi.ev"], dp), " (", | |
509 round(results["idi.ev"] - z * results["se.idi.ev"], dp), | |
510 " to ", round(results["idi.ev"] + z * results["se.idi.ev"], | |
511 dp), ")", sep = "")) | |
512 results.matrix[14, ] = c("IDI non-events", | |
513 paste(round(results["idi.ne"], dp), " (", | |
514 round(results["idi.ne"] - z * results["se.idi.ne"], dp), | |
515 " to ", round(results["idi.ne"] + z * results["se.idi.ne"], | |
516 dp), ")", sep = "")) | |
517 results.matrix[15, ] = c("IDI", | |
518 paste(round(results["idi"], dp), " (", | |
519 round(results["idi"] - z * results["se.idi"], dp), | |
520 " to ", round(results["idi"] + z * results["se.idi"], | |
521 dp), ")", sep = "")) | |
522 results.matrix[16, ] = c("IS (null model)", | |
523 paste(round(results["is.x1"], dp), " (", | |
524 round(results["is.x1"] - z * results["se.is.x1"], dp), | |
525 " to ", round(results["is.x1"] + z * results["se.is.x1"], | |
526 dp), ")", sep = "")) | |
527 results.matrix[17, ] = c("IS (alt model)", | |
528 paste(round(results["is.x2"], dp), " (", | |
529 round(results["is.x2"] - z * results["se.is.x2"], dp), | |
530 " to ", round(results["is.x2"] + z * results["se.is.x2"], | |
531 dp), ")", sep = "")) | |
532 results.matrix[18, ] = c("IP (null model)", | |
533 paste(round(results["ip.x1"], dp), " (", | |
534 round(results["ip.x1"] - z * results["se.ip.x1"], dp), | |
535 " to ", round(results["ip.x1"] + z * results["se.ip.x1"], | |
536 dp), ")", sep = "")) | |
537 results.matrix[19, ] = c("IP (alt model)", | |
538 paste(round(results["ip.x2"], dp), " (", | |
539 round(results["ip.x2"] - z * results["se.ip.x2"], dp), | |
540 " to ", round(results["ip.x2"] + z * results["se.ip.x2"], | |
541 dp), ")", sep = "")) | |
542 results.matrix[20, ] = c("AUC","-------------------------") | |
543 results.matrix[21, ] = c("AUC (null model)", | |
544 paste(round(results["auc.x1"], dp), " (", | |
545 round(results["auc.x1"] - z * results["se.auc.x1"], dp), | |
546 " to ", round(results["auc.x1"] + z * results["se.auc.x1"], | |
547 dp), ")", sep = "")) | |
548 results.matrix[22, ] = c("AUC (alt model)", | |
549 paste(round(results["auc.x2"], dp), " (", | |
550 round(results["auc.x2"] - z * results["se.auc.x2"], dp), | |
551 " to ", round(results["auc.x2"] + z * results["se.auc.x2"], | |
552 dp), ")", sep = "")) | |
553 results.matrix[23, ] = c("difference (P)", round(results["roc.test.x1.x2.pvalue"], dp)) | |
554 results.matrix[24, ] = c("Incidence", round(results["incidence"], dp)) | |
555 | |
556 return(results.matrix) | |
557 } | |
558 | |
559 | |
560 | |
561 ]]> | |
562 | |
563 options(width=120) | |
564 options(digits=5) | |
565 logf = file("rgNRI.log", open = "a") | |
566 sink(logf,type = c("output", "message")) | |
567 Out_Dir = "$html_file.files_path" | |
568 Input1 = "$input1" | |
569 Input2 = "$input2" | |
570 myTitle = "$title" | |
571 outtab = "$nri_file" | |
572 input1_obs = $input1_observed | |
573 input1_pred = $input1_predicted | |
574 input1_id = $input1_id | |
575 input2_obs = $input2_observed | |
576 input2_pred = $input2_predicted | |
577 input2_id = $input2_id | |
578 in1 = read.table(Input1,head=T,sep='\t') | |
579 in2 = read.table(Input2,head=T,sep='\t') | |
580 id1 = in1[,input1_id] | |
581 id2 = in2[,input2_id] | |
582 useme1 = in1[which(id1 %in% id2),] | |
583 useme2 = in2[which(id2 %in% id1),] | |
584 id1 = useme1[,input1_id] | |
585 id2 = useme2[,input2_id] | |
586 useme1 = useme1[order(id1),] | |
587 useme2 = useme2[order(id2),] | |
588 x1 = useme1[,input1_pred] | |
589 x2 = useme2[,input2_pred] | |
590 y1 = useme1[,input1_obs] | |
591 y2 = useme2[,input2_obs] | |
592 n.boot = $CImeth.nboot | |
593 conf.level = 0.95 | |
594 cis = "$CImeth.cis" | |
595 digits = 4 | |
596 nydiff = sum(y1 != y2) | |
597 if (nydiff > 0) { | |
598 print.noquote(paste('Input error: observed status column has',nydiff,'differences - cannot reliably proceed')) | |
599 quit(save="no",status=1) | |
600 } | |
601 y = y2 | |
602 outplot = 'rgNRI_EventRisk.pdf' | |
603 res = raplot(x1=x1, x2=x2, y=y, outplot=outplot,title=myTitle) | |
604 | |
605 stats = statistics.raplot(x1=x1, x2=x2, y=y) | |
606 res1 = stats\$resdf | |
607 out1 = stats\$output | |
608 print.noquote('Results:') | |
609 print.noquote(res1,digits=4) | |
610 res2 = summary.raplot(x1=x1, x2=x2, y=y, cis = cis, conf.level = conf.level, n.boot = n.boot, dp = digits, stat_ra=out1) | |
611 print.noquote('Summary:') | |
612 print.noquote(res2,digits=4) | |
613 write.table(format(res1,digits=4),outtab,quote=F, col.names=F,sep="\t",row.names=F) | |
614 print.noquote('SessionInfo for this R session:') | |
615 sessionInfo() | |
616 print.noquote('warnings for this R session:') | |
617 warnings() | |
618 sink() | |
619 </configfile> | |
620 </configfiles> | |
621 <citations> | |
622 <citation type="doi">doi: 10.2215/CJN.09590911</citation> | |
623 </citations> | |
624 </tool> | |
625 | |
626 | |
627 |