changeset 5:e9e354d64f8a draft

Uploaded
author fubar
date Fri, 19 Dec 2014 05:58:15 -0500
parents 5f2db639f8eb
children 42637d352d09
files readme.rst rg_nri.xml
diffstat 2 files changed, 666 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst	Fri Dec 19 05:58:15 2014 -0500
@@ -0,0 +1,39 @@
+glmnet wrappers
+===============
+
+This is a self installing Galaxy tool exposing the glmnet_ R package which has excellent documentation at
+glmnet_ Minimal details are provided in this wrapper - please RTM to get the best out of it.
+
+The tool exposes the entire range of penalised maximum likelihood
+GLM models ranging from pure lasso (set alpha to 1) to pure ridge-regression (set alpha to 0). 
+
+These models can be k-fold internally cross validated to help select an "optimal" predictive or classification
+algorithm. Predictive coefficients for each included independent variable are output for each model. 
+
+Predictors can be forced into models to adjust for known confounders or explanatory factors.
+
+The glmnet_ implementation of the coordinate descent algorithm is fast and efficient even on relatively large problems
+with tens of thousands of predictors and thousands of samples - such as normalised microarray intensities and anthropometry
+on a very large sample of obese patients. 
+
+The user supplies a tabular file with rows as samples and columns containing observations, then chooses 
+as many predictors as required. A separate model will be output for each of potentially multiple dependent
+variables. Models are reported as the coefficients for terms in an 'optimal' model.
+These optimal predictors are selected by repeatedly setting
+aside a random subsample, building a model in the remainder and estimating AUC or deviance 
+using  k (default 10) fold internal cross validation. For each of these steps, a random 1/k 
+of the samples are set aside and used to estiamte performance of an optimal model estimated 
+from the remaining samples. Plots are provided showing the range of these (eg 10) internal validation 
+estimates and mean model AUC (binomial) or residual deviance plots at each penalty increment step.
+
+A full range of link functions are available including Gaussian, Poisson, Binomial and
+Cox proportional hazard time to failure for censored data in this wrapper.
+
+Note that multinomial and multiresponse gaussian models are NOT yet implemented since I have not yet
+had use for them - send code!
+
+.. _glmnet: http://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html
+
+Wrapper author: Ross Lazarus
+19 october 2014
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rg_nri.xml	Fri Dec 19 05:58:15 2014 -0500
@@ -0,0 +1,627 @@
+<tool id="rg_nri" name="NRI" version="0.03">
+  <description>and other model improvement measures</description>
+  <requirements>
+      <requirement type="package" version="3.1.1">R_3_1_1</requirement>
+      <requirement type="package" version="1.3.18">graphicsmagick</requirement>
+      <requirement type="package" version="9.10">ghostscript</requirement>
+      <requirement type="package" version="2.14">glmnet_lars_2_14</requirement>
+  </requirements>
+  <command interpreter="python">
+     rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "rg_NRI" 
+    --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
+  </command>
+  <inputs>
+     <param name="title" type="text" value="NRI test" size="80" label="Plot Title" help="Will appear as the title for the comparison plot"/>
+    <param name="input1"  type="data" format="tabular" label="Select a tabular file from the baseline model with predicted and observed outcome column for each subject"
+    multiple='False' help="Observed and predicted status columns must be selected from this file below - NOTE both models must be in same order with exact matches in all observed outcomes" optional="False"/>
+    <param name="input1_observed" label="Select column containing observed outcome (0 for no event, 1 for an event)" type="data_column" data_ref="input1" numerical="True" 
+         multiple="False" use_header_names="True" optional="False" help = "Observed outcomes are compared in the two files to check that the datasets are from the same data"/>
+    <param name="input1_predicted" label="Select column containing predicted event probabilies from baseline model" type="data_column" data_ref="input1" numerical="True" 
+         multiple="False" use_header_names="True" optional="False"  help="Must be in range 0-1"/>
+    <param name="input1_id" label="Select column containing subject ID from baseline model" type="data_column" data_ref="input1" numerical="True" 
+         multiple="False" use_header_names="True" optional="False" help="Subect IDs are needed to match subjects to compare predictions in the two inputs"/>
+    <param name="input2"  type="data" format="tabular" label="Select a tabular file from the new model with predicted and observed outcome columns for each subject"
+    multiple='False' help="Observed and predicted status columns must be selected from this file below" />
+    <param name="input2_observed" label="Select column containing observed outcome (0 for no event, 1 for an event)" type="data_column" data_ref="input2" numerical="True" 
+         multiple="False" use_header_names="True" optional="False" help = "Observed outcomes are compared in the two files to check that the datasets are from the same data"/>
+    <param name="input2_predicted" label="Select column containing predicted event probabilities from the new model" type="data_column" data_ref="input2" numerical="True" 
+         multiple="False" use_header_names="True" optional="False" help="Must be in range 0-1"/>
+    <param name="input2_id" label="Select column containing subject ID from the new model" type="data_column" data_ref="input2" numerical="True" 
+         multiple="False" use_header_names="True" optional="False"  help="Subect IDs are needed to match subjects to compare predictions in the two inputs"/>
+    <conditional name="CImeth">
+        <param name="cis" type="select" label="CI calculation method" 
+             help="Bootstrap will take time - a long time for thousands - asymptotic is quick and informative">
+                <option value="asymptotic" selected="true">Asymptotic estimate</option>
+                <option value="boot">Bootstrap for empirical CIs</option>
+        </param>
+        <when value="boot">
+            <param name="nboot" type="integer" value="1000" label="Number of bootstrap replicates"/>
+        </when>
+        <when value="asymptotic">
+            <param name="nboot" type="hidden" value="1000"/>
+        </when>
+    </conditional>
+  </inputs>    
+  <outputs>
+    <data format="html" name="html_file" label="${title}.html"/>
+    <data format="tabular" name="nri_file" label="${title}_nrires.xls"/>
+  </outputs>
+ <tests>
+    <test>
+     <param name='input1' value='nri_test1.xls' ftype='tabular' />
+     <param name='input2' value='nri_test2.xls' ftype='tabular' />
+     <output name='html_file' file='nri_test1_out.html'  compare='diff' lines_diff='10' />
+     <output name='nri_file' file='nri_test1_out.xls' />
+    </test>
+</tests>
+<help>
+
+**Before you start**
+
+This is a simple tool to calculate various measures of improvement in prediction between two models described in pickering_paper_
+It is based on an R script pickering_code_ written by Dr John W Pickering and Dr David Cairns from sunny Otago University which
+has been debugged and slightly adjusted to fit a Galaxy tool wrapper.
+
+
+**What it does**
+
+Copied from the documentation in pickering_code_ ::
+
+    
+    Functions to create risk assessment plots and associated summary statistics
+    
+    
+      (c) 2012 Dr John W Pickering, john.pickering@otago.ac.nz, and Dr David Cairns
+       Last modified August 2014
+     
+      Redistribution and use in source and binary forms, with or without 
+       modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright 
+         notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright 
+         notice, this list of conditions and the following disclaimer in 
+         the documentation and/or other materials provided with the distribution
+
+     FUNCTIONS
+     raplot
+           Produces a Risk Assessment Plot and outputs the coordinates of the four curves
+           Based on: Pickering, J. W. and Endre, Z. H. (2012). New Metrics for Assessing Diagnostic Potential of
+           Candidate Biomarkers. Clinical Journal of the American Society of Nephrology, 7, 1355–1364. doi:10.2215/CJN.09590911
+    
+     statistics.raplot 
+           Produces the NRIs, IDIs, IS, IP, AUCs.
+           Based on: Pencina, M. J., D'Agostino, R. B. and Steyerberg, E. W. (2011). Extensions of net reclassification improvement calculations to
+           measure usefulness of new biomarkers. Statistics in Medicine, 30(1), 11–21. doi:10.1002/sim.4085
+           Pencina, M. J., D'Agostino, R. B. and Vasan, R. S. (2008). Evaluating the added predictive ability of a new marker: From area under the
+           ROC curve to reclassification and beyond.
+           Statistics in Medicine, 27(2), 157–172. doi:10.1002/sim.2929
+           DeLong, E., DeLong, D. and Clarke-Pearson, D. (1988). Comparing the areas under 2 or more correlated receiver operating characteristic curves - a nonparametric approach.
+           Biometrics, 44(3), 837–845.
+    
+     summary.raplot 
+           Produces the NRIs, IDIs, IS, IP, AUCs with confidence intervals using a bootstrap or asymptotic procedure. (I prefer bootstrap which is chosed by cis=c("boot"))
+
+
+     Required arguments for all functions:
+       x1 is calculated risk (eg from a glm) for the null model, i.e. predict(,type="response") on a glm object
+       x2 is calculated risk (eg from a glm) for the alternative model
+       y is the case-control indicator (0 for controls, 1 for cases)
+     Optional argument
+       t are the boundaries of the risks for each group (ie 0, 1 and the thresholds beteween.  eg c(0,0,3,0,7,1)). If missing, defaults to c(0, the incidence, 1)
+
+
+**Input**
+
+The observed and predicted outcomes from two models to be compared.
+
+**Output**
+
+Lots'o'measures (TM) see pickering_paper_ for details
+
+**Attributions**
+
+pickering_paper_ is the paper the caclulations performed by this tool is based on
+
+pickering_code_ is the R function from John Pickering exposed by this Galaxy tool with minor modifications and hacks by Ross Lazarus.
+
+Galaxy_ (that's what you are using right now!) for gluing everything together 
+
+Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is 
+licensed to you under the LGPL_ like other rgenetics artefacts
+
+.. _LGPL: http://www.gnu.org/copyleft/lesser.html
+.. _pickering_code: http://www.researchgate.net/publication/264672640_R_function_for_Risk_Assessment_Plot__reclassification_metrics_NRI_IDI_cfNRI 
+.. _pickering_paper: http://cjasn.asnjournals.org/content/early/2012/05/24/CJN.09590911.full 
+.. _Galaxy: http://getgalaxy.org
+
+
+</help>
+
+<configfiles>
+<configfile name="runme">
+    
+<![CDATA[
+
+### http://www.researchgate.net/publication/264672640_R_function_for_Risk_Assessment_Plot__reclassification_metrics_NRI_IDI_cfNRI code
+### http://cjasn.asnjournals.org/content/early/2012/05/24/CJN.09590911.full is the reference
+### lots of little tweaks and but fixes. Using t as a variable name seems fraught to me.
+### Ross Lazarus october 2014 for a Galaxy tool wrapper using the toolfactory infrastucture
+
+#############################################################################
+###Functions to create risk assessment plots and associated summary statistics
+#############################################################################
+###
+###  (c) 2012 Dr John W Pickering, john.pickering@otago.ac.nz, and Dr David Cairns
+###   Last modified August 2014
+### 
+###  Redistribution and use in source and binary forms, with or without 
+###   modification, are permitted provided that the following conditions are met:
+###   * Redistributions of source code must retain the above copyright 
+###     notice, this list of conditions and the following disclaimer.
+###   * Redistributions in binary form must reproduce the above copyright 
+###     notice, this list of conditions and the following disclaimer in 
+###     the documentation and/or other materials provided with the distribution
+
+### FUNCTIONS
+### raplot
+###       Produces a Risk Assessment Plot and outputs the coordinates of the four curves
+###       Based on: Pickering, J. W. and Endre, Z. H. (2012). New Metrics for Assessing Diagnostic Potential of
+###       Candidate Biomarkers. Clinical Journal of the American Society of Nephrology, 7, 1355–1364. doi:10.2215/CJN.09590911
+###
+### statistics.raplot 
+###       Produces the NRIs, IDIs, IS, IP, AUCs.
+###       Based on: Pencina, M. J., D'Agostino, R. B. and Steyerberg, E. W. (2011).
+### Extensions of net reclassification improvement calculations to measure usefulness of new biomarkers. Statistics in Medicine, 30(1), 11–21. doi:10.1002/sim.4085
+###       Pencina, M. J., D'Agostino, R. B. and Vasan, R. S. (2008). Evaluating the added predictive ability of a new marker: From area under the ROC curve to reclassification and beyond.
+### Statistics in Medicine, 27(2), 157–172. doi:10.1002/sim.2929
+###       DeLong, E., DeLong, D. and Clarke-Pearson, D. (1988). Comparing the areas under 2 or more correlated receiver operating characteristic curves - a nonparametric approach.
+### Biometrics, 44(3), 837–845.
+###
+### summary.raplot 
+###       Produces the NRIs, IDIs, IS, IP, AUCs with confidence intervals using a bootstrap or asymptotic procedure. (I prefer bootstrap which is chosed by cis=c("boot"))
+
+
+### Required arguments for all functions:
+###   x1 is calculated risk (eg from a glm) for the null model, i.e. predict(,type="response") on a glm object
+###   x2 is calculated risk (eg from a glm) for the alternative model
+###   y is the case-control indicator (0 for controls, 1 for cases)
+### Optional argument
+###   t are the boundaries of the risks for each group (ie 0, 1 and the thresholds beteween.  eg c(0,0,3,0,7,1)). If missing, defaults to c(0, the incidence, 1)
+
+
+### risk assessment plot
+
+library('e1071')
+library('caret')
+library('pROC')
+library('Hmisc')
+library('pracma')
+
+raplot = function(x1, x2, y, outplot, title) {
+
+    roc.model1 = roc(y, x1)
+    roc.model2 = roc(y, x2)
+    sens.model1 = roc.model1\$sensitivities
+    spec.model1 = 1 - roc.model1\$specificities
+    n.model1 = length(sens.model1)
+    thresh.model1 = roc.model1\$thresholds
+    thresh.model1 = thresh.model1[c(-1,-n.model1)]
+    sens.model1 = sens.model1[c(-1,-n.model1)]
+    spec.model1 = spec.model1[c(-1,-n.model1)]
+    sens.model2 = roc.model2\$sensitivities
+    spec.model2 = 1 - roc.model2\$specificities
+    n.model2 = length(sens.model2)
+    thresh.model2 = roc.model2\$thresholds
+    thresh.model2[1]=0
+    thresh.model2[length(thresh.model2)]=1
+    thresh.model2 = thresh.model2[c(-1,-n.model2)]
+    sens.model2 = sens.model2[c(-1,-n.model2)]
+    spec.model2 = spec.model2[c(-1,-n.model2)]
+
+    n.model1 = length(sens.model1)
+    n.model2 = length(sens.model2)
+
+    ### actual plotting
+    pdf(outplot)
+    plot(thresh.model1, sens.model1, xlim = c(0, 1), ylim = c(0, 1), type = "n", 
+         lty = 2, lwd = 2, xlab = "Risk of Event", ylab = "", col = "black", main=title)
+    grid()
+
+    polygon(x = c(thresh.model1, thresh.model2[n.model2:1]), 
+            y = c(sens.model1, sens.model2[n.model2:1]), border = NA, col = gray(0.8))
+    polygon(x = c(thresh.model1, thresh.model2[n.model2:1]), 
+            y = c(spec.model1, spec.model2[n.model2:1]), border = NA, col = gray(0.8))
+
+    lines(thresh.model1, sens.model1, type = "l", lty = 2, lwd = 2, col = "black")
+    lines(thresh.model2, sens.model2, type = "l", lty = 1, lwd = 2, col = "black")
+
+    lines(thresh.model1, spec.model1, type = "l", lty = 2, lwd = 2, col = "red")
+    lines(thresh.model2, spec.model2, type = "l", lty = 1, lwd = 2, col = "red")
+
+    text(x = -0.15, y = 0.4, labels = "Sensitivity, ", col = "black", xpd = TRUE, srt = 90)
+    text(x = -0.15, y = 0.4 + 0.175, labels = "1-Specificity", col = "red", xpd = TRUE, srt = 90)
+    legend("topleft", c("Event: New model", "Event: Baseline model", 
+                         "No Event: New model", "No Event: Baseline model"), 
+                          col = c("black", "black", "red", "red"), 
+                          lty = c(1,2, 1, 2), lwd = 2, bty = "n")
+    dev.off()
+    return(data.frame("Null.p.sens"=thresh.model1,
+                "Null.sens"=sens.model1,
+                "Null.p.1spec"=thresh.model1,
+                "Null.1spec"=sens.model1,
+                "Alt.p.sens"=thresh.model2,
+                "Alt.sens"=sens.model2,
+                "Alt.p.1spec"=thresh.model2,
+                "Alt.1spec"=sens.model2))
+
+}
+
+
+
+### statistics from a raplot (is an adaptation of improveProb() from Hmisc)
+
+statistics.raplot = function(x1, x2, y, threshvec)
+{   
+ 
+    s = is.na(x1 + x2 + y)  ###Remove rows with missing data
+    if (any(s)) {
+        smiss = sum(s)
+        s = !s
+        x1 = x1[s]
+        x2 = x2[s]
+        y = y[s]
+        print.noquote(paste('Warning: removed',smiss,'cases with missing values'))
+    }
+    n = length(y)
+    y = as.numeric(y)
+    u = sort(unique(y))
+    if (length(u) != 2 || u[1] != 0 || u[2] != 1) {
+        print.noquote("INPUT ERROR: y must have only two values: 0 and 1")
+        sink()
+        quit(save="no",status=2)
+        }
+    r = range(x1, x2)
+    if (r[1] < 0 || r[2] > 1) {
+        print.noquote("INPUT ERROR: x1 and x2 must be in [0,1]")
+        sink()
+        quit(save="no",status=3)
+        }
+    incidence=sum(y)/n
+    if (missing(threshvec)) {
+    threshvec=c(0, incidence,1)
+    print(paste('threshvec missing. using',paste(threshvec,collapse=',')))
+    }
+    a = (y == 1)
+    b = (y == 0)
+    na = sum(a)
+    nb = sum(b)
+    d = x2 - x1
+    ### NRI 
+    n.thresh=length(threshvec)-1
+    risk.class.x1.ev=cut2(x1[a],threshvec)
+    risk.class.x2.ev=cut2(x2[a],threshvec)
+    thresh=c()
+    lt = length(threshvec)
+    for (i in 1:(lt-1)) {
+     thresh[i] = paste("[",toString(threshvec[i]),",",toString(threshvec[i+1]),"]")
+    }
+    levels(risk.class.x1.ev)=thresh
+    levels(risk.class.x2.ev)=thresh
+    cM.ev=confusionMatrix(risk.class.x2.ev,risk.class.x1.ev)
+    pup.ev=0
+    pdown.ev=0
+    for (i in 1:(n.thresh-1)) { pup.ev = pup.ev + sum(cM.ev\$table[(i+1):n.thresh,i])}
+    for (i in 2:n.thresh) { pdown.ev = pdown.ev + sum(cM.ev\$table[1:(i-1),i])} 
+    pup.ev=pup.ev/na 
+    pdown.ev=pdown.ev/na
+    risk.class.x1.ne=cut2(x1[b],threshvec)
+    risk.class.x2.ne=cut2(x2[b],threshvec)  
+    levels(risk.class.x1.ne)=thresh
+    levels(risk.class.x2.ne)=thresh
+    cM.ne=confusionMatrix(risk.class.x2.ne,risk.class.x1.ne)
+    pup.ne=0
+    pdown.ne=0
+    for (i in 1:(n.thresh-1)){pup.ne=pup.ev+sum(cM.ne\$table[(i+1):n.thresh,i])}
+    for (i in 2:n.thresh){pdown.ne=pdown.ne+sum(cM.ne\$table[1:(i-1),i])} 
+    pdown.ne=pdown.ne/nb
+    pup.ne=pup.ne/nb
+    nri = pup.ev - pdown.ev - (pup.ne - pdown.ne)
+    se.nri = sqrt((pup.ev + pdown.ev)/na + (pup.ne + pdown.ne)/nb)
+    z.nri = nri/se.nri
+    nri.ev = pup.ev - pdown.ev
+    se.nri.ev = sqrt((pup.ev + pdown.ev)/na)
+    z.nri.ev = nri.ev/se.nri.ev
+    nri.ne = pdown.ne - pup.ne
+    se.nri.ne = sqrt((pdown.ne + pup.ne)/nb)
+    z.nri.ne = nri.ne/se.nri.ne 
+    ### Category Free NRI calculations
+    cfpup.ev = mean(d[a] > 0)
+    cfpup.ne = mean(d[b] > 0)
+    cfpdown.ev = mean(d[a] < 0)
+    cfpdown.ne = mean(d[b] < 0)
+    cfnri = cfpup.ev - cfpdown.ev - (cfpup.ne - cfpdown.ne)
+    se.cfnri = sqrt((cfpup.ev + cfpdown.ev)/na + (cfpup.ne + cfpdown.ne)/nb)
+    z.cfnri = cfnri/se.cfnri
+    cfnri.ev = cfpup.ev - cfpdown.ev
+    se.cfnri.ev = sqrt((cfpup.ev + cfpdown.ev)/na)
+    z.cfnri.ev = cfnri.ev/se.cfnri.ev
+    cfnri.ne = cfpdown.ne - cfpup.ne
+    se.cfnri.ne = sqrt((cfpdown.ne + cfpup.ne)/nb)
+    z.cfnri.ne = cfnri.ne/se.cfnri.ne
+    ### IDI calculations
+    improveSens = sum(d[a])/na
+    improveSpec = -sum(d[b])/nb
+    idi.ev = mean(improveSens)
+    idi.ne = mean(improveSpec)
+    idi = idi.ev - idi.ne
+    var.ev = var(d[a])/na
+    se.idi.ev = sqrt(var.ev)
+    z.idi.ev = idi.ev/se.idi.ev
+    var.ne = var(d[b])/nb
+    se.idi.ne = sqrt(var.ne)
+    z.idi.ne = idi.ne/se.idi.ne
+    se.idi = sqrt(var.ev + var.ne)
+    z.idi = idi/se.idi
+    ### AUC calculations
+    roc.x1 = roc(y, x1)
+    auc.x1 = auc(roc.x1)
+    ci.auc.x1 = ci.auc(roc.x1)
+    se.auc.x1 = (ci.auc.x1[3] - auc.x1)/qnorm(0.975)
+    roc.x2 = roc(y, x2)
+    auc.x2 = auc(roc.x2)
+    ci.auc.x2 = ci.auc(roc.x2)
+    se.auc.x2 = (ci.auc.x2[3] - auc.x2)/qnorm(0.975)
+    roc.test.x1.x2 = roc.test(roc.x1, roc.x2)  ###Uses the default Delong method
+    sens.x1 = roc.x1\$sensitivities
+    spec.x1 = 1 - roc.x1\$specificities
+    n.x1 = length(sens.x1)
+    x1 = roc.x1\$thresholds
+    x1 = x1[c(-1,-n.x1)]
+    sens.x1 = sens.x1[c(-1,-n.x1)]
+    spec.x1 = spec.x1[c(-1,-n.x1)]
+    sens.x2 = roc.x2\$sensitivities
+    spec.x2 = 1 - roc.x2\$specificities
+    n.x2 = length(sens.x2)
+    x2 = roc.x2\$thresholds
+    x2 = x2[c(-1,-n.x2)]
+    sens.x2 = sens.x2[c(-1,-n.x2)]
+    spec.x2 = spec.x2[c(-1,-n.x2)]
+    ### Integrated sensitivity and 1-specificity calculations
+    is.x1 = trapz(x = x1, y = sens.x1)  ### area under curves (relates to integrated sens, 1-spec)
+    is.x2 = trapz(x = x2, y = sens.x2)
+    ip.x1 = trapz(x = x1, y = spec.x1)
+    ip.x2 = trapz(x = x2, y = spec.x2)
+
+    ### Output
+    output = c(n, na, nb, pup.ev, pup.ne, pdown.ev, pdown.ne, nri, se.nri, z.nri,
+                nri.ev, se.nri.ev, z.nri.ev, nri.ne, se.nri.ne, z.nri.ne, 
+                cfpup.ev, cfpup.ne, cfpdown.ev, cfpdown.ne, cfnri, se.cfnri, z.cfnri,
+                cfnri.ev, se.cfnri.ev, z.cfnri.ev, cfnri.ne, se.cfnri.ne, z.cfnri.ne, 
+                improveSens, improveSpec, idi.ev, se.idi.ev, z.idi.ev, idi.ne, 
+                se.idi.ne, z.idi.ne, idi, se.idi, z.idi, is.x1, NA, is.x2, NA, 
+                ip.x1, NA, ip.x2, NA, auc.x1, se.auc.x1, auc.x2, se.auc.x2, 
+                roc.test.x1.x2\$p.value,incidence)
+    names(output) = c("n", "na", "nb", "pup.ev", "pup.ne", "pdown.ev", "pdown.ne", 
+                       "nri", "se.nri", "z.nri", "nri.ev", "se.nri.ev", "z.nri.ev",
+                       "nri.ne", "se.nri.ne", "z.nri.ne",
+                       "cfpup.ev", "cfpup.ne", "cfpdown.ev", "cfpdown.ne", 
+                       "cfnri", "se.cfnri", "z.cfnri", "cfnri.ev", "se.cfnri.ev", "z.cfnri.ev",
+                       "cfnri.ne", "se.cfnri.ne", "z.cfnri.ne", "improveSens", "improveSpec",
+                       "idi.ev", "se.idi.ev", "z.idi.ev", "idi.ne", "se.idi.ne", 
+                       "z.idi.ne", "idi", "se.idi", "z.idi", "is.x1", "se.is.x1", 
+                       "is.x2", "se.is.x2", "ip.x1", "se.ip.x1", "ip.x2", "se.ip.x2", 
+                       "auc.x1", "se.auc.x1", "auc.x2", "se.auc.x2", 
+                       "roc.test.x1.x2.pvalue","incidence")
+    resdf = data.frame(N=n, Na=na, Nb=nb, pup.ev=pup.ev, pup.ne=pup.ne, pdown.ev=pdown.ev, pdown.ne=pdown.ne, NRI=nri, NRI.se=se.nri, NRI.z=z.nri,
+                NRI.ev=nri.ev, NRI.ev.se=se.nri.ev, NRI.ev.z=z.nri.ev, NRI.ne=nri.ne, NRI.ne.se=se.nri.ne, NRI.ne.z=z.nri.ne, 
+                cfpup.ev=cfpup.ev, cfpup.ne=cfpup.ne, cfpdown.ev=cfpdown.ev, cfpdown.ne=cfpdown.ne, CFNRI=cfnri, CFNRI.se=se.cfnri, CFNRI.z=z.cfnri,
+                CFNRI.ev=cfnri.ev, CFNRI.ev.se=se.cfnri.ev, CFNRI.ev.z=z.cfnri.ev, CFNRI.ne=cfnri.ne, CFNRI.ne.se=se.cfnri.ne, CFNRI.ne.z=z.cfnri.ne, 
+                improvSens=improveSens, improvSpec=improveSpec, IDI.ev=idi.ev, IDI.ev.se=se.idi.ev, IDI.ev.z=z.idi.ev, IDI.ne=idi.ne, 
+                IDI.ne.se=se.idi.ne, IDI.ne.z=z.idi.ne, IDI=idi, IDI.se=se.idi, IDI.z=z.idi, isx1=is.x1, isx2=is.x2, 
+                ipxi=ip.x1, ipx2=ip.x2, AUC.x1=auc.x1, AUC.x1.se=se.auc.x1, AUC.x2=auc.x2, AUC.x2.se=se.auc.x2, 
+                roctestpval=roc.test.x1.x2\$p.value,incidence=incidence)
+    tr = t(resdf)
+    tresdf = data.frame(measure=colnames(resdf),value=tr[,1])
+    return(list(resdf=tresdf,output=output))
+}
+
+
+### More comprehensive summary statistics from a raplot
+### Choice of confidence intervals determined through asymptotics or bootstrapping (n.boot = ### of bootstrap resamples)
+### dp is number of decimal places for results table
+
+summary.raplot = function(x1, x2, y, threshvec, cis = "boot", conf.level = 0.95, n.boot = 2000, dp = 4, stat_ra=NA)
+{
+        results = stat_ra
+        if (cis == "boot") {
+            print.noquote("Bootstrap estimates for SE")
+            results.boot = matrix(NA, n.boot, length(names(results)))
+
+            colnames(results.boot) = names(results)
+
+            for (i in 1:n.boot) {
+                ###boot.index = sample(length(cc.status), replace = TRUE)
+                ###risk.model1.boot = risk.model1[boot.index]
+                ###risk.model2.boot = risk.model2[boot.index]
+                ###cc.status.boot = cc.status[boot.index]
+                boot.index = sample(length(y), replace = TRUE)
+                risk.model1.boot = x1[boot.index]
+                risk.model2.boot = x2[boot.index]
+                cc.status.boot = y[boot.index]           
+                r = statistics.raplot(x1 = risk.model1.boot, x2 = risk.model2.boot, y = cc.status.boot)
+                results.boot[i, ] = r\$output
+            }
+
+            results.se.boot = apply(results.boot, 2, sd)
+            print(paste(results.se.boot,collapse=','))
+
+
+            results[grep("se", names(results))] = results.se.boot[grep("se", names(results)) - 1]
+
+            }
+
+
+
+    ### calculate cis and return
+
+    z = abs(qnorm((1 - conf.level)/2))
+
+    results.matrix = matrix(NA, 24, 2)
+
+    results.matrix[1, ] = c("Total (n)", results["n"])
+    results.matrix[2, ] = c("Events (n)", results["na"])
+    results.matrix[3, ] = c("Non-events (n)", results["nb"])
+    results.matrix[4, ] = c("Category free NRI and summary statistics","-------------------------")
+    results.matrix[5, ] = c("cfNRI events (%)", 
+                             paste(round(100*results["cfnri.ev"], dp-2), " (", 
+                                   round(100*results["cfnri.ev"] - z * 100*results["se.cfnri.ev"], dp-2),
+                                    " to ", round(100*results["cfnri.ev"] + 
+                                      z * 100*results["se.cfnri.ev"], dp-2), ")", sep = ""))
+    results.matrix[6, ] = c("cfNRI non-events (%)", 
+                             paste(round(100*results["cfnri.ne"], dp-2), " (",
+                             round(100*results["cfnri.ne"] - z * 100*results["se.cfnri.ne"], dp)-2,
+                             " to ", round(100*results["cfnri.ne"] +  z * 100*results["se.cfnri.ne"], 
+                                           dp-2), ")", sep = "")) 
+    results.matrix[7, ] = c("cfNRI (%)", 
+                             paste(round(100*results["cfnri"], dp-2), " (", 
+                             round(100*results["cfnri"] - z * 100*results["se.cfnri"], dp-2), 
+                             " to ", round(100*results["cfnri"] + z * 100*results["se.cfnri"], 
+                                           dp-2), ")", sep = ""))
+    results.matrix[8, ] = c("NRI and summary statistics","-------------------------")
+    results.matrix[9, ] = c("NRI events (%)", 
+                             paste(round(100*results["nri.ev"], dp-2), " (", 
+                                   round(100*results["nri.ev"] - z * 100*results["se.nri.ev"], dp-2),
+                                   " to ", round(100*results["nri.ev"] + 
+                                                   z * 100*results["se.nri.ev"], dp-2), ")", sep = ""))
+    results.matrix[10, ] = c("NRI non-events (%)", 
+                             paste(round(100*results["nri.ne"], dp-2), " (",
+                                   round(100*results["nri.ne"] - z * 100*results["se.nri.ne"], dp-2),
+                                   " to ", round(100*results["nri.ne"] +  z * 100*results["se.nri.ne"], 
+                                                 dp-2), ")", sep = "")) 
+    results.matrix[11, ] = c("NRI (%)", 
+                             paste(round(100*results["nri"], dp-2), " (", 
+                                   round(100*results["nri"] - z * 100*results["se.nri"], dp-2), 
+                                   " to ", round(100*results["nri"] + z * 100*results["se.nri"], 
+                                                 dp-2), ")", sep = ""))
+    results.matrix[12, ] = c("IDI and summary statistics","-------------------------")
+    results.matrix[13, ] = c("IDI events", 
+                             paste(round(results["idi.ev"], dp), " (", 
+                              round(results["idi.ev"] - z * results["se.idi.ev"], dp), 
+                              " to ", round(results["idi.ev"] + z * results["se.idi.ev"], 
+                                            dp), ")", sep = ""))
+    results.matrix[14, ] = c("IDI non-events", 
+                              paste(round(results["idi.ne"], dp), " (", 
+                              round(results["idi.ne"] - z * results["se.idi.ne"], dp), 
+                              " to ", round(results["idi.ne"] + z * results["se.idi.ne"], 
+                                            dp), ")", sep = ""))
+    results.matrix[15, ] = c("IDI", 
+                              paste(round(results["idi"], dp), " (", 
+                              round(results["idi"] - z * results["se.idi"], dp), 
+                              " to ", round(results["idi"] + z * results["se.idi"], 
+                              dp), ")", sep = ""))
+    results.matrix[16, ] = c("IS (null model)", 
+                              paste(round(results["is.x1"], dp), " (", 
+                              round(results["is.x1"] - z * results["se.is.x1"], dp), 
+                              " to ", round(results["is.x1"] + z * results["se.is.x1"], 
+                                            dp), ")", sep = ""))
+    results.matrix[17, ] = c("IS (alt model)", 
+                              paste(round(results["is.x2"], dp), " (", 
+                              round(results["is.x2"] - z * results["se.is.x2"], dp), 
+                              " to ", round(results["is.x2"] + z * results["se.is.x2"], 
+                                            dp), ")", sep = ""))
+    results.matrix[18, ] = c("IP (null model)", 
+                              paste(round(results["ip.x1"], dp), " (", 
+                              round(results["ip.x1"] - z * results["se.ip.x1"], dp), 
+                              " to ", round(results["ip.x1"] + z *  results["se.ip.x1"], 
+                                            dp), ")", sep = ""))
+    results.matrix[19, ] = c("IP (alt model)", 
+                              paste(round(results["ip.x2"], dp), " (", 
+                              round(results["ip.x2"] - z * results["se.ip.x2"], dp), 
+                              " to ", round(results["ip.x2"] + z * results["se.ip.x2"], 
+                                            dp), ")", sep = ""))
+    results.matrix[20, ] = c("AUC","-------------------------")
+    results.matrix[21, ] = c("AUC (null model)", 
+                              paste(round(results["auc.x1"], dp), " (", 
+                              round(results["auc.x1"] - z * results["se.auc.x1"], dp), 
+                              " to ", round(results["auc.x1"] + z * results["se.auc.x1"], 
+                                            dp), ")", sep = ""))
+    results.matrix[22, ] = c("AUC (alt model)", 
+                              paste(round(results["auc.x2"], dp), " (", 
+                              round(results["auc.x2"] - z * results["se.auc.x2"], dp), 
+                              " to ", round(results["auc.x2"] +  z * results["se.auc.x2"], 
+                                            dp), ")", sep = ""))
+    results.matrix[23, ] = c("difference (P)", round(results["roc.test.x1.x2.pvalue"], dp))
+    results.matrix[24, ] = c("Incidence", round(results["incidence"], dp))
+    
+    return(results.matrix)
+}
+
+
+
+]]>
+
+options(width=120)
+options(digits=5)
+logf = file("rgNRI.log", open = "a")
+sink(logf,type = c("output", "message"))
+Out_Dir = "$html_file.files_path"
+Input1 =  "$input1"
+Input2 =  "$input2"
+myTitle = "$title"
+outtab = "$nri_file"
+input1_obs = $input1_observed
+input1_pred = $input1_predicted
+input1_id = $input1_id
+input2_obs = $input2_observed
+input2_pred = $input2_predicted
+input2_id = $input2_id
+in1 = read.table(Input1,head=T,sep='\t')
+in2 = read.table(Input2,head=T,sep='\t')
+id1 = in1[,input1_id]
+id2 = in2[,input2_id]
+useme1 = in1[which(id1 %in% id2),]
+useme2 = in2[which(id2 %in% id1),]
+id1 = useme1[,input1_id]
+id2 = useme2[,input2_id]
+useme1 = useme1[order(id1),]
+useme2 = useme2[order(id2),]
+x1 = useme1[,input1_pred]
+x2 = useme2[,input2_pred]
+y1 = useme1[,input1_obs]
+y2 = useme2[,input2_obs]
+n.boot = $CImeth.nboot
+conf.level = 0.95
+cis = "$CImeth.cis"
+digits = 4
+nydiff = sum(y1 != y2)
+if (nydiff &gt; 0) {
+  print.noquote(paste('Input error: observed status column has',nydiff,'differences - cannot reliably proceed'))
+  quit(save="no",status=1)
+  }
+y = y2
+outplot = 'rgNRI_EventRisk.pdf'
+res = raplot(x1=x1, x2=x2, y=y, outplot=outplot,title=myTitle)
+
+stats = statistics.raplot(x1=x1, x2=x2, y=y)
+res1 = stats\$resdf
+out1 = stats\$output
+print.noquote('Results:')
+print.noquote(res1,digits=4)
+res2 = summary.raplot(x1=x1, x2=x2, y=y, cis = cis, conf.level = conf.level, n.boot = n.boot, dp = digits, stat_ra=out1)
+print.noquote('Summary:')
+print.noquote(res2,digits=4)
+write.table(format(res1,digits=4),outtab,quote=F, col.names=F,sep="\t",row.names=F)
+print.noquote('SessionInfo for this R session:')
+sessionInfo()
+print.noquote('warnings for this R session:')
+warnings()
+sink()
+</configfile>
+</configfiles>
+<citations>
+    <citation type="doi">doi: 10.2215/​CJN.09590911</citation>
+</citations>
+</tool>
+
+
+