changeset 20:bb725f6d6d38 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rglasso commit 344140b8df53b8b7024618bb04594607a045c03a
author iuc
date Mon, 04 May 2015 22:47:29 -0400
parents 8c31e2aac682
children
files rg_nri.xml rglasso_cox.xml
diffstat 2 files changed, 494 insertions(+), 506 deletions(-) [+]
line wrap: on
line diff
--- a/rg_nri.xml	Wed Apr 29 12:07:11 2015 -0400
+++ b/rg_nri.xml	Mon May 04 22:47:29 2015 -0400
@@ -7,145 +7,12 @@
       <requirement type="package" version="2.14">glmnet_lars_2_14</requirement>
   </requirements>
   <command interpreter="python">
-     rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "rg_NRI" 
+     rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "rg_NRI"
     --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
   </command>
-  <inputs>
-     <param name="title" type="text" value="NRI test" size="80" label="Plot Title" help="Will appear as the title for the comparison plot"/>
-    <param name="input1"  type="data" format="tabular" label="Select a tabular file from the baseline model with predicted and observed outcome column for each subject"
-    multiple='False' help="Observed and predicted status columns must be selected from this file below - NOTE both models must be in same order with exact matches in all observed outcomes" optional="False"/>
-    <param name="input1_observed" label="Select column containing observed outcome (0 for no event, 1 for an event)" type="data_column" data_ref="input1" numerical="True" 
-         multiple="False" use_header_names="True" optional="False" help = "Observed outcomes are compared in the two files to check that the datasets are from the same data"/>
-    <param name="input1_predicted" label="Select column containing predicted event probabilies from baseline model" type="data_column" data_ref="input1" numerical="True" 
-         multiple="False" use_header_names="True" optional="False"  help="Must be in range 0-1"/>
-    <param name="input1_id" label="Select column containing subject ID from baseline model" type="data_column" data_ref="input1" numerical="True" 
-         multiple="False" use_header_names="True" optional="False" help="Subect IDs are needed to match subjects to compare predictions in the two inputs"/>
-    <param name="input2"  type="data" format="tabular" label="Select a tabular file from the new model with predicted and observed outcome columns for each subject"
-    multiple='False' help="Observed and predicted status columns must be selected from this file below" />
-    <param name="input2_observed" label="Select column containing observed outcome (0 for no event, 1 for an event)" type="data_column" data_ref="input2" numerical="True" 
-         multiple="False" use_header_names="True" optional="False" help = "Observed outcomes are compared in the two files to check that the datasets are from the same data"/>
-    <param name="input2_predicted" label="Select column containing predicted event probabilities from the new model" type="data_column" data_ref="input2" numerical="True" 
-         multiple="False" use_header_names="True" optional="False" help="Must be in range 0-1"/>
-    <param name="input2_id" label="Select column containing subject ID from the new model" type="data_column" data_ref="input2" numerical="True" 
-         multiple="False" use_header_names="True" optional="False"  help="Subect IDs are needed to match subjects to compare predictions in the two inputs"/>
-    <conditional name="CImeth">
-        <param name="cis" type="select" label="CI calculation method" 
-             help="Bootstrap will take time - a long time for thousands - asymptotic is quick and informative">
-                <option value="asymptotic" selected="true">Asymptotic estimate</option>
-                <option value="boot">Bootstrap for empirical CIs</option>
-        </param>
-        <when value="boot">
-            <param name="nboot" type="integer" value="1000" label="Number of bootstrap replicates"/>
-        </when>
-        <when value="asymptotic">
-            <param name="nboot" type="hidden" value="1000"/>
-        </when>
-    </conditional>
-  </inputs>    
-  <outputs>
-    <data format="html" name="html_file" label="${title}.html"/>
-    <data format="tabular" name="nri_file" label="${title}_nrires.xls"/>
-  </outputs>
- <tests>
-    <test>
-     <param name='title' value='nri_test1' />
-     <param name='input1' value='nri_test1.xls' ftype='tabular' />
-     <param name='input2' value='nri_test1.xls' ftype='tabular' />
-     <param name='input1_id' value="1" />
-     <param name='input1_observed' value="2" />
-     <param name='input1_predicted' value="3" />
-     <param name='input2_observed' value="2" />
-     <param name='input2_predicted' value="4" />
-     <output name='html_file' file='nri_test1_out.html'  compare='diff' lines_diff='10' />
-     <output name='nri_file' file='nri_test1_out.xls' />
-    </test>
-</tests>
-<help>
-
-**Before you start**
-
-This is a simple tool to calculate various measures of improvement in prediction between two models described in pickering_paper_
-It is based on an R script pickering_code_ written by Dr John W Pickering and Dr David Cairns from sunny Otago University which
-has been debugged and slightly adjusted to fit a Galaxy tool wrapper.
-
-
-**What it does**
-
-Copied from the documentation in pickering_code_ ::
-
-    
-    Functions to create risk assessment plots and associated summary statistics
-    
-    
-      (c) 2012 Dr John W Pickering, john.pickering@otago.ac.nz, and Dr David Cairns
-       Last modified August 2014
-     
-      Redistribution and use in source and binary forms, with or without 
-       modification, are permitted provided that the following conditions are met:
-       * Redistributions of source code must retain the above copyright 
-         notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above copyright 
-         notice, this list of conditions and the following disclaimer in 
-         the documentation and/or other materials provided with the distribution
-
-     FUNCTIONS
-     raplot
-           Produces a Risk Assessment Plot and outputs the coordinates of the four curves
-           Based on: Pickering, J. W. and Endre, Z. H. (2012). New Metrics for Assessing Diagnostic Potential of
-           Candidate Biomarkers. Clinical Journal of the American Society of Nephrology, 7, 1355–1364. doi:10.2215/CJN.09590911
-    
-     statistics.raplot 
-           Produces the NRIs, IDIs, IS, IP, AUCs.
-           Based on: Pencina, M. J., D'Agostino, R. B. and Steyerberg, E. W. (2011). Extensions of net reclassification improvement calculations to
-           measure usefulness of new biomarkers. Statistics in Medicine, 30(1), 11–21. doi:10.1002/sim.4085
-           Pencina, M. J., D'Agostino, R. B. and Vasan, R. S. (2008). Evaluating the added predictive ability of a new marker: From area under the
-           ROC curve to reclassification and beyond.
-           Statistics in Medicine, 27(2), 157–172. doi:10.1002/sim.2929
-           DeLong, E., DeLong, D. and Clarke-Pearson, D. (1988). Comparing the areas under 2 or more correlated receiver operating characteristic curves - a nonparametric approach.
-           Biometrics, 44(3), 837–845.
-    
-     summary.raplot 
-           Produces the NRIs, IDIs, IS, IP, AUCs with confidence intervals using a bootstrap or asymptotic procedure. (I prefer bootstrap which is chosed by cis=c("boot"))
-
-
-     Required arguments for all functions:
-       x1 is calculated risk (eg from a glm) for the null model, i.e. predict(,type="response") on a glm object
-       x2 is calculated risk (eg from a glm) for the alternative model
-       y is the case-control indicator (0 for controls, 1 for cases)
-     Optional argument
-       t are the boundaries of the risks for each group (ie 0, 1 and the thresholds beteween.  eg c(0,0,3,0,7,1)). If missing, defaults to c(0, the incidence, 1)
-
-
-**Input**
-
-The observed and predicted outcomes from two models to be compared.
-
-**Output**
-
-Lots'o'measures (TM) see pickering_paper_ for details
-
-**Attributions**
-
-pickering_paper_ is the paper the caclulations performed by this tool is based on
-
-pickering_code_ is the R function from John Pickering exposed by this Galaxy tool with minor modifications and hacks by Ross Lazarus.
-
-Galaxy_ (that's what you are using right now!) for gluing everything together 
-
-Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is 
-licensed to you under the LGPL_ like other rgenetics artefacts
-
-.. _LGPL: http://www.gnu.org/copyleft/lesser.html
-.. _pickering_code: http://www.researchgate.net/publication/264672640_R_function_for_Risk_Assessment_Plot__reclassification_metrics_NRI_IDI_cfNRI 
-.. _pickering_paper: http://cjasn.asnjournals.org/content/early/2012/05/24/CJN.09590911.full 
-.. _Galaxy: http://getgalaxy.org
-
-
-</help>
-
 <configfiles>
 <configfile name="runme">
-    
+
 <![CDATA[
 
 ### http://www.researchgate.net/publication/264672640_R_function_for_Risk_Assessment_Plot__reclassification_metrics_NRI_IDI_cfNRI code
@@ -159,13 +26,13 @@
 ###
 ###  (c) 2012 Dr John W Pickering, john.pickering@otago.ac.nz, and Dr David Cairns
 ###   Last modified August 2014
-### 
-###  Redistribution and use in source and binary forms, with or without 
+###
+###  Redistribution and use in source and binary forms, with or without
 ###   modification, are permitted provided that the following conditions are met:
-###   * Redistributions of source code must retain the above copyright 
+###   * Redistributions of source code must retain the above copyright
 ###     notice, this list of conditions and the following disclaimer.
-###   * Redistributions in binary form must reproduce the above copyright 
-###     notice, this list of conditions and the following disclaimer in 
+###   * Redistributions in binary form must reproduce the above copyright
+###     notice, this list of conditions and the following disclaimer in
 ###     the documentation and/or other materials provided with the distribution
 
 ### FUNCTIONS
@@ -174,7 +41,7 @@
 ###       Based on: Pickering, J. W. and Endre, Z. H. (2012). New Metrics for Assessing Diagnostic Potential of
 ###       Candidate Biomarkers. Clinical Journal of the American Society of Nephrology, 7, 1355–1364. doi:10.2215/CJN.09590911
 ###
-### statistics.raplot 
+### statistics.raplot
 ###       Produces the NRIs, IDIs, IS, IP, AUCs.
 ###       Based on: Pencina, M. J., D'Agostino, R. B. and Steyerberg, E. W. (2011).
 ### Extensions of net reclassification improvement calculations to measure usefulness of new biomarkers. Statistics in Medicine, 30(1), 11–21. doi:10.1002/sim.4085
@@ -183,7 +50,7 @@
 ###       DeLong, E., DeLong, D. and Clarke-Pearson, D. (1988). Comparing the areas under 2 or more correlated receiver operating characteristic curves - a nonparametric approach.
 ### Biometrics, 44(3), 837–845.
 ###
-### summary.raplot 
+### summary.raplot
 ###       Produces the NRIs, IDIs, IS, IP, AUCs with confidence intervals using a bootstrap or asymptotic procedure. (I prefer bootstrap which is chosed by cis=c("boot"))
 
 
@@ -229,13 +96,13 @@
 
     ### actual plotting
     pdf(outplot)
-    plot(thresh.model1, sens.model1, xlim = c(0, 1), ylim = c(0, 1), type = "n", 
+    plot(thresh.model1, sens.model1, xlim = c(0, 1), ylim = c(0, 1), type = "n",
          lty = 2, lwd = 2, xlab = "Risk of Event", ylab = "", col = "black", main=title)
     grid()
 
-    polygon(x = c(thresh.model1, thresh.model2[n.model2:1]), 
+    polygon(x = c(thresh.model1, thresh.model2[n.model2:1]),
             y = c(sens.model1, sens.model2[n.model2:1]), border = NA, col = gray(0.8))
-    polygon(x = c(thresh.model1, thresh.model2[n.model2:1]), 
+    polygon(x = c(thresh.model1, thresh.model2[n.model2:1]),
             y = c(spec.model1, spec.model2[n.model2:1]), border = NA, col = gray(0.8))
 
     lines(thresh.model1, sens.model1, type = "l", lty = 2, lwd = 2, col = "black")
@@ -246,9 +113,9 @@
 
     text(x = -0.15, y = 0.4, labels = "Sensitivity, ", col = "black", xpd = TRUE, srt = 90)
     text(x = -0.15, y = 0.4 + 0.175, labels = "1-Specificity", col = "red", xpd = TRUE, srt = 90)
-    legend("topleft", c("Event: New model", "Event: Baseline model", 
-                         "No Event: New model", "No Event: Baseline model"), 
-                          col = c("black", "black", "red", "red"), 
+    legend("topleft", c("Event: New model", "Event: Baseline model",
+                         "No Event: New model", "No Event: Baseline model"),
+                          col = c("black", "black", "red", "red"),
                           lty = c(1,2, 1, 2), lwd = 2, bty = "n")
     dev.off()
     return(data.frame("Null.p.sens"=thresh.model1,
@@ -267,8 +134,8 @@
 ### statistics from a raplot (is an adaptation of improveProb() from Hmisc)
 
 statistics.raplot = function(x1, x2, y, threshvec)
-{   
- 
+{
+
     s = is.na(x1 + x2 + y)  ###Remove rows with missing data
     if (any(s)) {
         smiss = sum(s)
@@ -302,7 +169,7 @@
     na = sum(a)
     nb = sum(b)
     d = x2 - x1
-    ### NRI 
+    ### NRI
     n.thresh=length(threshvec)-1
     risk.class.x1.ev=cut2(x1[a],threshvec)
     risk.class.x2.ev=cut2(x2[a],threshvec)
@@ -317,18 +184,18 @@
     pup.ev=0
     pdown.ev=0
     for (i in 1:(n.thresh-1)) { pup.ev = pup.ev + sum(cM.ev\$table[(i+1):n.thresh,i])}
-    for (i in 2:n.thresh) { pdown.ev = pdown.ev + sum(cM.ev\$table[1:(i-1),i])} 
-    pup.ev=pup.ev/na 
+    for (i in 2:n.thresh) { pdown.ev = pdown.ev + sum(cM.ev\$table[1:(i-1),i])}
+    pup.ev=pup.ev/na
     pdown.ev=pdown.ev/na
     risk.class.x1.ne=cut2(x1[b],threshvec)
-    risk.class.x2.ne=cut2(x2[b],threshvec)  
+    risk.class.x2.ne=cut2(x2[b],threshvec)
     levels(risk.class.x1.ne)=thresh
     levels(risk.class.x2.ne)=thresh
     cM.ne=confusionMatrix(risk.class.x2.ne,risk.class.x1.ne)
     pup.ne=0
     pdown.ne=0
     for (i in 1:(n.thresh-1)){pup.ne=pup.ev+sum(cM.ne\$table[(i+1):n.thresh,i])}
-    for (i in 2:n.thresh){pdown.ne=pdown.ne+sum(cM.ne\$table[1:(i-1),i])} 
+    for (i in 2:n.thresh){pdown.ne=pdown.ne+sum(cM.ne\$table[1:(i-1),i])}
     pdown.ne=pdown.ne/nb
     pup.ne=pup.ne/nb
     nri = pup.ev - pdown.ev - (pup.ne - pdown.ne)
@@ -339,7 +206,7 @@
     z.nri.ev = nri.ev/se.nri.ev
     nri.ne = pdown.ne - pup.ne
     se.nri.ne = sqrt((pdown.ne + pup.ne)/nb)
-    z.nri.ne = nri.ne/se.nri.ne 
+    z.nri.ne = nri.ne/se.nri.ne
     ### Category Free NRI calculations
     cfpup.ev = mean(d[a] > 0)
     cfpup.ne = mean(d[b] > 0)
@@ -400,31 +267,31 @@
 
     ### Output
     output = c(n, na, nb, pup.ev, pup.ne, pdown.ev, pdown.ne, nri, se.nri, z.nri,
-                nri.ev, se.nri.ev, z.nri.ev, nri.ne, se.nri.ne, z.nri.ne, 
+                nri.ev, se.nri.ev, z.nri.ev, nri.ne, se.nri.ne, z.nri.ne,
                 cfpup.ev, cfpup.ne, cfpdown.ev, cfpdown.ne, cfnri, se.cfnri, z.cfnri,
-                cfnri.ev, se.cfnri.ev, z.cfnri.ev, cfnri.ne, se.cfnri.ne, z.cfnri.ne, 
-                improveSens, improveSpec, idi.ev, se.idi.ev, z.idi.ev, idi.ne, 
-                se.idi.ne, z.idi.ne, idi, se.idi, z.idi, is.x1, NA, is.x2, NA, 
-                ip.x1, NA, ip.x2, NA, auc.x1, se.auc.x1, auc.x2, se.auc.x2, 
+                cfnri.ev, se.cfnri.ev, z.cfnri.ev, cfnri.ne, se.cfnri.ne, z.cfnri.ne,
+                improveSens, improveSpec, idi.ev, se.idi.ev, z.idi.ev, idi.ne,
+                se.idi.ne, z.idi.ne, idi, se.idi, z.idi, is.x1, NA, is.x2, NA,
+                ip.x1, NA, ip.x2, NA, auc.x1, se.auc.x1, auc.x2, se.auc.x2,
                 roc.test.x1.x2\$p.value,incidence)
-    names(output) = c("n", "na", "nb", "pup.ev", "pup.ne", "pdown.ev", "pdown.ne", 
+    names(output) = c("n", "na", "nb", "pup.ev", "pup.ne", "pdown.ev", "pdown.ne",
                        "nri", "se.nri", "z.nri", "nri.ev", "se.nri.ev", "z.nri.ev",
                        "nri.ne", "se.nri.ne", "z.nri.ne",
-                       "cfpup.ev", "cfpup.ne", "cfpdown.ev", "cfpdown.ne", 
+                       "cfpup.ev", "cfpup.ne", "cfpdown.ev", "cfpdown.ne",
                        "cfnri", "se.cfnri", "z.cfnri", "cfnri.ev", "se.cfnri.ev", "z.cfnri.ev",
                        "cfnri.ne", "se.cfnri.ne", "z.cfnri.ne", "improveSens", "improveSpec",
-                       "idi.ev", "se.idi.ev", "z.idi.ev", "idi.ne", "se.idi.ne", 
-                       "z.idi.ne", "idi", "se.idi", "z.idi", "is.x1", "se.is.x1", 
-                       "is.x2", "se.is.x2", "ip.x1", "se.ip.x1", "ip.x2", "se.ip.x2", 
-                       "auc.x1", "se.auc.x1", "auc.x2", "se.auc.x2", 
+                       "idi.ev", "se.idi.ev", "z.idi.ev", "idi.ne", "se.idi.ne",
+                       "z.idi.ne", "idi", "se.idi", "z.idi", "is.x1", "se.is.x1",
+                       "is.x2", "se.is.x2", "ip.x1", "se.ip.x1", "ip.x2", "se.ip.x2",
+                       "auc.x1", "se.auc.x1", "auc.x2", "se.auc.x2",
                        "roc.test.x1.x2.pvalue","incidence")
     resdf = data.frame(N=n, Na=na, Nb=nb, pup.ev=pup.ev, pup.ne=pup.ne, pdown.ev=pdown.ev, pdown.ne=pdown.ne, NRI=nri, NRI.se=se.nri, NRI.z=z.nri,
-                NRI.ev=nri.ev, NRI.ev.se=se.nri.ev, NRI.ev.z=z.nri.ev, NRI.ne=nri.ne, NRI.ne.se=se.nri.ne, NRI.ne.z=z.nri.ne, 
+                NRI.ev=nri.ev, NRI.ev.se=se.nri.ev, NRI.ev.z=z.nri.ev, NRI.ne=nri.ne, NRI.ne.se=se.nri.ne, NRI.ne.z=z.nri.ne,
                 cfpup.ev=cfpup.ev, cfpup.ne=cfpup.ne, cfpdown.ev=cfpdown.ev, cfpdown.ne=cfpdown.ne, CFNRI=cfnri, CFNRI.se=se.cfnri, CFNRI.z=z.cfnri,
-                CFNRI.ev=cfnri.ev, CFNRI.ev.se=se.cfnri.ev, CFNRI.ev.z=z.cfnri.ev, CFNRI.ne=cfnri.ne, CFNRI.ne.se=se.cfnri.ne, CFNRI.ne.z=z.cfnri.ne, 
-                improvSens=improveSens, improvSpec=improveSpec, IDI.ev=idi.ev, IDI.ev.se=se.idi.ev, IDI.ev.z=z.idi.ev, IDI.ne=idi.ne, 
-                IDI.ne.se=se.idi.ne, IDI.ne.z=z.idi.ne, IDI=idi, IDI.se=se.idi, IDI.z=z.idi, isx1=is.x1, isx2=is.x2, 
-                ipxi=ip.x1, ipx2=ip.x2, AUC.x1=auc.x1, AUC.x1.se=se.auc.x1, AUC.x2=auc.x2, AUC.x2.se=se.auc.x2, 
+                CFNRI.ev=cfnri.ev, CFNRI.ev.se=se.cfnri.ev, CFNRI.ev.z=z.cfnri.ev, CFNRI.ne=cfnri.ne, CFNRI.ne.se=se.cfnri.ne, CFNRI.ne.z=z.cfnri.ne,
+                improvSens=improveSens, improvSpec=improveSpec, IDI.ev=idi.ev, IDI.ev.se=se.idi.ev, IDI.ev.z=z.idi.ev, IDI.ne=idi.ne,
+                IDI.ne.se=se.idi.ne, IDI.ne.z=z.idi.ne, IDI=idi, IDI.se=se.idi, IDI.z=z.idi, isx1=is.x1, isx2=is.x2,
+                ipxi=ip.x1, ipx2=ip.x2, AUC.x1=auc.x1, AUC.x1.se=se.auc.x1, AUC.x2=auc.x2, AUC.x2.se=se.auc.x2,
                 roctestpval=roc.test.x1.x2\$p.value,incidence=incidence)
     tr = t(resdf)
     tresdf = data.frame(measure=colnames(resdf),value=tr[,1])
@@ -453,7 +320,7 @@
                 boot.index = sample(length(y), replace = TRUE)
                 risk.model1.boot = x1[boot.index]
                 risk.model2.boot = x2[boot.index]
-                cc.status.boot = y[boot.index]           
+                cc.status.boot = y[boot.index]
                 r = statistics.raplot(x1 = risk.model1.boot, x2 = risk.model2.boot, y = cc.status.boot)
                 results.boot[i, ] = r\$output
             }
@@ -478,87 +345,87 @@
     results.matrix[2, ] = c("Events (n)", results["na"])
     results.matrix[3, ] = c("Non-events (n)", results["nb"])
     results.matrix[4, ] = c("Category free NRI and summary statistics","-------------------------")
-    results.matrix[5, ] = c("cfNRI events (%)", 
-                             paste(round(100*results["cfnri.ev"], dp-2), " (", 
+    results.matrix[5, ] = c("cfNRI events (%)",
+                             paste(round(100*results["cfnri.ev"], dp-2), " (",
                                    round(100*results["cfnri.ev"] - z * 100*results["se.cfnri.ev"], dp-2),
-                                    " to ", round(100*results["cfnri.ev"] + 
+                                    " to ", round(100*results["cfnri.ev"] +
                                       z * 100*results["se.cfnri.ev"], dp-2), ")", sep = ""))
-    results.matrix[6, ] = c("cfNRI non-events (%)", 
+    results.matrix[6, ] = c("cfNRI non-events (%)",
                              paste(round(100*results["cfnri.ne"], dp-2), " (",
                              round(100*results["cfnri.ne"] - z * 100*results["se.cfnri.ne"], dp)-2,
-                             " to ", round(100*results["cfnri.ne"] +  z * 100*results["se.cfnri.ne"], 
-                                           dp-2), ")", sep = "")) 
-    results.matrix[7, ] = c("cfNRI (%)", 
-                             paste(round(100*results["cfnri"], dp-2), " (", 
-                             round(100*results["cfnri"] - z * 100*results["se.cfnri"], dp-2), 
-                             " to ", round(100*results["cfnri"] + z * 100*results["se.cfnri"], 
+                             " to ", round(100*results["cfnri.ne"] +  z * 100*results["se.cfnri.ne"],
+                                           dp-2), ")", sep = ""))
+    results.matrix[7, ] = c("cfNRI (%)",
+                             paste(round(100*results["cfnri"], dp-2), " (",
+                             round(100*results["cfnri"] - z * 100*results["se.cfnri"], dp-2),
+                             " to ", round(100*results["cfnri"] + z * 100*results["se.cfnri"],
                                            dp-2), ")", sep = ""))
     results.matrix[8, ] = c("NRI and summary statistics","-------------------------")
-    results.matrix[9, ] = c("NRI events (%)", 
-                             paste(round(100*results["nri.ev"], dp-2), " (", 
+    results.matrix[9, ] = c("NRI events (%)",
+                             paste(round(100*results["nri.ev"], dp-2), " (",
                                    round(100*results["nri.ev"] - z * 100*results["se.nri.ev"], dp-2),
-                                   " to ", round(100*results["nri.ev"] + 
+                                   " to ", round(100*results["nri.ev"] +
                                                    z * 100*results["se.nri.ev"], dp-2), ")", sep = ""))
-    results.matrix[10, ] = c("NRI non-events (%)", 
+    results.matrix[10, ] = c("NRI non-events (%)",
                              paste(round(100*results["nri.ne"], dp-2), " (",
                                    round(100*results["nri.ne"] - z * 100*results["se.nri.ne"], dp-2),
-                                   " to ", round(100*results["nri.ne"] +  z * 100*results["se.nri.ne"], 
-                                                 dp-2), ")", sep = "")) 
-    results.matrix[11, ] = c("NRI (%)", 
-                             paste(round(100*results["nri"], dp-2), " (", 
-                                   round(100*results["nri"] - z * 100*results["se.nri"], dp-2), 
-                                   " to ", round(100*results["nri"] + z * 100*results["se.nri"], 
+                                   " to ", round(100*results["nri.ne"] +  z * 100*results["se.nri.ne"],
+                                                 dp-2), ")", sep = ""))
+    results.matrix[11, ] = c("NRI (%)",
+                             paste(round(100*results["nri"], dp-2), " (",
+                                   round(100*results["nri"] - z * 100*results["se.nri"], dp-2),
+                                   " to ", round(100*results["nri"] + z * 100*results["se.nri"],
                                                  dp-2), ")", sep = ""))
     results.matrix[12, ] = c("IDI and summary statistics","-------------------------")
-    results.matrix[13, ] = c("IDI events", 
-                             paste(round(results["idi.ev"], dp), " (", 
-                              round(results["idi.ev"] - z * results["se.idi.ev"], dp), 
-                              " to ", round(results["idi.ev"] + z * results["se.idi.ev"], 
+    results.matrix[13, ] = c("IDI events",
+                             paste(round(results["idi.ev"], dp), " (",
+                              round(results["idi.ev"] - z * results["se.idi.ev"], dp),
+                              " to ", round(results["idi.ev"] + z * results["se.idi.ev"],
                                             dp), ")", sep = ""))
-    results.matrix[14, ] = c("IDI non-events", 
-                              paste(round(results["idi.ne"], dp), " (", 
-                              round(results["idi.ne"] - z * results["se.idi.ne"], dp), 
-                              " to ", round(results["idi.ne"] + z * results["se.idi.ne"], 
+    results.matrix[14, ] = c("IDI non-events",
+                              paste(round(results["idi.ne"], dp), " (",
+                              round(results["idi.ne"] - z * results["se.idi.ne"], dp),
+                              " to ", round(results["idi.ne"] + z * results["se.idi.ne"],
                                             dp), ")", sep = ""))
-    results.matrix[15, ] = c("IDI", 
-                              paste(round(results["idi"], dp), " (", 
-                              round(results["idi"] - z * results["se.idi"], dp), 
-                              " to ", round(results["idi"] + z * results["se.idi"], 
+    results.matrix[15, ] = c("IDI",
+                              paste(round(results["idi"], dp), " (",
+                              round(results["idi"] - z * results["se.idi"], dp),
+                              " to ", round(results["idi"] + z * results["se.idi"],
                               dp), ")", sep = ""))
-    results.matrix[16, ] = c("IS (null model)", 
-                              paste(round(results["is.x1"], dp), " (", 
-                              round(results["is.x1"] - z * results["se.is.x1"], dp), 
-                              " to ", round(results["is.x1"] + z * results["se.is.x1"], 
+    results.matrix[16, ] = c("IS (null model)",
+                              paste(round(results["is.x1"], dp), " (",
+                              round(results["is.x1"] - z * results["se.is.x1"], dp),
+                              " to ", round(results["is.x1"] + z * results["se.is.x1"],
                                             dp), ")", sep = ""))
-    results.matrix[17, ] = c("IS (alt model)", 
-                              paste(round(results["is.x2"], dp), " (", 
-                              round(results["is.x2"] - z * results["se.is.x2"], dp), 
-                              " to ", round(results["is.x2"] + z * results["se.is.x2"], 
+    results.matrix[17, ] = c("IS (alt model)",
+                              paste(round(results["is.x2"], dp), " (",
+                              round(results["is.x2"] - z * results["se.is.x2"], dp),
+                              " to ", round(results["is.x2"] + z * results["se.is.x2"],
                                             dp), ")", sep = ""))
-    results.matrix[18, ] = c("IP (null model)", 
-                              paste(round(results["ip.x1"], dp), " (", 
-                              round(results["ip.x1"] - z * results["se.ip.x1"], dp), 
-                              " to ", round(results["ip.x1"] + z *  results["se.ip.x1"], 
+    results.matrix[18, ] = c("IP (null model)",
+                              paste(round(results["ip.x1"], dp), " (",
+                              round(results["ip.x1"] - z * results["se.ip.x1"], dp),
+                              " to ", round(results["ip.x1"] + z *  results["se.ip.x1"],
                                             dp), ")", sep = ""))
-    results.matrix[19, ] = c("IP (alt model)", 
-                              paste(round(results["ip.x2"], dp), " (", 
-                              round(results["ip.x2"] - z * results["se.ip.x2"], dp), 
-                              " to ", round(results["ip.x2"] + z * results["se.ip.x2"], 
+    results.matrix[19, ] = c("IP (alt model)",
+                              paste(round(results["ip.x2"], dp), " (",
+                              round(results["ip.x2"] - z * results["se.ip.x2"], dp),
+                              " to ", round(results["ip.x2"] + z * results["se.ip.x2"],
                                             dp), ")", sep = ""))
     results.matrix[20, ] = c("AUC","-------------------------")
-    results.matrix[21, ] = c("AUC (null model)", 
-                              paste(round(results["auc.x1"], dp), " (", 
-                              round(results["auc.x1"] - z * results["se.auc.x1"], dp), 
-                              " to ", round(results["auc.x1"] + z * results["se.auc.x1"], 
+    results.matrix[21, ] = c("AUC (null model)",
+                              paste(round(results["auc.x1"], dp), " (",
+                              round(results["auc.x1"] - z * results["se.auc.x1"], dp),
+                              " to ", round(results["auc.x1"] + z * results["se.auc.x1"],
                                             dp), ")", sep = ""))
-    results.matrix[22, ] = c("AUC (alt model)", 
-                              paste(round(results["auc.x2"], dp), " (", 
-                              round(results["auc.x2"] - z * results["se.auc.x2"], dp), 
-                              " to ", round(results["auc.x2"] +  z * results["se.auc.x2"], 
+    results.matrix[22, ] = c("AUC (alt model)",
+                              paste(round(results["auc.x2"], dp), " (",
+                              round(results["auc.x2"] - z * results["se.auc.x2"], dp),
+                              " to ", round(results["auc.x2"] +  z * results["se.auc.x2"],
                                             dp), ")", sep = ""))
     results.matrix[23, ] = c("difference (P)", round(results["roc.test.x1.x2.pvalue"], dp))
     results.matrix[24, ] = c("Incidence", round(results["incidence"], dp))
-    
+
     return(results.matrix)
 }
 
@@ -624,6 +491,139 @@
 sink()
 </configfile>
 </configfiles>
+  <inputs>
+     <param name="title" type="text" value="NRI test" size="80" label="Plot Title" help="Will appear as the title for the comparison plot"/>
+    <param name="input1"  type="data" format="tabular" label="Select a tabular file from the baseline model with predicted and observed outcome column for each subject"
+    multiple='False' help="Observed and predicted status columns must be selected from this file below - NOTE both models must be in same order with exact matches in all observed outcomes" optional="False"/>
+    <param name="input1_observed" label="Select column containing observed outcome (0 for no event, 1 for an event)" type="data_column" data_ref="input1" numerical="True"
+         multiple="False" use_header_names="True" optional="False" help = "Observed outcomes are compared in the two files to check that the datasets are from the same data"/>
+    <param name="input1_predicted" label="Select column containing predicted event probabilies from baseline model" type="data_column" data_ref="input1" numerical="True"
+         multiple="False" use_header_names="True" optional="False"  help="Must be in range 0-1"/>
+    <param name="input1_id" label="Select column containing subject ID from baseline model" type="data_column" data_ref="input1" numerical="True"
+         multiple="False" use_header_names="True" optional="False" help="Subect IDs are needed to match subjects to compare predictions in the two inputs"/>
+    <param name="input2"  type="data" format="tabular" label="Select a tabular file from the new model with predicted and observed outcome columns for each subject"
+    multiple='False' help="Observed and predicted status columns must be selected from this file below" />
+    <param name="input2_observed" label="Select column containing observed outcome (0 for no event, 1 for an event)" type="data_column" data_ref="input2" numerical="True"
+         multiple="False" use_header_names="True" optional="False" help = "Observed outcomes are compared in the two files to check that the datasets are from the same data"/>
+    <param name="input2_predicted" label="Select column containing predicted event probabilities from the new model" type="data_column" data_ref="input2" numerical="True"
+         multiple="False" use_header_names="True" optional="False" help="Must be in range 0-1"/>
+    <param name="input2_id" label="Select column containing subject ID from the new model" type="data_column" data_ref="input2" numerical="True"
+         multiple="False" use_header_names="True" optional="False"  help="Subect IDs are needed to match subjects to compare predictions in the two inputs"/>
+    <conditional name="CImeth">
+        <param name="cis" type="select" label="CI calculation method"
+             help="Bootstrap will take time - a long time for thousands - asymptotic is quick and informative">
+                <option value="asymptotic" selected="true">Asymptotic estimate</option>
+                <option value="boot">Bootstrap for empirical CIs</option>
+        </param>
+        <when value="boot">
+            <param name="nboot" type="integer" value="1000" label="Number of bootstrap replicates"/>
+        </when>
+        <when value="asymptotic">
+            <param name="nboot" type="hidden" value="1000"/>
+        </when>
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="html" name="html_file" label="${title}.html"/>
+    <data format="tabular" name="nri_file" label="${title}_nrires.xls"/>
+  </outputs>
+ <tests>
+    <test>
+     <param name='title' value='nri_test1' />
+     <param name='input1' value='nri_test1.xls' ftype='tabular' />
+     <param name='input2' value='nri_test1.xls' ftype='tabular' />
+     <param name='input1_id' value="1" />
+     <param name='input1_observed' value="2" />
+     <param name='input1_predicted' value="3" />
+     <param name='input2_observed' value="2" />
+     <param name='input2_predicted' value="4" />
+     <output name='html_file' file='nri_test1_out.html'  compare='diff' lines_diff='10' />
+     <output name='nri_file' file='nri_test1_out.xls' />
+    </test>
+</tests>
+<help>
+
+**Before you start**
+
+This is a simple tool to calculate various measures of improvement in prediction between two models described in pickering_paper_
+It is based on an R script pickering_code_ written by Dr John W Pickering and Dr David Cairns from sunny Otago University which
+has been debugged and slightly adjusted to fit a Galaxy tool wrapper.
+
+
+**What it does**
+
+Copied from the documentation in pickering_code_ ::
+
+
+    Functions to create risk assessment plots and associated summary statistics
+
+
+      (c) 2012 Dr John W Pickering, john.pickering@otago.ac.nz, and Dr David Cairns
+       Last modified August 2014
+
+      Redistribution and use in source and binary forms, with or without
+       modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright
+         notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+         notice, this list of conditions and the following disclaimer in
+         the documentation and/or other materials provided with the distribution
+
+     FUNCTIONS
+     raplot
+           Produces a Risk Assessment Plot and outputs the coordinates of the four curves
+           Based on: Pickering, J. W. and Endre, Z. H. (2012). New Metrics for Assessing Diagnostic Potential of
+           Candidate Biomarkers. Clinical Journal of the American Society of Nephrology, 7, 1355–1364. doi:10.2215/CJN.09590911
+
+     statistics.raplot
+           Produces the NRIs, IDIs, IS, IP, AUCs.
+           Based on: Pencina, M. J., D'Agostino, R. B. and Steyerberg, E. W. (2011). Extensions of net reclassification improvement calculations to
+           measure usefulness of new biomarkers. Statistics in Medicine, 30(1), 11–21. doi:10.1002/sim.4085
+           Pencina, M. J., D'Agostino, R. B. and Vasan, R. S. (2008). Evaluating the added predictive ability of a new marker: From area under the
+           ROC curve to reclassification and beyond.
+           Statistics in Medicine, 27(2), 157–172. doi:10.1002/sim.2929
+           DeLong, E., DeLong, D. and Clarke-Pearson, D. (1988). Comparing the areas under 2 or more correlated receiver operating characteristic curves - a nonparametric approach.
+           Biometrics, 44(3), 837–845.
+
+     summary.raplot
+           Produces the NRIs, IDIs, IS, IP, AUCs with confidence intervals using a bootstrap or asymptotic procedure. (I prefer bootstrap which is chosed by cis=c("boot"))
+
+
+     Required arguments for all functions:
+       x1 is calculated risk (eg from a glm) for the null model, i.e. predict(,type="response") on a glm object
+       x2 is calculated risk (eg from a glm) for the alternative model
+       y is the case-control indicator (0 for controls, 1 for cases)
+     Optional argument
+       t are the boundaries of the risks for each group (ie 0, 1 and the thresholds beteween.  eg c(0,0,3,0,7,1)). If missing, defaults to c(0, the incidence, 1)
+
+
+**Input**
+
+The observed and predicted outcomes from two models to be compared.
+
+**Output**
+
+Lots'o'measures (TM) see pickering_paper_ for details
+
+**Attributions**
+
+pickering_paper_ is the paper the caclulations performed by this tool is based on
+
+pickering_code_ is the R function from John Pickering exposed by this Galaxy tool with minor modifications and hacks by Ross Lazarus.
+
+Galaxy_ (that's what you are using right now!) for gluing everything together
+
+Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
+licensed to you under the LGPL_ like other rgenetics artefacts
+
+.. _LGPL: http://www.gnu.org/copyleft/lesser.html
+.. _pickering_code: http://www.researchgate.net/publication/264672640_R_function_for_Risk_Assessment_Plot__reclassification_metrics_NRI_IDI_cfNRI
+.. _pickering_paper: http://cjasn.asnjournals.org/content/early/2012/05/24/CJN.09590911.full
+.. _Galaxy: http://getgalaxy.org
+
+
+</help>
+
 <citations>
     <citation type="doi">doi: 10.2215/​CJN.09590911</citation>
 </citations>
--- a/rglasso_cox.xml	Wed Apr 29 12:07:11 2015 -0400
+++ b/rglasso_cox.xml	Mon May 04 22:47:29 2015 -0400
@@ -7,223 +7,9 @@
       <requirement type="package" version="2.14">glmnet_lars_2_14</requirement>
   </requirements>
   <command interpreter="python">
-     rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "rglasso" 
+     rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "rglasso"
     --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
   </command>
-  <inputs>
-     <param name="title" type="text" value="lasso test" size="80" label="Title for job outputs" help="Typing a short, meaningful text here will help remind you (and explain to others) what the outputs represent">
-      <sanitizer invalid_char="">
-        <valid initial="string.letters,string.digits"><add value="_" /> </valid>
-      </sanitizer>
-    </param>
-    <param name="input1"  type="data" format="tabular" label="Select an input tabular text file from your history. Rows represent samples; Columns are measured phenotypes"
-    multiple='False' optional="False" help="Tabular text data with samples as rows, phenotypes as columns with a header row of column identifiers" />
-    <param name="xvar_cols" label="Select columns containing numeric variables to use as predictor (x) variables" type="data_column" data_ref="input1" numerical="False" 
-         multiple="True" use_header_names="True" force_select="True" />
-    <param name="force_xvar_cols" label="Select numeric columns containing variables ALWAYS included as predictors in cross validation" type="data_column" data_ref="input1" numerical="False" 
-         multiple="True" use_header_names="True" force_select="False"/>
-    <conditional name="model">
-        <param name="fam" type="select" label="GLM Link function for models" 
-             help="Binary dependant variables will automatically be set to Binomial no matter what this is set to">
-                <option value="gaussian" selected="true">Gaussian - continuous dependent (y)</option>
-                <option value="binomial">Binomial dependent variables</option>
-                <option value="poisson">Poisson (eg counts)</option>
-                <option value="cox">Cox models - require special setup for y variables - see below</option>
-        </param>
-        <when value="gaussian">
-            <param name="yvar_cols" label="Select numeric columns containing variables to use as the dependent (y) in elasticnet" type="data_column" data_ref="input1" numerical="False" 
-             multiple="True" use_header_names="True"  help = "If multiple, each will be modelled against all the x variables and reported separately." force_select="True"/>
-            <param name="output_full" type="hidden" value='F' />
-            <param name="output_pred" type="hidden" value='F' />
-              <param name="cox_id" label="Select column containing a unique sample identifier"
-                 help = "Only really needed for output sample specific predicted values downstream."
-                 type="data_column" data_ref="input1" numerical="False" force_select="True"
-                 multiple="False" use_header_names="True" />
-      </when>
-        <when value="binomial">
-            <param name="yvar_cols" label="Select numeric columns containing variables to use as the dependent (y) in elasticnet" type="data_column" data_ref="input1" numerical="False" 
-             multiple="True" use_header_names="True"  help = "If multiple, each will be modelled against all the x variables and reported separately." force_select="True"/>
-             <param name="output_full" type="hidden" value='F' />
-             <param name="output_pred" type="select" label="Create a tabular output with predicted values for each subject from the optimal model for (eg) NRI estimates" >
-                <option value="F" selected="true">No predicted value output file</option>
-                <option value="T">Create a predicted value output file</option>
-             </param>             
-              <param name="cox_id" label="Select column containing a unique sample identifier" 
-                 help = "Only really needed for output sample specific predicted values downstream."
-                 type="data_column" data_ref="input1" numerical="False" force_select="True"
-                 multiple="False" use_header_names="True" />
-             <param name="predict_at" type="hidden" value='' />
-             
-        </when>
-        <when value="poisson">
-            <param name="yvar_cols" label="Select columns containing variables to use as the dependent (y) in elasticnet" type="data_column" data_ref="input1" numerical="True" 
-             multiple="True" use_header_names="True"  help = "If multiple, each will be modelled against all the x variables and reported separately." force_select="True"/>
-             <param name="output_full" type="hidden" value='F' />
-             <param name="output_pred" type="hidden" value='F' />
-             <param name="predict_at" type="hidden" value='' />
-              <param name="cox_id" label="Select column containing a unique sample identifier"
-                 help = "Optional. Only really needed for output sample specific predicted values downstream. Free - enjoy"
-                 type="data_column" data_ref="input1" numerical="True" force_select="False"
-                 multiple="False" use_header_names="True" />
-        </when>
-        <when value="cox">
-             <param name="cox_time" label="Select column containing time under observation for Cox regression"
-                 type="data_column" data_ref="input1" numerical="True" force_select="True"
-                 multiple="False" use_header_names="True"  help = "This MUST contain a time period - eg continuous years or days to failure or right censoring"/>
-             <param name="cox_status" label="Select column containing status = 1 for outcome of interest at the end of the time under observation or 0 for right censoring"
-                 type="data_column" data_ref="input1" numerical="True" force_select="True"
-                 multiple="False" use_header_names="True"  help = "This MUST contain 1 for subjects who had an event at that time or 0 for a right censored observation"/>
-              <param name="cox_id" label="Select column containing a unique sample identifier"
-                 help = "Optional. Only really needed for output sample specific predicted values downstream. Free - enjoy"
-                 type="data_column" data_ref="input1" numerical="False" force_select="False"
-                 multiple="False" use_header_names="True" />
-             <param name="output_full" type="select" label="Create a tabular output with coefficients for all predictors" >
-                <option value="F" selected="true">No full model output file</option>
-                <option value="T">Create a full model output file</option>
-             </param>
-             <param name="output_pred" type="select" label="Create a tabular output with predicted values for each subject from the optimal model for (eg) NRI estimates" >
-                <option value="F" selected="true">No predicted value output file</option>
-                <option value="T">Create a predicted value output file</option>
-             </param>
-             <param name="predict_at"  type="text" value='' label="Provide a comma separated list of times to make a prediction for each subject"
-                 optional="True" help="Default (blank) will return predictions at 0%,25%,50%,75%,100% of the observed times which should be informative" />
-             
-        </when>
-    </conditional>
-    <param name="optLambda" type="select" label="Value to use when reporting optimal model and coefficients" help="minLambda will have more predictors - 1SDLambda will be more parsimonious">
-            <option value="lambda.1se" selected="true">Lambda + 1 SE of min MSE or AUC (fewer coefficients - more false negatives)</option>
-            <option value="lambda.min">Lambda at min MSE or max AUC (more coefficients - more false positives)</option>
-    </param>
-    <param name="logxform_cols"  optional="True" label="Select numeric columns to be log transformed before use as predictors or dependent variables" type="data_column"
-        data_ref="input1" numerical="True" multiple="True" use_header_names="True" help = "The wisdom of doing this depends entirely on your predictors - eg can help diminish long-tailed outlier influence"
-        force_select="False"/>
-    <param name="do_standard" type="select" label="Standardise x vars" 
-         help="If all measurements on same scale, may not be needed. Coefficients are always returned on the original scale.">
-            <option value="False" selected="true">No standardisation of predictors</option>l
-            <option value="True">Standardise predictors before model</option>
-    </param>
-    <param name="mdsplots" type="select" label="Generate MDS plots of samples in measurement space and measurements in sample space" >
-            <option value="False" selected="true">No MDS plots</option>l
-            <option value="True">Yes create MDS plots</option>
-    </param>
-    <param name="alpha" type="float" value="0.95" size="5" min="0.01" max="1.0" label="Alpha - see glmnet docs. 1 for pure lasso. 0.0 for pure ridge regression"
-     help="Default 0.95 allows lasso to cope better with expected predictor collinearity. Use (eg) 0.5 for hybrid regularised regression or (eg) 0.025 for ridge regression"/>
-    <param name="nfold" type="integer" value="10" size="5" label="Number of folds for internal cross validation"
-     help="Default of 10 is usually ok"/>
-  </inputs>
-  <outputs>
-    <data format="html" name="html_file" label="${title}.html"/>
-    <data format="tabular" name="model_file" label="${title}_modelres.xls"/>
-    <data format="tabular" name="output_full_file" label="${title}_full_cox_model.xls">
-        <filter>model['output_full'] == 'T'</filter>
-    </data>
-    <data format="tabular" name="output_pred_file" label="${title}_predicted_from_model.xls">
-        <filter>model['output_pred'] == 'T'</filter>
-    </data>
-  </outputs>
- <tests>
-    <test>
-     <param name='input1' value='cox_test.xls' ftype='tabular' />
-     <param name='treatment_name' value='case' />
-     <param name='title' value='Cox glmnet test' />
-     <param name='nfold' value='10' />
-     <param name='logxform_cols' value='' />
-     <param name='alpha' value='0.95' />
-     <param name='do_standard' value="True" />
-     <param name='cox_time' value='1' />
-     <param name='cox_status' value='2' />
-     <param name='cox_id' value='1' />
-     <param name='predict_at' value='' />
-     <param name='fam' value='cox' />
-     <param name='yvar_cols' value='' />
-     <param name='xvar_cols' value='3,4,5' />
-     <param name='force_xvar_cols' value='3' />
-     <param name='output_full' value='F' />
-     <param name='output_pred' value='F' />
-     <output name='model_file' file='coxlassotest_modelres.xls'> 
-          <assert_contents>
-                <has_text text="rhubarb" />
-                <has_text text="TRUE" />
-                <!-- &#009; is XML escape code for tab -->
-                <!-- has_line line="regulator&#009;partial_likelihood&#009;forced_in&#009;glmnet_model&#009;best_lambda" / -->
-                <has_line line="regulator&#009;partial_likelihood&#009;forced_in&#009;glmnet_model&#009;best_lambda&#009;lambdaChoice&#009;alpha" />
-                <has_n_columns n="7" />
-           </assert_contents>
-     </output>
-     <output name='html_file' file='coxlassotest.html'  compare='diff' lines_diff='16' />
-    </test>
-</tests>
-<help>
-
-**Before you start**
-
-Please read the glmnet documentation @ glmnet_
-
-This Galaxy wrapper merely exposes that code and the glmnet_ documentation is essential reading
-before getting useful results here.
-
-**What it does**
-
-From documentation at glmnet_ ::
-
- Glmnet is a package that fits a generalized linear model via penalized maximum likelihood.
- The regularization path is computed for the lasso or elasticnet penalty at a grid of values for the regularization parameter lambda.
- The algorithm is extremely fast, and can exploit sparsity in the input matrix x.
- It fits linear, logistic and multinomial, poisson, and Cox regression models.
- A variety of predictions can be made from the fitted models.
-
-Internal cross validation is used to optimise the choice of lambda based on CV AUC for logistic (binomial outcome) models, or CV mse for gaussian.
-
-**Warning about the tyrany of dimensionality**
-
-Yes, this package will select 'optimal' models even when you (optimistically) supply more predictors than you have cases.
-The model returned is unlikely to represent the only informative regularisation path through your data - if you run repeatedly with
-exactly the same settings, you will probably see many different models being selected.
-This is not a software bug - the real problem is that you just don't have enough information in your data. 
-
-Sufficiently big jobs will take a while (eg each lasso regression with 20k features on 1k samples takes about 2-3 minutes on our aged cluster)
-
-**Input**
-
-Assuming you have more measurements than samples, you supply data as a tabular text file where each row is a sample and columns
-are variables. You specify which columns are dependent (predictors) and which are observations for each sample. Each of multiple
-dependent variable columns will be run and reported independently. Predictors can be forced in to the model.
-
-**Output**
-
-For each selected dependent regression variable, a brief report of the model coefficients predicted at the
-'optimal' nfold CV value of lambda.
-
-**Predicted event probabilities for Cox and Logistic models**
-
-If you want to compare (eg) two competing clinical predictions, there's a companion generic NRI tool
-for predicted event probabilities. Estimates dozens of measures of improvement in prediction. Currently only works for identical id subjects
-but can probably be extended to independent sample predictions.
-
-Given a model, we can generate a predicted p (for status 1) in binomial or cox frameworks so models can be evaluated in terms of NRI.
-Of course, estimates are likely substantially inflated over 'real world' performance by being estimated from the same sample - but you probably
-already knew that since you were smart enough to reach this far down into the on screen help. The author salutes you, intrepid reader!
-
-It may seem an odd thing to do, but we can predict p for an event for each subject from our original data, given a parsimonious model. Doing
-this for two separate models (eg, forcing in an additional known explanatory measurement to the new model) allows comparison of the two models
-predicted status for each subject, or the same model in independent populations to see how badly it does
-
-**Attributions**
-
-glmnet_ is the R package exposed by this Galaxy tool.
-
-Galaxy_ (that's what you are using right now!) for gluing everything together 
-
-Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is 
-licensed to you under the LGPL_ like other rgenetics artefacts
-
-.. _LGPL: http://www.gnu.org/copyleft/lesser.html
-.. _glmnet: http://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html
-.. _Galaxy: http://getgalaxy.org
-
-
-</help>
-
 <configfiles>
 <configfile name="runme">
 <![CDATA[
@@ -236,27 +22,27 @@
 message=function(x) {print.noquote(paste(x,sep=''))}
 
 
-ross.cv.glmnet = function (x, y, weights, offset = NULL, lambda = NULL, type.measure = c("mse", 
-    "deviance", "class", "auc", "mae"), nfolds = 10, foldid, 
-    grouped = TRUE, keep = FALSE, parallel = FALSE, ...) 
+ross.cv.glmnet = function (x, y, weights, offset = NULL, lambda = NULL, type.measure = c("mse",
+    "deviance", "class", "auc", "mae"), nfolds = 10, foldid,
+    grouped = TRUE, keep = FALSE, parallel = FALSE, ...)
 {
-    if (missing(type.measure)) 
+    if (missing(type.measure))
         type.measure = "default"
     else type.measure = match.arg(type.measure)
-    if (!is.null(lambda) && length(lambda) < 2) 
+    if (!is.null(lambda) && length(lambda) < 2)
         stop("Need more than one value of lambda for cv.glmnet")
     N = nrow(x)
-    if (missing(weights)) 
+    if (missing(weights))
         weights = rep(1, N)
     else weights = as.double(weights)
     y = drop(y)
     glmnet.call = match.call(expand.dots = TRUE)
-    sel = match(c("type.measure", "nfolds", "foldid", "grouped", 
+    sel = match(c("type.measure", "nfolds", "foldid", "grouped",
         "keep"), names(glmnet.call), F)
-    if (any(sel)) 
+    if (any(sel))
         glmnet.call = glmnet.call[-sel]
     glmnet.call[[1]] = as.name("glmnet")
-    glmnet.object = glmnet(x, y, weights = weights, offset = offset, 
+    glmnet.object = glmnet(x, y, weights = weights, offset = offset,
         lambda = lambda, ...)
     glmnet.object\$call = glmnet.call
     is.offset = glmnet.object\$offset
@@ -266,56 +52,56 @@
         nz = sapply(nz, function(x) sapply(x, length))
         nz = ceiling(apply(nz, 1, median))
     }
-    else nz = sapply(predict(glmnet.object, type = "nonzero"), 
+    else nz = sapply(predict(glmnet.object, type = "nonzero"),
         length)
-    if (missing(foldid)) 
+    if (missing(foldid))
         foldid = sample(rep(seq(nfolds), length = N))
     else nfolds = max(foldid)
-    if (nfolds < 3) 
+    if (nfolds < 3)
         stop("nfolds must be bigger than 3; nfolds=10 recommended")
     outlist = as.list(seq(nfolds))
     if (parallel && require(foreach)) {
-        outlist = foreach(i = seq(nfolds), .packages = c("glmnet")) %dopar% 
+        outlist = foreach(i = seq(nfolds), .packages = c("glmnet")) %dopar%
             {
                 sel = foldid == i
-                if (is.matrix(y)) 
+                if (is.matrix(y))
                   y_sub = y[!sel, ]
                 else y_sub = y[!sel]
-                if (is.offset) 
+                if (is.offset)
                   offset_sub = as.matrix(offset)[!sel, ]
                 else offset_sub = NULL
-                glmnet(x[!sel, , drop = FALSE], y_sub, lambda = lambda, 
-                  offset = offset_sub, weights = weights[!sel], 
+                glmnet(x[!sel, , drop = FALSE], y_sub, lambda = lambda,
+                  offset = offset_sub, weights = weights[!sel],
                   ...)
             }
     }
     else {
         for (i in seq(nfolds)) {
             sel = foldid == i
-            if (is.matrix(y)) 
+            if (is.matrix(y))
                 y_sub = y[!sel, ]
             else y_sub = y[!sel]
-            if (is.offset) 
+            if (is.offset)
                 offset_sub = as.matrix(offset)[!sel, ]
             else offset_sub = NULL
-            outlist[[i]] = glmnet(x[!sel, , drop = FALSE], 
-                y_sub, lambda = lambda, offset = offset_sub, 
+            outlist[[i]] = glmnet(x[!sel, , drop = FALSE],
+                y_sub, lambda = lambda, offset = offset_sub,
                 weights = weights[!sel], ...)
         }
     }
     fun = paste("cv", class(glmnet.object)[[1]], sep = ".")
-    cvstuff = do.call(fun, list(outlist, lambda, x, y, weights, 
+    cvstuff = do.call(fun, list(outlist, lambda, x, y, weights,
         offset, foldid, type.measure, grouped, keep))
     cvm = cvstuff\$cvm
     cvsd = cvstuff\$cvsd
     cvname = cvstuff\$name
 
-    out = list(lambda = lambda, cvm = cvm, cvsd = cvsd, cvup = cvm + 
+    out = list(lambda = lambda, cvm = cvm, cvsd = cvsd, cvup = cvm +
         cvsd, cvlo = cvm - cvsd, nzero = nz, name = cvname, glmnet.fit = glmnet.object)
-    if (keep) 
+    if (keep)
         out = c(out, list(fit.preval = cvstuff\$fit.preval, foldid = foldid))
 
-    lamin = if (type.measure == "auc") 
+    lamin = if (type.measure == "auc")
         getmin(lambda, -cvm, cvsd)
     else getmin(lambda, cvm, cvsd)
     out = c(out, as.list(lamin))
@@ -342,7 +128,7 @@
 
 mdsPlot = function(dm,myTitle,groups=NA,outpdfname,transpose=T)
 {
-  
+
   samples = colnames(dm)
   mt = myTitle
   pcols=c('maroon')
@@ -355,11 +141,11 @@
   mydata = dm
   if (transpose==T)
   {
-  mydata = t(dm) 
+  mydata = t(dm)
   }
   npred = ncol(mydata)
-  d = dist(mydata) 
-  fit = cmdscale(d,eig=TRUE, k=min(10,npred-2))  
+  d = dist(mydata)
+  fit = cmdscale(d,eig=TRUE, k=min(10,npred-2))
   xmds = fit\$points[,1]
   ymds = fit\$points[,2]
   pdf(outpdfname)
@@ -388,7 +174,7 @@
   formstring=paste("y ~",cn)
   form = as.formula(formstring)
   ok = complete.cases(x)
-  
+
   if (sum(ok) < length(ok)) {
     x = x[ok,]
     yvec = yvec[ok]
@@ -408,9 +194,6 @@
   return(p1)
 }
 
-
-
-
 getpredp_cox = function(x,time,status,id,predict_at)
 {
   cols = colnames(x)
@@ -419,13 +202,13 @@
        return(NA)
        }
   cn = paste(colnames(x), collapse = ' + ')
-  
+
   formstring=paste("Surv(time, status) ~",cn)
-  
+
   form = as.formula(formstring)
-  
+
   ok = complete.cases(x)
-  
+
   if (sum(ok) < length(ok)) {
     x = x[ok,]
     time = time[ok]
@@ -444,7 +227,6 @@
 }
 
 
-
 dolasso_cox = function(x,y,debugOn=F,maxsteps=10000,nfold=10,xcolnames,ycolnames,optLambda='lambda.1se',out_full=F,out_full_file=NA,
                              out_pred=F,out_pred_file=NA,cox_id=NA, descr='Cox test',do_standard=F,alpha=0.9,penalty,predict_at,mdsplots=F)
 {
@@ -457,7 +239,7 @@
       if (class(p) == "try-error")
       {
         print.noquote(paste('Unable to produce predictors in sample space mds plot',p))
-      }  
+      }
       outpdfname = 'cox_samples_in_x_space_MDS.pdf'
       p = try({mdsPlot(x,'samples in measurement space',groups=y,outpdfname=outpdfname,transpose=F) },T)
       if (class(p) == "try-error")
@@ -468,7 +250,7 @@
   if (is.na(predict_at)) { predict_at = quantile(y) }
   message(paste('@@@ Cox model will be predicted at times =',paste(predict_at,collapse=',')))
   do_standard = do_standard
-  standardize = do_standard 
+  standardize = do_standard
   normalize = do_standard
   p = try({larsres = glmnet(x,y,family='cox',standardize=standardize,alpha=alpha,penalty.factor=penalty )},T)
   if (class(p) == "try-error")
@@ -492,13 +274,13 @@
   try(
       {
       pdf(outpdf)
-      plot(larsres,main='cox glmnet',label=T) 
+      plot(larsres,main='cox glmnet',label=T)
       grid()
       dev.off()
       },T)
-  
+
   larscv = NA
- 
+
   p = try({larscv=ross.cv.glmnet(x,y,family=fam,type.measure='deviance',penalty=penalty)},T)
   if (class(p) == "try-error") {
      print.noquote(paste('Unable to cross validate your data',p))
@@ -588,23 +370,23 @@
     sink()
     return(NA)
   }
-  
+
   mt = paste('Glmnet fraction deviance for',target)
   outpdf = paste(target,'glmnetPath.pdf',sep='_')
   pdf(outpdf)
   plot(larsres,main=mt,label=T)
   grid()
   dev.off()
-  
+
   outpdf = paste(target,'glmnetDeviance.pdf',sep='_')
-  
+
   mt2 = paste('Glmnet lambda for',target)
-  
+
   pdf(outpdf)
   plot(larsres,xvar="lambda",main=mt2,label=T)
   grid()
   dev.off()
-  
+
   larscv = NA
   if (fam=="binomial") {
     tmain = paste(target,'AUC')
@@ -621,13 +403,12 @@
     sink()
     return(NA)
   }
-  
-  
+
   pdf(outpdf)
   plot(larscv,main=tmain)
   grid()
   dev.off()
-  
+
   lse = larscv\$cvhits.1se
   lmin = larscv\$cvhits.min
   tot = lse + lmin
@@ -637,7 +418,7 @@
   print.noquote(nzhits)
   out_nz_file = paste(target,'cross_validation_model_counts.xls',sep='_')
   write.table(nzhits,out_nz_file,quote=FALSE, sep="\t",row.names=F)
-  
+
   ipenalty = c(0,penalty)
   if (optLambda == 'lambda.min') {
     best_lambda = larscv\$lambda.min
@@ -747,7 +528,7 @@
 
 corPlot=function(xdat=c(),main='main title',is_raw=T)
 {
-  library(pheatmap)   
+  library(pheatmap)
   library(gplots)
   if (is_raw) {
     cxdat = cor(xdat,method="spearman",use="pairwise.complete.obs")
@@ -762,17 +543,15 @@
 }
 
 
-
-
 runTest = function(n=10)
-{  
+{
   set.seed (NULL)
   Y = data.frame(y1=runif (n),y2=runif(n))
   Xv <- runif(n*n)
   X <- matrix(Xv, nrow = n, ncol = n)
-  
+
   mydf <- data.frame(Y, X)
-  
+
   regres_out = dolasso_generic(predvars=X,depvars=Y,debugOn=T,p.cutoff = 0.05,maxsteps=10000,nfold=10,
                                descr='randomdata',do_standard=do_standard,defaultFam="gaussian",alpha=0.05)
   return(regres_out)
@@ -900,6 +679,218 @@
 
 </configfile>
 </configfiles>
+  <inputs>
+     <param name="title" type="text" value="lasso test" size="80" label="Title for job outputs" help="Typing a short, meaningful text here will help remind you (and explain to others) what the outputs represent">
+      <sanitizer invalid_char="">
+        <valid initial="string.letters,string.digits"><add value="_" /> </valid>
+      </sanitizer>
+    </param>
+    <param name="input1"  type="data" format="tabular" label="Select an input tabular text file from your history. Rows represent samples; Columns are measured phenotypes"
+    multiple='False' optional="False" help="Tabular text data with samples as rows, phenotypes as columns with a header row of column identifiers" />
+    <param name="xvar_cols" label="Select columns containing numeric variables to use as predictor (x) variables" type="data_column" data_ref="input1" numerical="False"
+         multiple="True" use_header_names="True" force_select="True" />
+    <param name="force_xvar_cols" label="Select numeric columns containing variables ALWAYS included as predictors in cross validation" type="data_column" data_ref="input1" numerical="False"
+         multiple="True" use_header_names="True" force_select="False"/>
+    <conditional name="model">
+        <param name="fam" type="select" label="GLM Link function for models"
+             help="Binary dependant variables will automatically be set to Binomial no matter what this is set to">
+                <option value="gaussian" selected="true">Gaussian - continuous dependent (y)</option>
+                <option value="binomial">Binomial dependent variables</option>
+                <option value="poisson">Poisson (eg counts)</option>
+                <option value="cox">Cox models - require special setup for y variables - see below</option>
+        </param>
+        <when value="gaussian">
+            <param name="yvar_cols" label="Select numeric columns containing variables to use as the dependent (y) in elasticnet" type="data_column" data_ref="input1" numerical="False"
+             multiple="True" use_header_names="True"  help = "If multiple, each will be modelled against all the x variables and reported separately." force_select="True"/>
+            <param name="output_full" type="hidden" value='F' />
+            <param name="output_pred" type="hidden" value='F' />
+              <param name="cox_id" label="Select column containing a unique sample identifier"
+                 help = "Only really needed for output sample specific predicted values downstream."
+                 type="data_column" data_ref="input1" numerical="False" force_select="True"
+                 multiple="False" use_header_names="True" />
+      </when>
+        <when value="binomial">
+            <param name="yvar_cols" label="Select numeric columns containing variables to use as the dependent (y) in elasticnet" type="data_column" data_ref="input1" numerical="False"
+             multiple="True" use_header_names="True"  help = "If multiple, each will be modelled against all the x variables and reported separately." force_select="True"/>
+             <param name="output_full" type="hidden" value='F' />
+             <param name="output_pred" type="select" label="Create a tabular output with predicted values for each subject from the optimal model for (eg) NRI estimates" >
+                <option value="F" selected="true">No predicted value output file</option>
+                <option value="T">Create a predicted value output file</option>
+             </param>
+              <param name="cox_id" label="Select column containing a unique sample identifier"
+                 help = "Only really needed for output sample specific predicted values downstream."
+                 type="data_column" data_ref="input1" numerical="False" force_select="True"
+                 multiple="False" use_header_names="True" />
+             <param name="predict_at" type="hidden" value='' />
+
+        </when>
+        <when value="poisson">
+            <param name="yvar_cols" label="Select columns containing variables to use as the dependent (y) in elasticnet" type="data_column" data_ref="input1" numerical="True"
+             multiple="True" use_header_names="True"  help = "If multiple, each will be modelled against all the x variables and reported separately." force_select="True"/>
+             <param name="output_full" type="hidden" value='F' />
+             <param name="output_pred" type="hidden" value='F' />
+             <param name="predict_at" type="hidden" value='' />
+              <param name="cox_id" label="Select column containing a unique sample identifier"
+                 help = "Optional. Only really needed for output sample specific predicted values downstream. Free - enjoy"
+                 type="data_column" data_ref="input1" numerical="True" force_select="False"
+                 multiple="False" use_header_names="True" />
+        </when>
+        <when value="cox">
+             <param name="cox_time" label="Select column containing time under observation for Cox regression"
+                 type="data_column" data_ref="input1" numerical="True" force_select="True"
+                 multiple="False" use_header_names="True"  help = "This MUST contain a time period - eg continuous years or days to failure or right censoring"/>
+             <param name="cox_status" label="Select column containing status = 1 for outcome of interest at the end of the time under observation or 0 for right censoring"
+                 type="data_column" data_ref="input1" numerical="True" force_select="True"
+                 multiple="False" use_header_names="True"  help = "This MUST contain 1 for subjects who had an event at that time or 0 for a right censored observation"/>
+              <param name="cox_id" label="Select column containing a unique sample identifier"
+                 help = "Optional. Only really needed for output sample specific predicted values downstream. Free - enjoy"
+                 type="data_column" data_ref="input1" numerical="False" force_select="False"
+                 multiple="False" use_header_names="True" />
+             <param name="output_full" type="select" label="Create a tabular output with coefficients for all predictors" >
+                <option value="F" selected="true">No full model output file</option>
+                <option value="T">Create a full model output file</option>
+             </param>
+             <param name="output_pred" type="select" label="Create a tabular output with predicted values for each subject from the optimal model for (eg) NRI estimates" >
+                <option value="F" selected="true">No predicted value output file</option>
+                <option value="T">Create a predicted value output file</option>
+             </param>
+             <param name="predict_at"  type="text" value='' label="Provide a comma separated list of times to make a prediction for each subject"
+                 optional="True" help="Default (blank) will return predictions at 0%,25%,50%,75%,100% of the observed times which should be informative" />
+
+        </when>
+    </conditional>
+    <param name="optLambda" type="select" label="Value to use when reporting optimal model and coefficients" help="minLambda will have more predictors - 1SDLambda will be more parsimonious">
+            <option value="lambda.1se" selected="true">Lambda + 1 SE of min MSE or AUC (fewer coefficients - more false negatives)</option>
+            <option value="lambda.min">Lambda at min MSE or max AUC (more coefficients - more false positives)</option>
+    </param>
+    <param name="logxform_cols"  optional="True" label="Select numeric columns to be log transformed before use as predictors or dependent variables" type="data_column"
+        data_ref="input1" numerical="True" multiple="True" use_header_names="True" help = "The wisdom of doing this depends entirely on your predictors - eg can help diminish long-tailed outlier influence"
+        force_select="False"/>
+    <param name="do_standard" type="select" label="Standardise x vars"
+         help="If all measurements on same scale, may not be needed. Coefficients are always returned on the original scale.">
+            <option value="False" selected="true">No standardisation of predictors</option>l
+            <option value="True">Standardise predictors before model</option>
+    </param>
+    <param name="mdsplots" type="select" label="Generate MDS plots of samples in measurement space and measurements in sample space" >
+            <option value="False" selected="true">No MDS plots</option>l
+            <option value="True">Yes create MDS plots</option>
+    </param>
+    <param name="alpha" type="float" value="0.95" size="5" min="0.01" max="1.0" label="Alpha - see glmnet docs. 1 for pure lasso. 0.0 for pure ridge regression"
+     help="Default 0.95 allows lasso to cope better with expected predictor collinearity. Use (eg) 0.5 for hybrid regularised regression or (eg) 0.025 for ridge regression"/>
+    <param name="nfold" type="integer" value="10" size="5" label="Number of folds for internal cross validation"
+     help="Default of 10 is usually ok"/>
+  </inputs>
+  <outputs>
+    <data format="html" name="html_file" label="${title}.html"/>
+    <data format="tabular" name="model_file" label="${title}_modelres.xls"/>
+    <data format="tabular" name="output_full_file" label="${title}_full_cox_model.xls">
+        <filter>model['output_full'] == 'T'</filter>
+    </data>
+    <data format="tabular" name="output_pred_file" label="${title}_predicted_from_model.xls">
+        <filter>model['output_pred'] == 'T'</filter>
+    </data>
+  </outputs>
+ <tests>
+    <test>
+     <param name='input1' value='cox_test.xls' ftype='tabular' />
+     <param name='treatment_name' value='case' />
+     <param name='title' value='Cox glmnet test' />
+     <param name='nfold' value='10' />
+     <param name='logxform_cols' value='' />
+     <param name='alpha' value='0.95' />
+     <param name='do_standard' value="True" />
+     <param name='cox_time' value='1' />
+     <param name='cox_status' value='2' />
+     <param name='cox_id' value='1' />
+     <param name='predict_at' value='' />
+     <param name='fam' value='cox' />
+     <param name='yvar_cols' value='' />
+     <param name='xvar_cols' value='3,4,5' />
+     <param name='force_xvar_cols' value='3' />
+     <param name='output_full' value='F' />
+     <param name='output_pred' value='F' />
+     <output name='model_file' file='coxlassotest_modelres.xls'>
+          <assert_contents>
+                <has_text text="rhubarb" />
+                <has_text text="TRUE" />
+                <!-- &#009; is XML escape code for tab -->
+                <!-- has_line line="regulator&#009;partial_likelihood&#009;forced_in&#009;glmnet_model&#009;best_lambda" / -->
+                <has_line line="regulator&#009;partial_likelihood&#009;forced_in&#009;glmnet_model&#009;best_lambda&#009;lambdaChoice&#009;alpha" />
+                <has_n_columns n="7" />
+           </assert_contents>
+     </output>
+     <output name='html_file' file='coxlassotest.html'  compare='diff' lines_diff='16' />
+    </test>
+</tests>
+<help>
+
+**Before you start**
+
+Please read the glmnet documentation @ glmnet_
+
+This Galaxy wrapper merely exposes that code and the glmnet_ documentation is essential reading
+before getting useful results here.
+
+**What it does**
+
+From documentation at glmnet_ ::
+
+ Glmnet is a package that fits a generalized linear model via penalized maximum likelihood.
+ The regularization path is computed for the lasso or elasticnet penalty at a grid of values for the regularization parameter lambda.
+ The algorithm is extremely fast, and can exploit sparsity in the input matrix x.
+ It fits linear, logistic and multinomial, poisson, and Cox regression models.
+ A variety of predictions can be made from the fitted models.
+
+Internal cross validation is used to optimise the choice of lambda based on CV AUC for logistic (binomial outcome) models, or CV mse for gaussian.
+
+**Warning about the tyrany of dimensionality**
+
+Yes, this package will select 'optimal' models even when you (optimistically) supply more predictors than you have cases.
+The model returned is unlikely to represent the only informative regularisation path through your data - if you run repeatedly with
+exactly the same settings, you will probably see many different models being selected.
+This is not a software bug - the real problem is that you just don't have enough information in your data.
+
+Sufficiently big jobs will take a while (eg each lasso regression with 20k features on 1k samples takes about 2-3 minutes on our aged cluster)
+
+**Input**
+
+Assuming you have more measurements than samples, you supply data as a tabular text file where each row is a sample and columns
+are variables. You specify which columns are dependent (predictors) and which are observations for each sample. Each of multiple
+dependent variable columns will be run and reported independently. Predictors can be forced in to the model.
+
+**Output**
+
+For each selected dependent regression variable, a brief report of the model coefficients predicted at the
+'optimal' nfold CV value of lambda.
+
+**Predicted event probabilities for Cox and Logistic models**
+
+If you want to compare (eg) two competing clinical predictions, there's a companion generic NRI tool
+for predicted event probabilities. Estimates dozens of measures of improvement in prediction. Currently only works for identical id subjects
+but can probably be extended to independent sample predictions.
+
+Given a model, we can generate a predicted p (for status 1) in binomial or cox frameworks so models can be evaluated in terms of NRI.
+Of course, estimates are likely substantially inflated over 'real world' performance by being estimated from the same sample - but you probably
+already knew that since you were smart enough to reach this far down into the on screen help. The author salutes you, intrepid reader!
+
+It may seem an odd thing to do, but we can predict p for an event for each subject from our original data, given a parsimonious model. Doing
+this for two separate models (eg, forcing in an additional known explanatory measurement to the new model) allows comparison of the two models
+predicted status for each subject, or the same model in independent populations to see how badly it does
+
+**Attributions**
+
+glmnet_ is the R package exposed by this Galaxy tool.
+
+Galaxy_ (that's what you are using right now!) for gluing everything together
+
+Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
+licensed to you under the LGPL_ like other rgenetics artefacts
+
+.. _LGPL: http://www.gnu.org/copyleft/lesser.html
+.. _glmnet: http://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html
+.. _Galaxy: http://getgalaxy.org
+</help>
+
 <citations>
     <citation type="bibtex">
 @Article{Friedman2010, title = {Regularization Paths for Generalized Linear Models via Coordinate Descent},
@@ -917,6 +908,3 @@
     </citation>
 </citations>
 </tool>
-
-
-