4
|
1 <tool id="rglasso_cox" name="Lasso" version="0.03">
|
|
2 <description>and cox regression using elastic net</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="3.1.1">R_3_1_1</requirement>
|
|
5 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
|
|
6 <requirement type="package" version="9.10">ghostscript</requirement>
|
|
7 <requirement type="package" version="2.14">glmnet_lars_2_14</requirement>
|
|
8 </requirements>
|
|
9 <command interpreter="python">
|
|
10 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "rglasso"
|
|
11 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
|
|
12 </command>
|
|
13 <inputs>
|
|
14 <param name="title" type="text" value="lasso test" size="80" label="Title for job outputs" help="Typing a short, meaningful text here will help remind you (and explain to others) what the outputs represent">
|
|
15 <sanitizer invalid_char="">
|
|
16 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
|
|
17 </sanitizer>
|
|
18 </param>
|
|
19 <param name="input1" type="data" format="tabular" label="Select an input tabular text file from your history. Rows represent samples; Columns are measured phenotypes"
|
|
20 multiple='False' optional="False" help="Tabular text data with samples as rows, phenotypes as columns with a header row of column identifiers" />
|
|
21 <param name="xvar_cols" label="Select columns containing numeric variables to use as predictor (x) variables" type="data_column" data_ref="input1" numerical="False"
|
|
22 multiple="True" use_header_names="True" force_select="True" />
|
|
23 <param name="force_xvar_cols" label="Select numeric columns containing variables ALWAYS included as predictors in cross validation" type="data_column" data_ref="input1" numerical="False"
|
|
24 multiple="True" use_header_names="True" force_select="False"/>
|
|
25 <conditional name="model">
|
|
26 <param name="fam" type="select" label="GLM Link function for models"
|
|
27 help="Binary dependant variables will automatically be set to Binomial no matter what this is set to">
|
|
28 <option value="gaussian" selected="true">Gaussian - continuous dependent (y)</option>
|
|
29 <option value="binomial">Binomial dependent variables</option>
|
|
30 <option value="poisson">Poisson (eg counts)</option>
|
|
31 <option value="cox">Cox models - require special setup for y variables - see below</option>
|
|
32 </param>
|
|
33 <when value="gaussian">
|
|
34 <param name="yvar_cols" label="Select numeric columns containing variables to use as the dependent (y) in elasticnet" type="data_column" data_ref="input1" numerical="False"
|
|
35 multiple="True" use_header_names="True" help = "If multiple, each will be modelled against all the x variables and reported separately." force_select="True"/>
|
|
36 <param name="output_full" type="hidden" value='F' />
|
|
37 <param name="output_pred" type="hidden" value='F' />
|
|
38 <param name="cox_id" label="Select column containing a unique sample identifier"
|
|
39 help = "Only really needed for output sample specific predicted values downstream."
|
|
40 type="data_column" data_ref="input1" numerical="False" force_select="True"
|
|
41 multiple="False" use_header_names="True" />
|
|
42 </when>
|
|
43 <when value="binomial">
|
|
44 <param name="yvar_cols" label="Select numeric columns containing variables to use as the dependent (y) in elasticnet" type="data_column" data_ref="input1" numerical="False"
|
|
45 multiple="True" use_header_names="True" help = "If multiple, each will be modelled against all the x variables and reported separately." force_select="True"/>
|
|
46 <param name="output_full" type="hidden" value='F' />
|
|
47 <param name="output_pred" type="select" label="Create a tabular output with predicted values for each subject from the optimal model for (eg) NRI estimates" >
|
|
48 <option value="F" selected="true">No predicted value output file</option>
|
|
49 <option value="T">Create a predicted value output file</option>
|
|
50 </param>
|
|
51 <param name="cox_id" label="Select column containing a unique sample identifier"
|
|
52 help = "Only really needed for output sample specific predicted values downstream."
|
|
53 type="data_column" data_ref="input1" numerical="False" force_select="True"
|
|
54 multiple="False" use_header_names="True" />
|
|
55 <param name="predict_at" type="hidden" value='' />
|
|
56
|
|
57 </when>
|
|
58 <when value="poisson">
|
|
59 <param name="yvar_cols" label="Select columns containing variables to use as the dependent (y) in elasticnet" type="data_column" data_ref="input1" numerical="True"
|
|
60 multiple="True" use_header_names="True" help = "If multiple, each will be modelled against all the x variables and reported separately." force_select="True"/>
|
|
61 <param name="output_full" type="hidden" value='F' />
|
|
62 <param name="output_pred" type="hidden" value='F' />
|
|
63 <param name="predict_at" type="hidden" value='' />
|
|
64 <param name="cox_id" label="Select column containing a unique sample identifier"
|
|
65 help = "Optional. Only really needed for output sample specific predicted values downstream. Free - enjoy"
|
|
66 type="data_column" data_ref="input1" numerical="True" force_select="False"
|
|
67 multiple="False" use_header_names="True" />
|
|
68 </when>
|
|
69 <when value="cox">
|
|
70 <param name="cox_time" label="Select column containing time under observation for Cox regression"
|
|
71 type="data_column" data_ref="input1" numerical="True" force_select="True"
|
|
72 multiple="False" use_header_names="True" help = "This MUST contain a time period - eg continuous years or days to failure or right censoring"/>
|
|
73 <param name="cox_status" label="Select column containing status = 1 for outcome of interest at the end of the time under observation or 0 for right censoring"
|
|
74 type="data_column" data_ref="input1" numerical="True" force_select="True"
|
|
75 multiple="False" use_header_names="True" help = "This MUST contain 1 for subjects who had an event at that time or 0 for a right censored observation"/>
|
|
76 <param name="cox_id" label="Select column containing a unique sample identifier"
|
|
77 help = "Optional. Only really needed for output sample specific predicted values downstream. Free - enjoy"
|
|
78 type="data_column" data_ref="input1" numerical="False" force_select="False"
|
|
79 multiple="False" use_header_names="True" />
|
|
80 <param name="output_full" type="select" label="Create a tabular output with coefficients for all predictors" >
|
|
81 <option value="F" selected="true">No full model output file</option>
|
|
82 <option value="T">Create a full model output file</option>
|
|
83 </param>
|
|
84 <param name="output_pred" type="select" label="Create a tabular output with predicted values for each subject from the optimal model for (eg) NRI estimates" >
|
|
85 <option value="F" selected="true">No predicted value output file</option>
|
|
86 <option value="T">Create a predicted value output file</option>
|
|
87 </param>
|
|
88 <param name="predict_at" type="text" value='' label="Provide a comma separated list of times to make a prediction for each subject"
|
|
89 optional="True" help="Default (blank) will return predictions at 0%,25%,50%,75%,100% of the observed times which should be informative" />
|
|
90
|
|
91 </when>
|
|
92 </conditional>
|
|
93 <param name="optLambda" type="select" label="Value to use when reporting optimal model and coefficients" help="minLambda will have more predictors - 1SDLambda will be more parsimonious">
|
|
94 <option value="lambda.1se" selected="true">Lambda + 1 SE of min MSE or AUC (fewer coefficients - more false negatives)</option>
|
|
95 <option value="lambda.min">Lambda at min MSE or max AUC (more coefficients - more false positives)</option>
|
|
96 </param>
|
|
97 <param name="logxform_cols" optional="True" label="Select numeric columns to be log transformed before use as predictors or dependent variables" type="data_column"
|
|
98 data_ref="input1" numerical="True" multiple="True" use_header_names="True" help = "The wisdom of doing this depends entirely on your predictors - eg can help diminish long-tailed outlier influence"
|
|
99 force_select="False"/>
|
|
100 <param name="do_standard" type="select" label="Standardise x vars"
|
|
101 help="If all measurements on same scale, may not be needed. Coefficients are always returned on the original scale.">
|
|
102 <option value="False" selected="true">No standardisation of predictors</option>l
|
|
103 <option value="True">Standardise predictors before model</option>
|
|
104 </param>
|
|
105 <param name="mdsplots" type="select" label="Generate MDS plots of samples in measurement space and measurements in sample space" >
|
|
106 <option value="False" selected="true">No MDS plots</option>l
|
|
107 <option value="True">Yes create MDS plots</option>
|
|
108 </param>
|
|
109 <param name="alpha" type="float" value="0.95" size="5" min="0.01" max="1.0" label="Alpha - see glmnet docs. 1 for pure lasso. 0.0 for pure ridge regression"
|
|
110 help="Default 0.95 allows lasso to cope better with expected predictor collinearity. Use (eg) 0.5 for hybrid regularised regression or (eg) 0.025 for ridge regression"/>
|
|
111 <param name="nfold" type="integer" value="10" size="5" label="Number of folds for internal cross validation"
|
|
112 help="Default of 10 is usually ok"/>
|
|
113 </inputs>
|
|
114 <outputs>
|
|
115 <data format="html" name="html_file" label="${title}.html"/>
|
|
116 <data format="tabular" name="model_file" label="${title}_modelres.xls"/>
|
|
117 <data format="tabular" name="output_full_file" label="${title}_full_cox_model.xls">
|
|
118 <filter>model['output_full'] == 'T'</filter>
|
|
119 </data>
|
|
120 <data format="tabular" name="output_pred_file" label="${title}_predicted_from_model.xls">
|
|
121 <filter>model['output_pred'] == 'T'</filter>
|
|
122 </data>
|
|
123 </outputs>
|
|
124 <tests>
|
|
125 <test>
|
|
126 <param name='input1' value='cox_test.xls' ftype='tabular' />
|
|
127 <param name='treatment_name' value='case' />
|
|
128 <param name='title' value='Cox glmnet test' />
|
|
129 <param name='nfold' value='10' />
|
|
130 <param name='logxform_cols' value='' />
|
|
131 <param name='alpha' value='0.95' />
|
|
132 <param name='do_standard' value="True" />
|
|
133 <param name='cox_time' value='1' />
|
|
134 <param name='cox_status' value='2' />
|
9
|
135 <param name='cox_id' value='2' />
|
|
136 <param name='predict_at' value='' />
|
4
|
137 <param name='fam' value='cox' />
|
9
|
138 <param name='yvar_cols' value='' />
|
4
|
139 <param name='xvar_cols' value='3,4,5' />
|
|
140 <param name='force_xvar_cols' value='3' />
|
9
|
141 <param name='output_full' value='F' />
|
|
142 <param name='output_pred' value='F' />
|
|
143 <output name='model_file' file='coxlassotest_modelres.xls'>
|
4
|
144 <assert_contents>
|
|
145 <has_text text="rhubarb" />
|
|
146 <has_text text="TRUE" />
|
|
147 <!-- 	 is XML escape code for tab -->
|
9
|
148 <!-- has_line line="regulator	partial_likelihood	forced_in	glmnet_model	best_lambda" / -->
|
|
149 <has_line line="regulator	partial_likelihood	forced_in	glmnet_model	best_lambda	lambdaChoice	alpha" />
|
|
150 <has_n_columns n="7" />
|
4
|
151 </assert_contents>
|
|
152 </output>
|
|
153 <output name='html_file' file='coxlassotest.html' compare='diff' lines_diff='10' />
|
|
154 </test>
|
|
155 </tests>
|
|
156 <help>
|
|
157
|
|
158 **Before you start**
|
|
159
|
|
160 Please read the glmnet documentation @ glmnet_
|
|
161
|
|
162 This Galaxy wrapper merely exposes that code and the glmnet_ documentation is essential reading
|
|
163 before getting useful results here.
|
|
164
|
|
165 **What it does**
|
|
166
|
|
167 From documentation at glmnet_ ::
|
|
168
|
|
169 Glmnet is a package that fits a generalized linear model via penalized maximum likelihood.
|
|
170 The regularization path is computed for the lasso or elasticnet penalty at a grid of values for the regularization parameter lambda.
|
|
171 The algorithm is extremely fast, and can exploit sparsity in the input matrix x.
|
|
172 It fits linear, logistic and multinomial, poisson, and Cox regression models.
|
|
173 A variety of predictions can be made from the fitted models.
|
|
174
|
|
175 Internal cross validation is used to optimise the choice of lambda based on CV AUC for logistic (binomial outcome) models, or CV mse for gaussian.
|
|
176
|
|
177 **Warning about the tyrany of dimensionality**
|
|
178
|
|
179 Yes, this package will select 'optimal' models even when you (optimistically) supply more predictors than you have cases.
|
|
180 The model returned is unlikely to represent the only informative regularisation path through your data - if you run repeatedly with
|
|
181 exactly the same settings, you will probably see many different models being selected.
|
|
182 This is not a software bug - the real problem is that you just don't have enough information in your data.
|
|
183
|
|
184 Sufficiently big jobs will take a while (eg each lasso regression with 20k features on 1k samples takes about 2-3 minutes on our aged cluster)
|
|
185
|
|
186 **Input**
|
|
187
|
|
188 Assuming you have more measurements than samples, you supply data as a tabular text file where each row is a sample and columns
|
|
189 are variables. You specify which columns are dependent (predictors) and which are observations for each sample. Each of multiple
|
|
190 dependent variable columns will be run and reported independently. Predictors can be forced in to the model.
|
|
191
|
|
192 **Output**
|
|
193
|
|
194 For each selected dependent regression variable, a brief report of the model coefficients predicted at the
|
|
195 'optimal' nfold CV value of lambda.
|
|
196
|
|
197 **Predicted event probabilities for Cox and Logistic models**
|
|
198
|
|
199 If you want to compare (eg) two competing clinical predictions, there's a companion generic NRI tool
|
|
200 for predicted event probabilities. Estimates dozens of measures of improvement in prediction. Currently only works for identical id subjects
|
|
201 but can probably be extended to independent sample predictions.
|
|
202
|
|
203 Given a model, we can generate a predicted p (for status 1) in binomial or cox frameworks so models can be evaluated in terms of NRI.
|
|
204 Of course, estimates are likely substantially inflated over 'real world' performance by being estimated from the same sample - but you probably
|
|
205 already knew that since you were smart enough to reach this far down into the on screen help. The author salutes you, intrepid reader!
|
|
206
|
|
207 It may seem an odd thing to do, but we can predict p for an event for each subject from our original data, given a parsimonious model. Doing
|
|
208 this for two separate models (eg, forcing in an additional known explanatory measurement to the new model) allows comparison of the two models
|
|
209 predicted status for each subject, or the same model in independent populations to see how badly it does
|
|
210
|
|
211 **Attributions**
|
|
212
|
|
213 glmnet_ is the R package exposed by this Galaxy tool.
|
|
214
|
|
215 Galaxy_ (that's what you are using right now!) for gluing everything together
|
|
216
|
|
217 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
|
|
218 licensed to you under the LGPL_ like other rgenetics artefacts
|
|
219
|
|
220 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
|
|
221 .. _glmnet: http://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html
|
|
222 .. _Galaxy: http://getgalaxy.org
|
|
223
|
|
224
|
|
225 </help>
|
|
226
|
|
227 <configfiles>
|
|
228 <configfile name="runme">
|
|
229 <![CDATA[
|
|
230 library('glmnet')
|
|
231 library('lars')
|
|
232 library('survival')
|
|
233 library('pec')
|
|
234
|
|
235
|
|
236 message=function(x) {print.noquote(paste(x,sep=''))}
|
|
237
|
|
238
|
|
239 ross.cv.glmnet = function (x, y, weights, offset = NULL, lambda = NULL, type.measure = c("mse",
|
|
240 "deviance", "class", "auc", "mae"), nfolds = 10, foldid,
|
|
241 grouped = TRUE, keep = FALSE, parallel = FALSE, ...)
|
|
242 {
|
|
243 if (missing(type.measure))
|
|
244 type.measure = "default"
|
|
245 else type.measure = match.arg(type.measure)
|
|
246 if (!is.null(lambda) && length(lambda) < 2)
|
|
247 stop("Need more than one value of lambda for cv.glmnet")
|
|
248 N = nrow(x)
|
|
249 if (missing(weights))
|
|
250 weights = rep(1, N)
|
|
251 else weights = as.double(weights)
|
|
252 y = drop(y)
|
|
253 glmnet.call = match.call(expand.dots = TRUE)
|
|
254 sel = match(c("type.measure", "nfolds", "foldid", "grouped",
|
|
255 "keep"), names(glmnet.call), F)
|
|
256 if (any(sel))
|
|
257 glmnet.call = glmnet.call[-sel]
|
|
258 glmnet.call[[1]] = as.name("glmnet")
|
|
259 glmnet.object = glmnet(x, y, weights = weights, offset = offset,
|
|
260 lambda = lambda, ...)
|
|
261 glmnet.object\$call = glmnet.call
|
|
262 is.offset = glmnet.object\$offset
|
|
263 lambda = glmnet.object\$lambda
|
|
264 if (inherits(glmnet.object, "multnet")) {
|
|
265 nz = predict(glmnet.object, type = "nonzero")
|
|
266 nz = sapply(nz, function(x) sapply(x, length))
|
|
267 nz = ceiling(apply(nz, 1, median))
|
|
268 }
|
|
269 else nz = sapply(predict(glmnet.object, type = "nonzero"),
|
|
270 length)
|
|
271 if (missing(foldid))
|
|
272 foldid = sample(rep(seq(nfolds), length = N))
|
|
273 else nfolds = max(foldid)
|
|
274 if (nfolds < 3)
|
|
275 stop("nfolds must be bigger than 3; nfolds=10 recommended")
|
|
276 outlist = as.list(seq(nfolds))
|
|
277 if (parallel && require(foreach)) {
|
|
278 outlist = foreach(i = seq(nfolds), .packages = c("glmnet")) %dopar%
|
|
279 {
|
|
280 sel = foldid == i
|
|
281 if (is.matrix(y))
|
|
282 y_sub = y[!sel, ]
|
|
283 else y_sub = y[!sel]
|
|
284 if (is.offset)
|
|
285 offset_sub = as.matrix(offset)[!sel, ]
|
|
286 else offset_sub = NULL
|
|
287 glmnet(x[!sel, , drop = FALSE], y_sub, lambda = lambda,
|
|
288 offset = offset_sub, weights = weights[!sel],
|
|
289 ...)
|
|
290 }
|
|
291 }
|
|
292 else {
|
|
293 for (i in seq(nfolds)) {
|
|
294 sel = foldid == i
|
|
295 if (is.matrix(y))
|
|
296 y_sub = y[!sel, ]
|
|
297 else y_sub = y[!sel]
|
|
298 if (is.offset)
|
|
299 offset_sub = as.matrix(offset)[!sel, ]
|
|
300 else offset_sub = NULL
|
|
301 outlist[[i]] = glmnet(x[!sel, , drop = FALSE],
|
|
302 y_sub, lambda = lambda, offset = offset_sub,
|
|
303 weights = weights[!sel], ...)
|
|
304 }
|
|
305 }
|
|
306 fun = paste("cv", class(glmnet.object)[[1]], sep = ".")
|
|
307 cvstuff = do.call(fun, list(outlist, lambda, x, y, weights,
|
|
308 offset, foldid, type.measure, grouped, keep))
|
|
309 cvm = cvstuff\$cvm
|
|
310 cvsd = cvstuff\$cvsd
|
|
311 cvname = cvstuff\$name
|
|
312
|
|
313 out = list(lambda = lambda, cvm = cvm, cvsd = cvsd, cvup = cvm +
|
|
314 cvsd, cvlo = cvm - cvsd, nzero = nz, name = cvname, glmnet.fit = glmnet.object)
|
|
315 if (keep)
|
|
316 out = c(out, list(fit.preval = cvstuff\$fit.preval, foldid = foldid))
|
|
317
|
|
318 lamin = if (type.measure == "auc")
|
|
319 getmin(lambda, -cvm, cvsd)
|
|
320 else getmin(lambda, cvm, cvsd)
|
|
321 out = c(out, as.list(lamin))
|
|
322 hitsse = rep(0,ncol(x))
|
|
323 hitsmin = rep(0,ncol(x))
|
|
324 names(hitsse) = colnames(x)
|
|
325 names(hitsmin) = colnames(x)
|
|
326 olmin = lamin\$lambda.min
|
|
327 ol1sd = lamin\$lambda.1se
|
|
328 lambs = c(olmin,ol1sd)
|
|
329 names(lambs) = c('olmin','ol1sd')
|
|
330 for (cvfit in outlist) {
|
|
331 colmin = which(cvfit\$lambda == olmin)
|
|
332 col1se = which(cvfit\$lambda == ol1sd)
|
|
333 nzmin = which(cvfit\$beta[,colmin] != 0)
|
|
334 nz1se = which(cvfit\$beta[,col1se] != 0)
|
|
335 hitsse[nz1se] = hitsse[nz1se] + 1
|
|
336 hitsmin[nzmin] = hitsmin[nzmin] + 1
|
|
337 }
|
|
338 obj = c(out,list(cvhits.1se=hitsse,cvhits.min=hitsmin))
|
|
339 class(obj) = "cv.glmnet"
|
|
340 obj
|
|
341 }
|
|
342
|
|
343 mdsPlot = function(dm,myTitle,groups=NA,outpdfname,transpose=T)
|
|
344 {
|
|
345
|
|
346 samples = colnames(dm)
|
|
347 mt = myTitle
|
|
348 pcols=c('maroon')
|
|
349 if (! is.na(groups))
|
|
350 {
|
|
351 gu = unique(groups)
|
|
352 colours = rainbow(length(gu),start=0.1,end=0.9)
|
|
353 pcols = colours[match(groups,gu)]
|
|
354 }
|
|
355 mydata = dm
|
|
356 if (transpose==T)
|
|
357 {
|
|
358 mydata = t(dm)
|
|
359 }
|
|
360 npred = ncol(mydata)
|
|
361 d = dist(mydata)
|
|
362 fit = cmdscale(d,eig=TRUE, k=min(10,npred-2))
|
|
363 xmds = fit\$points[,1]
|
|
364 ymds = fit\$points[,2]
|
|
365 pdf(outpdfname)
|
|
366 plot(xmds, ymds, xlab="Dimension 1", ylab="Dimension 2",
|
|
367 main=paste(mt,"MDS Plot"),type="n", col=pcols, cex=0.35)
|
|
368 text(xmds, ymds, labels = row.names(mydata), cex=0.35, col=pcols)
|
|
369 grid(col="lightgray",lty="dotted")
|
|
370 dev.off()
|
|
371 }
|
|
372
|
|
373
|
|
374 getpredp_logistic = function(x,yvec,yvarname,id)
|
|
375 {
|
|
376 yvals = unique(yvec)
|
|
377 if (length(yvals) != 2) {
|
|
378 message(c('ERROR: y does not have 2 values =',paste(yvals,collapse=',')))
|
|
379 return(NA)
|
|
380 }
|
|
381 cols = colnames(x)
|
|
382 if (length(cols) == 0) {
|
|
383 message('ERROR: No columns in input x? Cannot predict!')
|
|
384 return(NA)
|
|
385 }
|
|
386 cn = paste(cols, collapse = ' + ')
|
|
387
|
|
388 formstring=paste("y ~",cn)
|
|
389 form = as.formula(formstring)
|
|
390 ok = complete.cases(x)
|
|
391
|
|
392 if (sum(ok) < length(ok)) {
|
|
393 x = x[ok,]
|
|
394 yvec = yvec[ok]
|
|
395 id = id[ok]
|
|
396 }
|
|
397 nx = data.frame(id=id,x,y=yvec)
|
|
398 print('nx,yvec:')
|
|
399 print(head(nx,n=3))
|
|
400 print(yvec)
|
|
401 mdl = glm(form, data=nx, family="binomial", na.action=na.omit)
|
|
402 message(c('Model format =',formstring))
|
|
403 message(paste('Predictive model details used to generate logistic outcome probabilities for',yvarname,':'))
|
|
404 print(summary(md1))
|
|
405 print(anova(md1))
|
|
406 predp = predict(md1,nx,type="response")
|
|
407 p1 = data.frame(id=id,pred_response=predp,obs_response=yvec)
|
|
408 return(p1)
|
|
409 }
|
|
410
|
|
411
|
|
412
|
|
413
|
|
414 getpredp_cox = function(x,time,status,id,predict_at)
|
|
415 {
|
|
416 cols = colnames(x)
|
|
417 if (length(cols) == 0) {
|
|
418 message('ERROR: No columns in input x? Cannot predict!')
|
|
419 return(NA)
|
|
420 }
|
|
421 cn = paste(colnames(x), collapse = ' + ')
|
|
422
|
|
423 formstring=paste("Surv(time, status) ~",cn)
|
|
424
|
|
425 form = as.formula(formstring)
|
|
426
|
|
427 ok = complete.cases(x)
|
|
428
|
|
429 if (sum(ok) < length(ok)) {
|
|
430 x = x[ok,]
|
|
431 time = time[ok]
|
|
432 status = status[ok]
|
|
433 id = id[ok]
|
|
434 }
|
|
435 nx = data.frame(x,time=time,status=status)
|
|
436 m1 = coxph(form, data=nx,singular.ok=TRUE)
|
|
437 print.noquote('Predictive model details used to generate survival probabilities:')
|
|
438 print.noquote(m1)
|
|
439 predpq = predictSurvProb(object=m1, newdata=nx, times=predict_at)
|
|
440 predpq = 1-predpq
|
|
441 colnames(predpq) = paste('p_surv_to',predict_at,sep='_')
|
|
442 p1 = data.frame(id=id,predpq,time=time,status=status)
|
|
443 return(p1)
|
|
444 }
|
|
445
|
|
446
|
|
447
|
|
448 dolasso_cox = function(x,y,debugOn=F,maxsteps=10000,nfold=10,xcolnames,ycolnames,optLambda='lambda.1se',out_full=F,out_full_file=NA,
|
|
449 out_pred=F,out_pred_file=NA,cox_id=NA, descr='Cox test',do_standard=F,alpha=0.9,penalty,predict_at,mdsplots=F)
|
|
450 {
|
|
451 logf = file("cox_rglasso.log", open = "a")
|
|
452 sink(logf,type = c("output", "message"))
|
|
453 res = NULL
|
|
454 if (mdsplots==T) {
|
|
455 outpdfname = 'cox_x_in_sample_space_MDS.pdf'
|
|
456 p = try({ mdsPlot(x,'measurements in sample space',groups=NA,outpdfname=outpdfname,transpose=T) },T)
|
|
457 if (class(p) == "try-error")
|
|
458 {
|
|
459 print.noquote(paste('Unable to produce predictors in sample space mds plot',p))
|
|
460 }
|
|
461 outpdfname = 'cox_samples_in_x_space_MDS.pdf'
|
|
462 p = try({mdsPlot(x,'samples in measurement space',groups=y,outpdfname=outpdfname,transpose=F) },T)
|
|
463 if (class(p) == "try-error")
|
|
464 {
|
|
465 print.noquote(paste('Unable to produce samples in measurement space mds plots',p))
|
|
466 }
|
|
467 }
|
|
468 if (is.na(predict_at)) { predict_at = quantile(y) }
|
|
469 message(paste('@@@ Cox model will be predicted at times =',paste(predict_at,collapse=',')))
|
|
470 do_standard = do_standard
|
|
471 standardize = do_standard
|
|
472 normalize = do_standard
|
|
473 p = try({larsres = glmnet(x,y,family='cox',standardize=standardize,alpha=alpha,penalty.factor=penalty )},T)
|
|
474 if (class(p) == "try-error")
|
|
475 {
|
|
476 print.noquote('Unable to run cox glmnet on your data')
|
|
477 print.noquote(p)
|
|
478 sink()
|
|
479 return(NA)
|
|
480 }
|
|
481 if (out_full == T)
|
|
482 {
|
|
483 b = as.matrix(larsres\$beta)
|
|
484 nb = length(colnames(b))
|
|
485 bcoef = b[,nb]
|
|
486 lastl = larsres\$lambda[length(larsres\$lambda)]
|
|
487 allres = data.frame(x=rownames(b),beta=bcoef,lambda=lastl)
|
|
488 write.table(format(allres,digits=5),out_full_file,quote=FALSE, sep="\t",row.names=F)
|
|
489 }
|
|
490
|
|
491 outpdf = paste('cox',descr,'glmnetdev.pdf',sep='_')
|
|
492 try(
|
|
493 {
|
|
494 pdf(outpdf)
|
|
495 plot(larsres,main='cox glmnet',label=T)
|
|
496 grid()
|
|
497 dev.off()
|
|
498 },T)
|
|
499
|
|
500 larscv = NA
|
|
501
|
|
502 p = try({larscv=ross.cv.glmnet(x,y,family=fam,type.measure='deviance',penalty=penalty)},T)
|
|
503 if (class(p) == "try-error") {
|
|
504 print.noquote(paste('Unable to cross validate your data',p))
|
|
505 sink()
|
|
506 return(NA)
|
|
507 }
|
|
508 lse = larscv\$cvhits.1se
|
|
509 lmin = larscv\$cvhits.min
|
|
510 tot = lse + lmin
|
|
511 allhits = data.frame(hits_lambda_1se = lse,hits_lambda_min = lmin)
|
|
512 nzhits = allhits[which(tot != 0),]
|
|
513 message('Times each predictor was selected in CV models (excluding zero count predictors)')
|
|
514 print.noquote(nzhits)
|
|
515 out_nz_file = 'cox_cross_validation_model_counts.xls'
|
|
516 write.table(nzhits,out_nz_file,quote=FALSE, sep="\t",row.names=F)
|
|
517
|
|
518 outpdf = paste('cox',descr,'glmnet_cvdeviance.pdf',sep='_')
|
|
519
|
|
520 p = try(
|
|
521 {
|
|
522 pdf(outpdf)
|
|
523 plot(larscv,main='Deviance',label=T)
|
|
524 grid()
|
|
525 dev.off()
|
|
526 },T)
|
|
527 if (optLambda == 'lambda.min') {
|
|
528 best_lambda = larscv\$lambda.min
|
|
529 bestcoef = as.matrix(coef(larscv, s = "lambda.min"))
|
|
530 } else {
|
|
531 best_lambda = larscv\$lambda.1se
|
|
532 bestcoef = as.matrix(coef(larscv, s = "lambda.1se"))
|
|
533 }
|
|
534 inmodel = which(bestcoef != 0)
|
|
535 coefs = bestcoef[inmodel]
|
|
536 preds = rownames(bestcoef)[inmodel]
|
|
537
|
|
538 names(coefs) = preds
|
|
539 pen = as.logical( ! penalty[inmodel])
|
|
540 if (out_pred==T)
|
|
541 {
|
|
542 if (length(inmodel) > 0 ) {
|
|
543 predcols = inmodel
|
|
544 xmat = as.matrix(x[,predcols])
|
|
545 colnames(xmat) = preds
|
|
546 bestpred = getpredp_cox(x=xmat,time=y[,'time'],status=y[,'status'],id=cox_id, predict_at=predict_at)
|
|
547 pred = data.frame(responsep=bestpred, best_lambda=best_lambda,lamchoice=optLambda,alpha=alpha)
|
|
548 write.table(pred,out_pred_file,quote=FALSE, sep="\t",row.names=F)
|
|
549 } else { print.noquote('WARNING: No coefficients in selected model to predict with - no predictions made') }
|
|
550 }
|
|
551 if (debugOn) {
|
|
552 print.noquote(paste('best_lambda=',best_lambda,'saving cox respreds=',paste(names(coefs),collapse=','),'as predictors of survival. Coefs=',paste(coefs,collapse=',')))
|
|
553 }
|
|
554 p = try({res = data.frame(regulator=names(coefs),partial_likelihood=coefs,forced_in=pen,glmnet_model='cox',best_lambda=best_lambda,
|
|
555 lambdaChoice=optLambda,alpha=alpha)},T)
|
|
556 if (class(p) == "try-error") {
|
|
557 message(paste('@@@ unable to return a dataframe',p))
|
|
558 sink()
|
|
559 return(NA)
|
|
560 }
|
|
561 print.noquote('@@@ Results preview:')
|
|
562 print.noquote(res,digits=5)
|
|
563 sink()
|
|
564 return(res)
|
|
565
|
|
566 }
|
|
567
|
|
568
|
|
569 do_lasso = function(x=NA,y=NA,do_standard=T,debugOn=T,defaultFam="gaussian",optLambda='minLambda',descr='description', indx=1,target='target',sane=F,
|
|
570 alpha=0.9,nfold=10,penalty=c(),out_pred=F,out_pred_file='outpred',yvarname='yvar',id=c(),mdsplots=F)
|
|
571 {
|
|
572 logf = file(paste(target,"rglasso.log",sep='_'), open = "a")
|
|
573 sink(logf,type = c("output", "message"))
|
|
574 res = NA
|
|
575 phe_is_bin = (length(unique(y)) == 2)
|
|
576 forcedin = paste(colnames(x)[which(penalty == 0)],collapse=',')
|
|
577 fam = "gaussian"
|
|
578 if (defaultFam %in% c("poisson","binomial","gaussian","multinomial")) fam=defaultFam
|
|
579 if (phe_is_bin == T) {
|
|
580 fam = "binomial"
|
|
581 }
|
|
582 print.noquote(paste('target=',target,'is binary=',phe_is_bin,'dim(x)=',paste(dim(x),collapse=','),'length(y)=',length(y),'force=',forcedin,'fam=',fam))
|
|
583 standardize = do_standard
|
|
584 p = try({larsres = glmnet(x,y,family=fam,standardize=standardize,maxit=10000,alpha=alpha,penalty.factor=penalty) },T)
|
|
585 if (class(p) == "try-error")
|
|
586 {
|
|
587 print(paste('ERROR: unable to run glmnet for target',target,'error=',p))
|
|
588 sink()
|
|
589 return(NA)
|
|
590 }
|
|
591
|
|
592 mt = paste('Glmnet fraction deviance for',target)
|
|
593 outpdf = paste(target,'glmnetPath.pdf',sep='_')
|
|
594 pdf(outpdf)
|
|
595 plot(larsres,main=mt,label=T)
|
|
596 grid()
|
|
597 dev.off()
|
|
598
|
|
599 outpdf = paste(target,'glmnetDeviance.pdf',sep='_')
|
|
600
|
|
601 mt2 = paste('Glmnet lambda for',target)
|
|
602
|
|
603 pdf(outpdf)
|
|
604 plot(larsres,xvar="lambda",main=mt2,label=T)
|
|
605 grid()
|
|
606 dev.off()
|
|
607
|
|
608 larscv = NA
|
|
609 if (fam=="binomial") {
|
|
610 tmain = paste(target,'AUC')
|
|
611 outpdf = paste(target,'glmnetCV_AUC.pdf',sep='_')
|
|
612 p = try({larscv = ross.cv.glmnet(x=x,y=y,family=fam,type.measure='auc')},T)
|
|
613 } else {
|
|
614 tmain = paste(target,'CV MSE')
|
|
615 outpdf = paste(target,'glmnetCV_MSE.pdf',sep='_')
|
|
616 p = try({larscv = ross.cv.glmnet(x=x,y=y,family=fam,type.measure='mse')},T)
|
|
617 }
|
|
618 if (class(p) == "try-error")
|
|
619 {
|
|
620 print(paste('ERROR: unable to run cross validation for target',target,'error=',p))
|
|
621 sink()
|
|
622 return(NA)
|
|
623 }
|
|
624
|
|
625
|
|
626 pdf(outpdf)
|
|
627 plot(larscv,main=tmain)
|
|
628 grid()
|
|
629 dev.off()
|
|
630
|
|
631 lse = larscv\$cvhits.1se
|
|
632 lmin = larscv\$cvhits.min
|
|
633 tot = lse + lmin
|
|
634 allhits = data.frame(pred=colnames(x),hits_lambda_1se = lse,hits_lambda_min = lmin)
|
|
635 nzhits = allhits[which(tot != 0),]
|
|
636 message('Total hit count for each predictor over all CV models (excluding zero count predictors)')
|
|
637 print.noquote(nzhits)
|
|
638 out_nz_file = paste(target,'cross_validation_model_counts.xls',sep='_')
|
|
639 write.table(nzhits,out_nz_file,quote=FALSE, sep="\t",row.names=F)
|
|
640
|
|
641 ipenalty = c(0,penalty)
|
|
642 if (optLambda == 'lambda.min') {
|
|
643 best_lambda = larscv\$lambda.min
|
|
644 bestpred = as.matrix(coef(larscv, s = "lambda.min"))
|
|
645 } else {
|
|
646 best_lambda = larscv\$lambda.1se
|
|
647 bestpred = as.matrix(coef(larscv, s = "lambda.1se"))
|
|
648 }
|
|
649 inmodel = which(bestpred != 0)
|
|
650 coefs = bestpred[inmodel,1]
|
|
651 preds = rownames(bestpred)[inmodel]
|
|
652 iforced = ipenalty[inmodel]
|
|
653 forced = ! as.logical(iforced)
|
|
654 names(coefs) = preds
|
|
655 ncoef = length(coefs) - 1
|
|
656 if (out_pred==T && fam=="binomial")
|
|
657 {
|
|
658 print.noquote(paste('Predicting',target,'probabilities from binomial glmnet at alpha',alpha,'and lambda',best_lambda))
|
|
659 bestpred = predict.glmnet(larsres,s=best_lambda,newx=x,type="response")
|
|
660 bestpred = exp(bestpred)/(1+exp(bestpred))
|
|
661 pred = data.frame(id=id,y=y,predp=as.vector(bestpred), best_lambda=best_lambda)
|
|
662 write.table(pred,out_pred_file,quote=FALSE, sep="\t",row.names=F)
|
|
663 }
|
|
664 if (debugOn) {cat(indx,'best_lambda=',best_lambda,'saving',fam,'respreds=',names(coefs),'as predictors of',target,'coefs=',coefs,'\n')}
|
|
665 res = try(data.frame(i=indx,pred=target,regulator=names(coefs),coef=coefs,forced_in=forced,glmnet_model=fam,ncoef=ncoef,
|
|
666 best_lambda=best_lambda,lambdaChoice=optLambda,alpha=alpha),T)
|
|
667 if (class(res) == "try-error") {
|
|
668 sink()
|
|
669 return(NA) }
|
|
670 print.noquote(res)
|
|
671 sink()
|
|
672 return(res)
|
|
673 }
|
|
674
|
|
675
|
|
676 dolasso_generic = function(predvars=NA,depvars=NA,debugOn=T,maxsteps=100, alpha=0.9,nfold=10,xcolnames=c(),ycolnames=c(),optLambda='minLambda', out_pred_file=NA,
|
|
677 descr="describe me",do_standard=F,defaultFam="gaussian",penalty=c(),out_pred=F,cox_id=c(),mdsplots=F,xfilt=0.95)
|
|
678 {
|
|
679 logf = file("rglasso.log", open = "a")
|
|
680 sink(logf,type = c("output", "message"))
|
|
681 xdat = predvars
|
|
682 xm = data.matrix(xdat)
|
|
683 res = NULL
|
|
684 id = cox_id
|
|
685 depnames = ycolnames
|
|
686 ndep = length(depnames)
|
|
687 if (mdsplots==T) {
|
|
688 outpdfname = 'rglasso_x_in_sample_space_MDS.pdf'
|
|
689 p = try({ mdsPlot(xm,'measurements in sample space',groups=NA,outpdfname=outpdfname,transpose=T) },T)
|
|
690 if (class(p) == "try-error")
|
|
691 {
|
|
692 print.noquote(paste('Unable to produce predictors in sample space mds plot. Error:',p))
|
|
693 }
|
|
694 outpdfname = 'rglasso_samples_in_x_space_MDS.pdf'
|
|
695 p = try({mdsPlot(xm,'samples in measurement space',groups=NA,outpdfname=outpdfname,transpose=F) },T)
|
|
696 if (class(p) == "try-error")
|
|
697 {
|
|
698 print.noquote(paste('Unable to produce samples in measurement space mds plot. Error:',p))
|
|
699 }
|
|
700 }
|
|
701 ndat = nrow(xm)
|
|
702 cfracs = colSums(! is.na(xm))/ndat
|
|
703 keepme = (cfracs >= xfilt)
|
|
704 print.noquote(paste('Removing', sum(! keepme), 'xvars with more than',xfilt,'fraction missing'))
|
|
705 vars = apply(xm,2,var,na.rm=T)
|
|
706 xm = xm[,keepme]
|
|
707 for (i in c(1:max(1,ndep))) {
|
|
708 target = depnames[i]
|
|
709 if (length(target) < 1) { target='y' }
|
|
710 if (i %% 100 == 0) { cat(i,target,'\n') }
|
|
711 if (ndep <= 1) {
|
|
712 y=depvars
|
|
713 } else {
|
|
714 y = depvars[,i]
|
|
715 }
|
|
716 if (fam == "binomial") {y = as.factor(y)}
|
|
717 x = xm
|
|
718 id = cox_id
|
|
719 if (fam != "cox") {
|
|
720 ok = complete.cases(x,y)
|
|
721 if (sum(! ok) > 0) {
|
|
722 message(paste('@@@ Removing',sum(! ok),'cases with missing y of',length(y),'@@@'))
|
|
723 y = y[(ok)]
|
|
724 x = x[(ok),]
|
|
725 id = id[(ok)]
|
|
726 }
|
|
727 }
|
|
728 ok = complete.cases(y)
|
|
729 if (sum(ok) == 0 ) {
|
|
730 print(paste("No complete cases found for",target,"in input x dim =",paste(dim(xm),collapse=','),"length y=",length(y)))
|
|
731 } else {
|
|
732 if (i == 1) { outpred = out_pred_file
|
|
733 } else {
|
|
734 outpred = paste(target,'predicted_output.xls')
|
|
735 }
|
|
736 regres = do_lasso(x=x,y=y,do_standard=do_standard,debugOn=debugOn,defaultFam=defaultFam,optLambda=optLambda,out_pred_file=outpred,
|
|
737 descr=descr,indx=i,target=target,alpha=alpha,nfold=nfold,penalty=penalty,out_pred=out_pred,yvarname=target,id=id,mdsplots=mdsplots)
|
|
738 if (! is.na(regres)) { res = rbind(res,regres) }
|
|
739 }
|
|
740 }
|
|
741 print.noquote('@@@ Results preview:')
|
|
742 print.noquote(res,digits=5)
|
|
743 sink()
|
|
744 return(res)
|
|
745 }
|
|
746
|
|
747
|
|
748 corPlot=function(xdat=c(),main='main title',is_raw=T)
|
|
749 {
|
|
750 library(pheatmap)
|
|
751 library(gplots)
|
|
752 if (is_raw) {
|
|
753 cxdat = cor(xdat,method="spearman",use="pairwise.complete.obs")
|
|
754 } else {
|
|
755 cxdat=xdat
|
|
756 }
|
|
757 xro = nrow(cxdat)
|
|
758 if (xro > 1000) stop("Too many rows for heatmap, who can read?!")
|
|
759 fontsize_col = 5.0
|
|
760 pheatmap(cxdat, main=main, show_colnames = F, width=30, height=30,
|
|
761 fontsize_row=fontsize_col, border_color=NA)
|
|
762 }
|
|
763
|
|
764
|
|
765
|
|
766
|
|
767 runTest = function(n=10)
|
|
768 {
|
|
769 set.seed (NULL)
|
|
770 Y = data.frame(y1=runif (n),y2=runif(n))
|
|
771 Xv <- runif(n*n)
|
|
772 X <- matrix(Xv, nrow = n, ncol = n)
|
|
773
|
|
774 mydf <- data.frame(Y, X)
|
|
775
|
|
776 regres_out = dolasso_generic(predvars=X,depvars=Y,debugOn=T,p.cutoff = 0.05,maxsteps=10000,nfold=10,
|
|
777 descr='randomdata',do_standard=do_standard,defaultFam="gaussian",alpha=0.05)
|
|
778 return(regres_out)
|
|
779 }
|
|
780 ]]>
|
|
781 options(width=512)
|
|
782 options(digits=5)
|
|
783 alpha = $alpha
|
|
784 nfold = $nfold
|
|
785 optLambda = "$optLambda"
|
|
786 Out_Dir = "$html_file.files_path"
|
|
787 Input = "$input1"
|
|
788 indat = read.table(Input,head=T,sep='\t')
|
|
789 datcols = colnames(indat)
|
|
790 myTitle = "$title"
|
|
791 outtab = "$model_file"
|
|
792 do_standard = as.logical("$do_standard")
|
|
793 mdsplots = as.logical("$mdsplots")
|
|
794 fam = "$model.fam"
|
|
795 xvar_cols_in = "$xvar_cols"
|
|
796 force_xvar_cols_in = "$force_xvar_cols"
|
|
797 xvar_cols = as.numeric(strsplit(xvar_cols_in,",")[[1]])
|
|
798 force_xvar_cols = c()
|
|
799 penalties = rep(1,length(datcols))
|
|
800 forced_in = NA
|
|
801
|
|
802 logxform = "$logxform_cols"
|
|
803 if (logxform != "None") {
|
|
804 logxform_cols = as.numeric(strsplit(logxform,",")[[1]])
|
|
805 if (length(logxform_cols) > 0) {
|
|
806 small = 1e-10
|
|
807 sset = indat[,logxform_cols]
|
|
808 zeros = which(sset==0,arr.ind=T)
|
|
809 nz = nrow(zeros)
|
|
810 if (nz > 0) {
|
|
811 message(paste('Log transforming encountered',nz,'zeros - added 1e-10'))
|
|
812 sset[zeros] = sset[zeros] + small
|
|
813 lset = log(sset)
|
|
814 indat[,logxform_cols] = lset
|
|
815 }
|
|
816 }
|
|
817 }
|
|
818 if (force_xvar_cols_in != "None")
|
|
819 {
|
|
820 force_xvar_cols = as.numeric(strsplit(force_xvar_cols_in,",")[[1]])
|
|
821 allx = c(xvar_cols,force_xvar_cols)
|
|
822 xvar_cols = unique(allx)
|
|
823 xvar_cols = xvar_cols[order(xvar_cols)]
|
|
824 penalties[force_xvar_cols] = 0
|
|
825 }
|
|
826 penalty = penalties[xvar_cols]
|
|
827 forcedin = paste(datcols[which(penalties == 0)],collapse=',')
|
|
828 cox_id_col = NA
|
|
829 cox_id = NA
|
|
830
|
|
831 message(paste('@@@ Using alpha =',alpha,'for all models'))
|
|
832 x = indat[,xvar_cols]
|
|
833 nx = nrow(x)
|
|
834 cx = ncol(x)
|
|
835 message(paste('@@@@ Input has',nx,'samples and',cx,'predictors'))
|
|
836 if (cx > nx) {
|
|
837 message('@@@ WARNING: Models will have more variables than cases so glmnet will likely return one of many possible solutions! Please DO NOT expect reliable results - glmnet is clever but not magical @@@')
|
|
838 }
|
|
839
|
|
840 xcolnames = datcols[xvar_cols]
|
|
841
|
|
842 if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
|
|
843 out_full = F
|
|
844 out_full_file = NA
|
|
845 out_pred_file = ""
|
|
846 out_pred = as.logical("$model.output_pred")
|
|
847
|
|
848 #if $model.fam == "binomial" or $model.fam == "cox":
|
|
849 cox_id_col = $model.cox_id
|
|
850 cox_id = indat[,cox_id_col]
|
|
851 if (out_pred == T) {
|
|
852 out_pred_file="$output_pred_file"
|
|
853 rownames(x) = cox_id
|
|
854 }
|
|
855 #end if
|
|
856 #if $model.fam == "cox":
|
|
857 cox_time = $model.cox_time
|
|
858 cox_status = $model.cox_status
|
|
859 out_full = as.logical("$model.output_full")
|
|
860 if (out_full == T) { out_full_file="$output_full_file" }
|
|
861 yvar_cols = c(cox_time,cox_status)
|
|
862 ycolnames = c('time','status')
|
|
863 istat = as.double(indat[,cox_status])
|
|
864 itime = as.double(indat[,cox_time])
|
|
865 predict_at = quantile(itime)
|
|
866 if ("$model.predict_at" > "")
|
|
867 {
|
|
868 pa = "$model.predict_at"
|
|
869 predict_at = as.numeric(strsplit(pa,",")[[1]])
|
|
870 }
|
|
871 y = data.frame(time = itime, status = istat)
|
|
872 ustat = unique(istat)
|
|
873 if ((length(ustat) != 2) | (! 1 %in% ustat ) | (! 0 %in% ustat))
|
|
874 {
|
|
875 print.noquote(paste('INPUT ERROR: status must have 0 (censored) or 1 (event) but found',paste(ustat,collapse=',') ))
|
|
876 quit(save='no',status=1)
|
|
877 }
|
|
878 y = as.matrix(y)
|
|
879 x = as.matrix(x)
|
|
880 print.noquote(paste('@@@ Cox model will predict yvar=',datcols[cox_status],'using cols=',paste(xcolnames,collapse=','),'n preds=',length(xcolnames),
|
|
881 'forced in=',forcedin))
|
|
882 regres_out = dolasso_cox(x=x,y=y,debugOn=F,maxsteps=10000,nfold=nfold,xcolnames=xcolnames,ycolnames=ycolnames,optLambda=optLambda,out_full=out_full,out_full_file=out_full_file,
|
|
883 out_pred=out_pred,out_pred_file=out_pred_file,cox_id=cox_id,descr=myTitle,do_standard=do_standard,alpha=alpha,penalty=penalty,predict_at=predict_at,mdsplots=mdsplots)
|
|
884 #else:
|
|
885 yvar_cols = "$model.yvar_cols"
|
|
886 yvar_cols = as.numeric(strsplit(yvar_cols,",")[[1]])
|
|
887 ycolnames = datcols[yvar_cols]
|
|
888 print.noquote(paste('@@@',fam,'model will predict yvar=',paste(ycolnames,collapse=','),'using cols=',paste(xcolnames,collapse=','),'n preds=',length(xcolnames),
|
|
889 'forced in=',forcedin))
|
|
890 y = data.matrix(indat[,yvar_cols])
|
|
891 print.noquote(paste('Model will use',fam,'link function to predict yvar=',paste(ycolnames,collapse=','),'n preds=',length(xcolnames),'forced in=',forcedin))
|
|
892 regres_out = dolasso_generic(predvars=x,depvars=y,debugOn=F, maxsteps=10000,nfold=nfold,xcolnames=xcolnames,ycolnames=ycolnames,optLambda=optLambda,out_pred_file=out_pred_file,
|
|
893 descr=myTitle,do_standard=do_standard,defaultFam=fam,alpha=alpha,penalty=penalty,out_pred=out_pred,cox_id=cox_id,mdsplots=mdsplots)
|
|
894 #end if
|
|
895
|
|
896 write.table(format(regres_out,digits=5),outtab,quote=FALSE, sep="\t",row.names=F)
|
|
897 print.noquote('@@@ SessionInfo for this R session:')
|
|
898 sessionInfo()
|
|
899 warnings()
|
|
900
|
|
901 </configfile>
|
|
902 </configfiles>
|
|
903 <citations>
|
|
904 <citation type="bibtex">
|
|
905 @Article{Friedman2010, title = {Regularization Paths for Generalized Linear Models via Coordinate Descent},
|
|
906 author = {Jerome Friedman and Trevor Hastie and Robert Tibshirani},
|
|
907 journal = {Journal of Statistical Software},
|
|
908 year = {2010},
|
|
909 volume = {33},
|
|
910 number = {1},
|
|
911 pages = {1--22},
|
|
912 url = {http://www.jstatsoft.org/v33/i01/}
|
|
913 }
|
|
914 </citation>
|
|
915 <citation type="doi">
|
|
916 10.1093/bioinformatics/bts573
|
|
917 </citation>
|
|
918 </citations>
|
|
919 </tool>
|
|
920
|
|
921
|
|
922
|