annotate rglasso_cox.xml @ 13:445cb45d1bc0 draft

Uploaded
author fubar
date Thu, 08 Jan 2015 01:05:20 -0500
parents 4b62ea84c318
children a061150f1d94
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
1 <tool id="rglasso_cox" name="Lasso" version="0.03">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
2 <description>and cox regression using elastic net</description>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
3 <requirements>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
4 <requirement type="package" version="3.1.1">R_3_1_1</requirement>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
5 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
6 <requirement type="package" version="9.10">ghostscript</requirement>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
7 <requirement type="package" version="2.14">glmnet_lars_2_14</requirement>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
8 </requirements>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
9 <command interpreter="python">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
10 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "rglasso"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
11 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
12 </command>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
13 <inputs>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
14 <param name="title" type="text" value="lasso test" size="80" label="Title for job outputs" help="Typing a short, meaningful text here will help remind you (and explain to others) what the outputs represent">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
15 <sanitizer invalid_char="">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
16 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
17 </sanitizer>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
18 </param>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
19 <param name="input1" type="data" format="tabular" label="Select an input tabular text file from your history. Rows represent samples; Columns are measured phenotypes"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
20 multiple='False' optional="False" help="Tabular text data with samples as rows, phenotypes as columns with a header row of column identifiers" />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
21 <param name="xvar_cols" label="Select columns containing numeric variables to use as predictor (x) variables" type="data_column" data_ref="input1" numerical="False"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
22 multiple="True" use_header_names="True" force_select="True" />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
23 <param name="force_xvar_cols" label="Select numeric columns containing variables ALWAYS included as predictors in cross validation" type="data_column" data_ref="input1" numerical="False"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
24 multiple="True" use_header_names="True" force_select="False"/>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
25 <conditional name="model">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
26 <param name="fam" type="select" label="GLM Link function for models"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
27 help="Binary dependant variables will automatically be set to Binomial no matter what this is set to">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
28 <option value="gaussian" selected="true">Gaussian - continuous dependent (y)</option>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
29 <option value="binomial">Binomial dependent variables</option>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
30 <option value="poisson">Poisson (eg counts)</option>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
31 <option value="cox">Cox models - require special setup for y variables - see below</option>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
32 </param>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
33 <when value="gaussian">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
34 <param name="yvar_cols" label="Select numeric columns containing variables to use as the dependent (y) in elasticnet" type="data_column" data_ref="input1" numerical="False"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
35 multiple="True" use_header_names="True" help = "If multiple, each will be modelled against all the x variables and reported separately." force_select="True"/>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
36 <param name="output_full" type="hidden" value='F' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
37 <param name="output_pred" type="hidden" value='F' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
38 <param name="cox_id" label="Select column containing a unique sample identifier"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
39 help = "Only really needed for output sample specific predicted values downstream."
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
40 type="data_column" data_ref="input1" numerical="False" force_select="True"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
41 multiple="False" use_header_names="True" />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
42 </when>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
43 <when value="binomial">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
44 <param name="yvar_cols" label="Select numeric columns containing variables to use as the dependent (y) in elasticnet" type="data_column" data_ref="input1" numerical="False"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
45 multiple="True" use_header_names="True" help = "If multiple, each will be modelled against all the x variables and reported separately." force_select="True"/>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
46 <param name="output_full" type="hidden" value='F' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
47 <param name="output_pred" type="select" label="Create a tabular output with predicted values for each subject from the optimal model for (eg) NRI estimates" >
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
48 <option value="F" selected="true">No predicted value output file</option>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
49 <option value="T">Create a predicted value output file</option>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
50 </param>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
51 <param name="cox_id" label="Select column containing a unique sample identifier"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
52 help = "Only really needed for output sample specific predicted values downstream."
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
53 type="data_column" data_ref="input1" numerical="False" force_select="True"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
54 multiple="False" use_header_names="True" />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
55 <param name="predict_at" type="hidden" value='' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
56
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
57 </when>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
58 <when value="poisson">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
59 <param name="yvar_cols" label="Select columns containing variables to use as the dependent (y) in elasticnet" type="data_column" data_ref="input1" numerical="True"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
60 multiple="True" use_header_names="True" help = "If multiple, each will be modelled against all the x variables and reported separately." force_select="True"/>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
61 <param name="output_full" type="hidden" value='F' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
62 <param name="output_pred" type="hidden" value='F' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
63 <param name="predict_at" type="hidden" value='' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
64 <param name="cox_id" label="Select column containing a unique sample identifier"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
65 help = "Optional. Only really needed for output sample specific predicted values downstream. Free - enjoy"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
66 type="data_column" data_ref="input1" numerical="True" force_select="False"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
67 multiple="False" use_header_names="True" />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
68 </when>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
69 <when value="cox">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
70 <param name="cox_time" label="Select column containing time under observation for Cox regression"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
71 type="data_column" data_ref="input1" numerical="True" force_select="True"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
72 multiple="False" use_header_names="True" help = "This MUST contain a time period - eg continuous years or days to failure or right censoring"/>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
73 <param name="cox_status" label="Select column containing status = 1 for outcome of interest at the end of the time under observation or 0 for right censoring"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
74 type="data_column" data_ref="input1" numerical="True" force_select="True"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
75 multiple="False" use_header_names="True" help = "This MUST contain 1 for subjects who had an event at that time or 0 for a right censored observation"/>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
76 <param name="cox_id" label="Select column containing a unique sample identifier"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
77 help = "Optional. Only really needed for output sample specific predicted values downstream. Free - enjoy"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
78 type="data_column" data_ref="input1" numerical="False" force_select="False"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
79 multiple="False" use_header_names="True" />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
80 <param name="output_full" type="select" label="Create a tabular output with coefficients for all predictors" >
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
81 <option value="F" selected="true">No full model output file</option>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
82 <option value="T">Create a full model output file</option>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
83 </param>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
84 <param name="output_pred" type="select" label="Create a tabular output with predicted values for each subject from the optimal model for (eg) NRI estimates" >
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
85 <option value="F" selected="true">No predicted value output file</option>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
86 <option value="T">Create a predicted value output file</option>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
87 </param>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
88 <param name="predict_at" type="text" value='' label="Provide a comma separated list of times to make a prediction for each subject"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
89 optional="True" help="Default (blank) will return predictions at 0%,25%,50%,75%,100% of the observed times which should be informative" />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
90
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
91 </when>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
92 </conditional>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
93 <param name="optLambda" type="select" label="Value to use when reporting optimal model and coefficients" help="minLambda will have more predictors - 1SDLambda will be more parsimonious">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
94 <option value="lambda.1se" selected="true">Lambda + 1 SE of min MSE or AUC (fewer coefficients - more false negatives)</option>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
95 <option value="lambda.min">Lambda at min MSE or max AUC (more coefficients - more false positives)</option>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
96 </param>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
97 <param name="logxform_cols" optional="True" label="Select numeric columns to be log transformed before use as predictors or dependent variables" type="data_column"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
98 data_ref="input1" numerical="True" multiple="True" use_header_names="True" help = "The wisdom of doing this depends entirely on your predictors - eg can help diminish long-tailed outlier influence"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
99 force_select="False"/>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
100 <param name="do_standard" type="select" label="Standardise x vars"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
101 help="If all measurements on same scale, may not be needed. Coefficients are always returned on the original scale.">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
102 <option value="False" selected="true">No standardisation of predictors</option>l
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
103 <option value="True">Standardise predictors before model</option>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
104 </param>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
105 <param name="mdsplots" type="select" label="Generate MDS plots of samples in measurement space and measurements in sample space" >
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
106 <option value="False" selected="true">No MDS plots</option>l
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
107 <option value="True">Yes create MDS plots</option>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
108 </param>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
109 <param name="alpha" type="float" value="0.95" size="5" min="0.01" max="1.0" label="Alpha - see glmnet docs. 1 for pure lasso. 0.0 for pure ridge regression"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
110 help="Default 0.95 allows lasso to cope better with expected predictor collinearity. Use (eg) 0.5 for hybrid regularised regression or (eg) 0.025 for ridge regression"/>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
111 <param name="nfold" type="integer" value="10" size="5" label="Number of folds for internal cross validation"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
112 help="Default of 10 is usually ok"/>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
113 </inputs>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
114 <outputs>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
115 <data format="html" name="html_file" label="${title}.html"/>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
116 <data format="tabular" name="model_file" label="${title}_modelres.xls"/>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
117 <data format="tabular" name="output_full_file" label="${title}_full_cox_model.xls">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
118 <filter>model['output_full'] == 'T'</filter>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
119 </data>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
120 <data format="tabular" name="output_pred_file" label="${title}_predicted_from_model.xls">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
121 <filter>model['output_pred'] == 'T'</filter>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
122 </data>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
123 </outputs>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
124 <tests>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
125 <test>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
126 <param name='input1' value='cox_test.xls' ftype='tabular' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
127 <param name='treatment_name' value='case' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
128 <param name='title' value='Cox glmnet test' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
129 <param name='nfold' value='10' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
130 <param name='logxform_cols' value='' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
131 <param name='alpha' value='0.95' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
132 <param name='do_standard' value="True" />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
133 <param name='cox_time' value='1' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
134 <param name='cox_status' value='2' />
9
4b62ea84c318 Uploaded
fubar
parents: 4
diff changeset
135 <param name='cox_id' value='2' />
4b62ea84c318 Uploaded
fubar
parents: 4
diff changeset
136 <param name='predict_at' value='' />
4
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
137 <param name='fam' value='cox' />
9
4b62ea84c318 Uploaded
fubar
parents: 4
diff changeset
138 <param name='yvar_cols' value='' />
4
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
139 <param name='xvar_cols' value='3,4,5' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
140 <param name='force_xvar_cols' value='3' />
9
4b62ea84c318 Uploaded
fubar
parents: 4
diff changeset
141 <param name='output_full' value='F' />
4b62ea84c318 Uploaded
fubar
parents: 4
diff changeset
142 <param name='output_pred' value='F' />
4b62ea84c318 Uploaded
fubar
parents: 4
diff changeset
143 <output name='model_file' file='coxlassotest_modelres.xls'>
4
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
144 <assert_contents>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
145 <has_text text="rhubarb" />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
146 <has_text text="TRUE" />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
147 <!-- &#009; is XML escape code for tab -->
9
4b62ea84c318 Uploaded
fubar
parents: 4
diff changeset
148 <!-- has_line line="regulator&#009;partial_likelihood&#009;forced_in&#009;glmnet_model&#009;best_lambda" / -->
4b62ea84c318 Uploaded
fubar
parents: 4
diff changeset
149 <has_line line="regulator&#009;partial_likelihood&#009;forced_in&#009;glmnet_model&#009;best_lambda&#009;lambdaChoice&#009;alpha" />
4b62ea84c318 Uploaded
fubar
parents: 4
diff changeset
150 <has_n_columns n="7" />
4
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
151 </assert_contents>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
152 </output>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
153 <output name='html_file' file='coxlassotest.html' compare='diff' lines_diff='10' />
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
154 </test>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
155 </tests>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
156 <help>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
157
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
158 **Before you start**
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
159
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
160 Please read the glmnet documentation @ glmnet_
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
161
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
162 This Galaxy wrapper merely exposes that code and the glmnet_ documentation is essential reading
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
163 before getting useful results here.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
164
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
165 **What it does**
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
166
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
167 From documentation at glmnet_ ::
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
168
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
169 Glmnet is a package that fits a generalized linear model via penalized maximum likelihood.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
170 The regularization path is computed for the lasso or elasticnet penalty at a grid of values for the regularization parameter lambda.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
171 The algorithm is extremely fast, and can exploit sparsity in the input matrix x.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
172 It fits linear, logistic and multinomial, poisson, and Cox regression models.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
173 A variety of predictions can be made from the fitted models.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
174
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
175 Internal cross validation is used to optimise the choice of lambda based on CV AUC for logistic (binomial outcome) models, or CV mse for gaussian.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
176
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
177 **Warning about the tyrany of dimensionality**
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
178
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
179 Yes, this package will select 'optimal' models even when you (optimistically) supply more predictors than you have cases.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
180 The model returned is unlikely to represent the only informative regularisation path through your data - if you run repeatedly with
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
181 exactly the same settings, you will probably see many different models being selected.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
182 This is not a software bug - the real problem is that you just don't have enough information in your data.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
183
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
184 Sufficiently big jobs will take a while (eg each lasso regression with 20k features on 1k samples takes about 2-3 minutes on our aged cluster)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
185
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
186 **Input**
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
187
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
188 Assuming you have more measurements than samples, you supply data as a tabular text file where each row is a sample and columns
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
189 are variables. You specify which columns are dependent (predictors) and which are observations for each sample. Each of multiple
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
190 dependent variable columns will be run and reported independently. Predictors can be forced in to the model.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
191
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
192 **Output**
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
193
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
194 For each selected dependent regression variable, a brief report of the model coefficients predicted at the
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
195 'optimal' nfold CV value of lambda.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
196
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
197 **Predicted event probabilities for Cox and Logistic models**
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
198
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
199 If you want to compare (eg) two competing clinical predictions, there's a companion generic NRI tool
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
200 for predicted event probabilities. Estimates dozens of measures of improvement in prediction. Currently only works for identical id subjects
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
201 but can probably be extended to independent sample predictions.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
202
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
203 Given a model, we can generate a predicted p (for status 1) in binomial or cox frameworks so models can be evaluated in terms of NRI.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
204 Of course, estimates are likely substantially inflated over 'real world' performance by being estimated from the same sample - but you probably
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
205 already knew that since you were smart enough to reach this far down into the on screen help. The author salutes you, intrepid reader!
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
206
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
207 It may seem an odd thing to do, but we can predict p for an event for each subject from our original data, given a parsimonious model. Doing
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
208 this for two separate models (eg, forcing in an additional known explanatory measurement to the new model) allows comparison of the two models
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
209 predicted status for each subject, or the same model in independent populations to see how badly it does
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
210
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
211 **Attributions**
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
212
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
213 glmnet_ is the R package exposed by this Galaxy tool.
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
214
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
215 Galaxy_ (that's what you are using right now!) for gluing everything together
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
216
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
217 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
218 licensed to you under the LGPL_ like other rgenetics artefacts
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
219
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
220 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
221 .. _glmnet: http://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
222 .. _Galaxy: http://getgalaxy.org
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
223
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
224
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
225 </help>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
226
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
227 <configfiles>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
228 <configfile name="runme">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
229 <![CDATA[
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
230 library('glmnet')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
231 library('lars')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
232 library('survival')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
233 library('pec')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
234
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
235
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
236 message=function(x) {print.noquote(paste(x,sep=''))}
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
237
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
238
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
239 ross.cv.glmnet = function (x, y, weights, offset = NULL, lambda = NULL, type.measure = c("mse",
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
240 "deviance", "class", "auc", "mae"), nfolds = 10, foldid,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
241 grouped = TRUE, keep = FALSE, parallel = FALSE, ...)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
242 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
243 if (missing(type.measure))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
244 type.measure = "default"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
245 else type.measure = match.arg(type.measure)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
246 if (!is.null(lambda) && length(lambda) < 2)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
247 stop("Need more than one value of lambda for cv.glmnet")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
248 N = nrow(x)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
249 if (missing(weights))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
250 weights = rep(1, N)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
251 else weights = as.double(weights)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
252 y = drop(y)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
253 glmnet.call = match.call(expand.dots = TRUE)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
254 sel = match(c("type.measure", "nfolds", "foldid", "grouped",
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
255 "keep"), names(glmnet.call), F)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
256 if (any(sel))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
257 glmnet.call = glmnet.call[-sel]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
258 glmnet.call[[1]] = as.name("glmnet")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
259 glmnet.object = glmnet(x, y, weights = weights, offset = offset,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
260 lambda = lambda, ...)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
261 glmnet.object\$call = glmnet.call
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
262 is.offset = glmnet.object\$offset
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
263 lambda = glmnet.object\$lambda
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
264 if (inherits(glmnet.object, "multnet")) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
265 nz = predict(glmnet.object, type = "nonzero")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
266 nz = sapply(nz, function(x) sapply(x, length))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
267 nz = ceiling(apply(nz, 1, median))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
268 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
269 else nz = sapply(predict(glmnet.object, type = "nonzero"),
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
270 length)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
271 if (missing(foldid))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
272 foldid = sample(rep(seq(nfolds), length = N))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
273 else nfolds = max(foldid)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
274 if (nfolds < 3)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
275 stop("nfolds must be bigger than 3; nfolds=10 recommended")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
276 outlist = as.list(seq(nfolds))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
277 if (parallel && require(foreach)) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
278 outlist = foreach(i = seq(nfolds), .packages = c("glmnet")) %dopar%
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
279 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
280 sel = foldid == i
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
281 if (is.matrix(y))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
282 y_sub = y[!sel, ]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
283 else y_sub = y[!sel]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
284 if (is.offset)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
285 offset_sub = as.matrix(offset)[!sel, ]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
286 else offset_sub = NULL
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
287 glmnet(x[!sel, , drop = FALSE], y_sub, lambda = lambda,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
288 offset = offset_sub, weights = weights[!sel],
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
289 ...)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
290 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
291 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
292 else {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
293 for (i in seq(nfolds)) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
294 sel = foldid == i
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
295 if (is.matrix(y))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
296 y_sub = y[!sel, ]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
297 else y_sub = y[!sel]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
298 if (is.offset)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
299 offset_sub = as.matrix(offset)[!sel, ]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
300 else offset_sub = NULL
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
301 outlist[[i]] = glmnet(x[!sel, , drop = FALSE],
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
302 y_sub, lambda = lambda, offset = offset_sub,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
303 weights = weights[!sel], ...)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
304 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
305 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
306 fun = paste("cv", class(glmnet.object)[[1]], sep = ".")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
307 cvstuff = do.call(fun, list(outlist, lambda, x, y, weights,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
308 offset, foldid, type.measure, grouped, keep))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
309 cvm = cvstuff\$cvm
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
310 cvsd = cvstuff\$cvsd
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
311 cvname = cvstuff\$name
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
312
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
313 out = list(lambda = lambda, cvm = cvm, cvsd = cvsd, cvup = cvm +
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
314 cvsd, cvlo = cvm - cvsd, nzero = nz, name = cvname, glmnet.fit = glmnet.object)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
315 if (keep)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
316 out = c(out, list(fit.preval = cvstuff\$fit.preval, foldid = foldid))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
317
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
318 lamin = if (type.measure == "auc")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
319 getmin(lambda, -cvm, cvsd)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
320 else getmin(lambda, cvm, cvsd)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
321 out = c(out, as.list(lamin))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
322 hitsse = rep(0,ncol(x))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
323 hitsmin = rep(0,ncol(x))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
324 names(hitsse) = colnames(x)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
325 names(hitsmin) = colnames(x)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
326 olmin = lamin\$lambda.min
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
327 ol1sd = lamin\$lambda.1se
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
328 lambs = c(olmin,ol1sd)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
329 names(lambs) = c('olmin','ol1sd')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
330 for (cvfit in outlist) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
331 colmin = which(cvfit\$lambda == olmin)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
332 col1se = which(cvfit\$lambda == ol1sd)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
333 nzmin = which(cvfit\$beta[,colmin] != 0)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
334 nz1se = which(cvfit\$beta[,col1se] != 0)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
335 hitsse[nz1se] = hitsse[nz1se] + 1
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
336 hitsmin[nzmin] = hitsmin[nzmin] + 1
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
337 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
338 obj = c(out,list(cvhits.1se=hitsse,cvhits.min=hitsmin))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
339 class(obj) = "cv.glmnet"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
340 obj
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
341 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
342
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
343 mdsPlot = function(dm,myTitle,groups=NA,outpdfname,transpose=T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
344 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
345
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
346 samples = colnames(dm)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
347 mt = myTitle
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
348 pcols=c('maroon')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
349 if (! is.na(groups))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
350 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
351 gu = unique(groups)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
352 colours = rainbow(length(gu),start=0.1,end=0.9)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
353 pcols = colours[match(groups,gu)]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
354 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
355 mydata = dm
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
356 if (transpose==T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
357 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
358 mydata = t(dm)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
359 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
360 npred = ncol(mydata)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
361 d = dist(mydata)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
362 fit = cmdscale(d,eig=TRUE, k=min(10,npred-2))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
363 xmds = fit\$points[,1]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
364 ymds = fit\$points[,2]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
365 pdf(outpdfname)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
366 plot(xmds, ymds, xlab="Dimension 1", ylab="Dimension 2",
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
367 main=paste(mt,"MDS Plot"),type="n", col=pcols, cex=0.35)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
368 text(xmds, ymds, labels = row.names(mydata), cex=0.35, col=pcols)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
369 grid(col="lightgray",lty="dotted")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
370 dev.off()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
371 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
372
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
373
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
374 getpredp_logistic = function(x,yvec,yvarname,id)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
375 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
376 yvals = unique(yvec)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
377 if (length(yvals) != 2) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
378 message(c('ERROR: y does not have 2 values =',paste(yvals,collapse=',')))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
379 return(NA)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
380 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
381 cols = colnames(x)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
382 if (length(cols) == 0) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
383 message('ERROR: No columns in input x? Cannot predict!')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
384 return(NA)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
385 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
386 cn = paste(cols, collapse = ' + ')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
387
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
388 formstring=paste("y ~",cn)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
389 form = as.formula(formstring)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
390 ok = complete.cases(x)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
391
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
392 if (sum(ok) < length(ok)) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
393 x = x[ok,]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
394 yvec = yvec[ok]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
395 id = id[ok]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
396 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
397 nx = data.frame(id=id,x,y=yvec)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
398 print('nx,yvec:')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
399 print(head(nx,n=3))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
400 print(yvec)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
401 mdl = glm(form, data=nx, family="binomial", na.action=na.omit)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
402 message(c('Model format =',formstring))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
403 message(paste('Predictive model details used to generate logistic outcome probabilities for',yvarname,':'))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
404 print(summary(md1))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
405 print(anova(md1))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
406 predp = predict(md1,nx,type="response")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
407 p1 = data.frame(id=id,pred_response=predp,obs_response=yvec)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
408 return(p1)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
409 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
410
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
411
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
412
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
413
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
414 getpredp_cox = function(x,time,status,id,predict_at)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
415 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
416 cols = colnames(x)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
417 if (length(cols) == 0) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
418 message('ERROR: No columns in input x? Cannot predict!')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
419 return(NA)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
420 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
421 cn = paste(colnames(x), collapse = ' + ')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
422
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
423 formstring=paste("Surv(time, status) ~",cn)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
424
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
425 form = as.formula(formstring)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
426
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
427 ok = complete.cases(x)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
428
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
429 if (sum(ok) < length(ok)) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
430 x = x[ok,]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
431 time = time[ok]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
432 status = status[ok]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
433 id = id[ok]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
434 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
435 nx = data.frame(x,time=time,status=status)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
436 m1 = coxph(form, data=nx,singular.ok=TRUE)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
437 print.noquote('Predictive model details used to generate survival probabilities:')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
438 print.noquote(m1)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
439 predpq = predictSurvProb(object=m1, newdata=nx, times=predict_at)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
440 predpq = 1-predpq
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
441 colnames(predpq) = paste('p_surv_to',predict_at,sep='_')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
442 p1 = data.frame(id=id,predpq,time=time,status=status)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
443 return(p1)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
444 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
445
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
446
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
447
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
448 dolasso_cox = function(x,y,debugOn=F,maxsteps=10000,nfold=10,xcolnames,ycolnames,optLambda='lambda.1se',out_full=F,out_full_file=NA,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
449 out_pred=F,out_pred_file=NA,cox_id=NA, descr='Cox test',do_standard=F,alpha=0.9,penalty,predict_at,mdsplots=F)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
450 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
451 logf = file("cox_rglasso.log", open = "a")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
452 sink(logf,type = c("output", "message"))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
453 res = NULL
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
454 if (mdsplots==T) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
455 outpdfname = 'cox_x_in_sample_space_MDS.pdf'
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
456 p = try({ mdsPlot(x,'measurements in sample space',groups=NA,outpdfname=outpdfname,transpose=T) },T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
457 if (class(p) == "try-error")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
458 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
459 print.noquote(paste('Unable to produce predictors in sample space mds plot',p))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
460 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
461 outpdfname = 'cox_samples_in_x_space_MDS.pdf'
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
462 p = try({mdsPlot(x,'samples in measurement space',groups=y,outpdfname=outpdfname,transpose=F) },T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
463 if (class(p) == "try-error")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
464 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
465 print.noquote(paste('Unable to produce samples in measurement space mds plots',p))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
466 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
467 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
468 if (is.na(predict_at)) { predict_at = quantile(y) }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
469 message(paste('@@@ Cox model will be predicted at times =',paste(predict_at,collapse=',')))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
470 do_standard = do_standard
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
471 standardize = do_standard
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
472 normalize = do_standard
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
473 p = try({larsres = glmnet(x,y,family='cox',standardize=standardize,alpha=alpha,penalty.factor=penalty )},T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
474 if (class(p) == "try-error")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
475 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
476 print.noquote('Unable to run cox glmnet on your data')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
477 print.noquote(p)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
478 sink()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
479 return(NA)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
480 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
481 if (out_full == T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
482 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
483 b = as.matrix(larsres\$beta)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
484 nb = length(colnames(b))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
485 bcoef = b[,nb]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
486 lastl = larsres\$lambda[length(larsres\$lambda)]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
487 allres = data.frame(x=rownames(b),beta=bcoef,lambda=lastl)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
488 write.table(format(allres,digits=5),out_full_file,quote=FALSE, sep="\t",row.names=F)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
489 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
490
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
491 outpdf = paste('cox',descr,'glmnetdev.pdf',sep='_')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
492 try(
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
493 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
494 pdf(outpdf)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
495 plot(larsres,main='cox glmnet',label=T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
496 grid()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
497 dev.off()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
498 },T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
499
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
500 larscv = NA
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
501
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
502 p = try({larscv=ross.cv.glmnet(x,y,family=fam,type.measure='deviance',penalty=penalty)},T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
503 if (class(p) == "try-error") {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
504 print.noquote(paste('Unable to cross validate your data',p))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
505 sink()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
506 return(NA)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
507 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
508 lse = larscv\$cvhits.1se
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
509 lmin = larscv\$cvhits.min
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
510 tot = lse + lmin
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
511 allhits = data.frame(hits_lambda_1se = lse,hits_lambda_min = lmin)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
512 nzhits = allhits[which(tot != 0),]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
513 message('Times each predictor was selected in CV models (excluding zero count predictors)')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
514 print.noquote(nzhits)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
515 out_nz_file = 'cox_cross_validation_model_counts.xls'
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
516 write.table(nzhits,out_nz_file,quote=FALSE, sep="\t",row.names=F)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
517
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
518 outpdf = paste('cox',descr,'glmnet_cvdeviance.pdf',sep='_')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
519
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
520 p = try(
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
521 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
522 pdf(outpdf)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
523 plot(larscv,main='Deviance',label=T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
524 grid()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
525 dev.off()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
526 },T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
527 if (optLambda == 'lambda.min') {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
528 best_lambda = larscv\$lambda.min
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
529 bestcoef = as.matrix(coef(larscv, s = "lambda.min"))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
530 } else {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
531 best_lambda = larscv\$lambda.1se
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
532 bestcoef = as.matrix(coef(larscv, s = "lambda.1se"))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
533 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
534 inmodel = which(bestcoef != 0)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
535 coefs = bestcoef[inmodel]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
536 preds = rownames(bestcoef)[inmodel]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
537
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
538 names(coefs) = preds
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
539 pen = as.logical( ! penalty[inmodel])
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
540 if (out_pred==T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
541 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
542 if (length(inmodel) > 0 ) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
543 predcols = inmodel
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
544 xmat = as.matrix(x[,predcols])
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
545 colnames(xmat) = preds
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
546 bestpred = getpredp_cox(x=xmat,time=y[,'time'],status=y[,'status'],id=cox_id, predict_at=predict_at)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
547 pred = data.frame(responsep=bestpred, best_lambda=best_lambda,lamchoice=optLambda,alpha=alpha)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
548 write.table(pred,out_pred_file,quote=FALSE, sep="\t",row.names=F)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
549 } else { print.noquote('WARNING: No coefficients in selected model to predict with - no predictions made') }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
550 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
551 if (debugOn) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
552 print.noquote(paste('best_lambda=',best_lambda,'saving cox respreds=',paste(names(coefs),collapse=','),'as predictors of survival. Coefs=',paste(coefs,collapse=',')))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
553 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
554 p = try({res = data.frame(regulator=names(coefs),partial_likelihood=coefs,forced_in=pen,glmnet_model='cox',best_lambda=best_lambda,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
555 lambdaChoice=optLambda,alpha=alpha)},T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
556 if (class(p) == "try-error") {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
557 message(paste('@@@ unable to return a dataframe',p))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
558 sink()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
559 return(NA)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
560 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
561 print.noquote('@@@ Results preview:')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
562 print.noquote(res,digits=5)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
563 sink()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
564 return(res)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
565
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
566 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
567
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
568
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
569 do_lasso = function(x=NA,y=NA,do_standard=T,debugOn=T,defaultFam="gaussian",optLambda='minLambda',descr='description', indx=1,target='target',sane=F,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
570 alpha=0.9,nfold=10,penalty=c(),out_pred=F,out_pred_file='outpred',yvarname='yvar',id=c(),mdsplots=F)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
571 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
572 logf = file(paste(target,"rglasso.log",sep='_'), open = "a")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
573 sink(logf,type = c("output", "message"))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
574 res = NA
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
575 phe_is_bin = (length(unique(y)) == 2)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
576 forcedin = paste(colnames(x)[which(penalty == 0)],collapse=',')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
577 fam = "gaussian"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
578 if (defaultFam %in% c("poisson","binomial","gaussian","multinomial")) fam=defaultFam
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
579 if (phe_is_bin == T) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
580 fam = "binomial"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
581 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
582 print.noquote(paste('target=',target,'is binary=',phe_is_bin,'dim(x)=',paste(dim(x),collapse=','),'length(y)=',length(y),'force=',forcedin,'fam=',fam))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
583 standardize = do_standard
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
584 p = try({larsres = glmnet(x,y,family=fam,standardize=standardize,maxit=10000,alpha=alpha,penalty.factor=penalty) },T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
585 if (class(p) == "try-error")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
586 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
587 print(paste('ERROR: unable to run glmnet for target',target,'error=',p))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
588 sink()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
589 return(NA)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
590 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
591
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
592 mt = paste('Glmnet fraction deviance for',target)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
593 outpdf = paste(target,'glmnetPath.pdf',sep='_')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
594 pdf(outpdf)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
595 plot(larsres,main=mt,label=T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
596 grid()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
597 dev.off()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
598
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
599 outpdf = paste(target,'glmnetDeviance.pdf',sep='_')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
600
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
601 mt2 = paste('Glmnet lambda for',target)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
602
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
603 pdf(outpdf)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
604 plot(larsres,xvar="lambda",main=mt2,label=T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
605 grid()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
606 dev.off()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
607
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
608 larscv = NA
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
609 if (fam=="binomial") {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
610 tmain = paste(target,'AUC')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
611 outpdf = paste(target,'glmnetCV_AUC.pdf',sep='_')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
612 p = try({larscv = ross.cv.glmnet(x=x,y=y,family=fam,type.measure='auc')},T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
613 } else {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
614 tmain = paste(target,'CV MSE')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
615 outpdf = paste(target,'glmnetCV_MSE.pdf',sep='_')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
616 p = try({larscv = ross.cv.glmnet(x=x,y=y,family=fam,type.measure='mse')},T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
617 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
618 if (class(p) == "try-error")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
619 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
620 print(paste('ERROR: unable to run cross validation for target',target,'error=',p))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
621 sink()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
622 return(NA)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
623 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
624
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
625
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
626 pdf(outpdf)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
627 plot(larscv,main=tmain)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
628 grid()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
629 dev.off()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
630
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
631 lse = larscv\$cvhits.1se
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
632 lmin = larscv\$cvhits.min
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
633 tot = lse + lmin
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
634 allhits = data.frame(pred=colnames(x),hits_lambda_1se = lse,hits_lambda_min = lmin)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
635 nzhits = allhits[which(tot != 0),]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
636 message('Total hit count for each predictor over all CV models (excluding zero count predictors)')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
637 print.noquote(nzhits)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
638 out_nz_file = paste(target,'cross_validation_model_counts.xls',sep='_')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
639 write.table(nzhits,out_nz_file,quote=FALSE, sep="\t",row.names=F)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
640
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
641 ipenalty = c(0,penalty)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
642 if (optLambda == 'lambda.min') {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
643 best_lambda = larscv\$lambda.min
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
644 bestpred = as.matrix(coef(larscv, s = "lambda.min"))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
645 } else {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
646 best_lambda = larscv\$lambda.1se
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
647 bestpred = as.matrix(coef(larscv, s = "lambda.1se"))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
648 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
649 inmodel = which(bestpred != 0)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
650 coefs = bestpred[inmodel,1]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
651 preds = rownames(bestpred)[inmodel]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
652 iforced = ipenalty[inmodel]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
653 forced = ! as.logical(iforced)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
654 names(coefs) = preds
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
655 ncoef = length(coefs) - 1
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
656 if (out_pred==T && fam=="binomial")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
657 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
658 print.noquote(paste('Predicting',target,'probabilities from binomial glmnet at alpha',alpha,'and lambda',best_lambda))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
659 bestpred = predict.glmnet(larsres,s=best_lambda,newx=x,type="response")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
660 bestpred = exp(bestpred)/(1+exp(bestpred))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
661 pred = data.frame(id=id,y=y,predp=as.vector(bestpred), best_lambda=best_lambda)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
662 write.table(pred,out_pred_file,quote=FALSE, sep="\t",row.names=F)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
663 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
664 if (debugOn) {cat(indx,'best_lambda=',best_lambda,'saving',fam,'respreds=',names(coefs),'as predictors of',target,'coefs=',coefs,'\n')}
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
665 res = try(data.frame(i=indx,pred=target,regulator=names(coefs),coef=coefs,forced_in=forced,glmnet_model=fam,ncoef=ncoef,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
666 best_lambda=best_lambda,lambdaChoice=optLambda,alpha=alpha),T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
667 if (class(res) == "try-error") {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
668 sink()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
669 return(NA) }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
670 print.noquote(res)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
671 sink()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
672 return(res)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
673 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
674
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
675
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
676 dolasso_generic = function(predvars=NA,depvars=NA,debugOn=T,maxsteps=100, alpha=0.9,nfold=10,xcolnames=c(),ycolnames=c(),optLambda='minLambda', out_pred_file=NA,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
677 descr="describe me",do_standard=F,defaultFam="gaussian",penalty=c(),out_pred=F,cox_id=c(),mdsplots=F,xfilt=0.95)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
678 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
679 logf = file("rglasso.log", open = "a")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
680 sink(logf,type = c("output", "message"))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
681 xdat = predvars
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
682 xm = data.matrix(xdat)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
683 res = NULL
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
684 id = cox_id
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
685 depnames = ycolnames
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
686 ndep = length(depnames)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
687 if (mdsplots==T) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
688 outpdfname = 'rglasso_x_in_sample_space_MDS.pdf'
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
689 p = try({ mdsPlot(xm,'measurements in sample space',groups=NA,outpdfname=outpdfname,transpose=T) },T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
690 if (class(p) == "try-error")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
691 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
692 print.noquote(paste('Unable to produce predictors in sample space mds plot. Error:',p))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
693 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
694 outpdfname = 'rglasso_samples_in_x_space_MDS.pdf'
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
695 p = try({mdsPlot(xm,'samples in measurement space',groups=NA,outpdfname=outpdfname,transpose=F) },T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
696 if (class(p) == "try-error")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
697 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
698 print.noquote(paste('Unable to produce samples in measurement space mds plot. Error:',p))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
699 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
700 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
701 ndat = nrow(xm)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
702 cfracs = colSums(! is.na(xm))/ndat
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
703 keepme = (cfracs >= xfilt)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
704 print.noquote(paste('Removing', sum(! keepme), 'xvars with more than',xfilt,'fraction missing'))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
705 vars = apply(xm,2,var,na.rm=T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
706 xm = xm[,keepme]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
707 for (i in c(1:max(1,ndep))) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
708 target = depnames[i]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
709 if (length(target) < 1) { target='y' }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
710 if (i %% 100 == 0) { cat(i,target,'\n') }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
711 if (ndep <= 1) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
712 y=depvars
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
713 } else {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
714 y = depvars[,i]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
715 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
716 if (fam == "binomial") {y = as.factor(y)}
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
717 x = xm
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
718 id = cox_id
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
719 if (fam != "cox") {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
720 ok = complete.cases(x,y)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
721 if (sum(! ok) > 0) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
722 message(paste('@@@ Removing',sum(! ok),'cases with missing y of',length(y),'@@@'))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
723 y = y[(ok)]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
724 x = x[(ok),]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
725 id = id[(ok)]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
726 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
727 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
728 ok = complete.cases(y)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
729 if (sum(ok) == 0 ) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
730 print(paste("No complete cases found for",target,"in input x dim =",paste(dim(xm),collapse=','),"length y=",length(y)))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
731 } else {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
732 if (i == 1) { outpred = out_pred_file
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
733 } else {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
734 outpred = paste(target,'predicted_output.xls')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
735 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
736 regres = do_lasso(x=x,y=y,do_standard=do_standard,debugOn=debugOn,defaultFam=defaultFam,optLambda=optLambda,out_pred_file=outpred,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
737 descr=descr,indx=i,target=target,alpha=alpha,nfold=nfold,penalty=penalty,out_pred=out_pred,yvarname=target,id=id,mdsplots=mdsplots)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
738 if (! is.na(regres)) { res = rbind(res,regres) }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
739 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
740 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
741 print.noquote('@@@ Results preview:')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
742 print.noquote(res,digits=5)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
743 sink()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
744 return(res)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
745 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
746
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
747
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
748 corPlot=function(xdat=c(),main='main title',is_raw=T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
749 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
750 library(pheatmap)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
751 library(gplots)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
752 if (is_raw) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
753 cxdat = cor(xdat,method="spearman",use="pairwise.complete.obs")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
754 } else {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
755 cxdat=xdat
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
756 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
757 xro = nrow(cxdat)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
758 if (xro > 1000) stop("Too many rows for heatmap, who can read?!")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
759 fontsize_col = 5.0
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
760 pheatmap(cxdat, main=main, show_colnames = F, width=30, height=30,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
761 fontsize_row=fontsize_col, border_color=NA)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
762 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
763
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
764
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
765
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
766
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
767 runTest = function(n=10)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
768 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
769 set.seed (NULL)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
770 Y = data.frame(y1=runif (n),y2=runif(n))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
771 Xv <- runif(n*n)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
772 X <- matrix(Xv, nrow = n, ncol = n)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
773
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
774 mydf <- data.frame(Y, X)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
775
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
776 regres_out = dolasso_generic(predvars=X,depvars=Y,debugOn=T,p.cutoff = 0.05,maxsteps=10000,nfold=10,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
777 descr='randomdata',do_standard=do_standard,defaultFam="gaussian",alpha=0.05)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
778 return(regres_out)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
779 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
780 ]]>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
781 options(width=512)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
782 options(digits=5)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
783 alpha = $alpha
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
784 nfold = $nfold
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
785 optLambda = "$optLambda"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
786 Out_Dir = "$html_file.files_path"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
787 Input = "$input1"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
788 indat = read.table(Input,head=T,sep='\t')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
789 datcols = colnames(indat)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
790 myTitle = "$title"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
791 outtab = "$model_file"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
792 do_standard = as.logical("$do_standard")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
793 mdsplots = as.logical("$mdsplots")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
794 fam = "$model.fam"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
795 xvar_cols_in = "$xvar_cols"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
796 force_xvar_cols_in = "$force_xvar_cols"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
797 xvar_cols = as.numeric(strsplit(xvar_cols_in,",")[[1]])
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
798 force_xvar_cols = c()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
799 penalties = rep(1,length(datcols))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
800 forced_in = NA
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
801
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
802 logxform = "$logxform_cols"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
803 if (logxform != "None") {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
804 logxform_cols = as.numeric(strsplit(logxform,",")[[1]])
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
805 if (length(logxform_cols) > 0) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
806 small = 1e-10
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
807 sset = indat[,logxform_cols]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
808 zeros = which(sset==0,arr.ind=T)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
809 nz = nrow(zeros)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
810 if (nz &gt; 0) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
811 message(paste('Log transforming encountered',nz,'zeros - added 1e-10'))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
812 sset[zeros] = sset[zeros] + small
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
813 lset = log(sset)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
814 indat[,logxform_cols] = lset
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
815 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
816 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
817 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
818 if (force_xvar_cols_in != "None")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
819 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
820 force_xvar_cols = as.numeric(strsplit(force_xvar_cols_in,",")[[1]])
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
821 allx = c(xvar_cols,force_xvar_cols)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
822 xvar_cols = unique(allx)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
823 xvar_cols = xvar_cols[order(xvar_cols)]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
824 penalties[force_xvar_cols] = 0
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
825 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
826 penalty = penalties[xvar_cols]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
827 forcedin = paste(datcols[which(penalties == 0)],collapse=',')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
828 cox_id_col = NA
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
829 cox_id = NA
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
830
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
831 message(paste('@@@ Using alpha =',alpha,'for all models'))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
832 x = indat[,xvar_cols]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
833 nx = nrow(x)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
834 cx = ncol(x)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
835 message(paste('@@@@ Input has',nx,'samples and',cx,'predictors'))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
836 if (cx > nx) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
837 message('@@@ WARNING: Models will have more variables than cases so glmnet will likely return one of many possible solutions! Please DO NOT expect reliable results - glmnet is clever but not magical @@@')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
838 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
839
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
840 xcolnames = datcols[xvar_cols]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
841
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
842 if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
843 out_full = F
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
844 out_full_file = NA
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
845 out_pred_file = ""
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
846 out_pred = as.logical("$model.output_pred")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
847
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
848 #if $model.fam == "binomial" or $model.fam == "cox":
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
849 cox_id_col = $model.cox_id
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
850 cox_id = indat[,cox_id_col]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
851 if (out_pred == T) {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
852 out_pred_file="$output_pred_file"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
853 rownames(x) = cox_id
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
854 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
855 #end if
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
856 #if $model.fam == "cox":
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
857 cox_time = $model.cox_time
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
858 cox_status = $model.cox_status
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
859 out_full = as.logical("$model.output_full")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
860 if (out_full == T) { out_full_file="$output_full_file" }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
861 yvar_cols = c(cox_time,cox_status)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
862 ycolnames = c('time','status')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
863 istat = as.double(indat[,cox_status])
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
864 itime = as.double(indat[,cox_time])
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
865 predict_at = quantile(itime)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
866 if ("$model.predict_at" &gt; "")
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
867 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
868 pa = "$model.predict_at"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
869 predict_at = as.numeric(strsplit(pa,",")[[1]])
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
870 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
871 y = data.frame(time = itime, status = istat)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
872 ustat = unique(istat)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
873 if ((length(ustat) != 2) | (! 1 %in% ustat ) | (! 0 %in% ustat))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
874 {
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
875 print.noquote(paste('INPUT ERROR: status must have 0 (censored) or 1 (event) but found',paste(ustat,collapse=',') ))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
876 quit(save='no',status=1)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
877 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
878 y = as.matrix(y)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
879 x = as.matrix(x)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
880 print.noquote(paste('@@@ Cox model will predict yvar=',datcols[cox_status],'using cols=',paste(xcolnames,collapse=','),'n preds=',length(xcolnames),
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
881 'forced in=',forcedin))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
882 regres_out = dolasso_cox(x=x,y=y,debugOn=F,maxsteps=10000,nfold=nfold,xcolnames=xcolnames,ycolnames=ycolnames,optLambda=optLambda,out_full=out_full,out_full_file=out_full_file,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
883 out_pred=out_pred,out_pred_file=out_pred_file,cox_id=cox_id,descr=myTitle,do_standard=do_standard,alpha=alpha,penalty=penalty,predict_at=predict_at,mdsplots=mdsplots)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
884 #else:
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
885 yvar_cols = "$model.yvar_cols"
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
886 yvar_cols = as.numeric(strsplit(yvar_cols,",")[[1]])
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
887 ycolnames = datcols[yvar_cols]
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
888 print.noquote(paste('@@@',fam,'model will predict yvar=',paste(ycolnames,collapse=','),'using cols=',paste(xcolnames,collapse=','),'n preds=',length(xcolnames),
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
889 'forced in=',forcedin))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
890 y = data.matrix(indat[,yvar_cols])
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
891 print.noquote(paste('Model will use',fam,'link function to predict yvar=',paste(ycolnames,collapse=','),'n preds=',length(xcolnames),'forced in=',forcedin))
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
892 regres_out = dolasso_generic(predvars=x,depvars=y,debugOn=F, maxsteps=10000,nfold=nfold,xcolnames=xcolnames,ycolnames=ycolnames,optLambda=optLambda,out_pred_file=out_pred_file,
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
893 descr=myTitle,do_standard=do_standard,defaultFam=fam,alpha=alpha,penalty=penalty,out_pred=out_pred,cox_id=cox_id,mdsplots=mdsplots)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
894 #end if
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
895
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
896 write.table(format(regres_out,digits=5),outtab,quote=FALSE, sep="\t",row.names=F)
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
897 print.noquote('@@@ SessionInfo for this R session:')
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
898 sessionInfo()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
899 warnings()
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
900
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
901 </configfile>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
902 </configfiles>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
903 <citations>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
904 <citation type="bibtex">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
905 @Article{Friedman2010, title = {Regularization Paths for Generalized Linear Models via Coordinate Descent},
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
906 author = {Jerome Friedman and Trevor Hastie and Robert Tibshirani},
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
907 journal = {Journal of Statistical Software},
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
908 year = {2010},
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
909 volume = {33},
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
910 number = {1},
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
911 pages = {1--22},
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
912 url = {http://www.jstatsoft.org/v33/i01/}
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
913 }
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
914 </citation>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
915 <citation type="doi">
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
916 10.1093/bioinformatics/bts573
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
917 </citation>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
918 </citations>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
919 </tool>
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
920
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
921
5f2db639f8eb Uploaded
fubar
parents:
diff changeset
922