Mercurial > repos > bgruening > sklearn_model_validation
comparison model_validation.xml @ 2:eb4a0fccbb3f draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 4ed8c4f6ef9ece81797a398b17a99bbaf49a6978
author | bgruening |
---|---|
date | Wed, 30 May 2018 08:23:35 -0400 |
parents | 1e778f5614bf |
children | 2e4d425cd108 |
comparison
equal
deleted
inserted
replaced
1:33171e815126 | 2:eb4a0fccbb3f |
---|---|
16 <configfile name="sklearn_model_validation_script"> | 16 <configfile name="sklearn_model_validation_script"> |
17 <![CDATA[ | 17 <![CDATA[ |
18 import sys | 18 import sys |
19 import json | 19 import json |
20 import pandas | 20 import pandas |
21 import ast | |
21 import pickle | 22 import pickle |
22 import numpy as np | 23 import numpy as np |
23 import sklearn.model_selection | 24 import sklearn.model_selection |
24 from sklearn import svm, linear_model, ensemble | 25 from sklearn import svm, linear_model, ensemble |
26 from sklearn.pipeline import Pipeline | |
25 | 27 |
26 @COLUMNS_FUNCTION@ | 28 @COLUMNS_FUNCTION@ |
29 | |
30 @FEATURE_SELECTOR_FUNCTION@ | |
27 | 31 |
28 input_json_path = sys.argv[1] | 32 input_json_path = sys.argv[1] |
29 params = json.load(open(input_json_path, "r")) | 33 params = json.load(open(input_json_path, "r")) |
30 | 34 |
31 input_type = params["input_options"]["selected_input"] | 35 input_type = params["input_options"]["selected_input"] |
49 header=header, | 53 header=header, |
50 parse_dates=True | 54 parse_dates=True |
51 ) | 55 ) |
52 y=y.ravel() | 56 y=y.ravel() |
53 | 57 |
54 validator = params["model_validation_functions"]["selected_function"] | |
55 validator = getattr(sklearn.model_selection, validator) | |
56 options = params["model_validation_functions"]["options"] | 58 options = params["model_validation_functions"]["options"] |
57 if 'scoring' in options and options['scoring'] == '': | 59 if 'scoring' in options and options['scoring'] == '': |
58 options['scoring'] = None | 60 options['scoring'] = None |
59 | 61 if 'pre_dispatch' in options and options['pre_dispatch'] == '': |
62 options['pre_dispatch'] = None | |
63 | |
64 pipeline_steps = [] | |
65 | |
66 ## Set up feature selector and add to pipeline steps. | |
67 if params['feature_selection']['do_feature_selection'] == 'Yes': | |
68 feature_selector = feature_selector(params['feature_selection']['feature_selection_algorithms']) | |
69 pipeline_steps.append( ('feature_selector', feature_selector)) | |
70 | |
71 ## Set up estimator and add to pipeline. | |
60 estimator=params["model_validation_functions"]["estimator"] | 72 estimator=params["model_validation_functions"]["estimator"] |
61 if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no': | 73 if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no': |
62 estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"] | 74 estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"] |
63 estimator = eval(estimator.replace('__dq__', '"').replace("__sq__","'")) | 75 estimator = eval(estimator.replace('__dq__', '"').replace("__sq__","'")) |
64 | 76 |
65 #if $model_validation_functions.selected_function == 'cross_validate': | 77 pipeline_steps.append( ('estimator', estimator) ) |
66 res = validator(estimator, X, y, **options) | 78 |
67 rval = res["$model_validation_functions.return_type"] | 79 pipeline = Pipeline(pipeline_steps) |
68 | 80 |
69 #elif $model_validation_functions.selected_function == 'learning_curve': | 81 ## Set up validator, run pipeline through validator and return results. |
70 options['train_sizes'] = eval(options['train_sizes']) | 82 |
71 train_sizes_abs, train_scores, test_scores = validator(estimator, X, y, **options) | 83 validator = params["model_validation_functions"]["selected_function"] |
72 rval = eval("$model_validation_functions.return_type") | 84 validator = getattr(sklearn.model_selection, validator) |
73 | 85 |
74 #elif $model_validation_functions.selected_function == 'permutation_test_score': | 86 selected_function = params["model_validation_functions"]["selected_function"] |
75 score, permutation_scores, pvalue = validator(estimator, X, y, **options) | 87 rval_type = params["model_validation_functions"].get("return_type", None) |
76 rval = eval("$model_validation_functions.return_type") | 88 |
77 if "$model_validation_functions.return_type" in ["score", "pvalue"]: | 89 if selected_function == 'cross_validate': |
78 rval = [rval] | 90 res = validator(pipeline, X, y, **options) |
79 | 91 rval = res[rval_type] |
80 #elif $model_validation_functions.selected_function == 'validation_curve': | 92 elif selected_function == 'learning_curve': |
81 options['param_range'] = eval(options['param_range']) | 93 options['train_sizes'] = eval(options['train_sizes']) |
82 train_scores, test_scores = validator(estimator, X, y, **options) | 94 train_sizes_abs, train_scores, test_scores = validator(pipeline, X, y, **options) |
83 rval = eval("$model_validation_functions.return_type") | 95 rval = eval(rval_type) |
84 | 96 elif selected_function == 'permutation_test_score': |
85 #else: | 97 score, permutation_scores, pvalue = validator(pipeline, X, y, **options) |
86 rval = validator(estimator, X, y, **options) | 98 rval = eval(rval_type) |
87 #end if | 99 if rval_type in ["score", "pvalue"]: |
100 rval = [rval] | |
101 elif selected_function == 'validation_curve': | |
102 options['param_name'] = 'estimator__' + options['param_name'] | |
103 options['param_range'] = eval(options['param_range']) | |
104 train_scores, test_scores = validator(pipeline, X, y, **options) | |
105 rval = eval(rval_type) | |
106 elif selected_function == 'GridSearchCV': | |
107 param_grid = params["model_validation_functions"]["param_grid"].replace("__sq__","'")\ | |
108 .replace('__dq__','"').replace("__oc__", "{").replace("__cc__", "}")\ | |
109 .replace("__ob__", "[").replace("__cb__", "]") | |
110 param_grid = ast.literal_eval(param_grid) | |
111 grid = validator(pipeline, param_grid, **options) | |
112 grid.fit(X, y) | |
113 rval = getattr(grid, rval_type) | |
114 if rval_type in ["best_estimator_", "best_score_", "best_index_"]: | |
115 rval = [rval] | |
116 else: | |
117 rval = validator(pipeline, X, y, **options) | |
88 | 118 |
89 rval = pandas.DataFrame(rval) | 119 rval = pandas.DataFrame(rval) |
90 rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False) | 120 if rval_type and rval_type == "cv_results_": |
121 rval.to_csv(path_or_buf="$outfile", sep='\t', header=True, index=False) | |
122 else: | |
123 rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False) | |
91 | 124 |
92 ]]> | 125 ]]> |
93 </configfile> | 126 </configfile> |
94 </configfiles> | 127 </configfiles> |
95 <inputs> | 128 <inputs> |
129 <conditional name="feature_selection"> | |
130 <param name="do_feature_selection" type="select" label="Do feature selection?"> | |
131 <option value="No" selected="true"/> | |
132 <option value="Yes"/> | |
133 </param> | |
134 <when value="No"/> | |
135 <when value="Yes"> | |
136 <expand macro="feature_selection_all"/> | |
137 </when> | |
138 </conditional> | |
96 <conditional name="model_validation_functions"> | 139 <conditional name="model_validation_functions"> |
97 <param name="selected_function" type="select" label="Select a model validation function"> | 140 <param name="selected_function" type="select" label="Select a model validation function"> |
141 <option value="GridSearchCV">GridSearchCV - Exhaustive search over specified parameter values for an estimator </option> | |
98 <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option> | 142 <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option> |
99 <option value="cross_val_predict">cross_val_predict - Generate cross-validated estimates for each input data point</option> | 143 <option value="cross_val_predict">cross_val_predict - Generate cross-validated estimates for each input data point</option> |
100 <option value="cross_val_score">cross_val_score - Evaluate a score by cross-validation</option> | 144 <option value="cross_val_score">cross_val_score - Evaluate a score by cross-validation</option> |
101 <option value="learning_curve">learning_curve - Learning curve</option> | 145 <option value="learning_curve">learning_curve - Learning curve</option> |
102 <option value="permutation_test_score">permutation_test_score - Evaluate the significance of a cross-validated score with permutations</option> | 146 <option value="permutation_test_score">permutation_test_score - Evaluate the significance of a cross-validated score with permutations</option> |
103 <option value="validation_curve">validation_curve - Validation curve</option> | 147 <option value="validation_curve">validation_curve - Validation curve</option> |
104 </param> | 148 </param> |
149 <when value="GridSearchCV"> | |
150 <expand macro="estimator_input_no_fit" /> | |
151 <param argument="param_grid" type="text" value="[{'feature_selector__k': [3, 5, 7, 9], 'estimator__C': [1, 10, 100, 1000]}]" label="param_grid" help="Dictionary with parameters names (string) as keys and lists of parameter settings to try as values, or a list of such dictionaries, in which case the grids spanned by each dictionary in the list are explored"/> | |
152 <section name="options" title="Other Options" expanded="false"> | |
153 <expand macro="scoring"/> | |
154 <expand macro="model_validation_common_options"/> | |
155 <expand macro="pre_dispatch" value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/> | |
156 <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="Data is identically distributed?"/> | |
157 <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset."/> | |
158 <!--error_score--> | |
159 <param argument="return_train_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="return_train_score" help=""/> | |
160 </section> | |
161 <param name="return_type" type="select" label="Select a return type"> | |
162 <option value="cv_results_" selected="true">cv_results_</option> | |
163 <option value="best_estimator_">best_estimator_</option> | |
164 <option value="best_score_">best_score_</option> | |
165 <option value="best_params_">best_params_</option> | |
166 <option value="best_index_">best_index_</option> | |
167 </param> | |
168 </when> | |
105 <when value="cross_validate"> | 169 <when value="cross_validate"> |
106 <expand macro="feature_selection_estimator" /> | 170 <expand macro="estimator_input_no_fit" /> |
107 <conditional name="extra_estimator"> | |
108 <expand macro="feature_selection_extra_estimator" /> | |
109 <expand macro="feature_selection_estimator_choices" /> | |
110 </conditional> | |
111 <section name="options" title="Other Options" expanded="false"> | 171 <section name="options" title="Other Options" expanded="false"> |
112 <!--groups--> | 172 <!--groups--> |
113 <expand macro="model_validation_common_options"/> | 173 <expand macro="model_validation_common_options"/> |
114 <expand macro="scoring"/> | 174 <expand macro="scoring"/> |
115 <!--fit_params--> | 175 <!--fit_params--> |
121 <option value="fit_time">fit_time</option> | 181 <option value="fit_time">fit_time</option> |
122 <option value="score_time">score_time</option> | 182 <option value="score_time">score_time</option> |
123 </param> | 183 </param> |
124 </when> | 184 </when> |
125 <when value="cross_val_predict"> | 185 <when value="cross_val_predict"> |
126 <expand macro="feature_selection_estimator" /> | 186 <expand macro="estimator_input_no_fit" /> |
127 <conditional name="extra_estimator"> | 187 <section name="options" title="Other Options" expanded="false"> |
128 <expand macro="feature_selection_extra_estimator" /> | 188 <!--groups--> |
129 <expand macro="feature_selection_estimator_choices" /> | 189 <expand macro="model_validation_common_options" /> |
130 </conditional> | |
131 <section name="options" title="Other Options" expanded="false"> | |
132 <!--groups--> | |
133 <param argument="cv" type="integer" value="" optional="true" label="cv" help="The number of folds in a (Stratified)KFold" /> | |
134 <expand macro="n_jobs"/> | |
135 <expand macro="verbose"/> | |
136 <!--fit_params--> | 190 <!--fit_params--> |
137 <param argument="pre_dispatch" type="integer" value="" optional="true" label="pre_dispatch" help="Controls the number of jobs that get dispatched during parallel execution" /> | 191 <expand macro="pre_dispatch" value="2*n_jobs’" help="Controls the number of jobs that get dispatched during parallel execution"/> |
138 <param argument="method" type="select" label="Invokes the passed method name of the passed estimator"> | 192 <param argument="method" type="select" label="Invokes the passed method name of the passed estimator"> |
139 <option value="predict" selected="true">predict</option> | 193 <option value="predict" selected="true">predict</option> |
140 <option value="predict_proba">predict_proba</option> | 194 <option value="predict_proba">predict_proba</option> |
141 </param> | 195 </param> |
142 </section> | 196 </section> |
143 </when> | 197 </when> |
144 <when value="cross_val_score"> | 198 <when value="cross_val_score"> |
145 <expand macro="feature_selection_estimator" /> | 199 <expand macro="estimator_input_no_fit" /> |
146 <conditional name="extra_estimator"> | |
147 <expand macro="feature_selection_extra_estimator" /> | |
148 <expand macro="feature_selection_estimator_choices" /> | |
149 </conditional> | |
150 <section name="options" title="Other Options" expanded="false"> | 200 <section name="options" title="Other Options" expanded="false"> |
151 <!--groups--> | 201 <!--groups--> |
152 <expand macro="model_validation_common_options"/> | 202 <expand macro="model_validation_common_options"/> |
153 <expand macro="scoring"/> | 203 <expand macro="scoring"/> |
154 <!--fit_params--> | 204 <!--fit_params--> |
155 <expand macro="pre_dispatch"/> | 205 <expand macro="pre_dispatch"/> |
156 </section> | 206 </section> |
157 </when> | 207 </when> |
158 <when value="learning_curve"> | 208 <when value="learning_curve"> |
159 <expand macro="feature_selection_estimator" /> | 209 <expand macro="estimator_input_no_fit" /> |
160 <conditional name="extra_estimator"> | |
161 <expand macro="feature_selection_extra_estimator" /> | |
162 <expand macro="feature_selection_estimator_choices" /> | |
163 </conditional> | |
164 <section name="options" title="Other Options" expanded="false"> | 210 <section name="options" title="Other Options" expanded="false"> |
165 <!--groups--> | 211 <!--groups--> |
166 <expand macro="model_validation_common_options"/> | 212 <expand macro="model_validation_common_options"/> |
167 <param argument="train_sizes" type="text" value="np.linspace(0.1, 1.0, 5)" label="train_sizes" help="Relative or absolute numbers of training examples that will be used to generate the learning curve"/> | 213 <param argument="train_sizes" type="text" value="np.linspace(0.1, 1.0, 5)" label="train_sizes" help="Relative or absolute numbers of training examples that will be used to generate the learning curve"/> |
168 <expand macro="scoring"/> | 214 <expand macro="scoring"/> |
176 <option value="train_scores">train_scores</option> | 222 <option value="train_scores">train_scores</option> |
177 <option value="test_scores">test_scores</option> | 223 <option value="test_scores">test_scores</option> |
178 </param> | 224 </param> |
179 </when> | 225 </when> |
180 <when value="permutation_test_score"> | 226 <when value="permutation_test_score"> |
181 <expand macro="feature_selection_estimator" /> | 227 <expand macro="estimator_input_no_fit" /> |
182 <conditional name="extra_estimator"> | |
183 <expand macro="feature_selection_extra_estimator" /> | |
184 <expand macro="feature_selection_estimator_choices" /> | |
185 </conditional> | |
186 <section name="options" title="Other Options" expanded="false"> | 228 <section name="options" title="Other Options" expanded="false"> |
187 <!--groups--> | 229 <!--groups--> |
188 <expand macro="model_validation_common_options"/> | 230 <expand macro="model_validation_common_options"/> |
189 <expand macro="scoring"/> | 231 <expand macro="scoring"/> |
190 <param name="n_permutations" type="integer" value="100" optional="true" label="n_permutations" help="Number of times to permute y"/> | 232 <param name="n_permutations" type="integer" value="100" optional="true" label="n_permutations" help="Number of times to permute y"/> |
195 <option value="permutation_scores">permutation_scores</option> | 237 <option value="permutation_scores">permutation_scores</option> |
196 <option value="pvalue">pvalue</option> | 238 <option value="pvalue">pvalue</option> |
197 </param> | 239 </param> |
198 </when> | 240 </when> |
199 <when value="validation_curve"> | 241 <when value="validation_curve"> |
200 <expand macro="feature_selection_estimator" /> | 242 <expand macro="estimator_input_no_fit" /> |
201 <conditional name="extra_estimator"> | |
202 <expand macro="feature_selection_extra_estimator" /> | |
203 <expand macro="feature_selection_estimator_choices" /> | |
204 </conditional> | |
205 <section name="options" title="Other Options" expanded="false"> | 243 <section name="options" title="Other Options" expanded="false"> |
206 <param name="param_name" type="text" value="gamma" label="param_name" help="Name of the parameter that will be varied"/> | 244 <param name="param_name" type="text" value="gamma" label="param_name" help="Name of the parameter that will be varied"/> |
207 <param name="param_range" type="text" value="np.logspace(-6, -1, 5)" label="param_range" help="The values of the parameter that will be evaluated."/> | 245 <param name="param_range" type="text" value="np.logspace(-6, -1, 5)" label="param_range" help="The values of the parameter that will be evaluated."/> |
208 <!--groups--> | 246 <!--groups--> |
209 <expand macro="model_validation_common_options"/> | 247 <expand macro="model_validation_common_options"/> |
285 <param name="header2" value="true" /> | 323 <param name="header2" value="true" /> |
286 <param name="col2" value="1"/> | 324 <param name="col2" value="1"/> |
287 <param name="return_type" value="test_scores"/> | 325 <param name="return_type" value="test_scores"/> |
288 <output name="outfile" file="mv_result06.tabular"/> | 326 <output name="outfile" file="mv_result06.tabular"/> |
289 </test> | 327 </test> |
328 <test> | |
329 <param name="do_feature_selection" value="Yes"/> | |
330 <param name="selected_algorithm" value="SelectKBest"/> | |
331 <param name="score_func" value="chi2"/> | |
332 <param name="selected_function" value="GridSearchCV"/> | |
333 <param name="estimator" value="svm.SVR(kernel="linear")"/> | |
334 <param name="has_estimator" value="yes"/> | |
335 <param name="param_grid" value="[{'feature_selector__k': [3, 7], 'estimator__C': [1, 100]}]"/> | |
336 <param name="return_type" value="best_score_"/> | |
337 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
338 <param name="header1" value="true" /> | |
339 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> | |
340 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | |
341 <param name="header2" value="true" /> | |
342 <param name="col2" value="1"/> | |
343 <output name="outfile" file="mv_result07.tabular"/> | |
344 </test> | |
290 </tests> | 345 </tests> |
291 <help> | 346 <help> |
292 <![CDATA[ | 347 <![CDATA[ |
293 **What it does** | 348 **What it does** |
294 This tool includes model validation functions to evaluate estimator performance in the cross-validation approach. This tool is based on | 349 This tool includes model validation functions to evaluate estimator performance in the cross-validation approach. This tool is based on |