Mercurial > repos > bgruening > sklearn_build_pipeline
comparison pipeline.xml @ 0:f8b431b981fa draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 76583c1fcd9d06a4679cc46ffaee44117b9e22cd
author | bgruening |
---|---|
date | Sat, 04 Aug 2018 12:14:28 -0400 |
parents | |
children | ddd8c51b7302 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f8b431b981fa |
---|---|
1 <tool id="sklearn_build_pipeline" name="Pipeline Builder" version="@VERSION@"> | |
2 <description>constructs a list of transforms and a final estimator</description> | |
3 <macros> | |
4 <import>main_macros.xml</import> | |
5 </macros> | |
6 <expand macro="python_requirements"/> | |
7 <expand macro="macro_stdio"/> | |
8 <version_command>echo "@VERSION@"</version_command> | |
9 <command> | |
10 <![CDATA[ | |
11 python "$sklearn_pipeline_script" '$inputs' | |
12 ]]> | |
13 </command> | |
14 <configfiles> | |
15 <inputs name="inputs" /> | |
16 <configfile name="sklearn_pipeline_script"> | |
17 <![CDATA[ | |
18 import sys | |
19 import json | |
20 import pickle | |
21 import pprint | |
22 import xgboost | |
23 import ast | |
24 import sklearn.feature_selection | |
25 from sklearn import ( preprocessing, svm, linear_model, ensemble, naive_bayes, | |
26 tree, neighbors, decomposition, kernel_approximation, cluster) | |
27 from sklearn.pipeline import Pipeline | |
28 | |
29 @GET_ESTIMATOR_FUNCTION@ | |
30 @FEATURE_SELECTOR_FUNCTION@ | |
31 | |
32 input_json_path = sys.argv[1] | |
33 with open(input_json_path, "r") as param_handler: | |
34 params = json.load(param_handler) | |
35 | |
36 pipeline_steps = [] | |
37 | |
38 def get_component(input_json, check_none=False): | |
39 if input_json['component_type'] == 'None': | |
40 if not check_none: | |
41 return | |
42 else: | |
43 sys.exit("The pre-processing component type can't be None when the number of components is greater than 1.") | |
44 if input_json['component_type'] == 'pre_processor': | |
45 preprocessor = input_json["pre_processors"]["selected_pre_processor"] | |
46 pre_processor_options = input_json["pre_processors"]["options"] | |
47 my_class = getattr(preprocessing, preprocessor) | |
48 return my_class(**pre_processor_options) | |
49 if input_json['component_type'] == 'feature_selection': | |
50 fs_obj = feature_selector(input_json['fs_algorithm_selector']) | |
51 return fs_obj | |
52 if input_json['component_type'] == 'decomposition': | |
53 algorithm = input_json['matrix_decomposition_selector']['select_algorithm'] | |
54 obj = getattr(decomposition, algorithm)() | |
55 options = input_json['matrix_decomposition_selector']['text_params'].strip() | |
56 if options != "": | |
57 options = ast.literal_eval('{' + options + '}') | |
58 obj.set_params(**options) | |
59 return obj | |
60 if input_json['component_type'] == 'kernel_approximation': | |
61 algorithm = input_json['kernel_approximation_selector']['select_algorithm'] | |
62 obj = getattr(kernel_approximation, algorithm)() | |
63 options = input_json['kernel_approximation_selector']['text_params'].strip() | |
64 if options != "": | |
65 options = ast.literal_eval('{' + options + '}') | |
66 obj.set_params(**options) | |
67 return obj | |
68 if input_json['component_type'] == 'FeatureAgglomeration': | |
69 algorithm = input_json['FeatureAgglomeration_selector']['select_algorithm'] | |
70 obj = getattr(cluster, algorithm)() | |
71 options = input_json['FeatureAgglomeration_selector']['text_params'].strip() | |
72 if options != "": | |
73 options = ast.literal_eval('{' + options + '}') | |
74 obj.set_params(**options) | |
75 return obj | |
76 if len(params['pipeline_component']) == 1: | |
77 step_obj = get_component( params['pipeline_component'][0]['component_selector']) | |
78 if step_obj: | |
79 pipeline_steps.append( ('preprocessing_1', step_obj) ) | |
80 else: | |
81 for i, c in enumerate(params['pipeline_component']): | |
82 step_obj = get_component( c['component_selector'], check_none=True ) | |
83 pipeline_steps.append( ('preprocessing_' + str(i+1), step_obj) ) | |
84 | |
85 # Set up final estimator and add to pipeline. | |
86 estimator_json = params["final_estimator"]['estimator_selector'] | |
87 estimator = get_estimator(estimator_json) | |
88 | |
89 pipeline_steps.append( ('estimator', estimator) ) | |
90 | |
91 pipeline = Pipeline(pipeline_steps) | |
92 pprint.pprint(pipeline.named_steps) | |
93 | |
94 with open("$outfile", 'wb') as out_handler: | |
95 pickle.dump(pipeline, out_handler, pickle.HIGHEST_PROTOCOL) | |
96 | |
97 ]]> | |
98 </configfile> | |
99 </configfiles> | |
100 <inputs> | |
101 <repeat name="pipeline_component" min="1" max="5" title="Pre-processing step"> | |
102 <conditional name="component_selector"> | |
103 <param name="component_type" type="select" label="Choose the type of transformation:"> | |
104 <option value="none" selected="true">None</option> | |
105 <option value="pre_processor">Sklearn Preprocessor</option> | |
106 <option value="feature_selection">Feature Selection</option> | |
107 <option value="decomposition">Matrix Decomposition</option> | |
108 <option value="kernel_approximation">Kernel Approximation</option> | |
109 <option value="FeatureAgglomeration">Agglomerate Features</option> | |
110 </param> | |
111 <when value="none"/> | |
112 <when value="pre_processor"> | |
113 <conditional name="pre_processors"> | |
114 <expand macro="sparse_preprocessors_ext" /> | |
115 <expand macro="sparse_preprocessor_options_ext" /> | |
116 </conditional> | |
117 </when> | |
118 <when value="feature_selection"> | |
119 <expand macro="feature_selection_all"> | |
120 <expand macro="fs_selectfrommodel_no_prefitted"/> | |
121 </expand> | |
122 </when> | |
123 <when value="decomposition"> | |
124 <expand macro="matrix_decomposition_all"/> | |
125 </when> | |
126 <when value="kernel_approximation"> | |
127 <expand macro="kernel_approximation_all"/> | |
128 </when> | |
129 <when value="FeatureAgglomeration"> | |
130 <expand macro="FeatureAgglomeration"/> | |
131 </when> | |
132 </conditional> | |
133 </repeat> | |
134 <section name="final_estimator" title="Final Estimator" expanded="true"> | |
135 <expand macro="estimator_selector_all" /> | |
136 </section> | |
137 </inputs> | |
138 <outputs> | |
139 <data format="zip" name="outfile"/> | |
140 </outputs> | |
141 <tests> | |
142 <test> | |
143 <repeat name="pipeline_component"> | |
144 <conditional name="component_selector"> | |
145 <param name="component_type" value="pre_processor"/> | |
146 <conditional name="pre_processors"> | |
147 <param name="selected_pre_processor" value="RobustScaler"/> | |
148 </conditional> | |
149 </conditional> | |
150 </repeat> | |
151 <repeat name="pipeline_component"> | |
152 <conditional name="component_selector"> | |
153 <param name="component_type" value="feature_selection"/> | |
154 <conditional name="fs_algorithm_selector"> | |
155 <param name="selected_algorithm" value="SelectKBest"/> | |
156 <param name="score_func" value="f_classif"/> | |
157 </conditional> | |
158 </conditional> | |
159 </repeat> | |
160 <param name="selected_module" value="svm"/> | |
161 <param name="selected_estimator" value="SVR"/> | |
162 <param name="text_params" value="'kernel': 'linear'"/> | |
163 <output name="outfile" file="pipeline01" compare="sim_size" delta="1"/> | |
164 </test> | |
165 <test> | |
166 <conditional name="component_selector"> | |
167 <param name="component_type" value="pre_processor"/> | |
168 <conditional name="pre_processors"> | |
169 <param name="selected_pre_processor" value="RobustScaler"/> | |
170 </conditional> | |
171 </conditional> | |
172 <param name="selected_module" value="linear_model"/> | |
173 <param name="selected_estimator" value="LassoCV"/> | |
174 <output name="outfile" file="pipeline02" compare="sim_size" delta="1"/> | |
175 </test> | |
176 <test> | |
177 <conditional name="component_selector"> | |
178 <param name="component_type" value="pre_processor"/> | |
179 <conditional name="pre_processors"> | |
180 <param name="selected_pre_processor" value="RobustScaler"/> | |
181 </conditional> | |
182 </conditional> | |
183 <param name="selected_module" value="xgboost"/> | |
184 <param name="selected_estimator" value="XGBClassifier"/> | |
185 <output name="outfile" file="pipeline03" compare="sim_size" delta="1"/> | |
186 </test> | |
187 <test> | |
188 <conditional name="component_selector"> | |
189 <param name="component_type" value="feature_selection"/> | |
190 <conditional name="fs_algorithm_selector"> | |
191 <param name="selected_algorithm" value="SelectFromModel"/> | |
192 <conditional name="model_inputter"> | |
193 <conditional name="estimator_selector"> | |
194 <param name="selected_module" value="ensemble"/> | |
195 <param name="selected_estimator" value="AdaBoostClassifier"/> | |
196 </conditional> | |
197 </conditional> | |
198 </conditional> | |
199 </conditional> | |
200 <section name="final_estimator"> | |
201 <param name="selected_module" value="svm"/> | |
202 <param name="selected_estimator" value="LinearSVC"/> | |
203 </section> | |
204 <output name="outfile" file="pipeline04" compare="sim_size" delta="1"/> | |
205 </test> | |
206 <test> | |
207 <conditional name="component_selector"> | |
208 <param name="component_type" value="None"/> | |
209 </conditional> | |
210 <param name="selected_module" value="ensemble"/> | |
211 <param name="selected_estimator" value="RandomForestRegressor"/> | |
212 <param name="text_params" value="'n_estimators': 100, 'random_state': 42"/> | |
213 <output name="outfile" file="pipeline05" compare="sim_size" delta="1"/> | |
214 </test> | |
215 <test> | |
216 <conditional name="component_selector"> | |
217 <param name="component_type" value="decomposition"/> | |
218 <conditional name="matrix_decomposition_selector"> | |
219 <param name="select_algorithm" value="PCA"/> | |
220 </conditional> | |
221 </conditional> | |
222 <param name="selected_module" value="ensemble"/> | |
223 <param name="selected_estimator" value="AdaBoostRegressor"/> | |
224 <output name="outfile" file="pipeline06" compare="sim_size" delta="1"/> | |
225 </test> | |
226 <test> | |
227 <conditional name="component_selector"> | |
228 <param name="component_type" value="kernel_approximation"/> | |
229 <conditional name="kernel_approximation_selector"> | |
230 <param name="select_algorithm" value="RBFSampler"/> | |
231 <param name="text_params" value="'n_components': 10, 'gamma': 2.0"/> | |
232 </conditional> | |
233 </conditional> | |
234 <param name="selected_module" value="ensemble"/> | |
235 <param name="selected_estimator" value="AdaBoostClassifier"/> | |
236 <output name="outfile" file="pipeline07" compare="sim_size" delta="1"/> | |
237 </test> | |
238 <test> | |
239 <conditional name="component_selector"> | |
240 <param name="component_type" value="FeatureAgglomeration"/> | |
241 <conditional name="FeatureAgglomeration_selector"> | |
242 <param name="select_algorithm" value="FeatureAgglomeration"/> | |
243 <param name="text_params" value="'n_clusters': 3, 'affinity': 'euclidean'"/> | |
244 </conditional> | |
245 </conditional> | |
246 <param name="selected_module" value="ensemble"/> | |
247 <param name="selected_estimator" value="AdaBoostClassifier"/> | |
248 <output name="outfile" file="pipeline08" compare="sim_size" delta="1"/> | |
249 </test> | |
250 </tests> | |
251 <help> | |
252 <![CDATA[ | |
253 **What it does** | |
254 Constructs a pipeline that contains a list of transfroms and a final estimator. Pipeline assembles several steps | |
255 that can be cross-validated together while setting different parameters. | |
256 please refer to `Scikit-learn pipeline Pipeline`_. | |
257 | |
258 **Pre-processing components** allow None, one or a combination of up to 5 transformations from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_ and/or `cluster.FeatureAgglomeration`_. | |
259 | |
260 **Estimator** selector supports estimators from `xgboost`_ and many scikit-learn modules, including `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_ and `neighbors`_. | |
261 | |
262 | |
263 .. _`Scikit-learn pipeline Pipeline`: http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html | |
264 .. _`svm`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.svm | |
265 .. _`linear_model`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.linear_model | |
266 .. _`ensemble`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.ensemble | |
267 .. _`naive_bayes`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.naive_bayes | |
268 .. _`tree`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.tree | |
269 .. _`neighbors`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.neighbors | |
270 .. _`xgboost`: https://xgboost.readthedocs.io/en/latest/python/python_api.html | |
271 | |
272 .. _`sklearn.preprocessing`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing | |
273 .. _`feature_selection`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection | |
274 .. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition | |
275 .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation | |
276 .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html | |
277 | |
278 ]]> | |
279 </help> | |
280 <expand macro="sklearn_citation"/> | |
281 </tool> |