Mercurial > repos > bgruening > sklearn_build_pipeline
diff pipeline.xml @ 3:ddd8c51b7302 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
author | bgruening |
---|---|
date | Fri, 17 Aug 2018 12:19:41 -0400 |
parents | f8b431b981fa |
children | 9e189ce7d859 |
line wrap: on
line diff
--- a/pipeline.xml Tue Aug 07 05:39:39 2018 -0400 +++ b/pipeline.xml Fri Aug 17 12:19:41 2018 -0400 @@ -3,7 +3,9 @@ <macros> <import>main_macros.xml</import> </macros> - <expand macro="python_requirements"/> + <expand macro="python_requirements"> + <requirement type="package" version="0.6">skrebate</requirement> + </expand> <expand macro="macro_stdio"/> <version_command>echo "@VERSION@"</version_command> <command> @@ -16,18 +18,18 @@ <configfile name="sklearn_pipeline_script"> <![CDATA[ import sys +import os import json import pickle import pprint -import xgboost -import ast -import sklearn.feature_selection -from sklearn import ( preprocessing, svm, linear_model, ensemble, naive_bayes, +import skrebate +from sklearn import (preprocessing, svm, linear_model, ensemble, naive_bayes, tree, neighbors, decomposition, kernel_approximation, cluster) from sklearn.pipeline import Pipeline -@GET_ESTIMATOR_FUNCTION@ -@FEATURE_SELECTOR_FUNCTION@ +execfile("$__tool_directory__/utils.py") + +safe_eval = SafeEval() input_json_path = sys.argv[1] with open(input_json_path, "r") as param_handler: @@ -45,34 +47,44 @@ preprocessor = input_json["pre_processors"]["selected_pre_processor"] pre_processor_options = input_json["pre_processors"]["options"] my_class = getattr(preprocessing, preprocessor) - return my_class(**pre_processor_options) - if input_json['component_type'] == 'feature_selection': - fs_obj = feature_selector(input_json['fs_algorithm_selector']) - return fs_obj - if input_json['component_type'] == 'decomposition': + obj = my_class(**pre_processor_options) + elif input_json['component_type'] == 'feature_selection': + obj = feature_selector(input_json['fs_algorithm_selector']) + elif input_json['component_type'] == 'decomposition': algorithm = input_json['matrix_decomposition_selector']['select_algorithm'] obj = getattr(decomposition, algorithm)() options = input_json['matrix_decomposition_selector']['text_params'].strip() if options != "": - options = ast.literal_eval('{' + options + '}') + options = safe_eval('dict(' + options + ')') obj.set_params(**options) - return obj - if input_json['component_type'] == 'kernel_approximation': + elif input_json['component_type'] == 'kernel_approximation': algorithm = input_json['kernel_approximation_selector']['select_algorithm'] obj = getattr(kernel_approximation, algorithm)() options = input_json['kernel_approximation_selector']['text_params'].strip() if options != "": - options = ast.literal_eval('{' + options + '}') + options = safe_eval('dict(' + options + ')') obj.set_params(**options) - return obj - if input_json['component_type'] == 'FeatureAgglomeration': + elif input_json['component_type'] == 'FeatureAgglomeration': algorithm = input_json['FeatureAgglomeration_selector']['select_algorithm'] obj = getattr(cluster, algorithm)() options = input_json['FeatureAgglomeration_selector']['text_params'].strip() if options != "": - options = ast.literal_eval('{' + options + '}') + options = safe_eval('dict(' + options + ')') obj.set_params(**options) - return obj + elif input_json['component_type'] == 'skrebate': + algorithm = input_json['skrebate_selector']['select_algorithm'] + if algorithm == 'TuRF': + obj = getattr(skrebate, algorithm)(core_algorithm='ReliefF') + else: + obj = getattr(skrebate, algorithm)() + options = input_json['skrebate_selector']['text_params'].strip() + if options != "": + options = safe_eval('dict(' + options + ')') + obj.set_params(**options) + if 'n_jobs' in obj.get_params(): + obj.set_params( n_jobs=N_JOBS ) + return obj + if len(params['pipeline_component']) == 1: step_obj = get_component( params['pipeline_component'][0]['component_selector']) if step_obj: @@ -101,14 +113,15 @@ <repeat name="pipeline_component" min="1" max="5" title="Pre-processing step"> <conditional name="component_selector"> <param name="component_type" type="select" label="Choose the type of transformation:"> - <option value="none" selected="true">None</option> + <option value="None" selected="true">None</option> <option value="pre_processor">Sklearn Preprocessor</option> <option value="feature_selection">Feature Selection</option> <option value="decomposition">Matrix Decomposition</option> <option value="kernel_approximation">Kernel Approximation</option> <option value="FeatureAgglomeration">Agglomerate Features</option> + <option value="skrebate">Skrebate algorithm</option> </param> - <when value="none"/> + <when value="None"/> <when value="pre_processor"> <conditional name="pre_processors"> <expand macro="sparse_preprocessors_ext" /> @@ -129,6 +142,9 @@ <when value="FeatureAgglomeration"> <expand macro="FeatureAgglomeration"/> </when> + <when value="skrebate"> + <expand macro="skrebate"/> + </when> </conditional> </repeat> <section name="final_estimator" title="Final Estimator" expanded="true"> @@ -159,7 +175,7 @@ </repeat> <param name="selected_module" value="svm"/> <param name="selected_estimator" value="SVR"/> - <param name="text_params" value="'kernel': 'linear'"/> + <param name="text_params" value="kernel='linear'"/> <output name="outfile" file="pipeline01" compare="sim_size" delta="1"/> </test> <test> @@ -209,7 +225,7 @@ </conditional> <param name="selected_module" value="ensemble"/> <param name="selected_estimator" value="RandomForestRegressor"/> - <param name="text_params" value="'n_estimators': 100, 'random_state': 42"/> + <param name="text_params" value="n_estimators=100, random_state=42"/> <output name="outfile" file="pipeline05" compare="sim_size" delta="1"/> </test> <test> @@ -228,7 +244,7 @@ <param name="component_type" value="kernel_approximation"/> <conditional name="kernel_approximation_selector"> <param name="select_algorithm" value="RBFSampler"/> - <param name="text_params" value="'n_components': 10, 'gamma': 2.0"/> + <param name="text_params" value="n_components=10, gamma=2.0"/> </conditional> </conditional> <param name="selected_module" value="ensemble"/> @@ -240,13 +256,37 @@ <param name="component_type" value="FeatureAgglomeration"/> <conditional name="FeatureAgglomeration_selector"> <param name="select_algorithm" value="FeatureAgglomeration"/> - <param name="text_params" value="'n_clusters': 3, 'affinity': 'euclidean'"/> + <param name="text_params" value="n_clusters=3, affinity='euclidean'"/> </conditional> </conditional> <param name="selected_module" value="ensemble"/> <param name="selected_estimator" value="AdaBoostClassifier"/> <output name="outfile" file="pipeline08" compare="sim_size" delta="1"/> </test> + <test> + <conditional name="component_selector"> + <param name="component_type" value="skrebate"/> + <conditional name="skrebate_selector"> + <param name="select_algorithm" value="ReliefF"/> + <param name="text_params" value="n_features_to_select=3, n_neighbors=100"/> + </conditional> + </conditional> + <param name="selected_module" value="ensemble"/> + <param name="selected_estimator" value="RandomForestRegressor"/> + <output name="outfile" file="pipeline09" compare="sim_size" delta="1"/> + </test> + <test> + <conditional name="component_selector"> + <param name="component_type" value="skrebate"/> + <conditional name="skrebate_selector"> + <param name="select_algorithm" value="TuRF"/> + <param name="text_params" value=""/> + </conditional> + </conditional> + <param name="selected_module" value="ensemble"/> + <param name="selected_estimator" value="RandomForestRegressor"/> + <output name="outfile" file="pipeline10" compare="sim_size" delta="1"/> + </test> </tests> <help> <![CDATA[ @@ -255,7 +295,7 @@ that can be cross-validated together while setting different parameters. please refer to `Scikit-learn pipeline Pipeline`_. -**Pre-processing components** allow None, one or a combination of up to 5 transformations from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_ and/or `cluster.FeatureAgglomeration`_. +**Pre-processing components** allow None, one or a combination of up to 5 transformations from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_, `cluster.FeatureAgglomeration`_ and/or `skrebate`_. **Estimator** selector supports estimators from `xgboost`_ and many scikit-learn modules, including `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_ and `neighbors`_. @@ -274,6 +314,7 @@ .. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html +.. _`skrebate`: https://epistasislab.github.io/scikit-rebate/using/ ]]> </help>