Mercurial > repos > bgruening > sklearn_build_pipeline
diff pipeline.xml @ 24:15815a470e6b draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author | bgruening |
---|---|
date | Wed, 09 Aug 2023 12:06:37 +0000 |
parents | 449bd57f70f4 |
children |
line wrap: on
line diff
--- a/pipeline.xml Thu Aug 11 07:59:30 2022 +0000 +++ b/pipeline.xml Wed Aug 09 12:06:37 2023 +0000 @@ -1,4 +1,4 @@ -<tool id="sklearn_build_pipeline" name="Pipeline Builder" version="@VERSION@" profile="20.05"> +<tool id="sklearn_build_pipeline" name="Pipeline Builder" version="@VERSION@" profile="@PROFILE@"> <description>an all-in-one platform to build pipeline, single estimator, preprocessor and custom wrappers</description> <macros> <import>main_macros.xml</import> @@ -18,7 +18,6 @@ import imblearn import json import pandas as pd -import pickle import pprint import skrebate import sys @@ -30,11 +29,9 @@ svm, linear_model, tree, discriminant_analysis) from sklearn.pipeline import make_pipeline from imblearn.pipeline import make_pipeline as imb_make_pipeline +from galaxy_ml.model_persist import dump_model_to_h5, load_model_from_h5 from galaxy_ml.utils import (SafeEval, feature_selector, get_estimator, - try_get_attr, get_search_params, load_model) - -## TODO remove following imports after scikit-learn v0.22 -from sklearn.experimental import enable_hist_gradient_boosting + try_get_attr, get_search_params) N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) @@ -182,10 +179,8 @@ regressor_path = '$final_estimator.estimator_selector.regressor' transformer_path = '$final_estimator.estimator_selector.transformer' #end if - with open(regressor_path, 'rb') as f: - regressor = load_model(f) - with open(transformer_path, 'rb') as f: - transformer = load_model(f) + regressor = load_model_from_h5(regressor_path) + transformer = load_model_from_h5(transformer_path) estimator = compose.TransformedTargetRegressor(regressor=regressor, transformer=transformer) pipeline_steps.append( estimator ) else: @@ -202,14 +197,8 @@ out_obj = make_pipeline(*pipeline_steps) pprint.pprint(out_obj.named_steps) -with open('$outfile', 'wb') as out_handler: - pickle.dump(out_obj, out_handler, pickle.HIGHEST_PROTOCOL) +dump_model_to_h5(out_obj, '$outfile', verbose=0) -#if $get_params -results = get_search_params(out_obj) -df = pd.DataFrame(results, columns=['', 'Parameter', 'Value']) -df.to_csv('$outfile_params', sep='\t', index=False) -#end if ]]> </configfile> </configfiles> @@ -254,7 +243,9 @@ <expand macro="imbalanced_learn_sampling" /> </when> <when value="IRAPS"> - <expand macro="estimator_params_text" label="Type in parameter settings for IRAPSCore if different from default:" help="Default(=blank): n_iter=1000, responsive_thres=-1, resistant_thres=0, random_state=None. No double quotes" /> + <expand macro="estimator_params_text" + label="Type in parameter settings for IRAPSCore if different from default:" + help="Default(=blank): n_iter=1000, responsive_thres=-1, resistant_thres=0, random_state=None. No double quotes" /> <param argument="p_thres" type="float" value="0.001" label="P value threshold" help="Float. default=0.001" /> <param argument="fc_thres" type="float" value="0.1" label="fold change threshold" help="Float. default=0.1" /> <param argument="occurrence" type="float" value="0.7" label="reservation factor" help="Float. default=0.7" /> @@ -267,7 +258,7 @@ </repeat> <section name="final_estimator" title="Final Estimator" expanded="true"> <conditional name="estimator_selector"> - <param name="selected_module" type="select" label="Choose the module that contains target estimator:"> + <param name="selected_module" type="select" label="Choose the module that contains target estimator:" > <expand macro="estimator_module_options"> <option value="sklearn.compose">sklearn.compose</option> <option value="binarize_target">Binarize Target Classifier or Regressor</option> @@ -280,21 +271,21 @@ <param name="selected_estimator" type="select" label="Choose estimator class:"> <option value="TransformedTargetRegressor" selected="true">TransformedTargetRegressor</option> </param> - <param name="regressor" type="data" format="zip" label="Choose the dataset containing the wrapped regressor" /> - <param name="transformer" type="data" format="zip" label="Choose the dataset containing transformer" /> + <param name="regressor" type="data" format="h5mlm" label="Choose the dataset containing the wrapped regressor" /> + <param name="transformer" type="data" format="h5mlm" label="Choose the dataset containing transformer" /> </when> <when value="binarize_target"> <param name="clf_or_regr" type="select" label="Classifier or Regressor:"> <option value="BinarizeTargetClassifier">BinarizeTargetClassifier</option> <option value="BinarizeTargetRegressor">BinarizeTargetRegressor</option> </param> - <param name="wrapped_estimator" type="data" format="zip" label="Choose the dataset containing the wrapped estimator or pipeline" /> + <param name="wrapped_estimator" type="data" format="h5mlm" label="Choose the dataset containing the wrapped estimator or pipeline" /> <param name='z_score' type="float" value="-1" optional="false" label="Discrize target values using z_score" /> <param name='value' type="float" value="" optional="true" label="Discretize target values using a fixed value instead" help="Optional. default: None." /> <param name="less_is_positive" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Are the detecting values smaller than others?" /> </when> <when value="custom_estimator"> - <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the custom estimator or pipeline" /> + <param name="c_estimator" type="data" format="h5mlm" label="Choose the dataset containing the custom estimator or pipeline" /> </when> <when value="none" /> </expand> @@ -304,13 +295,9 @@ <option value="Pipeline_Builder" selected="true">Pipeline</option> <option value="Final_Estimator_Builder">Final Estimator</option> </param>--> - <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Output parameters for searchCV?" help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool." /> </inputs> <outputs> - <data format="zip" name="outfile" label="New Pipleline/Estimator" /> - <data format="tabular" name="outfile_params" label="get_params for Pipleline/Estimator"> - <filter>get_params</filter> - </data> + <data format="h5mlm" name="outfile" label="New Pipleline/Estimator" /> </outputs> <tests> <test> @@ -328,7 +315,7 @@ <param name="selected_module" value="none" /> </conditional> </section> - <output name="outfile" file="pipeline17" compare="sim_size" delta="30" /> + <output name="outfile" file="pipeline17" compare="sim_size" delta="5" /> </test> <test> <conditional name="component_selector"> @@ -340,12 +327,11 @@ <section name="final_estimator"> <conditional name="estimator_selector"> <param name="selected_module" value="sklearn.compose" /> - <param name="regressor" value="RandomForestRegressor01.zip" ftype="zip" /> - <param name="transformer" value="pipeline17" ftype="zip" /> + <param name="regressor" value="RandomForestRegressor01.h5mlm" ftype="h5mlm" /> + <param name="transformer" value="pipeline17" ftype="h5mlm" /> </conditional> </section> - <param name="get_params" value="true" /> - <output name="outfile_params" file="pipeline_params18" ftype="tabular" /> + <output name="outfile" file="pipeline18" compare="sim_size" delta="5" /> </test> <test> <repeat name="pipeline_component"> @@ -368,7 +354,7 @@ <param name="selected_module" value="svm" /> <param name="selected_estimator" value="SVR" /> <param name="text_params" value="kernel='linear'" /> - <output name="outfile" file="pipeline01" compare="sim_size" delta="30" /> + <output name="outfile" file="pipeline01" compare="sim_size" delta="5" /> </test> <test> <conditional name="component_selector"> @@ -379,7 +365,7 @@ </conditional> <param name="selected_module" value="linear_model" /> <param name="selected_estimator" value="LassoCV" /> - <output name="outfile" file="pipeline02" compare="sim_size" delta="30" /> + <output name="outfile" file="pipeline02" compare="sim_size" delta="5" /> </test> <test> <conditional name="component_selector"> @@ -390,7 +376,7 @@ </conditional> <param name="selected_module" value="xgboost" /> <param name="selected_estimator" value="XGBClassifier" /> - <output name="outfile" file="pipeline03" compare="sim_size" delta="30" /> + <output name="outfile" file="pipeline03" compare="sim_size" delta="5" /> </test> <test> <conditional name="component_selector"> @@ -409,7 +395,7 @@ <param name="selected_module" value="svm" /> <param name="selected_estimator" value="LinearSVC" /> </section> - <output name="outfile" file="pipeline04" compare="sim_size" delta="30" /> + <output name="outfile" file="pipeline04" compare="sim_size" delta="5" /> </test> <test> <conditional name="component_selector"> @@ -418,56 +404,54 @@ <param name="selected_module" value="ensemble" /> <param name="selected_estimator" value="RandomForestRegressor" /> <param name="text_params" value="n_estimators=100, random_state=42" /> - <param name="get_params" value="true" /> - <output name="outfile" file="pipeline05" compare="sim_size" delta="30" /> - <output name="outfile_params" file="pipeline_params05.tabular" ftype="tabular" /> + <output name="outfile" file="pipeline05" compare="sim_size" delta="5" /> </test> <test> <conditional name="component_selector"> <param name="component_type" value="decomposition" /> - <conditional name="matrix_decomposition_selector"> - <param name="select_algorithm" value="PCA" /> - </conditional> + <conditional name="matrix_decomposition_selector"> + <param name="select_algorithm" value="PCA" /> + </conditional> </conditional> <param name="selected_module" value="ensemble" /> <param name="selected_estimator" value="AdaBoostRegressor" /> - <output name="outfile" file="pipeline06" compare="sim_size" delta="30" /> + <output name="outfile" file="pipeline06" compare="sim_size" delta="5" /> </test> <test> <conditional name="component_selector"> <param name="component_type" value="kernel_approximation" /> - <conditional name="kernel_approximation_selector"> - <param name="select_algorithm" value="RBFSampler" /> - <param name="text_params" value="n_components=10, gamma=2.0" /> - </conditional> + <conditional name="kernel_approximation_selector"> + <param name="select_algorithm" value="RBFSampler" /> + <param name="text_params" value="n_components=10, gamma=2.0" /> + </conditional> </conditional> <param name="selected_module" value="ensemble" /> <param name="selected_estimator" value="AdaBoostClassifier" /> - <output name="outfile" file="pipeline07" compare="sim_size" delta="30" /> + <output name="outfile" file="pipeline07" compare="sim_size" delta="5" /> </test> <test> <conditional name="component_selector"> <param name="component_type" value="FeatureAgglomeration" /> - <conditional name="FeatureAgglomeration_selector"> - <param name="select_algorithm" value="FeatureAgglomeration" /> - <param name="text_params" value="n_clusters=3, affinity='euclidean'" /> - </conditional> + <conditional name="FeatureAgglomeration_selector"> + <param name="select_algorithm" value="FeatureAgglomeration" /> + <param name="text_params" value="n_clusters=3, affinity='euclidean'" /> + </conditional> </conditional> <param name="selected_module" value="ensemble" /> <param name="selected_estimator" value="AdaBoostClassifier" /> - <output name="outfile" file="pipeline08" compare="sim_size" delta="30" /> + <output name="outfile" file="pipeline08" compare="sim_size" delta="20" /> </test> <test> <conditional name="component_selector"> <param name="component_type" value="skrebate" /> - <conditional name="skrebate_selector"> - <param name="select_algorithm" value="ReliefF" /> - <param name="text_params" value="n_features_to_select=3, n_neighbors=100" /> - </conditional> + <conditional name="skrebate_selector"> + <param name="select_algorithm" value="ReliefF" /> + <param name="text_params" value="n_features_to_select=3, n_neighbors=100" /> + </conditional> </conditional> <param name="selected_module" value="ensemble" /> <param name="selected_estimator" value="RandomForestRegressor" /> - <output name="outfile" file="pipeline09" compare="sim_size" delta="30" /> + <output name="outfile" file="pipeline09" compare="sim_size" delta="5" /> </test> <test> <conditional name="component_selector"> @@ -478,7 +462,7 @@ </conditional> <param name="selected_module" value="ensemble" /> <param name="selected_estimator" value="RandomForestClassifier" /> - <output name="outfile" file="pipeline11" compare="sim_size" delta="30" /> + <output name="outfile" file="pipeline11" compare="sim_size" delta="5" /> </test> <test expect_failure="true"> <conditional name="component_selector"> @@ -505,7 +489,7 @@ <param name="selected_module" value="none" /> </conditional> </section> - <output name="outfile" file="pipeline12" compare="sim_size" delta="30" /> + <output name="outfile" file="pipeline12" compare="sim_size" delta="5" /> </test> <test> <conditional name="component_selector"> @@ -513,7 +497,7 @@ </conditional> <param name="selected_module" value="ensemble" /> <param name="selected_estimator" value="RandomForestClassifier" /> - <output name="outfile" file="RandomForestClassifier.zip" compare="sim_size" delta="30" /> + <output name="outfile" file="RandomForestClassifier.h5mlm" compare="sim_size" delta="5" /> </test> <test> <conditional name="component_selector"> @@ -524,7 +508,7 @@ <param name="selected_module" value="none" /> </conditional> </section> - <output name="outfile" file="pipeline14" compare="sim_size" delta="30" /> + <output name="outfile" file="pipeline14" compare="sim_size" delta="5" /> </test> <test> <conditional name="component_selector"> @@ -534,10 +518,10 @@ <conditional name="estimator_selector"> <param name="selected_module" value="binarize_target" /> <param name="clf_or_regr" value="BinarizeTargetClassifier" /> - <param name="wrapped_estimator" value="RandomForestClassifier.zip" ftype="zip" /> + <param name="wrapped_estimator" value="RandomForestClassifier.h5mlm" ftype="h5mlm" /> </conditional> </section> - <output name="outfile" file="pipeline15" compare="sim_size" delta="30" /> + <output name="outfile" file="pipeline15" compare="sim_size" delta="5" /> </test> <test> <conditional name="component_selector"> @@ -551,10 +535,10 @@ <section name="final_estimator"> <conditional name="estimator_selector"> <param name="selected_module" value="custom_estimator" /> - <param name="c_estimator" value="keras_model02" ftype="zip" /> + <param name="c_estimator" value="keras_model02" ftype="h5mlm" /> </conditional> </section> - <output name="outfile" file="pipeline16" compare="sim_size" delta="30" /> + <output name="outfile" file="pipeline16" compare="sim_size" delta="5" /> </test> </tests> <help> @@ -583,9 +567,9 @@ **Output** -- Pickled pipeline/estimator object +- Pipeline/estimator object -- Hyperparameter of the ojbect (optional) +- Hyperparameter of the object (optional) .. _`Scikit-learn pipeline Pipeline`: http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html