Mercurial > repos > bgruening > sklearn_ensemble
diff ensemble.xml @ 44:fce065687d98 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author | bgruening |
---|---|
date | Wed, 09 Aug 2023 11:26:43 +0000 |
parents | 142f27ae0806 |
children |
line wrap: on
line diff
--- a/ensemble.xml Thu Aug 11 07:56:55 2022 +0000 +++ b/ensemble.xml Wed Aug 09 11:26:43 2023 +0000 @@ -1,4 +1,4 @@ -<tool id="sklearn_ensemble" name="Ensemble methods" version="@VERSION@" profile="20.05"> +<tool id="sklearn_ensemble" name="Ensemble methods" version="@VERSION@" profile="@PROFILE@"> <description>for classification and regression</description> <macros> <import>main_macros.xml</import> @@ -17,12 +17,12 @@ import json import numpy as np import pandas -import pickle import sys from scipy.io import mmread import sklearn.ensemble -from galaxy_ml.utils import load_model, get_X_y +from galaxy_ml.model_persist import dump_model_to_h5, load_model_from_h5 +from galaxy_ml.utils import clean_params, get_X_y N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) @@ -57,11 +57,6 @@ options["select_max_features"].pop("num_max_features") options["max_features"] = options["select_max_features"]["max_features"] options.pop("select_max_features") - if "presort" in options: - if options["presort"] == "true": - options["presort"] = True - if options["presort"] == "false": - options["presort"] = False if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0: options["min_samples_leaf"] = 1 if "min_samples_split" in options and options["min_samples_split"] > 1.0: @@ -72,12 +67,11 @@ my_class = getattr(sklearn.ensemble, algorithm) estimator = my_class(**options) estimator.fit(X,y) - with open(outfile_fit, 'wb') as out_handler: - pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL) + dump_model_to_h5(estimator, outfile_fit) else: - with open(infile_model, 'rb') as model_handler: - classifier_object = load_model(model_handler) + classifier_object = load_model_from_h5(infile_model) + classifier_object = clean_params(classifier_object) header = 'infer' if params["selected_tasks"]["header"] else None data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None) prediction = classifier_object.predict(data) @@ -89,7 +83,7 @@ </configfile> </configfiles> <inputs> - <expand macro="sl_Conditional" model="zip"> + <expand macro="sl_Conditional" model="h5mlm"> <param name="selected_algorithm" type="select" label="Select an ensemble method:"> <option value="RandomForestClassifier" selected="true">Random forest classifier</option> <option value="AdaBoostClassifier">Ada boost classifier</option> @@ -153,7 +147,6 @@ <expand macro="verbose" /> <expand macro="warm_start" checked="false" /> <expand macro="random_state" /> - <expand macro="presort" /> </section> </when> <when value="RandomForestRegressor"> @@ -216,7 +209,6 @@ <expand macro="verbose" /> <expand macro="warm_start" checked="false" /> <expand macro="random_state" /> - <expand macro="presort" /> </section> </when> </expand> @@ -236,7 +228,7 @@ <output name="outfile_fit" file="rfc_model01" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="rfc_model01" ftype="zip" /> + <param name="infile_model" value="rfc_model01" ftype="h5mlm" /> <param name="infile_data" value="test.tabular" ftype="tabular" /> <param name="selected_task" value="load" /> <output name="outfile_predict" file="rfc_result01" /> @@ -252,7 +244,7 @@ <output name="outfile_fit" file="rfr_model01" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="rfr_model01" ftype="zip" /> + <param name="infile_model" value="rfr_model01" ftype="h5mlm" /> <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> <param name="selected_task" value="load" /> <output name="outfile_predict" file="rfr_result01" /> @@ -272,7 +264,7 @@ <output name="outfile_fit" file="gbr_model01" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="gbr_model01" ftype="zip" /> + <param name="infile_model" value="gbr_model01" ftype="h5mlm" /> <param name="infile_data" value="regression_test_X.tabular" ftype="tabular" /> <param name="selected_task" value="load" /> <param name="header" value="True" /> @@ -288,7 +280,7 @@ <output name="outfile_fit" file="gbc_model01" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="gbc_model01" ftype="zip" /> + <param name="infile_model" value="gbc_model01" ftype="h5mlm" /> <param name="infile_data" value="test.tabular" ftype="tabular" /> <param name="selected_task" value="load" /> <output name="outfile_predict" file="gbc_result01" /> @@ -304,7 +296,7 @@ <output name="outfile_fit" file="abc_model01" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="abc_model01" ftype="zip" /> + <param name="infile_model" value="abc_model01" ftype="h5mlm" /> <param name="infile_data" value="test.tabular" ftype="tabular" /> <param name="selected_task" value="load" /> <output name="outfile_predict" file="abc_result01" /> @@ -320,7 +312,7 @@ <output name="outfile_fit" file="abr_model01" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="abr_model01" ftype="zip" /> + <param name="infile_model" value="abr_model01" ftype="h5mlm" /> <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> <param name="selected_task" value="load" /> <output name="outfile_predict" file="abr_result01" />