Mercurial > repos > bgruening > sklearn_ensemble
diff ensemble.xml @ 38:142f27ae0806 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 208a8d348e7c7a182cfbe1b6f17868146428a7e2"
author | bgruening |
---|---|
date | Tue, 13 Apr 2021 21:05:37 +0000 |
parents | c49ad9558f6a |
children | fce065687d98 |
line wrap: on
line diff
--- a/ensemble.xml Wed Mar 11 17:19:59 2020 +0000 +++ b/ensemble.xml Tue Apr 13 21:05:37 2021 +0000 @@ -1,19 +1,19 @@ -<tool id="sklearn_ensemble" name="Ensemble methods" version="@VERSION@"> +<tool id="sklearn_ensemble" name="Ensemble methods" version="@VERSION@" profile="20.05"> <description>for classification and regression</description> <macros> <import>main_macros.xml</import> </macros> - <expand macro="python_requirements"/> - <expand macro="macro_stdio"/> + <expand macro="python_requirements" /> + <expand macro="macro_stdio" /> <version_command>echo "@VERSION@"</version_command> <command><![CDATA[ python "$ensemble_script" '$inputs' ]]> </command> <configfiles> - <inputs name="inputs"/> + <inputs name="inputs" /> <configfile name="ensemble_script"> -<![CDATA[ + <![CDATA[ import json import numpy as np import pandas @@ -99,98 +99,98 @@ <option value="GradientBoostingRegressor">Gradient Boosting Regressor</option> </param> <when value="RandomForestClassifier"> - <expand macro="sl_mixed_input"/> + <expand macro="sl_mixed_input" /> <section name="options" title="Advanced Options" expanded="False"> - <expand macro="n_estimators" default_value="100"/> - <expand macro="criterion"/> - <expand macro="max_features"/> - <expand macro="max_depth"/> - <expand macro="min_samples_split"/> - <expand macro="min_samples_leaf"/> - <expand macro="min_weight_fraction_leaf"/> - <expand macro="max_leaf_nodes"/> - <expand macro="bootstrap"/> - <expand macro="warm_start" checked="false"/> - <expand macro="random_state"/> - <expand macro="oob_score"/> + <expand macro="n_estimators" default_value="100" /> + <expand macro="criterion" /> + <expand macro="max_features" /> + <expand macro="max_depth" /> + <expand macro="min_samples_split" /> + <expand macro="min_samples_leaf" /> + <expand macro="min_weight_fraction_leaf" /> + <expand macro="max_leaf_nodes" /> + <expand macro="bootstrap" /> + <expand macro="warm_start" checked="false" /> + <expand macro="random_state" /> + <expand macro="oob_score" /> <!--class_weight=None--> </section> </when> <when value="AdaBoostClassifier"> - <expand macro="sl_mixed_input"/> + <expand macro="sl_mixed_input" /> <section name="options" title="Advanced Options" expanded="False"> <!--base_estimator=None--> - <expand macro="n_estimators" default_value="50"/> - <expand macro="learning_rate"/> - <param argument="algorithm" type="select" label="Boosting algorithm" help=" "> + <expand macro="n_estimators" default_value="50" /> + <expand macro="learning_rate" /> + <param argument="algorithm" type="select" label="Boosting algorithm" help=" "> <option value="SAMME.R" selected="true">SAMME.R</option> <option value="SAMME">SAMME</option> </param> - <expand macro="random_state"/> + <expand macro="random_state" /> </section> </when> <when value="GradientBoostingClassifier"> - <expand macro="sl_mixed_input"/> + <expand macro="sl_mixed_input" /> <section name="options" title="Advanced Options" expanded="False"> <!--base_estimator=None--> <param argument="loss" type="select" label="Loss function"> <option value="deviance" selected="true">deviance - logistic regression with probabilistic outputs</option> <option value="exponential">exponential - gradient boosting recovers the AdaBoost algorithm</option> </param> - <expand macro="learning_rate" default_value='0.1'/> - <expand macro="n_estimators" default_value="100" help="The number of boosting stages to perform"/> - <expand macro="max_depth" default_value="3" help="maximum depth of the individual regression estimators"/> + <expand macro="learning_rate" default_value='0.1' /> + <expand macro="n_estimators" default_value="100" help="The number of boosting stages to perform" /> + <expand macro="max_depth" default_value="3" help="maximum depth of the individual regression estimators" /> <expand macro="criterion2"> <option value="friedman_mse" selected="true">friedman_mse - mean squared error with improvement score by Friedman</option> </expand> - <expand macro="min_samples_split" type="float"/> - <expand macro="min_samples_leaf" type="float" label="The minimum number of samples required to be at a leaf node"/> - <expand macro="min_weight_fraction_leaf"/> - <expand macro="subsample"/> - <expand macro="max_features"/> - <expand macro="max_leaf_nodes"/> - <expand macro="min_impurity_decrease"/> - <expand macro="verbose"/> - <expand macro="warm_start" checked="false"/> - <expand macro="random_state"/> - <expand macro="presort"/> + <expand macro="min_samples_split" type="float" /> + <expand macro="min_samples_leaf" type="float" label="The minimum number of samples required to be at a leaf node" /> + <expand macro="min_weight_fraction_leaf" /> + <expand macro="subsample" /> + <expand macro="max_features" /> + <expand macro="max_leaf_nodes" /> + <expand macro="min_impurity_decrease" /> + <expand macro="verbose" /> + <expand macro="warm_start" checked="false" /> + <expand macro="random_state" /> + <expand macro="presort" /> </section> </when> <when value="RandomForestRegressor"> - <expand macro="sl_mixed_input"/> + <expand macro="sl_mixed_input" /> <section name="options" title="Advanced Options" expanded="False"> - <expand macro="n_estimators" default_value="100"/> - <expand macro="criterion2"/> - <expand macro="max_features"/> - <expand macro="max_depth"/> - <expand macro="min_samples_split"/> - <expand macro="min_samples_leaf"/> - <expand macro="min_weight_fraction_leaf"/> - <expand macro="max_leaf_nodes"/> - <expand macro="min_impurity_decrease"/> - <expand macro="bootstrap"/> - <expand macro="oob_score"/> - <expand macro="random_state"/> - <expand macro="verbose"/> - <expand macro="warm_start" checked="false"/> + <expand macro="n_estimators" default_value="100" /> + <expand macro="criterion2" /> + <expand macro="max_features" /> + <expand macro="max_depth" /> + <expand macro="min_samples_split" /> + <expand macro="min_samples_leaf" /> + <expand macro="min_weight_fraction_leaf" /> + <expand macro="max_leaf_nodes" /> + <expand macro="min_impurity_decrease" /> + <expand macro="bootstrap" /> + <expand macro="oob_score" /> + <expand macro="random_state" /> + <expand macro="verbose" /> + <expand macro="warm_start" checked="false" /> </section> </when> <when value="AdaBoostRegressor"> - <expand macro="sl_mixed_input"/> + <expand macro="sl_mixed_input" /> <section name="options" title="Advanced Options" expanded="False"> <!--base_estimator=None--> - <expand macro="n_estimators" default_value="50"/> - <expand macro="learning_rate"/> - <param argument="loss" type="select" label="Loss function" optional="true" help="Used when updating the weights after each boosting iteration. "> + <expand macro="n_estimators" default_value="50" /> + <expand macro="learning_rate" /> + <param argument="loss" type="select" label="Loss function" optional="true" help="Used when updating the weights after each boosting iteration. "> <option value="linear" selected="true">linear</option> <option value="square">square</option> <option value="exponential">exponential</option> </param> - <expand macro="random_state"/> + <expand macro="random_state" /> </section> </when> <when value="GradientBoostingRegressor"> - <expand macro="sl_mixed_input"/> + <expand macro="sl_mixed_input" /> <section name="options" title="Advanced Options" expanded="False"> <param argument="loss" type="select" label="Loss function"> <option value="ls" selected="true">ls - least squares regression</option> @@ -198,132 +198,132 @@ <option value="huber">huber - combination of least squares regression and least absolute deviation</option> <option value="quantile">quantile - use alpha to specify the quantile</option> </param> - <expand macro="learning_rate" default_value="0.1"/> - <expand macro="n_estimators" default_value="100" help="The number of boosting stages to perform"/> - <expand macro="max_depth" default_value="3" help="maximum depth of the individual regression estimators"/> + <expand macro="learning_rate" default_value="0.1" /> + <expand macro="n_estimators" default_value="100" help="The number of boosting stages to perform" /> + <expand macro="max_depth" default_value="3" help="maximum depth of the individual regression estimators" /> <expand macro="criterion2"> <option value="friedman_mse" selected="true">friedman_mse - mean squared error with improvement score by Friedman</option> </expand> - <expand macro="min_samples_split" type="float"/> - <expand macro="min_samples_leaf" type="float" label="The minimum number of samples required to be at a leaf node"/> - <expand macro="min_weight_fraction_leaf"/> - <expand macro="subsample"/> - <expand macro="max_features"/> - <expand macro="max_leaf_nodes"/> - <expand macro="min_impurity_decrease"/> + <expand macro="min_samples_split" type="float" /> + <expand macro="min_samples_leaf" type="float" label="The minimum number of samples required to be at a leaf node" /> + <expand macro="min_weight_fraction_leaf" /> + <expand macro="subsample" /> + <expand macro="max_features" /> + <expand macro="max_leaf_nodes" /> + <expand macro="min_impurity_decrease" /> <param argument="alpha" type="float" value="0.9" label="alpha" help="The alpha-quantile of the huber loss function and the quantile loss function" /> <!--base_estimator=None--> - <expand macro="verbose"/> - <expand macro="warm_start" checked="false"/> - <expand macro="random_state"/> - <expand macro="presort"/> + <expand macro="verbose" /> + <expand macro="warm_start" checked="false" /> + <expand macro="random_state" /> + <expand macro="presort" /> </section> </when> </expand> </inputs> - <expand macro="output"/> + <expand macro="output" /> <tests> <test> - <param name="infile1" value="train.tabular" ftype="tabular"/> - <param name="infile2" value="train.tabular" ftype="tabular"/> - <param name="col1" value="1,2,3,4"/> - <param name="col2" value="5"/> - <param name="selected_task" value="train"/> - <param name="selected_algorithm" value="RandomForestClassifier"/> - <param name="random_state" value="10"/> - <output name="outfile_fit" file="rfc_model01" compare="sim_size" delta="5"/> + <param name="infile1" value="train.tabular" ftype="tabular" /> + <param name="infile2" value="train.tabular" ftype="tabular" /> + <param name="col1" value="1,2,3,4" /> + <param name="col2" value="5" /> + <param name="selected_task" value="train" /> + <param name="selected_algorithm" value="RandomForestClassifier" /> + <param name="random_state" value="10" /> + <output name="outfile_fit" file="rfc_model01" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="rfc_model01" ftype="zip"/> - <param name="infile_data" value="test.tabular" ftype="tabular"/> - <param name="selected_task" value="load"/> - <output name="outfile_predict" file="rfc_result01"/> + <param name="infile_model" value="rfc_model01" ftype="zip" /> + <param name="infile_data" value="test.tabular" ftype="tabular" /> + <param name="selected_task" value="load" /> + <output name="outfile_predict" file="rfc_result01" /> </test> <test> - <param name="infile1" value="regression_train.tabular" ftype="tabular"/> - <param name="infile2" value="regression_train.tabular" ftype="tabular"/> - <param name="col1" value="1,2,3,4,5"/> - <param name="col2" value="6"/> - <param name="selected_task" value="train"/> - <param name="selected_algorithm" value="RandomForestRegressor"/> - <param name="random_state" value="10"/> - <output name="outfile_fit" file="rfr_model01" compare="sim_size" delta="5"/> + <param name="infile1" value="regression_train.tabular" ftype="tabular" /> + <param name="infile2" value="regression_train.tabular" ftype="tabular" /> + <param name="col1" value="1,2,3,4,5" /> + <param name="col2" value="6" /> + <param name="selected_task" value="train" /> + <param name="selected_algorithm" value="RandomForestRegressor" /> + <param name="random_state" value="10" /> + <output name="outfile_fit" file="rfr_model01" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="rfr_model01" ftype="zip"/> - <param name="infile_data" value="regression_test.tabular" ftype="tabular"/> - <param name="selected_task" value="load"/> - <output name="outfile_predict" file="rfr_result01"/> + <param name="infile_model" value="rfr_model01" ftype="zip" /> + <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> + <param name="selected_task" value="load" /> + <output name="outfile_predict" file="rfr_result01" /> </test> <test> - <param name="infile1" value="regression_X.tabular" ftype="tabular"/> - <param name="infile2" value="regression_y.tabular" ftype="tabular"/> - <param name="header1" value="True"/> - <param name="selected_column_selector_option" value="all_columns"/> - <param name="header2" value="True"/> - <param name="col2" value="1"/> - <param name="selected_task" value="train"/> - <param name="selected_algorithm" value="GradientBoostingRegressor"/> - <param name="max_features" value="number_input"/> - <param name="num_max_features" value="0.5"/> - <param name="random_state" value="42"/> - <output name="outfile_fit" file="gbr_model01" compare="sim_size" delta="5"/> + <param name="infile1" value="regression_X.tabular" ftype="tabular" /> + <param name="infile2" value="regression_y.tabular" ftype="tabular" /> + <param name="header1" value="True" /> + <param name="selected_column_selector_option" value="all_columns" /> + <param name="header2" value="True" /> + <param name="col2" value="1" /> + <param name="selected_task" value="train" /> + <param name="selected_algorithm" value="GradientBoostingRegressor" /> + <param name="max_features" value="number_input" /> + <param name="num_max_features" value="0.5" /> + <param name="random_state" value="42" /> + <output name="outfile_fit" file="gbr_model01" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="gbr_model01" ftype="zip"/> - <param name="infile_data" value="regression_test_X.tabular" ftype="tabular"/> - <param name="selected_task" value="load"/> - <param name="header" value="True"/> - <output name="outfile_predict" file="gbr_prediction_result01.tabular"/> + <param name="infile_model" value="gbr_model01" ftype="zip" /> + <param name="infile_data" value="regression_test_X.tabular" ftype="tabular" /> + <param name="selected_task" value="load" /> + <param name="header" value="True" /> + <output name="outfile_predict" file="gbr_prediction_result01.tabular" /> </test> <test> - <param name="infile1" value="train.tabular" ftype="tabular"/> - <param name="infile2" value="train.tabular" ftype="tabular"/> - <param name="col1" value="1,2,3,4"/> - <param name="col2" value="5"/> - <param name="selected_task" value="train"/> - <param name="selected_algorithm" value="GradientBoostingClassifier"/> - <output name="outfile_fit" file="gbc_model01" compare="sim_size" delta="5"/> + <param name="infile1" value="train.tabular" ftype="tabular" /> + <param name="infile2" value="train.tabular" ftype="tabular" /> + <param name="col1" value="1,2,3,4" /> + <param name="col2" value="5" /> + <param name="selected_task" value="train" /> + <param name="selected_algorithm" value="GradientBoostingClassifier" /> + <output name="outfile_fit" file="gbc_model01" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="gbc_model01" ftype="zip"/> - <param name="infile_data" value="test.tabular" ftype="tabular"/> - <param name="selected_task" value="load"/> - <output name="outfile_predict" file="gbc_result01"/> + <param name="infile_model" value="gbc_model01" ftype="zip" /> + <param name="infile_data" value="test.tabular" ftype="tabular" /> + <param name="selected_task" value="load" /> + <output name="outfile_predict" file="gbc_result01" /> </test> <test> - <param name="infile1" value="train.tabular" ftype="tabular"/> - <param name="infile2" value="train.tabular" ftype="tabular"/> - <param name="col1" value="1,2,3,4"/> - <param name="col2" value="5"/> - <param name="selected_task" value="train"/> - <param name="selected_algorithm" value="AdaBoostClassifier"/> - <param name="random_state" value="10"/> - <output name="outfile_fit" file="abc_model01" compare="sim_size" delta="5"/> + <param name="infile1" value="train.tabular" ftype="tabular" /> + <param name="infile2" value="train.tabular" ftype="tabular" /> + <param name="col1" value="1,2,3,4" /> + <param name="col2" value="5" /> + <param name="selected_task" value="train" /> + <param name="selected_algorithm" value="AdaBoostClassifier" /> + <param name="random_state" value="10" /> + <output name="outfile_fit" file="abc_model01" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="abc_model01" ftype="zip"/> - <param name="infile_data" value="test.tabular" ftype="tabular"/> - <param name="selected_task" value="load"/> - <output name="outfile_predict" file="abc_result01"/> + <param name="infile_model" value="abc_model01" ftype="zip" /> + <param name="infile_data" value="test.tabular" ftype="tabular" /> + <param name="selected_task" value="load" /> + <output name="outfile_predict" file="abc_result01" /> </test> <test> - <param name="infile1" value="regression_train.tabular" ftype="tabular"/> - <param name="infile2" value="regression_train.tabular" ftype="tabular"/> - <param name="col1" value="1,2,3,4,5"/> - <param name="col2" value="6"/> - <param name="selected_task" value="train"/> - <param name="selected_algorithm" value="AdaBoostRegressor"/> - <param name="random_state" value="10"/> - <output name="outfile_fit" file="abr_model01" compare="sim_size" delta="5"/> + <param name="infile1" value="regression_train.tabular" ftype="tabular" /> + <param name="infile2" value="regression_train.tabular" ftype="tabular" /> + <param name="col1" value="1,2,3,4,5" /> + <param name="col2" value="6" /> + <param name="selected_task" value="train" /> + <param name="selected_algorithm" value="AdaBoostRegressor" /> + <param name="random_state" value="10" /> + <output name="outfile_fit" file="abr_model01" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="abr_model01" ftype="zip"/> - <param name="infile_data" value="regression_test.tabular" ftype="tabular"/> - <param name="selected_task" value="load"/> - <output name="outfile_predict" file="abr_result01"/> + <param name="infile_model" value="abr_model01" ftype="zip" /> + <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> + <param name="selected_task" value="load" /> + <output name="outfile_predict" file="abr_result01" /> </test> </tests> <help><![CDATA[ @@ -390,6 +390,6 @@ **3 - Prediction output** The tool predicts the class labels for new samples and adds them as the last column to the prediction dataset. The new dataset then is output as a tabular file. The prediction output format should look like the training dataset. - ]]></help> - <expand macro="sklearn_citation"/> + ]]> </help> + <expand macro="sklearn_citation" /> </tool>