view stacking_ensembles.xml @ 4:ba7fb6b33cd0 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 80417bf0158a9b596e485dd66408f738f405145a
author bgruening
date Mon, 02 Oct 2023 08:02:28 +0000
parents 0a1812986bc3
children
line wrap: on
line source

<tool id="stacking_ensemble_models" name="Stacking Ensembles" version="@VERSION@" profile="@PROFILE@">
    <description>builds stacking, voting ensemble models with numerous base options</description>
    <macros>
        <import>main_macros.xml</import>
        <macro name="stacking_ensemble_inputs">
            <section name="options" title="Advanced Options" expanded="false">
                <yield />
                <param argument="use_features_in_secondary" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
                <param argument="store_train_meta_features" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
            </section>
        </macro>
        <macro name="stacking_base_estimator">
            <conditional name="estimator_selector">
                <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
                    <expand macro="estimator_module_options">
                        <option value="custom_estimator">Load a custom estimator</option>
                    </expand>
                </param>
                <expand macro="estimator_suboptions">
                    <when value="custom_estimator">
                        <param name="c_estimator" type="data" format="h5mlm" label="Choose the dataset containing the custom estimator or pipeline" />
                    </when>
                </expand>
            </conditional>
        </macro>
        <macro name="stacking_voting_weights">
            <section name="options" title="Advanced Options" expanded="false">
                <param argument="weights" type="text" value="[]" optional="true" help="Sequence of weights (float or int). Uses uniform weights if None (`[]`).">
                <sanitizer>
                    <valid initial="default">
                    <add value="[" />
                    <add value="]" />
                    </valid>
                </sanitizer>
                </param>
                <yield />
            </section>
        </macro>
    </macros>
    <expand macro="python_requirements" />
    <expand macro="macro_stdio" />
    <version_command>echo "$ENSEMBLE_VERSION"</version_command>
    <command>
        <![CDATA[
        #for $i, $base in enumerate($base_est_builder)
        #if $i == 0
            #if $base.estimator_selector.selected_module == 'custom_estimator'
            bases='${base.estimator_selector.c_estimator}';
            #else
            bases='None';
            #end if
        #elif $base.estimator_selector.selected_module == 'custom_estimator'
        bases="\$bases,${base.estimator_selector.c_estimator}";
        #else
        bases="\$bases,None";
        #end if
        #end for
        python '$__tool_directory__/stacking_ensembles.py'
            --inputs '$inputs'
            --outfile '$outfile'
            --bases "\$bases"
            #if $algo_selection.estimator_type not in ('sklearn.ensemble_VotingClassifier', 'sklearn.ensemble_VotingRegressor')
            #if $algo_selection.meta_estimator.estimator_selector.selected_module == 'custom_estimator'
            --meta '${algo_selection.meta_estimator.estimator_selector.c_estimator}'
            #end if
            #end if
        ]]>
    </command>
    <configfiles>
        <inputs name="inputs" />
    </configfiles>
    <inputs>
        <conditional name="algo_selection">
            <param name="estimator_type" type="select" label="Choose the stacking ensemble type">
                <option value="sklearn.ensemble_VotingClassifier" selected="true">sklearn.ensemble -- VotingClassifier</option>
                <option value="sklearn.ensemble_VotingRegressor">sklearn.ensemble -- VotingRegressor</option>
                <option value="mlxtend.classifier_StackingCVClassifier">mlxtend.classifier -- StackingCVClassifier</option>
                <option value="mlxtend.classifier_StackingClassifier">mlxtend.classifier -- StackingClassifier</option>
                <option value="mlxtend.regressor_StackingCVRegressor">mlxtend.regressor -- StackingCVRegressor</option>
                <option value="mlxtend.regressor_StackingRegressor">mlxtend.regressor -- StackingRegressor</option>
            </param>
            <when value="sklearn.ensemble_VotingClassifier">
                <expand macro="stacking_voting_weights">
                    <param argument="voting" type="select" help="If ‘hard’, uses predicted class labels for majority rule voting. Else if ‘soft’, predicts the class label based on the argmax of the sums of the predicted probabilities, which is recommended for an ensemble of well-calibrated classifiers.">
                        <option value="hard" selected="true">hard</option>
                        <option value="soft">soft</option>
                    </param>
                    <param argument="flatten_transform" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" help="" />
                </expand>
            </when>
            <when value="sklearn.ensemble_VotingRegressor">
                <expand macro="stacking_voting_weights" />
            </when>
            <when value="mlxtend.classifier_StackingCVClassifier">
                <expand macro="stacking_ensemble_inputs">
                    <expand macro="cv_reduced" />
                    <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
                    <param argument="drop_last_proba" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
                </expand>
                <section name="meta_estimator" title="Meta Estimator" expanded="true">
                    <expand macro="stacking_base_estimator" />
                </section>
            </when>
            <when value="mlxtend.classifier_StackingClassifier">
                <expand macro="stacking_ensemble_inputs">
                    <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
                    <param argument="average_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
                    <param argument="drop_last_proba" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
                </expand>
                <section name="meta_estimator" title="Meta Estimator" expanded="true">
                    <expand macro="stacking_base_estimator" />
                </section>
            </when>
            <when value="mlxtend.regressor_StackingCVRegressor">
                <expand macro="stacking_ensemble_inputs">
                    <expand macro="cv_reduced" />
                    <!--TODO support group splitters. Hint: `groups` is a fit_param-->
                    <expand macro="shuffle" label="shuffle" />
                    <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data." />
                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" />
                </expand>
                <section name="meta_estimator" title="Meta Estimator" expanded="true">
                    <expand macro="stacking_base_estimator" />
                </section>
            </when>
            <when value="mlxtend.regressor_StackingRegressor">
                <expand macro="stacking_ensemble_inputs">
                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" />
                </expand>
                <section name="meta_estimator" title="Meta Estimator" expanded="true">
                    <expand macro="stacking_base_estimator" />
                </section>
            </when>
        </conditional>
        <repeat name="base_est_builder" min="1" max="20" title="Base Estimator">
            <expand macro="stacking_base_estimator" />
        </repeat>
    </inputs>
    <outputs>
        <data format="h5mlm" name="outfile" label="${algo_selection.estimator_type} on ${on_string}" />
    </outputs>
    <tests>
        <test>
            <conditional name="algo_selection">
                <param name="estimator_type" value="sklearn.ensemble_VotingClassifier" />
                <section name="options">
                    <param name="weights" value="[1, 2]" />
                </section>
            </conditional>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="svm" />
                    <param name="selected_estimator" value="SVC" />
                </conditional>
            </repeat>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="xgboost" />
                    <param name="selected_estimator" value="XGBClassifier" />
                </conditional>
            </repeat>
            <output name="outfile" file="StackingVoting03.h5mlm" compare="sim_size" delta="5" />
        </test>
        <test>
            <conditional name="algo_selection">
                <param name="estimator_type" value="mlxtend.regressor_StackingCVRegressor" />
                <section name="meta_estimator">
                    <conditional name="estimator_selector">
                        <param name="selected_module" value="custom_estimator" />
                        <param name="c_estimator" value="LinearRegression01.h5mlm" ftype="h5mlm" />
                    </conditional>
                </section>
            </conditional>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="custom_estimator" />
                    <param name="c_estimator" value="RandomForestRegressor01.h5mlm" ftype="h5mlm" />
                </conditional>
            </repeat>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="custom_estimator" />
                    <param name="c_estimator" value="XGBRegressor01.h5mlm" ftype="h5mlm" />
                </conditional>
            </repeat>
            <output name="outfile" file="StackingCVRegressor01.h5mlm" compare="sim_size" delta="5" />
        </test>
        <test>
            <conditional name="algo_selection">
                <param name="estimator_type" value="mlxtend.regressor_StackingRegressor" />
                <section name="meta_estimator">
                    <conditional name="estimator_selector">
                        <param name="selected_module" value="svm" />
                        <param name="selected_estimator" value="SVR" />
                    </conditional>
                </section>
            </conditional>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="custom_estimator" />
                    <param name="c_estimator" value="RandomForestRegressor01.h5mlm" ftype="h5mlm" />
                </conditional>
            </repeat>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="xgboost" />
                    <param name="selected_estimator" value="XGBRegressor" />
                </conditional>
            </repeat>
            <output name="outfile" file="StackingRegressor02.h5mlm" compare="sim_size" delta="5" />
        </test>
    </tests>
    <help>
        <![CDATA[
This tool wrapps Stacking Regression, also called Super Learning, in which different base algorithms train
on the original dataset and predict results respectively, a second level of `metalearner` fits on the previous
prediction results to ensemble a strong learner.
Refer to https://h2o-release.s3.amazonaws.com/h2o/rel-ueno/2/docs-website/h2o-docs/data-science/stacked-ensembles.html.


        ]]>
    </help>
    <expand macro="sklearn_citation">
        <expand macro="skrebate_citation" />
        <expand macro="xgboost_citation" />
        <expand macro="imblearn_citation" />
        <citation type="bibtex">
            @article{raschkas_2018_mlxtend,
                author       = {Sebastian Raschka},
                title        = {MLxtend: Providing machine learning and data science 
                                                utilities and extensions to Python’s  
                                                scientific computing stack},
                journal      = {The Journal of Open Source Software},
                volume       = {3},
                number       = {24},
                month        = apr,
                year         = 2018,
                publisher    = {The Open Journal},
                doi          = {10.21105/joss.00638},
                url          = {http://joss.theoj.org/papers/10.21105/joss.00638}
            }
        </citation>
    </expand>
</tool>