Mercurial > repos > bgruening > stacking_ensemble_models
diff main_macros.xml @ 2:38c4f8a98038 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5b2ac730ec6d3b762faa9034eddd19ad1b347476"
author | bgruening |
---|---|
date | Mon, 16 Dec 2019 10:07:37 +0000 |
parents | c1b0c8232816 |
children | 0a1812986bc3 |
line wrap: on
line diff
--- a/main_macros.xml Fri Aug 09 06:30:16 2019 -0400 +++ b/main_macros.xml Mon Dec 16 10:07:37 2019 +0000 @@ -1,12 +1,10 @@ <macros> - <token name="@VERSION@">1.0.7.10</token> - - <token name="@ENSEMBLE_VERSION@">0.2.0</token> + <token name="@VERSION@">1.0.8.1</token> <xml name="python_requirements"> <requirements> <requirement type="package" version="3.6">python</requirement> - <requirement type="package" version="0.7.10">Galaxy-ML</requirement> + <requirement type="package" version="0.8.1">Galaxy-ML</requirement> <yield/> </requirements> </xml> @@ -235,8 +233,8 @@ <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered."/> </xml> - <xml name="n_iter" token_default_value="5" token_help_text="The number of passes over the training data (aka epochs). "> - <param argument="n_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@"/> + <xml name="n_iter_no_change" token_default_value="5" token_help_text="Number of iterations with no improvement to wait before early stopping. "> + <param argument="n_iter_no_change" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@"/> </xml> <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration"> @@ -328,8 +326,8 @@ <!--Data interface--> - <xml name="samples_tabular" token_multiple1="false" token_multiple2="false"> - <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/> + <xml name="samples_tabular" token_label1="Training samples dataset:" token_multiple1="false" token_multiple2="false"> + <param name="infile1" type="data" format="tabular" label="@LABEL1@"/> <param name="header1" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> <conditional name="column_selector_options_1"> <expand macro="samples_column_selector_options" multiple="@MULTIPLE1@"/> @@ -421,27 +419,46 @@ <xml name="sl_mixed_input"> <conditional name="input_options"> - <param name="selected_input" type="select" label="Select input type:"> - <option value="tabular" selected="true">tabular data</option> - <option value="sparse">sparse matrix</option> - <option value="seq_fasta">sequnences in a fasta file</option> - <option value="refseq_and_interval">reference genome and intervals</option> - </param> - <when value="tabular"> - <expand macro="samples_tabular" multiple1="true" multiple2="false"/> - </when> - <when value="sparse"> - <expand macro="sparse_target"/> - </when> - <when value="seq_fasta"> - <expand macro="inputs_seq_fasta"/> - </when> - <when value="refseq_and_interval"> - <expand macro="inputs_refseq_and_interval"/> - </when> + <expand macro="data_input_options"/> + <expand macro="data_input_whens"/> </conditional> </xml> + <xml name="sl_mixed_input_plus_sequence"> + <conditional name="input_options"> + <expand macro="data_input_options"> + <option value="seq_fasta">sequnences in a fasta file</option> + <option value="refseq_and_interval">reference genome and intervals</option> + </expand> + <expand macro="data_input_whens"> + <when value="seq_fasta"> + <expand macro="inputs_seq_fasta"/> + </when> + <when value="refseq_and_interval"> + <expand macro="inputs_refseq_and_interval"/> + </when> + </expand> + </conditional> + </xml> + + <xml name="data_input_options"> + <param name="selected_input" type="select" label="Select input type:"> + <option value="tabular" selected="true">tabular data</option> + <option value="sparse">sparse matrix</option> + <yield/> + </param> + </xml> + + <xml name="data_input_whens"> + <when value="tabular"> + <expand macro="samples_tabular" multiple1="true" multiple2="false"/> + </when> + <when value="sparse"> + <expand macro="sparse_target"/> + </when> + <yield/> + </xml> + <xml name="input_tabular_target"> <param name="infile2" type="data" format="tabular" label="Dataset containing class labels or target values:"/> <param name="header2" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" /> @@ -744,6 +761,9 @@ <option value="MinMaxScaler">Minmax Scaler (Scales features to a range)</option> <option value="PolynomialFeatures">Polynomial Features (Generates polynomial and interaction features)</option> <option value="RobustScaler">Robust Scaler (Scales features using outlier-invariance statistics)</option> + <option value="QuantileTransformer">QuantileTransformer (Transform features using quantiles information)</option> + <option value="PowerTransformer">PowerTransformer (Apply a power transform featurewise to make data more Gaussian-like)</option> + <option value="KBinsDiscretizer">KBinsDiscretizer (Bin continuous data into intervals.)</option> </expand> </xml> @@ -818,6 +838,42 @@ label="Use a copy of data for inplace scaling" help=" "/> </section> </when> + <when value="QuantileTransformer"> + <section name="options" title="Advanced Options" expanded="False"> + <param name="n_quantiles" type="integer" value="1000" min="0" label="Number of quantiles to be computed" /> + <param name="output_distribution" type="select" label="Marginal distribution for the transformed data"> + <option value="uniform" selected="true">uniform</option> + <option value="normal">normal</option> + </param> + <param name="ignore_implicit_zeros" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Whether to discard sparse entries" help="Only applies to sparse matrices. If False, sparse entries are treated as zeros"/> + <param name="subsample" type="integer" value="100000" label="Maximum number of samples used to estimate the quantiles for computational efficiency" help="Note that the subsampling procedure may differ for value-identical sparse and dense matrices."/> + <expand macro="random_state" help_text="This is used by subsampling and smoothing noise"/> + </section> + </when> + <when value="PowerTransformer"> + <section name="options" title="Advanced Options" expanded="False"> + <param name="method" type="select" label="The power transform method"> + <option value="yeo-johnson" selected="true">yeo-johnson (works with positive and negative values)</option> + <option value="box-cox">box-cox (might perform better, but only works with strictly positive values)</option> + </param> + <param name="standardize" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Whether to apply zero-mean, unit-variance normalization to the transformed output."/> + </section> + </when> + <when value="KBinsDiscretizer"> + <section name="options" title="Advanced Options" expanded="False"> + <param name="n_bins" type="integer" value="5" min="2" label="The number of bins to produce"/> + <param name="encode" type="select" label="Method used to encode the transformed result"> + <option value="onehot" selected="true">onehot (encode the transformed result with one-hot encoding and return a sparse matrix)</option> + <option value="onehot-dense">onehot-dense (encode the transformed result with one-hot encoding and return a dense array)</option> + <option value="ordinal">ordinal (return the bin identifier encoded as an integer value)</option> + </param> + <param name="strategy" type="select" label="Strategy used to define the widths of the bins"> + <option value="uniform">uniform (all bins in each feature have identical widths)</option> + <option value="quantile" selected="true">quantile (all bins in each feature have the same number of points)</option> + <option value="kmeans">kmeans (values in each bin have the same nearest center of a 1D k-means cluster)</option> + </param> + </section> + </when> </expand> </xml> @@ -1242,6 +1298,7 @@ <option value="neg_mean_squared_log_error">Regression -- 'neg_mean_squared_log_error'</option> <option value="neg_median_absolute_error">Regression -- 'neg_median_absolute_error'</option> <option value="r2">Regression -- 'r2'</option> + <option value="max_error">Regression -- 'max_error'</option> <option value="binarize_auc_scorer">anomaly detection -- binarize_auc_scorer</option> <option value="binarize_average_precision_scorer">anomaly detection -- binarize_average_precision_scorer</option> </param> @@ -1272,6 +1329,7 @@ <when value="neg_mean_squared_log_error"><expand macro="secondary_scoring_selection_regression"/></when> <when value="neg_median_absolute_error"><expand macro="secondary_scoring_selection_regression"/></when> <when value="r2"><expand macro="secondary_scoring_selection_regression"/></when> + <when value="max_error"><expand macro="secondary_scoring_selection_regression"/></when> <when value="binarize_auc_scorer"><expand macro="secondary_scoring_selection_anormaly"/></when> <when value="binarize_average_precision_scorer"><expand macro="secondary_scoring_selection_anormaly"/></when> </conditional> @@ -1310,6 +1368,7 @@ <option value="neg_mean_squared_log_error">Regression -- 'neg_mean_squared_log_error'</option> <option value="neg_median_absolute_error">Regression -- 'neg_median_absolute_error'</option> <option value="r2">Regression -- 'r2'</option> + <option value="max_error">Regression -- 'max_error'</option> </param> </xml> @@ -1324,32 +1383,6 @@ <param argument="pre_dispatch" type="@TYPE@" value="@DEFAULT_VALUE@" optional="true" label="pre_dispatch" help="@HELP@"/> </xml> - <xml name="search_cv_estimator"> - <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/> - <section name="search_params_builder" title="Search parameters Builder" expanded="true"> - <param name="infile_params" type="data" format="tabular" optional="true" label="Choose the dataset containing parameter names" help="This dataset could be the output of `get_params` in the `Estimator Attributes` tool."/> - <repeat name="param_set" min="1" max="30" title="Parameter settings for search:"> - <param name="sp_name" type="select" optional="true" label="Choose a parameter name (with current value)"> - <options from_dataset="infile_params" startswith="@"> - <column name="name" index="2"/> - <column name="value" index="1"/> - <filter type="unique_value" name="unique_param" column="1"/> - </options> - </param> - <param name="sp_list" type="text" value="" optional="true" label="Search list" help="list or array-like, for example: [1, 10, 100, 1000], [True, False] and ['auto', 'sqrt', None]. See `help` section for more examples"> - <sanitizer> - <valid initial="default"> - <add value="'"/> - <add value="""/> - <add value="["/> - <add value="]"/> - </valid> - </sanitizer> - </param> - </repeat> - </section> - </xml> - <xml name="estimator_and_hyperparameter"> <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/> <section name="hyperparams_swapping" title="Hyperparameter Swapping" expanded="false"> @@ -1379,7 +1412,7 @@ <expand macro="model_validation_common_options"/> <!--expand macro="pre_dispatch" default_value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/--> <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds"/> - <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset."/> + <!--param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset. Be aware that `refit=True` invokes extra computation, but it's REQUIRED for outputting the best estimator!"/> --> <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to NaN if an error occurs in estimator fitting and FitFailedWarning is raised."/> <param argument="return_train_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="return_train_score" help=""/> </xml> @@ -1456,6 +1489,8 @@ <option value="GradientBoostingClassifier">GradientBoostingClassifier</option> <option value="GradientBoostingRegressor">GradientBoostingRegressor</option> <option value="IsolationForest">IsolationForest</option> + <option value="HistGradientBoostingClassifier">HistGradientBoostingClassifier</option> + <option value="HistGradientBoostingRegressor">HistGradientBoostingRegressor</option> <option value="RandomForestClassifier">RandomForestClassifier</option> <option value="RandomForestRegressor">RandomForestRegressor</option> <option value="RandomTreesEmbedding">RandomTreesEmbedding</option>