Mercurial > repos > bgruening > sklearn_pairwise_metrics
diff main_macros.xml @ 4:1573e8255a34 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit 0e582cf1f3134c777cce3aa57d71b80ed95e6ba9
author | bgruening |
---|---|
date | Fri, 16 Feb 2018 09:13:01 -0500 |
parents | f8cd85c496c9 |
children | 08d8af6abdbc |
line wrap: on
line diff
--- a/main_macros.xml Thu Jun 23 15:27:24 2016 -0400 +++ b/main_macros.xml Fri Feb 16 09:13:01 2018 -0500 @@ -12,10 +12,12 @@ </token> <xml name="python_requirements"> - <requirements> - <requirement type="package" version="0.2.1b">eden</requirement> - <yield /> - </requirements> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="0.19.1">scikit-learn</requirement> + <requirement type="package" version="0.22.0">pandas</requirement> + <yield /> + </requirements> </xml> <xml name="macro_stdio"> @@ -90,6 +92,79 @@ </xml> + <!--Generalized Linear Models--> + <xml name="loss" token_help=" " token_select="false"> + <param argument="loss" type="select" label="Loss function" help="@HELP@"> + <option value="squared_loss" selected="@SELECT@">squared loss</option> + <option value="huber">huber</option> + <option value="epsilon_insensitive">epsilon insensitive</option> + <option value="squared_epsilon_insensitive">squared epsilon insensitive</option> + <yield/> + </param> + </xml> + + <xml name="penalty" token_help=" "> + <param argument="penalty" type="select" label="Penalty (regularization term)" help="@HELP@"> + <option value="l2" selected="true">l2</option> + <option value="l1">l1</option> + <option value="elasticnet">elastic net</option> + <option value="none">none</option> + <yield/> + </param> + </xml> + + <xml name="l1_ratio" token_default_value="0.15" token_help=" "> + <param argument="l1_ratio" type="float" value="@DEFAULT_VALUE@" label="Elastic Net mixing parameter" help="@HELP@"/> + </xml> + + <xml name="epsilon" token_default_value="0.1" token_help="Used if loss is ‘huber’, ‘epsilon_insensitive’, or ‘squared_epsilon_insensitive’. "> + <param argument="epsilon" type="float" value="@DEFAULT_VALUE@" label="Epsilon (epsilon-sensitive loss functions only)" help="@HELP@"/> + </xml> + + <xml name="learning_rate_s" token_help=" " token_selected1="false" token_selected2="false"> + <param argument="learning_rate" type="select" optional="true" label="Learning rate schedule" help="@HELP@"> + <option value="optimal" selected="@SELECTED1@">optimal</option> + <option value="constant">constant</option> + <option value="invscaling" selected="@SELECTED2@">inverse scaling</option> + <yield/> + </param> + </xml> + + <xml name="eta0" token_default_value="0.0" token_help="Used with ‘constant’ or ‘invscaling’ schedules. "> + <param argument="eta0" type="float" value="@DEFAULT_VALUE@" label="Initial learning rate" help="@HELP@"/> + </xml> + + <xml name="power_t" token_default_value="0.5" token_help=" "> + <param argument="power_t" type="float" value="@DEFAULT_VALUE@" label="Exponent for inverse scaling learning rate" help="@HELP@"/> + </xml> + + <xml name="normalize" token_checked="false" token_help=" "> + <param argument="normalize" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Normalize samples before training" help=" "/> + </xml> + + <xml name="copy_X" token_checked="true" token_help=" "> + <param argument="copy_X" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use a copy of samples" help="If false, samples would be overwritten. "/> + </xml> + + <xml name="ridge_params"> + <expand macro="normalize"/> + <expand macro="alpha" default_value="1.0"/> + <expand macro="fit_intercept"/> + <expand macro="max_iter" default_value=""/> + <expand macro="tol" default_value="0.001" help_text="Precision of the solution. "/> + <!--class_weight--> + <expand macro="copy_X"/> + <param argument="solver" type="select" value="" label="Solver to use in the computational routines" help=" "> + <option value="auto" selected="true">auto</option> + <option value="svd">svd</option> + <option value="cholesky">cholesky</option> + <option value="lsqr">lsqr</option> + <option value="sparse_cg">sparse_cg</option> + <option value="sag">sag</option> + </param> + <expand macro="random_state"/> + </xml> + <!--Ensemble methods--> <xml name="n_estimators" token_default_value="10" token_help=" "> <param argument="n_estimators" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of trees in the forest" help="@HELP@"/> @@ -106,18 +181,18 @@ <xml name="min_samples_leaf" token_default_value="1" token_help=" "> <param argument="min_samples_leaf" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Minimum number of samples in newly created leaves" help="@HELP@"/> </xml> - + <xml name="min_weight_fraction_leaf" token_default_value="0.0" token_help=" "> <param argument="min_weight_fraction_leaf" type="float" optional="true" value="@DEFAULT_VALUE@" label="Minimum weighted fraction of the input samples required to be at a leaf node" help="@HELP@"/> </xml> <xml name="max_leaf_nodes" token_default_value="" token_help=" "> <param argument="max_leaf_nodes" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum number of leaf nodes in best-first method" help="@HELP@"/> - </xml> + </xml> <xml name="bootstrap" token_checked="true" token_help=" "> <param argument="bootstrap" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Use bootstrap samples for building trees." help="@HELP@"/> - </xml> + </xml> <xml name="criterion" token_help=" "> <param argument="criterion" type="select" label="Function to measure the quality of a split" help=" "> @@ -127,13 +202,13 @@ </param> </xml> - <xml name="oob_score" token_checked="flase" token_help=" "> - <param argument="oob_score" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Use out-of-bag samples to estimate the generalization error" help="@HELP@"/> + <xml name="oob_score" token_checked="false" token_help=" "> + <param argument="oob_score" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use out-of-bag samples to estimate the generalization error" help="@HELP@"/> </xml> <xml name="max_features" token_default_value="auto" token_help="This could be an integer, float, string, or None. For more information please refer to help. "> <param argument="max_features" type="text" optional="true" value="@DEFAULT_VALUE@" label="Number of features for finding the best split" help="@HELP@"/> - </xml> + </xml> <xml name="learning_rate" token_default_value="1.0" token_help=" "> <param argument="learning_rate" type="float" optional="true" value="@DEFAULT_VALUE@" label="Learning rate" help="@HELP@"/> @@ -150,7 +225,7 @@ </xml> <xml name="fit_intercept" token_checked="true"> - <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered."/> + <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered."/> </xml> <xml name="n_iter" token_default_value="5" token_help_text="The number of passes over the training data (aka epochs). "> @@ -158,7 +233,7 @@ </xml> <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration"> - <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/> + <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/> </xml> <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results."> @@ -166,7 +241,7 @@ </xml> <xml name="warm_start" token_checked="true" token_help_text="When set to True, reuse the solution of the previous call to fit as initialization,otherwise, just erase the previous solution."> - <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@"/> + <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@"/> </xml> <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term."> @@ -230,10 +305,11 @@ <xml name="average"> <param argument="average" type="select" optional="true" label="Averaging type" help=" "> - <option value="micro" help="Calculate metrics globally by counting the total true positives, false negatives and false positives.">micro</option> - <option value="samples" help="Calculate metrics for each instance, and find their average (only meaningful for multilabel).">samples</option> - <!--option value="macro" help=""></option--> - <!--option value="weighted" help=""></option--> + <option value="micro">Calculate metrics globally by counting the total true positives, false negatives and false positives. (micro)</option> + <option value="samples">Calculate metrics for each instance, and find their average. Only meaningful for multilabel. (samples)</option> + <option value="macro">Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. (macro)</option> + <option value="weighted">Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters ‘macro’ to account for label imbalance; it can result in an F-score that is not between precision and recall. (weighted)</option> + <option value="None">None</option> <yield/> </param> </xml> @@ -242,7 +318,7 @@ <param argument="beta" type="float" value="1.0" label="The strength of recall versus precision in the F-score" help=" "/> </xml> - + <!--Data interface--> <xml name="tabular_input"> <param name="infile" type="data" format="tabular" label="Data file with numeric values"/> @@ -258,6 +334,13 @@ <yield/> </xml> + <xml name="samples_tabular" token_multiple1="False" token_multiple2="False"> + <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/> + <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/> + <param name="infile2" type="data" format="tabular" label="Dataset containing class labels:"/> + <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/> + <yield/> + </xml> <xml name="clf_inputs_extended" token_label1=" " token_label2=" " token_multiple="False"> <conditional name="true_columns"> @@ -299,7 +382,7 @@ <repeat name="@NAME@" min="1" max="@MAX_NUM@" title="Select input file(s):"> <param name="input" type="data" format="@FORMAT@" label="@LABEL@" help="@HELP_TEXT@"/> </repeat> - </xml> + </xml> <xml name="sparse_target" token_label1="Select a sparse matrix:" token_label2="Select the tabular containing true labels:" token_multiple="False" token_format1="txt" token_format2="tabular" token_help1="" token_help2=""> <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/> @@ -314,7 +397,7 @@ <option value="sparse">sparse matrix</option> </param> <when value="tabular"> - <expand macro="sample_cols" multiple1="true"/> + <expand macro="samples_tabular" multiple1="true"/> </when> <when value="sparse"> <expand macro="sparse_target"/> @@ -332,8 +415,8 @@ <section name="options" title="Advanced Options" expanded="False"> <yield/> <param argument="weights" type="select" label="Weight function" help="Used in prediction."> - <option value="uniform" selected="true" help="Uniform weights. All points in each neighborhood are weighted equally.">Uniform</option> - <option value="distance" help="Weight points by the inverse of their distance.">Distance</option> + <option value="uniform" selected="true">Uniform weights. All points in each neighborhood are weighted equally. (Uniform)</option> + <option value="distance">Weight points by the inverse of their distance. (Distance)</option> </param> <param argument="algorithm" type="select" label="Neighbor selection algorithm" help=" "> <option value="auto" selected="true">Auto</option> @@ -360,12 +443,15 @@ </param> <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/> <!--TODO: param argument="gamma" float, optional (default=’auto’) --> - <param argument="coef0" type="float" optional="true" value="0.0" label="Zero coefficient (polynomial and sigmoid kernels only)" help="Independent term in kernel function. dafault: 0.0 "/> - <param argument="shrinking" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use the shrinking heuristic" help=" "/> - <param argument="probability" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Enable probability estimates. " help="This must be enabled prior to calling fit, and will slow down that method."/> + <param argument="coef0" type="float" optional="true" value="0.0" label="Zero coefficient (polynomial and sigmoid kernels only)" + help="Independent term in kernel function. dafault: 0.0 "/> + <param argument="shrinking" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" + label="Use the shrinking heuristic" help=" "/> + <param argument="probability" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" + label="Enable probability estimates. " help="This must be enabled prior to calling fit, and will slow down that method."/> <!-- param argument="cache_size"--> <!--expand macro="class_weight"/--> - <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. "/> + <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. "/> <expand macro="max_iter" default_value="-1" label="Solver maximum number of iterations" help_text="Hard limit on iterations within solver, or -1 for no limit."/> <!--param argument="decision_function_shape"--> <expand macro="random_state" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data for probability estimation. A fixed seed allows reproducible results."/> @@ -395,8 +481,10 @@ <option value="kmeans" selected="true">kmeans</option> <option value="discretize">discretize</option> </param> - <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/> - <param argument="coef0" type="integer" optional="true" value="1" label="Zero coefficient (polynomial and sigmoid kernels only)" help="Ignored by other kernels. dafault : 1 "/> + <param argument="degree" type="integer" optional="true" value="3" + label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/> + <param argument="coef0" type="integer" optional="true" value="1" + label="Zero coefficient (polynomial and sigmoid kernels only)" help="Ignored by other kernels. dafault : 1 "/> <!--param argument="kernel_params"--> </section> </xml> @@ -407,7 +495,7 @@ <expand macro="init"/> <expand macro="n_init" default_value="3"/> <expand macro="max_iter" default_value="100"/> - <expand macro="tol" help_text="Early stopping heuristics based on normalized center change. To disable set to 0.0 ."/> + <expand macro="tol" help_text="Early stopping heuristics based on normalized center change. To disable set to 0.0 ."/> <expand macro="random_state"/> <param argument="batch_size" type="integer" optional="true" value="100" label="Batch size" help="Size of the mini batches."/> <!--param argument="compute_labels"--> @@ -428,7 +516,7 @@ <expand macro="tol" default_value="0.0001" help_text="Relative tolerance with regards to inertia to declare convergence."/> <!--param argument="precompute_distances"/--> <expand macro="random_state"/> - <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/> + <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/> </section> </xml> @@ -557,7 +645,8 @@ </when> <when value="euclidean_distances"> <section name="options" title="Advanced Options" expanded="False"> - <param argument="squared" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Return squared Euclidean distances" help=" "/> + <param argument="squared" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" + label="Return squared Euclidean distances" help=" "/> </section> </when> </xml> @@ -589,21 +678,27 @@ <when value="Binarizer"> <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> <section name="options" title="Advanced Options" expanded="False"> - <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing binarization" help=" "/> - <param argument="threshold" type="float" optional="true" value="0.0" label="Threshold" help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/> + <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" + label="Use a copy of data for precomputing binarization" help=" "/> + <param argument="threshold" type="float" optional="true" value="0.0" + label="Threshold" + help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/> </section> </when> <when value="Imputer"> <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> <section name="options" title="Advanced Options" expanded="False"> - <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing imputation" help=" "/> + <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" + label="Use a copy of data for precomputing imputation" help=" "/> <param argument="strategy" type="select" optional="true" label="Imputation strategy" help=" "> <option value="mean" selected="true">Replace missing values using the mean along the axis</option> <option value="median">Replace missing values using the median along the axis</option> <option value="most_frequent">Replace missing using the most frequent value along the axis</option> </param> - <param argument="missing_values" type="text" optional="true" value="NaN" label="Placeholder for missing values" help="For missing values encoded as numpy.nan, use the string value “NaN”"/> - <param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0" label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/> + <param argument="missing_values" type="text" optional="true" value="NaN" + label="Placeholder for missing values" help="For missing values encoded as numpy.nan, use the string value “NaN”"/> + <param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0" + label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/> <!--param argument="axis" type="select" optional="true" label="The axis along which to impute" help=" "> <option value="0" selected="true">Impute along columns</option> <option value="1">Impute along rows</option> @@ -613,15 +708,19 @@ <when value="StandardScaler"> <expand macro="multitype_input"/> <section name="options" title="Advanced Options" expanded="False"> - <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for performing inplace scaling" help=" "/> - <param argument="with_mean" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Center the data before scaling" help=" "/> - <param argument="with_std" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Scale the data to unit variance (or unit standard deviation)" help=" "/> + <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" + label="Use a copy of data for performing inplace scaling" help=" "/> + <param argument="with_mean" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" + label="Center the data before scaling" help=" "/> + <param argument="with_std" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" + label="Scale the data to unit variance (or unit standard deviation)" help=" "/> </section> </when> <when value="MaxAbsScaler"> <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> <section name="options" title="Advanced Options" expanded="False"> - <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing scaling" help=" "/> + <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" + label="Use a copy of data for precomputing scaling" help=" "/> </section> </when> <when value="Normalizer"> @@ -631,13 +730,27 @@ <option value="l1" selected="true">l1</option> <option value="l2">l2</option> <option value="max">max</option> - <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing row normalization" help=" "/> </param> + <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" + label="Use a copy of data for precomputing row normalization" help=" "/> </section> </when> <yield/> </xml> + <!-- Outputs --> + + <xml name="output"> + <outputs> + <data format="tabular" name="outfile_predict"> + <filter>selected_tasks['selected_task'] == 'load'</filter> + </data> + <data format="zip" name="outfile_fit"> + <filter>selected_tasks['selected_task'] == 'train'</filter> + </data> + </outputs> + </xml> + <!--Citations--> <xml name="eden_citation"> @@ -691,4 +804,4 @@ </citations> </xml> -</macros> \ No newline at end of file +</macros>