Mercurial > repos > bgruening > sklearn_pairwise_metrics
diff main_macros.xml @ 0:18afd0c1cc56 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit 620159e95d2fae693b5f25d591528be159130f25
author | bgruening |
---|---|
date | Wed, 04 May 2016 13:09:52 -0400 |
parents | |
children | 887b1f85a4b6 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/main_macros.xml Wed May 04 13:09:52 2016 -0400 @@ -0,0 +1,445 @@ +<macros> + <token name="@VERSION@">0.9</token> + + <xml name="python_requirements"> + <requirements> + <requirement type="package" version="0.2.1b">eden</requirement> + <yield /> + </requirements> + </xml> + + <xml name="macro_stdio"> + <stdio> + <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" /> + </stdio> + </xml> + + <xml name="train_loadConditional" token_train="tabular" token_data="tabular" token_model="txt"> + <conditional name="selected_tasks"> + <param name="selected_task" type="select" label="Select a Classification Task"> + <option value="load">Load a model and predict</option> + <option value="train" selected="true">Train a model</option> + </param> + <when value="load"> + <param name="infile_model" type="data" format="@MODEL@" label="Models" help="Select a model file." /> + <param name="infile_data" type="data" format="@DATA@" label="Data (tabular)" help="Select the dataset you want to classify."/> + <conditional name="prediction_options"> + <param name="prediction_option" type="select" label="Select the type of prediction"> + <option value="predict">Predict class labels</option> + <option value="advanced">Include advanced options</option> + </param> + <when value="predict"> + </when> + <when value="advanced"> + </when> + </conditional> + </when> + <when value="train"> + <param name="infile_train" type="data" format="@TRAIN@" label="Training samples (tabular)" /> + <conditional name="selected_algorithms"> + <yield /> + </conditional> + </when> + </conditional> + </xml> + + <xml name="advanced_section"> + <section name="options" title="Advanced Options" expanded="False"> + <yield /> + </section> + </xml> + + <xml name="tabular_input"> + <param name="infile" type="data" format="tabular" label="Data file with numeric values"/> + <param name="start_column" type="data_column" data_ref="infile" optional="True" label="Select a subset of data. Start column:" /> + <param name="end_column" type="data_column" data_ref="infile" optional="True" label="End column:" /> + </xml> + + <xml name="tol" token_default_value="0.0" token_help_text="Early stopping heuristics based on the relative center changes. Set to default (0.0) to disable this convergence detection."> + <param argument="tol" type="float" optional="true" value="@DEFAULT_VALUE@" label="Tolerance" help="@HELP_TEXT@"/> + </xml> + + <xml name="n_clusters" token_default_value="8"> + <param argument="n_clusters" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of clusters" help=" "/> + </xml> + + <xml name="fit_intercept" token_checked="true"> + <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered."/> + </xml> + + <xml name="n_iter" token_default_value="5" token_help_text="The number of passes over the training data (aka epochs). "> + <param argument="n_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@"/> + </xml> + + <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration"> + <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/> + </xml> + + <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results."> + <param argument="random_state" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Random seed number" help="@HELP_TEXT@"/> + </xml> + + <xml name="warm_start" token_checked="true" token_help_text="When set to True, reuse the solution of the previous call to fit as initialization,otherwise, just erase the previous solution."> + <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@"/> + </xml> + + <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term."> + <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@"/> + </xml> + + <!--xml name="class_weight" token_default_value="" token_help_text=""> + <param argument="class_weight" type="" optional="true" value="@DEFAULT_VALUE@" label="" help="@HELP_TEXT@"/> + </xml--> + + <xml name="alpha" token_default_value="0.0001" token_help_text="Constant that multiplies the regularization term if regularization is used. "> + <param argument="alpha" type="float" optional="true" value="@DEFAULT_VALUE@" label="Regularization coefficient" help="@HELP_TEXT@"/> + </xml> + + <xml name="n_samples" token_default_value="100" token_help_text="The total number of points equally divided among clusters."> + <param argument="n_samples" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of samples" help="@HELP_TEXT@"/> + </xml> + + <xml name="n_features" token_default_value="2" token_help_text="Number of different numerical properties produced for each sample."> + <param argument="n_features" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of features" help="@HELP_TEXT@"/> + </xml> + + <xml name="noise" token_default_value="0.0" token_help_text="Floating point number. "> + <param argument="noise" type="float" optional="true" value="@DEFAULT_VALUE@" label="Standard deviation of the Gaussian noise added to the data" help="@HELP_TEXT@"/> + </xml> + + <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term. "> + <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@"/> + </xml> + + <xml name="max_iter" token_default_value="300" token_label="Maximum number of iterations per single run" token_help_text=" "> + <param argument="max_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/> + </xml> + + <xml name="n_init" token_default_value="10" > + <param argument="n_init" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of runs with different centroid seeds" help=" "/> + </xml> + + <xml name="init"> + <param argument="init" type="select" label="Centroid initialization method" help="''k-means++'' selects initial cluster centers that speed up convergence. ''random'' chooses k observations (rows) at random from data as initial centroids."> + <option value="k-means++">k-means++</option> + <option value="random">random</option> + </param> + </xml> + + <xml name="gamma" token_default_value="1.0" token_label="Scaling parameter" token_help_text=" "> + <param argument="gamma" type="float" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/> + </xml> + + <xml name="degree" token_default_value="3" token_label="Degree of the polynomial" token_help_text=" "> + <param argument="degree" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/> + </xml> + + <xml name="coef0" token_default_value="1" token_label="Zero coefficient" token_help_text=" "> + <param argument="coef0" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/> + </xml> + + <xml name="multiple_input" token_name="input_files" token_max_num="10" token_format="txt" token_label="Sparse matrix file (.mtx, .txt)" token_help_text="Specify a sparse matrix file in .txt format."> + <repeat name="@NAME@" min="1" max="@MAX_NUM@" title="Select input file(s):"> + <param name="input" type="data" format="@FORMAT@" label="@LABEL@" help="@HELP_TEXT@"/> + </repeat> + </xml> + + <xml name="eden_citation"> + <citations> + <citation type="bibtex"> + @misc{fabrizio_costa_2015_15094, + author = {Fabrizio Costa and + Björn Grüning and + gigolo}, + title = {EDeN: EDeN - Graph Vectorizer}, + month = feb, + year = 2015, + doi = {10.5281/zenodo.15094}, + url = {http://dx.doi.org/10.5281/zenodo.15094} + } + } + </citation> + </citations> + </xml> + + <xml name="sklearn_citation"> + <citations> + <citation type="bibtex"> + @article{scikit-learn, + title={Scikit-learn: Machine Learning in {P}ython}, + author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. + and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. + and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and + Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, + journal={Journal of Machine Learning Research}, + volume={12}, + pages={2825--2830}, + year={2011} + url = {https://github.com/scikit-learn/scikit-learn} + } + </citation> + </citations> + </xml> + + <xml name="scipy_citation"> + <citations> + <citation type="bibtex"> + @Misc{, + author = {Eric Jones and Travis Oliphant and Pearu Peterson and others}, + title = {{SciPy}: Open source scientific tools for {Python}}, + year = {2001--}, + url = "http://www.scipy.org/", + note = {[Online; accessed 2016-04-09]} + } + </citation> + </citations> + </xml> + + + <xml name="nn_advanced_options"> + <section name="options" title="Advanced Options" expanded="False"> + <yield/> + <param argument="weights" type="select" label="Weight function" help="Used in prediction."> + <option value="uniform" selected="true" help="Uniform weights. All points in each neighborhood are weighted equally.">Uniform</option> + <option value="distance" help="Weight points by the inverse of their distance.">Distance</option> + </param> + <param argument="algorithm" type="select" label="Neighbor selection algorithm" help=" "> + <option value="auto" selected="true">Auto</option> + <option value="ball_tree">BallTree</option> + <option value="kd_tree">KDTree</option> + <option value="brute">Brute-force</option> + </param> + <param argument="leaf_size" type="integer" value="30" label="Leaf size" help="Used with BallTree and KDTree. Affects the time and memory usage of the constructed tree."/> + <!--param name="metric"--> + <!--param name="p"--> + <!--param name="metric_params"--> + </section> + </xml> + + <xml name="svc_advanced_options"> + <section name="options" title="Advanced Options" expanded="False"> + <yield/> + <param argument="kernel" type="select" optional="true" label="Kernel type" help="Kernel type to be used in the algorithm. If none is given, ‘rbf’ will be used."> + <option value="rbf" selected="true">rbf</option> + <option value="linear">linear</option> + <option value="poly">poly</option> + <option value="sigmoid">sigmoid</option> + <option value="precomputed">precomputed</option> + </param> + <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/> + <!--TODO: param argument="gamma" float, optional (default=’auto’) --> + <param argument="coef0" type="float" optional="true" value="0.0" label="Zero coefficient (polynomial and sigmoid kernels only)" help="Independent term in kernel function. dafault: 0.0 "/> + <param argument="shrinking" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use the shrinking heuristic" help=" "/> + <param argument="probability" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Enable probability estimates. " help="This must be enabled prior to calling fit, and will slow down that method."/> + <!-- param argument="cache_size"--> + <!--expand macro="class_weight"/--> + <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. "/> + <expand macro="max_iter" default_value="-1" label="Solver maximum number of iterations" help_text="Hard limit on iterations within solver, or -1 for no limit."/> + <!--param argument="decision_function_shape"--> + <expand macro="random_state" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data for probability estimation. A fixed seed allows reproducible results."/> + </section> + </xml> + + <xml name="spectral_clustering_advanced_options"> + <section name="options" title="Advanced Options" expanded="False"> + <expand macro="n_clusters"/> + <param argument="eigen_solver" type="select" value="" label="Eigen solver" help="The eigenvalue decomposition strategy to use."> + <option value="arpack" selected="true">arpack</option> + <option value="lobpcg">lobpcg</option> + <option value="amg">amg</option> + <!--None--> + </param> + <expand macro="random_state"/> + <expand macro="n_init"/> + <param argument="gamma" type="float" optional="true" value="1.0" label="Kernel scaling factor" help="Scaling factor of RBF, polynomial, exponential chi^2 and sigmoid affinity kernel. Ignored for affinity=''nearest_neighbors''."/> + <param argument="affinity" type="select" label="Affinity" help="Affinity kernel to use. "> + <option value="rbf" selected="true">RBF</option> + <option value="precomputed">precomputed</option> + <option value="nearest_neighbors">Nearset neighbors</option> + </param> + <param argument="n_neighbors" type="integer" optional="true" value="10" label="Number of neighbors" help="Number of neighbors to use when constructing the affinity matrix using the nearest neighbors method. Ignored for affinity=''rbf''"/> + <!--param argument="eigen_tol"--> + <param argument="assign_labels" type="select" label="Assign labels" help="The strategy to use to assign labels in the embedding space."> + <option value="kmeans" selected="true">kmeans</option> + <option value="discretize">discretize</option> + </param> + <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/> + <param argument="coef0" type="integer" optional="true" value="1" label="Zero coefficient (polynomial and sigmoid kernels only)" help="Ignored by other kernels. dafault : 1 "/> + <!--param argument="kernel_params"--> + </section> + </xml> + + <xml name="minibatch_kmeans_advanced_options"> + <section name="options" title="Advanced Options" expanded="False"> + <expand macro="n_clusters"/> + <expand macro="init"/> + <expand macro="n_init" default_value="3"/> + <expand macro="max_iter" default_value="100"/> + <expand macro="tol" help_text="Early stopping heuristics based on normalized center change. To disable set to 0.0 ."/> + <expand macro="random_state"/> + <param argument="batch_size" type="integer" optional="true" value="100" label="Batch size" help="Size of the mini batches."/> + <!--param argument="compute_labels"--> + <param argument="max_no_improvement" type="integer" optional="true" value="10" label="Maximum number of improvement attempts" help=" + Convergence detection based on inertia (the consecutive number of mini batches that doe not yield an improvement on the smoothed inertia). + To disable, set max_no_improvement to None. "/> + <param argument="init_size" type="integer" optional="true" value="" label="Number of random initialization samples" help="Number of samples to randomly sample for speeding up the initialization . ( default: 3 * batch_size )"/> + <param argument="reassignment_ratio" type="float" optional="true" value="0.01" label="Re-assignment ratio" help="Controls the fraction of the maximum number of counts for a center to be reassigned. Higher values yield better clustering results."/> + </section> + </xml> + + <xml name="kmeans_advanced_options"> + <section name="options" title="Advanced Options" expanded="False"> + <expand macro="n_clusters"/> + <expand macro="init"/> + <expand macro="n_init"/> + <expand macro="max_iter"/> + <expand macro="tol" default_value="0.0001" help_text="Relative tolerance with regards to inertia to declare convergence."/> + <!--param argument="precompute_distances"/--> + <expand macro="random_state"/> + <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/> + </section> + </xml> + + <xml name="birch_advanced_options"> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="threshold" type="float" optional="true" value="0.5" label="Subcluster radius threshold" help="The radius of the subcluster obtained by merging a new sample; the closest subcluster should be less than the threshold to avoid a new subcluster."/> + <param argument="branching_factor" type="integer" optional="true" value="50" label="Maximum number of subclusters per branch" help="Maximum number of CF subclusters in each node."/> + <expand macro="n_clusters" default_value="3"/> + <!--param argument="compute_labels"/--> + </section> + </xml> + + <xml name="dbscan_advanced_options"> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="eps" type="float" optional="true" value="0.5" label="Maximum neighborhood distance" help="The maximum distance between two samples for them to be considered as in the same neighborhood."/> + <param argument="min_samples" type="integer" optional="true" value="5" label="Minimal core point density" help="The number of samples (or total weight) in a neighborhood for a point (including the point itself) to be considered as a core point."/> + <param argument="metric" type="text" optional="true" value="euclidean" label="Metric" help="The metric to use when calculating distance between instances in a feature array."/> + <param argument="algorithm" type="select" label="Pointwise distance computation algorithm" help="The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors."> + <option value="auto" selected="true">auto</option> + <option value="ball_tree">ball_tree</option> + <option value="kd_tree">kd_tree</option> + <option value="brute">brute</option> + </param> + <param argument="leaf_size" type="integer" optional="true" value="30" label="Leaf size" help="Leaf size passed to BallTree or cKDTree. Memory and time efficieny factor in tree constrution and querying."/> + </section> + </xml> + + <xml name="clustering_algorithms_options"> + <conditional name="algorithm_options"> + <param name="selected_algorithm" type="select" label="Clustering Algorithm"> + <option value="KMeans" selected="true">KMeans</option> + <option value="SpectralClustering">Spectral Clustering</option> + <option value="MiniBatchKMeans">Mini Batch KMeans</option> + <option value="DBSCAN">DBSCAN</option> + <option value="Birch">Birch</option> + </param> + <when value="KMeans"> + <expand macro="kmeans_advanced_options"/> + </when> + <when value="DBSCAN"> + <expand macro="dbscan_advanced_options"/> + </when> + <when value="Birch"> + <expand macro="birch_advanced_options"/> + </when> + <when value="SpectralClustering"> + <expand macro="spectral_clustering_advanced_options"/> + </when> + <when value="MiniBatchKMeans"> + <expand macro="minibatch_kmeans_advanced_options"/> + </when> + </conditional> + </xml> + + <xml name="distance_metrics"> + <param argument="metric" type="select" label="Distance metric" help=" "> + <option value="euclidean" selected="true">euclidean</option> + <option value="cityblock">cityblock</option> + <option value="cosine">cosine</option> + <option value="l1">l1</option> + <option value="l2">l2</option> + <option value="manhattan">manhattan</option> + <yield/> + </param> + </xml> + + <xml name="distance_nonsparse_metrics"> + <option value="braycurtis">braycurtis</option> + <option value="canberra">canberra</option> + <option value="chebyshev">chebyshev</option> + <option value="correlation">correlation</option> + <option value="dice">dice</option> + <option value="hamming">hamming</option> + <option value="jaccard">jaccard</option> + <option value="kulsinski">kulsinski</option> + <option value="mahalanobis">mahalanobis</option> + <option value="matching">matching</option> + <option value="minkowski">minkowski</option> + <option value="rogerstanimoto">rogerstanimoto</option> + <option value="russellrao">russellrao</option> + <option value="seuclidean">seuclidean</option> + <option value="sokalmichener">sokalmichener</option> + <option value="sokalsneath">sokalsneath</option> + <option value="sqeuclidean">sqeuclidean</option> + <option value="yule">yule</option> + </xml> + + <xml name="pairwise_kernel_metrics"> + <param argument="metric" type="select" label="Pirwise Kernel metric" help=" "> + <option value="rbf" selected="true">rbf</option> + <option value="sigmoid">sigmoid</option> + <option value="polynomial">polynomial</option> + <option value="linear" selected="true">linear</option> + <option value="chi2">chi2</option> + <option value="additive_chi2">additive_chi2</option> + </param> + </xml> + + <xml name="sparse_pairwise_metric_functions"> + <param name="selected_metric_function" type="select" label="Select the pairwise metric you want to compute:"> + <option value="euclidean_distances" selected="true">Euclidean distance matrix</option> + <option value="pairwise_distances">Distance matrix</option> + <option value="pairwise_distances_argmin">Minimum distances between one point and a set of points</option> + <yield/> + </param> + </xml> + + <xml name="pairwise_metric_functions"> + <option value="additive_chi2_kernel" >Additive chi-squared kernel</option> + <option value="chi2_kernel">Exponential chi-squared kernel</option> + <option value="linear_kernel">Linear kernel</option> + <option value="manhattan_distances">L1 distances</option> + <option value="pairwise_kernels">Kernel</option> + <option value="polynomial_kernel">Polynomial kernel</option> + <option value="rbf_kernel">Gaussian (rbf) kernel</option> + <option value="laplacian_kernel">Laplacian kernel</option> + </xml> + + <xml name="sparse_pairwise_condition"> + <when value="pairwise_distances"> + <section name="options" title="Advanced Options" expanded="False"> + <expand macro="distance_metrics"> + <yield/> + </expand> + </section> + </when> + <when value="euclidean_distances"> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="squared" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Return squared Euclidean distances" help=" "/> + </section> + </when> + </xml> + + <xml name="argmin_distance_condition"> + <when value="pairwise_distances_argmin"> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="axis" type="integer" optional="true" value="1" label="Axis" help="Axis along which the argmin and distances are to be computed."/> + <expand macro="distance_metrics"> + <yield/> + </expand> + <param argument="batch_size" type="integer" optional="true" value="500" label="Batch size" help="Number of rows to be processed in each batch run."/> + </section> + </when> + </xml> + +</macros> \ No newline at end of file