Mercurial > repos > bgruening > sklearn_numeric_clustering
comparison main_macros.xml @ 0:dac8a9712939 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit a6e80305ed0892c8163d690a2d376d6b454824de-dirty
author | bgruening |
---|---|
date | Mon, 02 May 2016 16:16:42 -0400 |
parents | |
children | d938b80a954f |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dac8a9712939 |
---|---|
1 <macros> | |
2 <token name="@VERSION@">0.9</token> | |
3 | |
4 <xml name="python_requirements"> | |
5 <requirements> | |
6 <requirement type="package" version="0.2.1b">eden</requirement> | |
7 <yield /> | |
8 </requirements> | |
9 </xml> | |
10 | |
11 <xml name="macro_stdio"> | |
12 <stdio> | |
13 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" /> | |
14 </stdio> | |
15 </xml> | |
16 | |
17 <xml name="train_loadConditional" token_train="tabular" token_data="tabular" token_model="txt"> | |
18 <conditional name="selected_tasks"> | |
19 <param name="selected_task" type="select" label="Select a Classification Task"> | |
20 <option value="load">Load a model and predict</option> | |
21 <option value="train" selected="true">Train a model</option> | |
22 </param> | |
23 <when value="load"> | |
24 <param name="infile_model" type="data" format="@MODEL@" label="Models" help="Select a model file." /> | |
25 <param name="infile_data" type="data" format="@DATA@" label="Data (tabular)" help="Select the dataset you want to classify."/> | |
26 <conditional name="prediction_options"> | |
27 <param name="prediction_option" type="select" label="Select the type of prediction"> | |
28 <option value="predict">Predict class labels</option> | |
29 <option value="advanced">Include advanced options</option> | |
30 </param> | |
31 <when value="predict"> | |
32 </when> | |
33 <when value="advanced"> | |
34 </when> | |
35 </conditional> | |
36 </when> | |
37 <when value="train"> | |
38 <param name="infile_train" type="data" format="@TRAIN@" label="Training samples (tabular)" /> | |
39 <conditional name="selected_algorithms"> | |
40 <yield /> | |
41 </conditional> | |
42 </when> | |
43 </conditional> | |
44 </xml> | |
45 | |
46 <xml name="advanced_section"> | |
47 <section name="options" title="Advanced Options" expanded="False"> | |
48 <yield /> | |
49 </section> | |
50 </xml> | |
51 | |
52 <xml name="tabular_input"> | |
53 <param name="infile" type="data" format="tabular" label="Data file with numeric values"/> | |
54 <param name="start_column" type="data_column" data_ref="infile" optional="True" label="Select a subset of data. Start column:" /> | |
55 <param name="end_column" type="data_column" data_ref="infile" optional="True" label="End column:" /> | |
56 </xml> | |
57 | |
58 <xml name="tol" token_default_value="0.0" token_help_text="Early stopping heuristics based on the relative center changes. Set to default (0.0) to disable this convergence detection."> | |
59 <param argument="tol" type="float" optional="true" value="@DEFAULT_VALUE@" label="Tolerance" help="@HELP_TEXT@"/> | |
60 </xml> | |
61 | |
62 <xml name="n_clusters" token_default_value="8"> | |
63 <param argument="n_clusters" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of clusters" help=" "/> | |
64 </xml> | |
65 | |
66 <xml name="fit_intercept" token_checked="true"> | |
67 <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered."/> | |
68 </xml> | |
69 | |
70 <xml name="n_iter" token_default_value="5" token_help_text="The number of passes over the training data (aka epochs). "> | |
71 <param argument="n_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@"/> | |
72 </xml> | |
73 | |
74 <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration"> | |
75 <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/> | |
76 </xml> | |
77 | |
78 <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results."> | |
79 <param argument="random_state" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Random seed number" help="@HELP_TEXT@"/> | |
80 </xml> | |
81 | |
82 <xml name="warm_start" token_checked="true" token_help_text="When set to True, reuse the solution of the previous call to fit as initialization,otherwise, just erase the previous solution."> | |
83 <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@"/> | |
84 </xml> | |
85 | |
86 <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term."> | |
87 <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@"/> | |
88 </xml> | |
89 | |
90 <!--xml name="class_weight" token_default_value="" token_help_text=""> | |
91 <param argument="class_weight" type="" optional="true" value="@DEFAULT_VALUE@" label="" help="@HELP_TEXT@"/> | |
92 </xml--> | |
93 | |
94 <xml name="alpha" token_default_value="0.0001" token_help_text="Constant that multiplies the regularization term if regularization is used. "> | |
95 <param argument="alpha" type="float" optional="true" value="@DEFAULT_VALUE@" label="Regularization coefficient" help="@HELP_TEXT@"/> | |
96 </xml> | |
97 | |
98 <xml name="n_samples" token_default_value="100" token_help_text="The total number of points equally divided among clusters."> | |
99 <param argument="n_samples" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of samples" help="@HELP_TEXT@"/> | |
100 </xml> | |
101 | |
102 <xml name="n_features" token_default_value="2" token_help_text="Number of different numerical properties produced for each sample."> | |
103 <param argument="n_features" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of features" help="@HELP_TEXT@"/> | |
104 </xml> | |
105 | |
106 <xml name="noise" token_default_value="0.0" token_help_text="Floating point number. "> | |
107 <param argument="noise" type="float" optional="true" value="@DEFAULT_VALUE@" label="Standard deviation of the Gaussian noise added to the data" help="@HELP_TEXT@"/> | |
108 </xml> | |
109 | |
110 <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term. "> | |
111 <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@"/> | |
112 </xml> | |
113 | |
114 <xml name="max_iter" token_default_value="300" token_label="Maximum number of iterations per single run" token_help_text=" "> | |
115 <param argument="max_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/> | |
116 </xml> | |
117 | |
118 <xml name="n_init" token_default_value="10" > | |
119 <param argument="n_init" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of runs with different centroid seeds" help=" "/> | |
120 </xml> | |
121 | |
122 <xml name="init"> | |
123 <param argument="init" type="select" label="Centroid initialization method" help="''k-means++'' selects initial cluster centers that speed up convergence. ''random'' chooses k observations (rows) at random from data as initial centroids."> | |
124 <option value="k-means++">k-means++</option> | |
125 <option value="random">random</option> | |
126 </param> | |
127 </xml> | |
128 | |
129 <xml name="multiple_sparse"> | |
130 <repeat name="sparse_inputs" min="1" max="10" title="Inputs"> | |
131 <param name="input" type="data" format="txt" label="Sparse matrix file (.mtx, .txt)" help="Specify a sparse matrix file in .txt format."/> | |
132 </repeat> | |
133 </xml> | |
134 | |
135 <xml name="eden_citation"> | |
136 <citations> | |
137 <citation type="bibtex"> | |
138 @misc{fabrizio_costa_2015_15094, | |
139 author = {Fabrizio Costa and | |
140 Björn Grüning and | |
141 gigolo}, | |
142 title = {EDeN: EDeN - Graph Vectorizer}, | |
143 month = feb, | |
144 year = 2015, | |
145 doi = {10.5281/zenodo.15094}, | |
146 url = {http://dx.doi.org/10.5281/zenodo.15094} | |
147 } | |
148 } | |
149 </citation> | |
150 </citations> | |
151 </xml> | |
152 | |
153 <xml name="sklearn_citation"> | |
154 <citations> | |
155 <citation type="bibtex"> | |
156 @article{scikit-learn, | |
157 title={Scikit-learn: Machine Learning in {P}ython}, | |
158 author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. | |
159 and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. | |
160 and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and | |
161 Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, | |
162 journal={Journal of Machine Learning Research}, | |
163 volume={12}, | |
164 pages={2825--2830}, | |
165 year={2011} | |
166 url = {https://github.com/scikit-learn/scikit-learn} | |
167 } | |
168 </citation> | |
169 </citations> | |
170 </xml> | |
171 | |
172 <xml name="scipy_citation"> | |
173 <citations> | |
174 <citation type="bibtex"> | |
175 @Misc{, | |
176 author = {Eric Jones and Travis Oliphant and Pearu Peterson and others}, | |
177 title = {{SciPy}: Open source scientific tools for {Python}}, | |
178 year = {2001--}, | |
179 url = "http://www.scipy.org/", | |
180 note = {[Online; accessed 2016-04-09]} | |
181 } | |
182 </citation> | |
183 </citations> | |
184 </xml> | |
185 | |
186 | |
187 <xml name="nn_advanced_options"> | |
188 <section name="options" title="Advanced Options" expanded="False"> | |
189 <yield/> | |
190 <param argument="weights" type="select" label="Weight function" help="Used in prediction."> | |
191 <option value="uniform" selected="true" help="Uniform weights. All points in each neighborhood are weighted equally.">Uniform</option> | |
192 <option value="distance" help="Weight points by the inverse of their distance.">Distance</option> | |
193 </param> | |
194 <param argument="algorithm" type="select" label="Neighbor selection algorithm" help=" "> | |
195 <option value="auto" selected="true">Auto</option> | |
196 <option value="ball_tree">BallTree</option> | |
197 <option value="kd_tree">KDTree</option> | |
198 <option value="brute">Brute-force</option> | |
199 </param> | |
200 <param argument="leaf_size" type="integer" value="30" label="Leaf size" help="Used with BallTree and KDTree. Affects the time and memory usage of the constructed tree."/> | |
201 <!--param name="metric"--> | |
202 <!--param name="p"--> | |
203 <!--param name="metric_params"--> | |
204 </section> | |
205 </xml> | |
206 | |
207 <xml name="svc_advanced_options"> | |
208 <section name="options" title="Advanced Options" expanded="False"> | |
209 <yield/> | |
210 <param argument="kernel" type="select" optional="true" label="Kernel type" help="Kernel type to be used in the algorithm. If none is given, ‘rbf’ will be used."> | |
211 <option value="rbf" selected="true">rbf</option> | |
212 <option value="linear">linear</option> | |
213 <option value="poly">poly</option> | |
214 <option value="sigmoid">sigmoid</option> | |
215 <option value="precomputed">precomputed</option> | |
216 </param> | |
217 <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/> | |
218 <!--TODO: param argument="gamma" float, optional (default=’auto’) --> | |
219 <param argument="coef0" type="float" optional="true" value="0.0" label="Zero coefficient (polynomial and sigmoid kernels only)" help="Independent term in kernel function. dafault: 0.0 "/> | |
220 <param argument="shrinking" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use the shrinking heuristic" help=" "/> | |
221 <param argument="probability" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Enable probability estimates. " help="This must be enabled prior to calling fit, and will slow down that method."/> | |
222 <!-- param argument="cache_size"--> | |
223 <!--expand macro="class_weight"/--> | |
224 <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. "/> | |
225 <expand macro="max_iter" default_value="-1" label="Solver maximum number of iterations" help_text="Hard limit on iterations within solver, or -1 for no limit."/> | |
226 <!--param argument="decision_function_shape"--> | |
227 <expand macro="random_state" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data for probability estimation. A fixed seed allows reproducible results."/> | |
228 </section> | |
229 </xml> | |
230 | |
231 <xml name="spectral_clustering_advanced_options"> | |
232 <section name="options" title="Advanced Options" expanded="False"> | |
233 <expand macro="n_clusters"/> | |
234 <param argument="eigen_solver" type="select" value="" label="Eigen solver" help="The eigenvalue decomposition strategy to use."> | |
235 <option value="arpack" selected="true">arpack</option> | |
236 <option value="lobpcg">lobpcg</option> | |
237 <option value="amg">amg</option> | |
238 <!--None--> | |
239 </param> | |
240 <expand macro="random_state"/> | |
241 <expand macro="n_init"/> | |
242 <param argument="gamma" type="float" optional="true" value="1.0" label="Kernel scaling factor" help="Scaling factor of RBF, polynomial, exponential chi^2 and sigmoid affinity kernel. Ignored for affinity=''nearest_neighbors''."/> | |
243 <param argument="affinity" type="select" label="Affinity" help="Affinity kernel to use. "> | |
244 <option value="rbf" selected="true">RBF</option> | |
245 <option value="precomputed">precomputed</option> | |
246 <option value="nearest_neighbors">Nearset neighbors</option> | |
247 </param> | |
248 <param argument="n_neighbors" type="integer" optional="true" value="10" label="Number of neighbors" help="Number of neighbors to use when constructing the affinity matrix using the nearest neighbors method. Ignored for affinity=''rbf''"/> | |
249 <!--param argument="eigen_tol"--> | |
250 <param argument="assign_labels" type="select" label="Assign labels" help="The strategy to use to assign labels in the embedding space."> | |
251 <option value="kmeans" selected="true">kmeans</option> | |
252 <option value="discretize">discretize</option> | |
253 </param> | |
254 <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/> | |
255 <param argument="coef0" type="integer" optional="true" value="1" label="Zero coefficient (polynomial and sigmoid kernels only)" help="Ignored by other kernels. dafault : 1 "/> | |
256 <!--param argument="kernel_params"--> | |
257 </section> | |
258 </xml> | |
259 | |
260 <xml name="minibatch_kmeans_advanced_options"> | |
261 <section name="options" title="Advanced Options" expanded="False"> | |
262 <expand macro="n_clusters"/> | |
263 <expand macro="init"/> | |
264 <expand macro="n_init" default_value="3"/> | |
265 <expand macro="max_iter" default_value="100"/> | |
266 <expand macro="tol" help_text="Early stopping heuristics based on normalized center change. To disable set to 0.0 ."/> | |
267 <expand macro="random_state"/> | |
268 <param argument="batch_size" type="integer" optional="true" value="100" label="Batch size" help="Size of the mini batches."/> | |
269 <!--param argument="compute_labels"--> | |
270 <param argument="max_no_improvement" type="integer" optional="true" value="10" label="Maximum number of improvement attempts" help=" | |
271 Convergence detection based on inertia (the consecutive number of mini batches that doe not yield an improvement on the smoothed inertia). | |
272 To disable, set max_no_improvement to None. "/> | |
273 <param argument="init_size" type="integer" optional="true" value="" label="Number of random initialization samples" help="Number of samples to randomly sample for speeding up the initialization . ( default: 3 * batch_size )"/> | |
274 <param argument="reassignment_ratio" type="float" optional="true" value="0.01" label="Re-assignment ratio" help="Controls the fraction of the maximum number of counts for a center to be reassigned. Higher values yield better clustering results."/> | |
275 </section> | |
276 </xml> | |
277 | |
278 <xml name="kmeans_advanced_options"> | |
279 <section name="options" title="Advanced Options" expanded="False"> | |
280 <expand macro="n_clusters"/> | |
281 <expand macro="init"/> | |
282 <expand macro="n_init"/> | |
283 <expand macro="max_iter"/> | |
284 <expand macro="tol" default_value="0.0001" help_text="Relative tolerance with regards to inertia to declare convergence."/> | |
285 <!--param argument="precompute_distances"/--> | |
286 <expand macro="random_state"/> | |
287 <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/> | |
288 </section> | |
289 </xml> | |
290 | |
291 <xml name="birch_advanced_options"> | |
292 <section name="options" title="Advanced Options" expanded="False"> | |
293 <param argument="threshold" type="float" optional="true" value="0.5" label="Subcluster radius threshold" help="The radius of the subcluster obtained by merging a new sample; the closest subcluster should be less than the threshold to avoid a new subcluster."/> | |
294 <param argument="branching_factor" type="integer" optional="true" value="50" label="Maximum number of subclusters per branch" help="Maximum number of CF subclusters in each node."/> | |
295 <expand macro="n_clusters" default_value="3"/> | |
296 <!--param argument="compute_labels"/--> | |
297 </section> | |
298 </xml> | |
299 | |
300 <xml name="dbscan_advanced_options"> | |
301 <section name="options" title="Advanced Options" expanded="False"> | |
302 <param argument="eps" type="float" optional="true" value="0.5" label="Maximum neighborhood distance" help="The maximum distance between two samples for them to be considered as in the same neighborhood."/> | |
303 <param argument="min_samples" type="integer" optional="true" value="5" label="Minimal core point density" help="The number of samples (or total weight) in a neighborhood for a point (including the point itself) to be considered as a core point."/> | |
304 <param argument="metric" type="text" optional="true" value="euclidean" label="Metric" help="The metric to use when calculating distance between instances in a feature array."/> | |
305 <param argument="algorithm" type="select" label="Pointwise distance computation algorithm" help="The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors."> | |
306 <option value="auto" selected="true">auto</option> | |
307 <option value="ball_tree">ball_tree</option> | |
308 <option value="kd_tree">kd_tree</option> | |
309 <option value="brute">brute</option> | |
310 </param> | |
311 <param argument="leaf_size" type="integer" optional="true" value="30" label="Leaf size" help="Leaf size passed to BallTree or cKDTree. Memory and time efficieny factor in tree constrution and querying."/> | |
312 </section> | |
313 </xml> | |
314 | |
315 <xml name="clustering_algorithms_options"> | |
316 <conditional name="algorithm_options"> | |
317 <param name="selected_algorithm" type="select" label="Clustering Algorithm"> | |
318 <option value="KMeans" selected="true">KMeans</option> | |
319 <option value="SpectralClustering">Spectral Clustering</option> | |
320 <option value="MiniBatchKMeans">Mini Batch KMeans</option> | |
321 <option value="DBSCAN">DBSCAN</option> | |
322 <option value="Birch">Birch</option> | |
323 </param> | |
324 <when value="KMeans"> | |
325 <expand macro="kmeans_advanced_options"/> | |
326 </when> | |
327 <when value="DBSCAN"> | |
328 <expand macro="dbscan_advanced_options"/> | |
329 </when> | |
330 <when value="Birch"> | |
331 <expand macro="birch_advanced_options"/> | |
332 </when> | |
333 <when value="SpectralClustering"> | |
334 <expand macro="spectral_clustering_advanced_options"/> | |
335 </when> | |
336 <when value="MiniBatchKMeans"> | |
337 <expand macro="minibatch_kmeans_advanced_options"/> | |
338 </when> | |
339 </conditional> | |
340 </xml> | |
341 </macros> |