Mercurial > repos > bgruening > sklearn_pairwise_metrics

diff main_macros.xml @ 4:1573e8255a34 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit 0e582cf1f3134c777cce3aa57d71b80ed95e6ba9
author: bgruening
date: Fri, 16 Feb 2018 09:13:01 -0500
parents: f8cd85c496c9
children: 08d8af6abdbc
--- a/main_macros.xml	Thu Jun 23 15:27:24 2016 -0400
+++ b/main_macros.xml	Fri Feb 16 09:13:01 2018 -0500
@@ -12,10 +12,12 @@
   </token>
 
   <xml name="python_requirements">
-    <requirements>
-        <requirement type="package" version="0.2.1b">eden</requirement>
-        <yield />
-    </requirements>
+      <requirements>
+          <requirement type="package" version="2.7">python</requirement>
+          <requirement type="package" version="0.19.1">scikit-learn</requirement>
+          <requirement type="package" version="0.22.0">pandas</requirement>
+          <yield />
+      </requirements>
   </xml>
 
   <xml name="macro_stdio">
@@ -90,6 +92,79 @@
   </xml>
 
 
+  <!--Generalized Linear Models-->
+  <xml name="loss" token_help=" " token_select="false">
+    <param argument="loss" type="select" label="Loss function"  help="@HELP@">
+        <option value="squared_loss" selected="@SELECT@">squared loss</option>
+        <option value="huber">huber</option>
+        <option value="epsilon_insensitive">epsilon insensitive</option>
+        <option value="squared_epsilon_insensitive">squared epsilon insensitive</option>
+        <yield/>
+    </param>
+  </xml>
+
+  <xml name="penalty" token_help=" ">
+    <param argument="penalty" type="select" label="Penalty (regularization term)"  help="@HELP@">
+        <option value="l2" selected="true">l2</option>
+        <option value="l1">l1</option>
+        <option value="elasticnet">elastic net</option>
+        <option value="none">none</option>
+        <yield/>
+    </param>
+  </xml>
+
+  <xml name="l1_ratio" token_default_value="0.15" token_help=" ">
+    <param argument="l1_ratio" type="float" value="@DEFAULT_VALUE@" label="Elastic Net mixing parameter" help="@HELP@"/>
+  </xml>
+
+  <xml name="epsilon" token_default_value="0.1" token_help="Used if loss is ‘huber’, ‘epsilon_insensitive’, or ‘squared_epsilon_insensitive’. ">
+    <param argument="epsilon" type="float" value="@DEFAULT_VALUE@" label="Epsilon (epsilon-sensitive loss functions only)" help="@HELP@"/>
+  </xml>
+
+  <xml name="learning_rate_s" token_help=" " token_selected1="false" token_selected2="false">
+    <param argument="learning_rate" type="select" optional="true" label="Learning rate schedule"  help="@HELP@">
+        <option value="optimal" selected="@SELECTED1@">optimal</option>
+        <option value="constant">constant</option>
+        <option value="invscaling" selected="@SELECTED2@">inverse scaling</option>
+        <yield/>
+    </param>
+  </xml>
+
+  <xml name="eta0" token_default_value="0.0" token_help="Used with ‘constant’ or ‘invscaling’ schedules. ">
+    <param argument="eta0" type="float" value="@DEFAULT_VALUE@" label="Initial learning rate" help="@HELP@"/>
+  </xml>
+
+  <xml name="power_t" token_default_value="0.5" token_help=" ">
+    <param argument="power_t" type="float" value="@DEFAULT_VALUE@" label="Exponent for inverse scaling learning rate" help="@HELP@"/>
+  </xml>
+
+  <xml name="normalize" token_checked="false" token_help=" ">
+    <param argument="normalize" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Normalize samples before training" help=" "/>
+  </xml>
+
+  <xml name="copy_X" token_checked="true" token_help=" ">
+    <param argument="copy_X" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use a copy of samples" help="If false, samples would be overwritten. "/>
+  </xml>
+
+  <xml name="ridge_params">
+    <expand macro="normalize"/>
+    <expand macro="alpha" default_value="1.0"/>
+    <expand macro="fit_intercept"/>
+    <expand macro="max_iter" default_value=""/>
+    <expand macro="tol" default_value="0.001" help_text="Precision of the solution. "/>
+    <!--class_weight-->
+    <expand macro="copy_X"/>
+    <param argument="solver" type="select" value="" label="Solver to use in the computational routines" help=" ">
+        <option value="auto" selected="true">auto</option>
+        <option value="svd">svd</option>
+        <option value="cholesky">cholesky</option>
+        <option value="lsqr">lsqr</option>
+        <option value="sparse_cg">sparse_cg</option>
+        <option value="sag">sag</option>
+    </param>
+    <expand macro="random_state"/>
+  </xml>
+
   <!--Ensemble methods-->
   <xml name="n_estimators" token_default_value="10" token_help=" ">
     <param argument="n_estimators" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of trees in the forest" help="@HELP@"/>
@@ -106,18 +181,18 @@
   <xml name="min_samples_leaf" token_default_value="1" token_help=" ">
     <param argument="min_samples_leaf" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Minimum number of samples in newly created leaves" help="@HELP@"/>
   </xml>
-  
+
   <xml name="min_weight_fraction_leaf" token_default_value="0.0" token_help=" ">
     <param argument="min_weight_fraction_leaf" type="float" optional="true" value="@DEFAULT_VALUE@" label="Minimum weighted fraction of the input samples required to be at a leaf node" help="@HELP@"/>
   </xml>
 
   <xml name="max_leaf_nodes" token_default_value="" token_help=" ">
     <param argument="max_leaf_nodes" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum number of leaf nodes in best-first method" help="@HELP@"/>
-  </xml>  
+  </xml>
 
   <xml name="bootstrap" token_checked="true" token_help=" ">
     <param argument="bootstrap" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Use bootstrap samples for building trees." help="@HELP@"/>
-  </xml>  
+  </xml>
 
   <xml name="criterion" token_help=" ">
     <param argument="criterion" type="select" label="Function to measure the quality of a split"  help=" ">
@@ -127,13 +202,13 @@
     </param>
   </xml>
 
-  <xml name="oob_score" token_checked="flase" token_help=" ">
-    <param argument="oob_score" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Use out-of-bag samples to estimate the generalization error" help="@HELP@"/>
+  <xml name="oob_score" token_checked="false" token_help=" ">
+    <param argument="oob_score" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use out-of-bag samples to estimate the generalization error" help="@HELP@"/>
   </xml>
 
   <xml name="max_features" token_default_value="auto" token_help="This could be an integer, float, string, or None. For more information please refer to help. ">
     <param argument="max_features" type="text" optional="true" value="@DEFAULT_VALUE@" label="Number of features for finding the best split" help="@HELP@"/>
-  </xml>   
+  </xml>
 
   <xml name="learning_rate" token_default_value="1.0" token_help=" ">
     <param argument="learning_rate" type="float" optional="true" value="@DEFAULT_VALUE@" label="Learning rate" help="@HELP@"/>
@@ -150,7 +225,7 @@
   </xml>
 
   <xml name="fit_intercept" token_checked="true">
-    <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered."/>
+    <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered."/>
   </xml>
 
   <xml name="n_iter" token_default_value="5" token_help_text="The number of passes over the training data (aka epochs). ">
@@ -158,7 +233,7 @@
   </xml>
 
   <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration">
-    <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/>
+    <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/>
   </xml>
 
   <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results.">
@@ -166,7 +241,7 @@
   </xml>
 
   <xml name="warm_start" token_checked="true" token_help_text="When set to True, reuse the solution of the previous call to fit as initialization,otherwise, just erase the previous solution.">
-    <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@"/>
+    <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@"/>
   </xml>
 
   <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term.">
@@ -230,10 +305,11 @@
 
   <xml name="average">
     <param argument="average" type="select" optional="true" label="Averaging type" help=" ">
-      <option value="micro" help="Calculate metrics globally by counting the total true positives, false negatives and false positives.">micro</option>
-      <option value="samples" help="Calculate metrics for each instance, and find their average (only meaningful for multilabel).">samples</option>
-      <!--option value="macro" help=""></option-->
-      <!--option value="weighted" help=""></option-->
+      <option value="micro">Calculate metrics globally by counting the total true positives, false negatives and false positives. (micro)</option>
+      <option value="samples">Calculate metrics for each instance, and find their average. Only meaningful for multilabel. (samples)</option>
+      <option value="macro">Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. (macro)</option>
+      <option value="weighted">Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters ‘macro’ to account for label imbalance; it can result in an F-score that is not between precision and recall. (weighted)</option>
+      <option value="None">None</option>
       <yield/>
     </param>
   </xml>
@@ -242,7 +318,7 @@
     <param argument="beta" type="float" value="1.0" label="The strength of recall versus precision in the F-score" help=" "/>
   </xml>
 
-  
+
   <!--Data interface-->
   <xml name="tabular_input">
     <param name="infile" type="data" format="tabular" label="Data file with numeric values"/>
@@ -258,6 +334,13 @@
     <yield/>
   </xml>
 
+  <xml name="samples_tabular" token_multiple1="False" token_multiple2="False">
+    <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/>
+    <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/>
+    <param name="infile2" type="data" format="tabular" label="Dataset containing class labels:"/>
+    <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
+    <yield/>
+  </xml>
 
   <xml name="clf_inputs_extended" token_label1=" " token_label2=" " token_multiple="False">
     <conditional name="true_columns">
@@ -299,7 +382,7 @@
     <repeat name="@NAME@" min="1" max="@MAX_NUM@" title="Select input file(s):">
         <param name="input" type="data" format="@FORMAT@" label="@LABEL@" help="@HELP_TEXT@"/>
     </repeat>
-  </xml> 
+  </xml>
 
   <xml name="sparse_target" token_label1="Select a sparse matrix:" token_label2="Select the tabular containing true labels:" token_multiple="False" token_format1="txt" token_format2="tabular" token_help1="" token_help2="">
     <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/>
@@ -314,7 +397,7 @@
           <option value="sparse">sparse matrix</option>
       </param>
       <when value="tabular">
-          <expand macro="sample_cols" multiple1="true"/>
+          <expand macro="samples_tabular" multiple1="true"/>
       </when>
       <when value="sparse">
           <expand macro="sparse_target"/>
@@ -332,8 +415,8 @@
     <section name="options" title="Advanced Options" expanded="False">
       <yield/>
       <param argument="weights" type="select" label="Weight function" help="Used in prediction.">
-          <option value="uniform" selected="true" help="Uniform weights. All points in each neighborhood are weighted equally.">Uniform</option>
-          <option value="distance" help="Weight points by the inverse of their distance.">Distance</option>
+          <option value="uniform" selected="true">Uniform weights. All points in each neighborhood are weighted equally. (Uniform)</option>
+          <option value="distance">Weight points by the inverse of their distance. (Distance)</option>
       </param>
       <param argument="algorithm" type="select" label="Neighbor selection algorithm" help=" ">
           <option value="auto" selected="true">Auto</option>
@@ -360,12 +443,15 @@
         </param>
         <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/>
         <!--TODO: param argument="gamma" float, optional (default=’auto’) -->
-        <param argument="coef0" type="float" optional="true" value="0.0" label="Zero coefficient (polynomial and sigmoid kernels only)" help="Independent term in kernel function. dafault: 0.0 "/>
-        <param argument="shrinking" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use the shrinking heuristic" help=" "/>
-        <param argument="probability" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Enable probability estimates. " help="This must be enabled prior to calling fit, and will slow down that method."/>
+        <param argument="coef0" type="float" optional="true" value="0.0" label="Zero coefficient (polynomial and sigmoid kernels only)"
+            help="Independent term in kernel function. dafault: 0.0 "/>
+        <param argument="shrinking" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+            label="Use the shrinking heuristic" help=" "/>
+        <param argument="probability" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false"
+            label="Enable probability estimates. " help="This must be enabled prior to calling fit, and will slow down that method."/>
         <!-- param argument="cache_size"-->
         <!--expand macro="class_weight"/-->
-        <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. "/> 
+        <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. "/>
         <expand macro="max_iter" default_value="-1" label="Solver maximum number of iterations" help_text="Hard limit on iterations within solver, or -1 for no limit."/>
         <!--param argument="decision_function_shape"-->
         <expand macro="random_state" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data for probability estimation. A fixed seed allows reproducible results."/>
@@ -395,8 +481,10 @@
             <option value="kmeans" selected="true">kmeans</option>
             <option value="discretize">discretize</option>
         </param>
-        <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/>
-        <param argument="coef0" type="integer" optional="true" value="1" label="Zero coefficient (polynomial and sigmoid kernels only)" help="Ignored by other kernels. dafault : 1 "/>
+        <param argument="degree" type="integer" optional="true" value="3"
+            label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/>
+        <param argument="coef0" type="integer" optional="true" value="1"
+            label="Zero coefficient (polynomial and sigmoid kernels only)" help="Ignored by other kernels. dafault : 1 "/>
         <!--param argument="kernel_params"-->
     </section>
   </xml>
@@ -407,7 +495,7 @@
         <expand macro="init"/>
         <expand macro="n_init" default_value="3"/>
         <expand macro="max_iter" default_value="100"/>
-        <expand macro="tol" help_text="Early stopping heuristics based on normalized center change. To disable set to 0.0 ."/> 
+        <expand macro="tol" help_text="Early stopping heuristics based on normalized center change. To disable set to 0.0 ."/>
         <expand macro="random_state"/>
         <param argument="batch_size" type="integer" optional="true" value="100" label="Batch size" help="Size of the mini batches."/>
         <!--param argument="compute_labels"-->
@@ -428,7 +516,7 @@
       <expand macro="tol" default_value="0.0001" help_text="Relative tolerance with regards to inertia to declare convergence."/>
       <!--param argument="precompute_distances"/-->
       <expand macro="random_state"/>
-      <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/>
+      <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/>
     </section>
   </xml>
 
@@ -557,7 +645,8 @@
     </when>
     <when value="euclidean_distances">
       <section name="options" title="Advanced Options" expanded="False">
-          <param argument="squared" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Return squared Euclidean distances" help=" "/>
+          <param argument="squared" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false"
+            label="Return squared Euclidean distances" help=" "/>
       </section>
     </when>
   </xml>
@@ -589,21 +678,27 @@
     <when value="Binarizer">
         <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
         <section name="options" title="Advanced Options" expanded="False">
-          <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing binarization" help=" "/>
-          <param argument="threshold" type="float" optional="true" value="0.0" label="Threshold" help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/>
+            <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+                label="Use a copy of data for precomputing binarization" help=" "/>
+            <param argument="threshold" type="float" optional="true" value="0.0"
+                label="Threshold"
+                help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/>
         </section>
     </when>
     <when value="Imputer">
       <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
       <section name="options" title="Advanced Options" expanded="False">
-          <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing imputation" help=" "/>
+          <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+            label="Use a copy of data for precomputing imputation" help=" "/>
           <param argument="strategy" type="select" optional="true" label="Imputation strategy" help=" ">
               <option value="mean" selected="true">Replace missing values using the mean along the axis</option>
               <option value="median">Replace missing values using the median along the axis</option>
               <option value="most_frequent">Replace missing using the most frequent value along the axis</option>
           </param>
-          <param argument="missing_values" type="text" optional="true" value="NaN" label="Placeholder for missing values" help="For missing values encoded as numpy.nan, use the string value “NaN”"/>
-          <param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0" label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/>
+          <param argument="missing_values" type="text" optional="true" value="NaN"
+                label="Placeholder for missing values" help="For missing values encoded as numpy.nan, use the string value “NaN”"/>
+          <param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0"
+                label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/>
           <!--param argument="axis" type="select" optional="true" label="The axis along which to impute" help=" ">
               <option value="0" selected="true">Impute along columns</option>
               <option value="1">Impute along rows</option>
@@ -613,15 +708,19 @@
     <when value="StandardScaler">
       <expand macro="multitype_input"/>
       <section name="options" title="Advanced Options" expanded="False">
-        <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for performing inplace scaling" help=" "/>
-        <param argument="with_mean" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Center the data before scaling" help=" "/>
-        <param argument="with_std" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Scale the data to unit variance (or unit standard deviation)" help=" "/>
+        <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+            label="Use a copy of data for performing inplace scaling" help=" "/>
+        <param argument="with_mean" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+            label="Center the data before scaling" help=" "/>
+        <param argument="with_std" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+            label="Scale the data to unit variance (or unit standard deviation)" help=" "/>
       </section>
     </when>
     <when value="MaxAbsScaler">
       <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
       <section name="options" title="Advanced Options" expanded="False">
-        <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing scaling" help=" "/>
+        <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+            label="Use a copy of data for precomputing scaling" help=" "/>
       </section>
     </when>
     <when value="Normalizer">
@@ -631,13 +730,27 @@
           <option value="l1" selected="true">l1</option>
           <option value="l2">l2</option>
           <option value="max">max</option>
-          <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing row normalization" help=" "/>
         </param>
+        <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+            label="Use a copy of data for precomputing row normalization" help=" "/>
       </section>
     </when>
     <yield/>
   </xml>
 
+  <!-- Outputs -->
+
+  <xml name="output">
+    <outputs>
+      <data format="tabular" name="outfile_predict">
+          <filter>selected_tasks['selected_task'] == 'load'</filter>
+      </data>
+      <data format="zip" name="outfile_fit">
+          <filter>selected_tasks['selected_task'] == 'train'</filter>
+      </data>
+    </outputs>
+  </xml>
+
 
   <!--Citations-->
   <xml name="eden_citation">
@@ -691,4 +804,4 @@
     </citations>
   </xml>
 
-</macros>
\ No newline at end of file
+</macros>
author	bgruening
date	Fri, 16 Feb 2018 09:13:01 -0500
parents	f8cd85c496c9
children	08d8af6abdbc