diff main_macros.xml @ 13:9a701331ddd4 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 79fe42239dcf077b13f85cbcd6c6e30d7e1e4832
author bgruening
date Tue, 22 May 2018 19:30:00 -0400
parents 712b1cac1686
children 98d91207d60c
line wrap: on
line diff
--- a/main_macros.xml	Sat Apr 28 18:06:37 2018 -0400
+++ b/main_macros.xml	Tue May 22 19:30:00 2018 -0400
@@ -2,12 +2,17 @@
   <token name="@VERSION@">0.9</token>
 
   <token name="@COLUMNS_FUNCTION@">
-def read_columns(f, c, **args):
+def read_columns(f, c, return_df=False, **args):
   data = pandas.read_csv(f, **args)
   cols = c.split (',')
   cols = map(int, cols)
   cols = list(map(lambda x: x - 1, cols))
-  y = data.iloc[:,cols].values
+  data = data.iloc[:,cols]
+  y = data.values
+  if return_df:
+    return y, data
+  else:
+    return y
   return y
   </token>
 
@@ -789,6 +794,128 @@
     </when>
     <yield/>
   </xml>
+  <xml name="feature_selection_all">
+    <conditional name="feature_selection_algorithms">
+      <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
+        <option value="SelectFromModel" selected="true">SelectFromModel - Meta-transformer for selecting features based on importance weights</option>
+        <option value="GenericUnivariateSelect" selected="true">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option>
+        <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option>
+        <option value="SelectKBest">SelectKBest - Select features according to the k highest scores</option>
+        <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option>
+        <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option>
+        <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option>
+        <option value="RFE">RFE - Feature ranking with recursive feature elimination</option>
+        <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option>
+        <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option>
+        <!--option value="chi2">Compute chi-squared stats between each non-negative feature and class</option-->
+        <!--option value="f_classif">Compute the ANOVA F-value for the provided sample</option-->
+        <!--option value="f_regression">Univariate linear regression tests</option-->
+        <!--option value="mutual_info_classif">Estimate mutual information for a discrete target variable</option-->
+        <!--option value="mutual_info_regression">Estimate mutual information for a continuous target variable</option-->
+      </param>
+      <when value="SelectFromModel">
+        <expand macro="feature_selection_estimator" />
+        <conditional name="extra_estimator">
+          <expand macro="feature_selection_extra_estimator" >
+            <option value="no_load">No, I will load a prefitted estimator</option>
+          </expand>
+          <expand macro="feature_selection_estimator_choices" >
+            <when value="no_load">
+              <param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" />
+            </when>
+          </expand>
+        </conditional>
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." />
+          <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " />
+        </section>
+      </when>
+      <when value="GenericUnivariateSelect">
+        <expand macro="feature_selection_score_function" />
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="mode" type="select" label="Feature selection mode">
+            <option value="percentile">percentile</option>
+            <option value="k_best">k_best</option>
+            <option value="fpr">fpr</option>
+            <option value="fdr">fdr</option>
+            <option value="fwe">fwe</option>
+          </param>
+          <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" />
+        </section>
+      </when>
+      <when value="SelectPercentile">
+        <expand macro="feature_selection_score_function" />
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" />
+        </section>
+      </when>
+      <when value="SelectKBest">
+        <expand macro="feature_selection_score_function" />
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." />
+        </section>
+      </when>
+      <when value="SelectFpr">
+        <expand macro="feature_selection_score_function" />
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/>
+        </section>
+      </when>
+      <when value="SelectFdr">
+        <expand macro="feature_selection_score_function" />
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>
+        </section>
+      </when>
+      <when value="SelectFwe">
+        <expand macro="feature_selection_score_function" />
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>
+        </section>
+      </when>
+      <when value="RFE">
+        <expand macro="feature_selection_estimator" />
+        <conditional name="extra_estimator">
+          <expand macro="feature_selection_extra_estimator" />
+          <expand macro="feature_selection_estimator_choices" />
+        </conditional>
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." />
+          <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
+          <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
+        </section>
+      </when>
+      <when value="RFECV">
+        <expand macro="feature_selection_estimator" />
+        <conditional name="extra_estimator">
+          <expand macro="feature_selection_extra_estimator" />
+          <expand macro="feature_selection_estimator_choices" />
+        </conditional>
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
+          <param argument="cv" type="integer" value="" optional="true" label="cv" help="Determines the cross-validation splitting strategy" />
+          <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A string (see model evaluation documentation) or a scorer callable object / function with signature scorer(estimator, X, y)."/>
+          <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
+          <param argument="n_jobs" type="integer" value="1" label="n_jobs" help="Number of cores to run in parallel while fitting across folds. Defaults to 1 core."/>
+        </section>
+      </when>
+      <when value="VarianceThreshold">
+        <section name="options" title="Options" expanded="True">
+          <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/>
+        </section>
+      </when>
+      <!--when value="chi2">
+      </when>
+      <when value="f_classif">
+      </when>
+      <when value="f_regression">
+      </when>
+      <when value="mutual_info_classif">
+      </when>
+      <when value="mutual_info_regression">
+      </when-->
+    </conditional>
+  </xml>
   <xml name="feature_selection_score_function">
     <param argument="score_func" type="select" label="Select a score function">
       <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option>