Mercurial > repos > bgruening > sklearn_searchcv

diff search_model_validation.xml @ 2:79f41472b53f draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit b1e5fa3170484d2cc3396f2abe99bb8cfcfa9c65
author: bgruening
date: Tue, 07 Aug 2018 05:39:16 -0400
parents: f6802e2b5bc7
children: a58ad596036f
--- a/search_model_validation.xml	Sat Aug 04 17:28:53 2018 -0400
+++ b/search_model_validation.xml	Tue Aug 07 05:39:16 2018 -0400
@@ -20,6 +20,7 @@
 import sys
 import json
 import pandas
+import re
 import pickle
 import numpy as np
 import xgboost
@@ -30,7 +31,9 @@
 
 @COLUMNS_FUNCTION@
 @GET_ESTIMATOR_FUNCTION@
+@SAFE_EVAL_FUNCTION@
 @GET_SEARCH_PARAMS_FUNCTION@
+@GET_CV_FUNCTION@
 
 input_json_path = sys.argv[1]
 with open(input_json_path, "r") as param_handler:
@@ -42,9 +45,6 @@
 infile_pipeline = "$search_schemes.infile_pipeline"
 outfile_result = "$outfile_result"
 outfile_estimator = "$outfile_estimator"
-#if $search_schemes.selected_search_scheme == "RandomizedSearchCV":
-np.random.seed($search_schemes.random_seed)
-#end if
 
 params_builder = params['search_schemes']['search_params_builder']
 
@@ -87,6 +87,7 @@
 optimizers = getattr(model_selection, optimizers)
 
 options = params["search_schemes"]["options"]
+options['cv'] = get_cv( options['cv'].strip() )
 if 'scoring' in options and options['scoring'] == '':
     options['scoring'] = None
 if 'pre_dispatch' in options and options['pre_dispatch'] == '':
@@ -123,7 +124,6 @@
                 </section>
             </when>
             <when value="RandomizedSearchCV">
-                <param name="random_seed" type="integer" value="65535" min="0" max="65535" label="Set up random seed:"/>
                 <expand macro="search_cv_estimator"/>
                 <section name="options" title="Advanced Options for SearchCV" expanded="false">
                     <expand macro="search_cv_options"/>
@@ -347,6 +347,22 @@
                 </assert_contents>
             </output>
         </test>
+        <test>
+            <param name="selected_search_scheme" value="GridSearchCV"/>
+            <param name="infile_pipeline" value="pipeline01"/>
+            <conditional name="search_param_selector">
+                <param name="search_p" value="C: [1, 10, 100, 1000]"/>
+                <param name="selected_param_type" value="final_estimator_p"/>
+            </conditional>
+            <param name='cv' value="StratifiedKFold(n_splits=3, shuffle=True, random_state=10)"/>
+            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
+            <param name="header1" value="true" />
+            <param name="selected_column_selector_option" value="all_columns"/>
+            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
+            <param name="header2" value="true" />
+            <param name="selected_column_selector_option2" value="all_columns"/>
+            <output name="outfile_estimator" file="searchCV01" compare="sim_size" delta="1"/>
+        </test>
     </tests>
     <help>
         <![CDATA[
author	bgruening
date	Tue, 07 Aug 2018 05:39:16 -0400
parents	f6802e2b5bc7
children	a58ad596036f