Mercurial > repos > bgruening > sklearn_searchcv
diff search_model_validation.xml @ 2:79f41472b53f draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit b1e5fa3170484d2cc3396f2abe99bb8cfcfa9c65
author | bgruening |
---|---|
date | Tue, 07 Aug 2018 05:39:16 -0400 |
parents | f6802e2b5bc7 |
children | a58ad596036f |
line wrap: on
line diff
--- a/search_model_validation.xml Sat Aug 04 17:28:53 2018 -0400 +++ b/search_model_validation.xml Tue Aug 07 05:39:16 2018 -0400 @@ -20,6 +20,7 @@ import sys import json import pandas +import re import pickle import numpy as np import xgboost @@ -30,7 +31,9 @@ @COLUMNS_FUNCTION@ @GET_ESTIMATOR_FUNCTION@ +@SAFE_EVAL_FUNCTION@ @GET_SEARCH_PARAMS_FUNCTION@ +@GET_CV_FUNCTION@ input_json_path = sys.argv[1] with open(input_json_path, "r") as param_handler: @@ -42,9 +45,6 @@ infile_pipeline = "$search_schemes.infile_pipeline" outfile_result = "$outfile_result" outfile_estimator = "$outfile_estimator" -#if $search_schemes.selected_search_scheme == "RandomizedSearchCV": -np.random.seed($search_schemes.random_seed) -#end if params_builder = params['search_schemes']['search_params_builder'] @@ -87,6 +87,7 @@ optimizers = getattr(model_selection, optimizers) options = params["search_schemes"]["options"] +options['cv'] = get_cv( options['cv'].strip() ) if 'scoring' in options and options['scoring'] == '': options['scoring'] = None if 'pre_dispatch' in options and options['pre_dispatch'] == '': @@ -123,7 +124,6 @@ </section> </when> <when value="RandomizedSearchCV"> - <param name="random_seed" type="integer" value="65535" min="0" max="65535" label="Set up random seed:"/> <expand macro="search_cv_estimator"/> <section name="options" title="Advanced Options for SearchCV" expanded="false"> <expand macro="search_cv_options"/> @@ -347,6 +347,22 @@ </assert_contents> </output> </test> + <test> + <param name="selected_search_scheme" value="GridSearchCV"/> + <param name="infile_pipeline" value="pipeline01"/> + <conditional name="search_param_selector"> + <param name="search_p" value="C: [1, 10, 100, 1000]"/> + <param name="selected_param_type" value="final_estimator_p"/> + </conditional> + <param name='cv' value="StratifiedKFold(n_splits=3, shuffle=True, random_state=10)"/> + <param name="infile1" value="regression_X.tabular" ftype="tabular"/> + <param name="header1" value="true" /> + <param name="selected_column_selector_option" value="all_columns"/> + <param name="infile2" value="regression_y.tabular" ftype="tabular"/> + <param name="header2" value="true" /> + <param name="selected_column_selector_option2" value="all_columns"/> + <output name="outfile_estimator" file="searchCV01" compare="sim_size" delta="1"/> + </test> </tests> <help> <![CDATA[