Mercurial > repos > bgruening > sklearn_feature_selection
comparison main_macros.xml @ 10:d00e89558c18 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 76583c1fcd9d06a4679cc46ffaee44117b9e22cd
author | bgruening |
---|---|
date | Sat, 04 Aug 2018 12:17:30 -0400 |
parents | 7701da597d1d |
children | 10ceccee183e |
comparison
equal
deleted
inserted
replaced
9:7701da597d1d | 10:d00e89558c18 |
---|---|
32 options = inputs["options"] | 32 options = inputs["options"] |
33 | 33 |
34 if inputs['selected_algorithm'] == 'SelectFromModel': | 34 if inputs['selected_algorithm'] == 'SelectFromModel': |
35 if not options['threshold'] or options['threshold'] == 'None': | 35 if not options['threshold'] or options['threshold'] == 'None': |
36 options['threshold'] = None | 36 options['threshold'] = None |
37 if 'extra_estimator' in inputs and inputs['extra_estimator']['has_estimator'] == 'no_load': | 37 if inputs['model_inputter']['input_mode'] == 'prefitted': |
38 with open("inputs['extra_estimator']['fitted_estimator']", 'rb') as model_handler: | 38 model_file = inputs['model_inputter']['fitted_estimator'] |
39 fitted_estimator = pickle.load(model_handler) | 39 with open(model_file, 'rb') as model_handler: |
40 new_selector = selector(fitted_estimator, prefit=True, **options) | 40 fitted_estimator = pickle.load(model_handler) |
41 else: | 41 new_selector = selector(fitted_estimator, prefit=True, **options) |
42 estimator=inputs["estimator"] | 42 else: |
43 if inputs["extra_estimator"]["has_estimator"]=='no': | 43 estimator_json = inputs['model_inputter']["estimator_selector"] |
44 estimator=inputs["extra_estimator"]["new_estimator"] | 44 estimator = get_estimator(estimator_json) |
45 estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) | 45 new_selector = selector(estimator, **options) |
46 new_selector = selector(estimator, **options) | |
47 | 46 |
48 elif inputs['selected_algorithm'] in ['RFE', 'RFECV']: | 47 elif inputs['selected_algorithm'] in ['RFE', 'RFECV']: |
49 if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'): | 48 if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'): |
50 options['scoring'] = None | 49 options['scoring'] = None |
51 estimator=inputs["estimator"] | 50 estimator=get_estimator(inputs["estimator_selector"]) |
52 if inputs["extra_estimator"]["has_estimator"]=='no': | |
53 estimator=inputs["extra_estimator"]["new_estimator"] | |
54 estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) | |
55 new_selector = selector(estimator, **options) | 51 new_selector = selector(estimator, **options) |
56 | 52 |
57 elif inputs['selected_algorithm'] == "VarianceThreshold": | 53 elif inputs['selected_algorithm'] == "VarianceThreshold": |
58 new_selector = selector(**options) | 54 new_selector = selector(**options) |
59 | 55 |
102 ) | 98 ) |
103 y=y.ravel() | 99 y=y.ravel() |
104 return X, y | 100 return X, y |
105 </token> | 101 </token> |
106 | 102 |
103 <token name="@GET_SEARCH_PARAMS_FUNCTION@"> | |
104 def get_search_params(params_builder): | |
105 search_params = {} | |
106 | |
107 def safe_eval(literal): | |
108 | |
109 FROM_SCIPY_STATS = [ 'bernoulli', 'binom', 'boltzmann', 'dlaplace', 'geom', 'hypergeom', | |
110 'logser', 'nbinom', 'planck', 'poisson', 'randint', 'skellam', 'zipf' ] | |
111 | |
112 FROM_NUMPY_RANDOM = [ 'beta', 'binomial', 'bytes', 'chisquare', 'choice', 'dirichlet', 'division', | |
113 'exponential', 'f', 'gamma', 'geometric', 'gumbel', 'hypergeometric', | |
114 'laplace', 'logistic', 'lognormal', 'logseries', 'mtrand', 'multinomial', | |
115 'multivariate_normal', 'negative_binomial', 'noncentral_chisquare', 'noncentral_f', | |
116 'normal', 'pareto', 'permutation', 'poisson', 'power', 'rand', 'randint', | |
117 'randn', 'random', 'random_integers', 'random_sample', 'ranf', 'rayleigh', | |
118 'sample', 'seed', 'set_state', 'shuffle', 'standard_cauchy', 'standard_exponential', | |
119 'standard_gamma', 'standard_normal', 'standard_t', 'triangular', 'uniform', | |
120 'vonmises', 'wald', 'weibull', 'zipf' ] | |
121 | |
122 # File opening and other unneeded functions could be dropped | |
123 UNWANTED = ['open', 'type', 'dir', 'id', 'str', 'repr'] | |
124 | |
125 # Allowed symbol table. Add more if needed. | |
126 new_syms = { | |
127 'np_arange': getattr(np, 'arange'), | |
128 'ensemble_ExtraTreesClassifier': getattr(ensemble, 'ExtraTreesClassifier') | |
129 } | |
130 | |
131 syms = make_symbol_table(use_numpy=False, **new_syms) | |
132 | |
133 for method in FROM_SCIPY_STATS: | |
134 syms['scipy_stats_' + method] = getattr(scipy.stats, method) | |
135 | |
136 for func in FROM_NUMPY_RANDOM: | |
137 syms['np_random_' + func] = getattr(np.random, func) | |
138 | |
139 for key in UNWANTED: | |
140 syms.pop(key, None) | |
141 | |
142 aeval = Interpreter(symtable=syms, use_numpy=False, minimal=False, | |
143 no_if=True, no_for=True, no_while=True, no_try=True, | |
144 no_functiondef=True, no_ifexp=True, no_listcomp=False, | |
145 no_augassign=False, no_assert=True, no_delete=True, | |
146 no_raise=True, no_print=True) | |
147 | |
148 return aeval(literal) | |
149 | |
150 for p in params_builder['param_set']: | |
151 search_p = p['search_param_selector']['search_p'] | |
152 if search_p.strip() == '': | |
153 continue | |
154 param_type = p['search_param_selector']['selected_param_type'] | |
155 | |
156 lst = search_p.split(":") | |
157 assert (len(lst) == 2), "Error, make sure there is one and only one colon in search parameter input." | |
158 literal = lst[1].strip() | |
159 ev = safe_eval(literal) | |
160 if param_type == "final_estimator_p": | |
161 search_params["estimator__" + lst[0].strip()] = ev | |
162 else: | |
163 search_params["preprocessing_" + param_type[5:6] + "__" + lst[0].strip()] = ev | |
164 | |
165 return search_params | |
166 </token> | |
167 | |
168 <token name="@GET_ESTIMATOR_FUNCTION@"> | |
169 def get_estimator(estimator_json): | |
170 estimator_module = estimator_json['selected_module'] | |
171 estimator_cls = estimator_json['selected_estimator'] | |
172 | |
173 if estimator_module == "xgboost": | |
174 cls = getattr(xgboost, estimator_cls) | |
175 else: | |
176 module = getattr(sklearn, estimator_module) | |
177 cls = getattr(module, estimator_cls) | |
178 | |
179 estimator = cls() | |
180 | |
181 estimator_params = estimator_json['text_params'].strip() | |
182 if estimator_params != "": | |
183 try: | |
184 params = ast.literal_eval('{' + estimator_params + '}') | |
185 except ValueError: | |
186 sys.exit("Unsupported parameter input: `%s`" %estimator_params) | |
187 estimator.set_params(**params) | |
188 | |
189 return estimator | |
190 </token> | |
191 | |
107 <xml name="python_requirements"> | 192 <xml name="python_requirements"> |
108 <requirements> | 193 <requirements> |
109 <requirement type="package" version="2.7">python</requirement> | 194 <requirement type="package" version="2.7">python</requirement> |
110 <requirement type="package" version="0.19.1">scikit-learn</requirement> | 195 <requirement type="package" version="0.19.1">scikit-learn</requirement> |
111 <requirement type="package" version="0.22.0">pandas</requirement> | 196 <requirement type="package" version="0.22.0">pandas</requirement> |
197 <requirement type="package" version="0.72.1">xgboost</requirement> | |
112 <yield /> | 198 <yield /> |
113 </requirements> | 199 </requirements> |
114 </xml> | 200 </xml> |
115 | 201 |
116 <xml name="macro_stdio"> | 202 <xml name="macro_stdio"> |
905 </section> | 991 </section> |
906 </when> | 992 </when> |
907 </expand> | 993 </expand> |
908 </xml> | 994 </xml> |
909 | 995 |
910 <xml name="estimator_input_no_fit"> | 996 <xml name="fs_selectfrommodel_prefitted"> |
911 <expand macro="feature_selection_estimator" /> | 997 <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" > |
912 <conditional name="extra_estimator"> | 998 <option value="new" selected="true">Yes</option> |
913 <expand macro="feature_selection_extra_estimator" /> | 999 <option value="prefitted">No. Load a prefitted estimator</option> |
914 <expand macro="feature_selection_estimator_choices" /> | 1000 </param> |
915 </conditional> | 1001 <when value="new"> |
1002 <expand macro="estimator_selector_all"/> | |
1003 </when> | |
1004 <when value="prefitted"> | |
1005 <param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" /> | |
1006 </when> | |
1007 </xml> | |
1008 | |
1009 <xml name="fs_selectfrommodel_no_prefitted"> | |
1010 <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" > | |
1011 <option value="new" selected="true">Yes</option> | |
1012 </param> | |
1013 <when value="new"> | |
1014 <expand macro="estimator_selector_all"/> | |
1015 </when> | |
916 </xml> | 1016 </xml> |
917 | 1017 |
918 <xml name="feature_selection_all"> | 1018 <xml name="feature_selection_all"> |
919 <conditional name="feature_selection_algorithms"> | 1019 <conditional name="fs_algorithm_selector"> |
920 <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> | 1020 <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> |
921 <option value="SelectFromModel" selected="true">SelectFromModel - Meta-transformer for selecting features based on importance weights</option> | 1021 <option value="SelectKBest" selected="true">SelectKBest - Select features according to the k highest scores</option> |
922 <option value="GenericUnivariateSelect" selected="true">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option> | 1022 <option value="SelectFromModel">SelectFromModel - Meta-transformer for selecting features based on importance weights</option> |
1023 <option value="GenericUnivariateSelect">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option> | |
923 <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option> | 1024 <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option> |
924 <option value="SelectKBest">SelectKBest - Select features according to the k highest scores</option> | |
925 <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option> | 1025 <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option> |
926 <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option> | 1026 <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option> |
927 <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option> | 1027 <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option> |
928 <option value="RFE">RFE - Feature ranking with recursive feature elimination</option> | 1028 <option value="RFE">RFE - Feature ranking with recursive feature elimination</option> |
929 <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option> | 1029 <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option> |
930 <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option> | 1030 <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option> |
931 <!--option value="chi2">Compute chi-squared stats between each non-negative feature and class</option--> | |
932 <!--option value="f_classif">Compute the ANOVA F-value for the provided sample</option--> | |
933 <!--option value="f_regression">Univariate linear regression tests</option--> | |
934 <!--option value="mutual_info_classif">Estimate mutual information for a discrete target variable</option--> | |
935 <!--option value="mutual_info_regression">Estimate mutual information for a continuous target variable</option--> | |
936 </param> | 1031 </param> |
937 <when value="SelectFromModel"> | 1032 <when value="SelectFromModel"> |
938 <expand macro="feature_selection_estimator" /> | 1033 <conditional name="model_inputter"> |
939 <conditional name="extra_estimator"> | 1034 <yield/> |
940 <expand macro="feature_selection_extra_estimator" > | |
941 <option value="no_load">No, I will load a prefitted estimator</option> | |
942 </expand> | |
943 <expand macro="feature_selection_estimator_choices" > | |
944 <when value="no_load"> | |
945 <param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" /> | |
946 </when> | |
947 </expand> | |
948 </conditional> | 1035 </conditional> |
949 <section name="options" title="Other Options" expanded="True"> | 1036 <section name="options" title="Advanced Options" expanded="False"> |
950 <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." /> | 1037 <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." /> |
951 <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " /> | 1038 <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " /> |
952 </section> | 1039 </section> |
953 </when> | 1040 </when> |
954 <when value="GenericUnivariateSelect"> | 1041 <when value="GenericUnivariateSelect"> |
955 <expand macro="feature_selection_score_function" /> | 1042 <expand macro="feature_selection_score_function" /> |
956 <section name="options" title="Other Options" expanded="True"> | 1043 <section name="options" title="Advanced Options" expanded="False"> |
957 <param argument="mode" type="select" label="Feature selection mode"> | 1044 <param argument="mode" type="select" label="Feature selection mode"> |
958 <option value="percentile">percentile</option> | 1045 <option value="percentile">percentile</option> |
959 <option value="k_best">k_best</option> | 1046 <option value="k_best">k_best</option> |
960 <option value="fpr">fpr</option> | 1047 <option value="fpr">fpr</option> |
961 <option value="fdr">fdr</option> | 1048 <option value="fdr">fdr</option> |
964 <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" /> | 1051 <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" /> |
965 </section> | 1052 </section> |
966 </when> | 1053 </when> |
967 <when value="SelectPercentile"> | 1054 <when value="SelectPercentile"> |
968 <expand macro="feature_selection_score_function" /> | 1055 <expand macro="feature_selection_score_function" /> |
969 <section name="options" title="Other Options" expanded="True"> | 1056 <section name="options" title="Advanced Options" expanded="False"> |
970 <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" /> | 1057 <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" /> |
971 </section> | 1058 </section> |
972 </when> | 1059 </when> |
973 <when value="SelectKBest"> | 1060 <when value="SelectKBest"> |
974 <expand macro="feature_selection_score_function" /> | 1061 <expand macro="feature_selection_score_function" /> |
975 <section name="options" title="Other Options" expanded="True"> | 1062 <section name="options" title="Advanced Options" expanded="False"> |
976 <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." /> | 1063 <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." /> |
977 </section> | 1064 </section> |
978 </when> | 1065 </when> |
979 <when value="SelectFpr"> | 1066 <when value="SelectFpr"> |
980 <expand macro="feature_selection_score_function" /> | 1067 <expand macro="feature_selection_score_function" /> |
981 <section name="options" title="Other Options" expanded="True"> | 1068 <section name="options" title="Advanced Options" expanded="False"> |
982 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/> | 1069 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/> |
983 </section> | 1070 </section> |
984 </when> | 1071 </when> |
985 <when value="SelectFdr"> | 1072 <when value="SelectFdr"> |
986 <expand macro="feature_selection_score_function" /> | 1073 <expand macro="feature_selection_score_function" /> |
987 <section name="options" title="Other Options" expanded="True"> | 1074 <section name="options" title="Advanced Options" expanded="False"> |
988 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/> | 1075 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/> |
989 </section> | 1076 </section> |
990 </when> | 1077 </when> |
991 <when value="SelectFwe"> | 1078 <when value="SelectFwe"> |
992 <expand macro="feature_selection_score_function" /> | 1079 <expand macro="feature_selection_score_function" /> |
993 <section name="options" title="Other Options" expanded="True"> | 1080 <section name="options" title="Advanced Options" expanded="False"> |
994 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/> | 1081 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/> |
995 </section> | 1082 </section> |
996 </when> | 1083 </when> |
997 <when value="RFE"> | 1084 <when value="RFE"> |
998 <expand macro="feature_selection_estimator" /> | 1085 <expand macro="estimator_selector_all"/> |
999 <conditional name="extra_estimator"> | 1086 <section name="options" title="Advanced Options" expanded="False"> |
1000 <expand macro="feature_selection_extra_estimator" /> | |
1001 <expand macro="feature_selection_estimator_choices" /> | |
1002 </conditional> | |
1003 <section name="options" title="Other Options" expanded="True"> | |
1004 <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." /> | 1087 <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." /> |
1005 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> | 1088 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> |
1006 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> | 1089 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> |
1007 </section> | 1090 </section> |
1008 </when> | 1091 </when> |
1009 <when value="RFECV"> | 1092 <when value="RFECV"> |
1010 <expand macro="feature_selection_estimator" /> | 1093 <expand macro="estimator_selector_all"/> |
1011 <conditional name="extra_estimator"> | 1094 <section name="options" title="Advanced Options" expanded="False"> |
1012 <expand macro="feature_selection_extra_estimator" /> | |
1013 <expand macro="feature_selection_estimator_choices" /> | |
1014 </conditional> | |
1015 <section name="options" title="Other Options" expanded="True"> | |
1016 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> | 1095 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> |
1017 <param argument="cv" type="integer" value="" optional="true" label="cv" help="Determines the cross-validation splitting strategy" /> | 1096 <param argument="cv" type="integer" value="" optional="true" label="cv" help="Determines the cross-validation splitting strategy" /> |
1018 <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A string (see model evaluation documentation) or a scorer callable object / function with signature scorer(estimator, X, y)."/> | 1097 <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A string (see model evaluation documentation) or a scorer callable object / function with signature scorer(estimator, X, y)."/> |
1019 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> | 1098 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> |
1020 <param argument="n_jobs" type="integer" value="1" label="n_jobs" help="Number of cores to run in parallel while fitting across folds. Defaults to 1 core."/> | 1099 <param argument="n_jobs" type="integer" value="1" label="n_jobs" help="Number of cores to run in parallel while fitting across folds. Defaults to 1 core."/> |
1021 </section> | 1100 </section> |
1022 </when> | 1101 </when> |
1023 <when value="VarianceThreshold"> | 1102 <when value="VarianceThreshold"> |
1024 <section name="options" title="Options" expanded="True"> | 1103 <section name="options" title="Options" expanded="False"> |
1025 <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/> | 1104 <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/> |
1026 </section> | 1105 </section> |
1027 </when> | 1106 </when> |
1028 <!--when value="chi2"> | 1107 <!--when value="chi2"> |
1029 </when> | 1108 </when> |
1046 <option value="mutual_info_classif">mutual_info_classif - Estimate mutual information for a discrete target variable</option> | 1125 <option value="mutual_info_classif">mutual_info_classif - Estimate mutual information for a discrete target variable</option> |
1047 <option value="mutual_info_regression">mutual_info_regression - Estimate mutual information for a continuous target variable</option> | 1126 <option value="mutual_info_regression">mutual_info_regression - Estimate mutual information for a continuous target variable</option> |
1048 </param> | 1127 </param> |
1049 </xml> | 1128 </xml> |
1050 | 1129 |
1051 <xml name="feature_selection_estimator"> | 1130 <xml name="feature_selection_output_mothods"> |
1052 <param argument="estimator" type="select" label="Select an estimator" help="The base estimator from which the transformer is built."> | 1131 <conditional name="output_method_selector"> |
1053 <option value="svm.SVR(kernel="linear")">svm.SVR(kernel="linear")</option> | 1132 <param name="selected_method" type="select" label="Select an output method:"> |
1054 <option value="svm.SVC(kernel="linear")">svm.SVC(kernel="linear")</option> | |
1055 <option value="svm.LinearSVC(penalty="l1", dual=False, tol=1e-3)">svm.LinearSVC(penalty="l1", dual=False, tol=1e-3)</option> | |
1056 <option value="linear_model.LassoCV()">linear_model.LassoCV()</option> | |
1057 <option value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)">ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)</option> | |
1058 </param> | |
1059 </xml> | |
1060 | |
1061 <xml name="feature_selection_extra_estimator"> | |
1062 <param name="has_estimator" type="select" label="Does your estimator on the list above?"> | |
1063 <option value="yes">Yes, my estimator is on the list</option> | |
1064 <option value="no">No, I need make a new estimator</option> | |
1065 <yield/> | |
1066 </param> | |
1067 </xml> | |
1068 | |
1069 <xml name="feature_selection_estimator_choices"> | |
1070 <when value="yes"> | |
1071 </when> | |
1072 <when value="no"> | |
1073 <param name="new_estimator" type="text" value="" label="Make a new estimator" /> | |
1074 </when> | |
1075 <yield/> | |
1076 </xml> | |
1077 | |
1078 <xml name="feature_selection_methods"> | |
1079 <conditional name="select_methods"> | |
1080 <param name="selected_method" type="select" label="Select an operation"> | |
1081 <option value="fit_transform">fit_transform - Fit to data, then transform it</option> | 1133 <option value="fit_transform">fit_transform - Fit to data, then transform it</option> |
1082 <option value="get_support">get_support - Get a mask, or integer index, of the features selected</option> | 1134 <option value="get_support">get_support - Get a mask, or integer index, of the features selected</option> |
1083 </param> | 1135 </param> |
1084 <when value="fit_transform"> | 1136 <when value="fit_transform"> |
1085 <!--**fit_params--> | 1137 <!--**fit_params--> |
1099 | 1151 |
1100 <xml name="scoring"> | 1152 <xml name="scoring"> |
1101 <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A metric used to evaluate the estimator"/> | 1153 <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A metric used to evaluate the estimator"/> |
1102 </xml> | 1154 </xml> |
1103 | 1155 |
1104 <xml name="pre_dispatch" token_type="text" token_default_value="all" token_help="Number of predispatched jobs for parallel execution"> | 1156 <xml name="pre_dispatch" token_type="hidden" token_default_value="all" token_help="Number of predispatched jobs for parallel execution"> |
1105 <param argument="pre_dispatch" type="@TYPE@" value="@DEFAULT_VALUE@" optional="true" label="pre_dispatch" help="@HELP@"/> | 1157 <param argument="pre_dispatch" type="@TYPE@" value="@DEFAULT_VALUE@" optional="true" label="pre_dispatch" help="@HELP@"/> |
1106 </xml> | 1158 </xml> |
1107 | 1159 |
1160 <xml name="search_cv_estimator"> | |
1161 <param name="infile_pipeline" type="data" format="zip" label="Choose the dataset containing pipeline object:"/> | |
1162 <section name="search_params_builder" title="Search parameters Builder" expanded="true"> | |
1163 <repeat name="param_set" min="1" max="20" title="Parameter setting for search:"> | |
1164 <conditional name="search_param_selector"> | |
1165 <param name="selected_param_type" type="select" label="Choose the transformation the parameter belongs to"> | |
1166 <option value="final_estimator_p" selected="true">Final estimator</option> | |
1167 <option value="prep_1_p">Pre-processing step #1</option> | |
1168 <option value="prep_2_p">Pre-processing step #2</option> | |
1169 <option value="prep_3_p">Pre-processing step #3</option> | |
1170 <option value="prep_4_p">Pre-processing step #4</option> | |
1171 <option value="prep_5_p">Pre-processing step #5</option> | |
1172 </param> | |
1173 <when value="final_estimator_p"> | |
1174 <expand macro="search_param_input" /> | |
1175 </when> | |
1176 <when value="prep_1_p"> | |
1177 <expand macro="search_param_input" label="Pre_processing component #1 parameter:" help="One parameter per box. For example: with_centering: [True, False]."/> | |
1178 </when> | |
1179 <when value="prep_2_p"> | |
1180 <expand macro="search_param_input" label="Pre_processing component #2 parameter:" help="One parameter per box. For example: k: [3, 5, 7, 9]. See bottom for more examples"/> | |
1181 </when> | |
1182 <when value="prep_3_p"> | |
1183 <expand macro="search_param_input" label="Pre_processing component #3 parameter:" help="One parameter per box. For example: n_components: [1, 10, 100, 1000]. See bottom for more examples"/> | |
1184 </when> | |
1185 <when value="prep_4_p"> | |
1186 <expand macro="search_param_input" label="Pre_processing component #4 parameter:" help="One parameter per box. For example: n_components: [1, 10, 100, 1000]. See bottom for more examples"/> | |
1187 </when> | |
1188 <when value="prep_5_p"> | |
1189 <expand macro="search_param_input" label="Pre_processing component #5 parameter:" help="One parameter per box. For example: affinity: ['euclidean', 'l1', 'l2', 'manhattan']. See bottom for more examples"/> | |
1190 </when> | |
1191 </conditional> | |
1192 </repeat> | |
1193 </section> | |
1194 </xml> | |
1195 | |
1196 <xml name="search_param_input" token_label="Estimator parameter:" token_help="One parameter per box. For example: C: [1, 10, 100, 1000]. See bottom for more examples"> | |
1197 <param name="search_p" type="text" value="" size="100" optional="true" label="@LABEL@" help="@HELP@"> | |
1198 <sanitizer> | |
1199 <valid initial="default"> | |
1200 <add value="'"/> | |
1201 <add value="""/> | |
1202 <add value="["/> | |
1203 <add value="]"/> | |
1204 </valid> | |
1205 </sanitizer> | |
1206 </param> | |
1207 </xml> | |
1208 | |
1209 <xml name="search_cv_options"> | |
1210 <expand macro="scoring"/> | |
1211 <expand macro="model_validation_common_options"/> | |
1212 <expand macro="pre_dispatch" value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/> | |
1213 <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds"/> | |
1214 <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset."/> | |
1215 <!--error_score--> | |
1216 <param argument="return_train_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="return_train_score" help=""/> | |
1217 </xml> | |
1218 | |
1219 <xml name="estimator_selector_all"> | |
1220 <conditional name="estimator_selector"> | |
1221 <param name="selected_module" type="select" label="Choose the module that contains target estimator:" > | |
1222 <option value="svm" selected="true">sklearn.svm</option> | |
1223 <option value="linear_model">sklearn.linear_model</option> | |
1224 <option value="ensemble">sklearn.ensemble</option> | |
1225 <option value="naive_bayes">sklearn.naive_bayes</option> | |
1226 <option value="tree">sklearn.tree</option> | |
1227 <option value="neighbors">sklearn.neighbors</option> | |
1228 <option value="xgboost">xgboost</option> | |
1229 <!--more--> | |
1230 </param> | |
1231 <when value="svm"> | |
1232 <param name="selected_estimator" type="select" label="Choose estimator class:"> | |
1233 <option value="LinearSVC" selected="true">LinearSVC</option> | |
1234 <option value="LinearSVR">LinearSVR</option> | |
1235 <option value="NuSVC">NuSVC</option> | |
1236 <option value="NuSVR">NuSVR</option> | |
1237 <option value="OneClassSVM">OneClassSVM</option> | |
1238 <option value="SVC">SVC</option> | |
1239 <option value="SVR">SVR</option> | |
1240 </param> | |
1241 <expand macro="estimator_params_text"/> | |
1242 </when> | |
1243 <when value="linear_model"> | |
1244 <param name="selected_estimator" type="select" label="Choose estimator class:"> | |
1245 <option value="ARDRegression" selected="true">ARDRegression</option> | |
1246 <option value="BayesianRidge">BayesianRidge</option> | |
1247 <option value="ElasticNet">ElasticNet</option> | |
1248 <option value="ElasticNetCV">ElasticNetCV</option> | |
1249 <option value="HuberRegressor">HuberRegressor</option> | |
1250 <option value="Lars">Lars</option> | |
1251 <option value="LarsCV">LarsCV</option> | |
1252 <option value="Lasso">Lasso</option> | |
1253 <option value="LassoCV">LassoCV</option> | |
1254 <option value="LassoLars">LassoLars</option> | |
1255 <option value="LassoLarsCV">LassoLarsCV</option> | |
1256 <option value="LassoLarsIC">LassoLarsIC</option> | |
1257 <option value="LinearRegression">LinearRegression</option> | |
1258 <option value="LogisticRegression">LogisticRegression</option> | |
1259 <option value="LogisticRegressionCV">LogisticRegressionCV</option> | |
1260 <option value="MultiTaskLasso">MultiTaskLasso</option> | |
1261 <option value="MultiTaskElasticNet">MultiTaskElasticNet</option> | |
1262 <option value="MultiTaskLassoCV">MultiTaskLassoCV</option> | |
1263 <option value="MultiTaskElasticNetCV">MultiTaskElasticNetCV</option> | |
1264 <option value="OrthogonalMatchingPursuit">OrthogonalMatchingPursuit</option> | |
1265 <option value="OrthogonalMatchingPursuitCV">OrthogonalMatchingPursuitCV</option> | |
1266 <option value="PassiveAggressiveClassifier">PassiveAggressiveClassifier</option> | |
1267 <option value="PassiveAggressiveRegressor">PassiveAggressiveRegressor</option> | |
1268 <option value="Perceptron">Perceptron</option> | |
1269 <option value="RANSACRegressor">RANSACRegressor</option> | |
1270 <option value="Ridge">Ridge</option> | |
1271 <option value="RidgeClassifier">RidgeClassifier</option> | |
1272 <option value="RidgeClassifierCV">RidgeClassifierCV</option> | |
1273 <option value="RidgeCV">RidgeCV</option> | |
1274 <option value="SGDClassifier">SGDClassifier</option> | |
1275 <option value="SGDRegressor">SGDRegressor</option> | |
1276 <option value="TheilSenRegressor">TheilSenRegressor</option> | |
1277 </param> | |
1278 <expand macro="estimator_params_text"/> | |
1279 </when> | |
1280 <when value="ensemble"> | |
1281 <param name="selected_estimator" type="select" label="Choose estimator class:"> | |
1282 <option value="AdaBoostClassifier" selected="true">AdaBoostClassifier</option> | |
1283 <option value="AdaBoostRegressor">AdaBoostRegressor</option> | |
1284 <option value="BaggingClassifier">BaggingClassifier</option> | |
1285 <option value="BaggingRegressor">BaggingRegressor</option> | |
1286 <option value="ExtraTreesClassifier">ExtraTreesClassifier</option> | |
1287 <option value="ExtraTreesRegressor">ExtraTreesRegressor</option> | |
1288 <option value="GradientBoostingClassifier">GradientBoostingClassifier</option> | |
1289 <option value="GradientBoostingRegressor">GradientBoostingRegressor</option> | |
1290 <option value="IsolationForest">IsolationForest</option> | |
1291 <option value="RandomForestClassifier">RandomForestClassifier</option> | |
1292 <option value="RandomForestRegressor">RandomForestRegressor</option> | |
1293 <option value="RandomTreesEmbedding">RandomTreesEmbedding</option> | |
1294 <option value="VotingClassifier">VotingClassifier</option> | |
1295 </param> | |
1296 <expand macro="estimator_params_text"/> | |
1297 </when> | |
1298 <when value="naive_bayes"> | |
1299 <param name="selected_estimator" type="select" label="Choose estimator class:"> | |
1300 <option value="BernoulliNB" selected="true">BernoulliNB</option> | |
1301 <option value="GaussianNB">GaussianNB</option> | |
1302 <option value="MultinomialNB">MultinomialNB</option> | |
1303 </param> | |
1304 <expand macro="estimator_params_text"/> | |
1305 </when> | |
1306 <when value="tree"> | |
1307 <param name="selected_estimator" type="select" label="Choose estimator class:"> | |
1308 <option value="DecisionTreeClassifier" selected="true">DecisionTreeClassifier</option> | |
1309 <option value="DecisionTreeRegressor">DecisionTreeRegressor</option> | |
1310 <option value="ExtraTreeClassifier">ExtraTreeClassifier</option> | |
1311 <option value="ExtraTreeRegressor">ExtraTreeRegressor</option> | |
1312 </param> | |
1313 <expand macro="estimator_params_text"/> | |
1314 </when> | |
1315 <when value="neighbors"> | |
1316 <param name="selected_estimator" type="select" label="Choose estimator class:"> | |
1317 <option value="BallTree" selected="true">BallTree</option> | |
1318 <option value="DistanceMetric">DistanceMetric</option> | |
1319 <option value="KDTree">KDTree</option> | |
1320 <option value="KernelDensity">KernelDensity</option> | |
1321 <option value="KNeighborsClassifier">KNeighborsClassifier</option> | |
1322 <option value="KNeighborsRegressor">KNeighborsRegressor</option> | |
1323 <option value="LocalOutlierFactor">LocalOutlierFactor</option> | |
1324 <option value="RadiusNeighborsClassifier">RadiusNeighborsClassifier</option> | |
1325 <option value="RadiusNeighborsRegressor">RadiusNeighborsRegressor</option> | |
1326 <option value="NearestCentroid">NearestCentroid</option> | |
1327 <option value="NearestNeighbors">NearestNeighbors</option> | |
1328 </param> | |
1329 <expand macro="estimator_params_text"/> | |
1330 </when> | |
1331 <when value="xgboost"> | |
1332 <param name="selected_estimator" type="select" label="Choose estimator class:"> | |
1333 <option value="XGBRegressor" selected="true">XGBRegressor</option> | |
1334 <option value="XGBClassifier">XGBClassifier</option> | |
1335 </param> | |
1336 <expand macro="estimator_params_text"/> | |
1337 </when> | |
1338 </conditional> | |
1339 </xml> | |
1340 | |
1341 <xml name="estimator_params_text" token_label="Type in estimator parameters:" | |
1342 token_help="Parameters in dictionary without braces ('{}'), e.g., 'C': 1, 'kernel': 'linear'. No double quotes. Leave this box blank for default estimator."> | |
1343 <param name="text_params" type="text" value="" size="50" optional="true" label="@LABEL@" help="@HELP@"> | |
1344 <sanitizer> | |
1345 <valid initial="default"> | |
1346 <add value="'"/> | |
1347 </valid> | |
1348 </sanitizer> | |
1349 </param> | |
1350 </xml> | |
1351 | |
1352 <xml name="kernel_approximation_all"> | |
1353 <conditional name="kernel_approximation_selector"> | |
1354 <param name="select_algorithm" type="select" label="Choose a kernel approximation algorithm:"> | |
1355 <option value="Nystroem" selected="true">Nystroem</option> | |
1356 <option value="RBFSampler">RBFSampler</option> | |
1357 <option value="AdditiveChi2Sampler">AdditiveChi2Sampler</option> | |
1358 <option value="SkewedChi2Sampler">SkewedChi2Sampler</option> | |
1359 </param> | |
1360 <when value="Nystroem"> | |
1361 <expand macro="estimator_params_text" label="Type in kernel approximater parameters:" | |
1362 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': 100, 'kernel': 'rbf'. No double quotes. Leave this box blank for class default."/> | |
1363 </when> | |
1364 <when value="RBFSampler"> | |
1365 <expand macro="estimator_params_text" label="Type in kernel approximater parameters:" | |
1366 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': 100, 'gamma': 1.0. No double quotes. Leave this box blank for class default."/> | |
1367 </when> | |
1368 <when value="AdditiveChi2Sampler"> | |
1369 <expand macro="estimator_params_text" label="Type in kernel approximater parameters:" | |
1370 help="Parameters in dictionary without braces ('{}'), e.g., 'sample_steps': 2, 'sample_interval': None. No double quotes. Leave this box blank for class default."/> | |
1371 </when> | |
1372 <when value="SkewedChi2Sampler"> | |
1373 <expand macro="estimator_params_text" label="Type in kernel approximater parameters:" | |
1374 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': 100, 'skewedness': 1.0. No double quotes. Leave this box blank for class default."/> | |
1375 </when> | |
1376 </conditional> | |
1377 </xml> | |
1378 | |
1379 <xml name="matrix_decomposition_all"> | |
1380 <conditional name="matrix_decomposition_selector"> | |
1381 <param name="select_algorithm" type="select" label="Choose a matrix decomposition algorithm:"> | |
1382 <option value="DictionaryLearning" selected="true">DictionaryLearning</option> | |
1383 <option value="FactorAnalysis">FactorAnalysis</option> | |
1384 <option value="FastICA">FastICA</option> | |
1385 <option value="IncrementalPCA">IncrementalPCA</option> | |
1386 <option value="KernelPCA">KernelPCA</option> | |
1387 <option value="LatentDirichletAllocation">LatentDirichletAllocation</option> | |
1388 <option value="MiniBatchDictionaryLearning">MiniBatchDictionaryLearning</option> | |
1389 <option value="MiniBatchSparsePCA">MiniBatchSparsePCA</option> | |
1390 <option value="NMF">NMF</option> | |
1391 <option value="PCA">PCA</option> | |
1392 <option value="SparsePCA">SparsePCA</option> | |
1393 <option value="SparseCoder">SparseCoder</option> | |
1394 <option value="TruncatedSVD">TruncatedSVD</option> | |
1395 </param> | |
1396 <when value="DictionaryLearning"> | |
1397 <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:" | |
1398 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': None, 'alpha': 1.0. No double quotes. Leave this box blank for class default."/> | |
1399 </when> | |
1400 <when value="FactorAnalysis"> | |
1401 <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:" | |
1402 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': 100, 'random_state': 42. No double quotes. Leave this box blank for class default."/> | |
1403 </when> | |
1404 <when value="FastICA"> | |
1405 <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:" | |
1406 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': 100, 'random_state': 42. No double quotes. Leave this box blank for class default."/> | |
1407 </when> | |
1408 <when value="IncrementalPCA"> | |
1409 <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:" | |
1410 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': 100, 'whiten': False. No double quotes. Leave this box blank for class default."/> | |
1411 </when> | |
1412 <when value="KernelPCA"> | |
1413 <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:" | |
1414 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': 100, 'random_state': 42. No double quotes. Leave this box blank for class default."/> | |
1415 </when> | |
1416 <when value="LatentDirichletAllocation"> | |
1417 <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:" | |
1418 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': 100, 'random_state': 42. No double quotes. Leave this box blank for class default."/> | |
1419 </when> | |
1420 <when value="MiniBatchDictionaryLearning"> | |
1421 <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:" | |
1422 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': 100, 'random_state': 42. No double quotes. Leave this box blank for class default."/> | |
1423 </when> | |
1424 <when value="MiniBatchSparsePCA"> | |
1425 <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:" | |
1426 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': 100, 'random_state': 42. No double quotes. Leave this box blank for class default."/> | |
1427 </when> | |
1428 <when value="NMF"> | |
1429 <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:" | |
1430 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': 100, 'init': 'random'. No double quotes. Leave this box blank for class default."/> | |
1431 </when> | |
1432 <when value="PCA"> | |
1433 <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:" | |
1434 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': 100, 'random_state': 42. No double quotes. Leave this box blank for class default."/> | |
1435 </when> | |
1436 <when value="SparsePCA"> | |
1437 <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:" | |
1438 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': 100, 'random_state': 42. No double quotes. Leave this box blank for class default."/> | |
1439 </when> | |
1440 <when value="SparseCoder"> | |
1441 <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:" | |
1442 help="Parameters in dictionary without braces ('{}'), e.g., 'transform_algorithm': 'omp', 'transform_alpha': 1.0. No double quotes. Leave this box blank for class default."/> | |
1443 </when> | |
1444 <when value="TruncatedSVD"> | |
1445 <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:" | |
1446 help="Parameters in dictionary without braces ('{}'), e.g., 'n_components': 2, 'algorithm': 'randomized'. No double quotes. Leave this box blank for default estimator."/> | |
1447 </when> | |
1448 </conditional> | |
1449 </xml> | |
1450 | |
1451 <xml name="FeatureAgglomeration"> | |
1452 <conditional name="FeatureAgglomeration_selector"> | |
1453 <param name="select_algorithm" type="select" label="Choose the algorithm:"> | |
1454 <option value="FeatureAgglomeration" selected="true">FeatureAgglomeration</option> | |
1455 </param> | |
1456 <when value="FeatureAgglomeration"> | |
1457 <expand macro="estimator_params_text" label="Type in parameters:" | |
1458 help="Parameters in dictionary without braces ('{}'), e.g., 'n_clusters': 2, 'affinity': 'euclidean'. No double quotes. Leave this box blank for class default."/> | |
1459 </when> | |
1460 </conditional> | |
1461 </xml> | |
1108 <!-- Outputs --> | 1462 <!-- Outputs --> |
1109 | 1463 |
1110 <xml name="output"> | 1464 <xml name="output"> |
1111 <outputs> | 1465 <outputs> |
1112 <data format="tabular" name="outfile_predict"> | 1466 <data format="tabular" name="outfile_predict"> |
1115 <data format="zip" name="outfile_fit"> | 1469 <data format="zip" name="outfile_fit"> |
1116 <filter>selected_tasks['selected_task'] == 'train'</filter> | 1470 <filter>selected_tasks['selected_task'] == 'train'</filter> |
1117 </data> | 1471 </data> |
1118 </outputs> | 1472 </outputs> |
1119 </xml> | 1473 </xml> |
1120 | |
1121 | 1474 |
1122 <!--Citations--> | 1475 <!--Citations--> |
1123 <xml name="eden_citation"> | 1476 <xml name="eden_citation"> |
1124 <citations> | 1477 <citations> |
1125 <citation type="doi">10.5281/zenodo.15094</citation> | 1478 <citation type="doi">10.5281/zenodo.15094</citation> |