# HG changeset patch # User bgruening # Date 1527031665 14400 # Node ID 5a06c81f044daba934a7d1072e710a358fb3b8cf # Parent f017e93ceda78f86e146d57b97392a0b9becdeb1 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 79fe42239dcf077b13f85cbcd6c6e30d7e1e4832 diff -r f017e93ceda7 -r 5a06c81f044d feature_selection.xml --- a/feature_selection.xml Sat Apr 28 18:04:04 2018 -0400 +++ b/feature_selection.xml Tue May 22 19:27:45 2018 -0400 @@ -1,4 +1,4 @@ - + module, including univariate filter selection methods and recursive feature elimination algorithm main_macros.xml @@ -28,12 +28,16 @@ input_json_path = sys.argv[1] params = json.load(open(input_json_path, "r")) +## Read features +features_has_header = params["input_options"]["header1"] input_type = params["input_options"]["selected_input"] if input_type=="tabular": + header = 'infer' if features_has_header else None header = 'infer' if params["input_options"]["header1"] else None - X = read_columns( + X, input_df = read_columns( "$input_options.infile1", "$input_options.col1", + return_df = True, sep='\t', header=header, parse_dates=True @@ -41,6 +45,7 @@ else: X = mmread(open("$input_options.infile1", 'r')) +## Read labels header = 'infer' if params["input_options"]["header2"] else None y = read_columns( "$input_options.infile2", @@ -51,54 +56,55 @@ ) y=y.ravel() +## Create feature selector selector = params["feature_selection_algorithms"]["selected_algorithm"] selector = getattr(sklearn.feature_selection, selector) options = params["feature_selection_algorithms"]["options"] -#if $feature_selection_algorithms.selected_algorithm == 'SelectFromModel': -if not options['threshold'] or options['threshold'] == 'None': - options['threshold'] = None -#if $feature_selection_algorithms.extra_estimator.has_estimator == 'no_load': -fitted_estimator = pickle.load(open("$feature_selection_algorithms.extra_estimator.fitted_estimator", 'r')) -new_selector = selector(fitted_estimator, prefit=True, **options) -#else: -estimator=params["feature_selection_algorithms"]["estimator"] -if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no': - estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"] -estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) -new_selector = selector(estimator, **options) -new_selector.fit(X, y) -#end if +if params['feature_selection_algorithms']['selected_algorithm'] == 'SelectFromModel': + if not options['threshold'] or options['threshold'] == 'None': + options['threshold'] = None + if 'extra_estimator' in params['feature_selection_algorithms'] and params['feature_selection_algorithms']['extra_estimator']['has_estimator'] == 'no_load': + fitted_estimator = pickle.load(open("params['feature_selection_algorithms']['extra_estimator']['fitted_estimator']", 'r')) + new_selector = selector(fitted_estimator, prefit=True, **options) + else: + estimator=params["feature_selection_algorithms"]["estimator"] + if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no': + estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"] + estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) + new_selector = selector(estimator, **options) + new_selector.fit(X, y) -#elif $feature_selection_algorithms.selected_algorithm in ['RFE', 'RFECV']: -if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'): - options['scoring'] = None -estimator=params["feature_selection_algorithms"]["estimator"] -if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no': - estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"] -estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) -new_selector = selector(estimator, **options) -new_selector.fit(X, y) +elif params['feature_selection_algorithms']['selected_algorithm'] in ['RFE', 'RFECV']: + if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'): + options['scoring'] = None + estimator=params["feature_selection_algorithms"]["estimator"] + if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no': + estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"] + estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) + new_selector = selector(estimator, **options) + new_selector.fit(X, y) -#elif $feature_selection_algorithms.selected_algorithm == "VarianceThreshold": -new_selector = selector(**options) -new_selector.fit(X, y) +elif params['feature_selection_algorithms']['selected_algorithm'] == "VarianceThreshold": + new_selector = selector(**options) + new_selector.fit(X, y) -#else: -score_func = params["feature_selection_algorithms"]["score_func"] -score_func = getattr(sklearn.feature_selection, score_func) -new_selector = selector(score_func, **options) -new_selector.fit(X, y) -#end if +else: + score_func = params["feature_selection_algorithms"]["score_func"] + score_func = getattr(sklearn.feature_selection, score_func) + new_selector = selector(score_func, **options) + new_selector.fit(X, y) -#if $select_methods.selected_method == "fit_transform": -res = new_selector.transform(X) +## Transform to select features +selected_names = None +if "$select_methods.selected_method" == "fit_transform": + res = new_selector.transform(X) + if features_has_header: + selected_names = input_df.columns[new_selector.get_support(indices=True)] +else: + res = new_selector.get_support(params["select_methods"]["indices"]) -#else: -res = new_selector.get_support(params["select_methods"]["indices"]) -#end if - -res = pandas.DataFrame(res) +res = pandas.DataFrame(res, columns = selected_names) res.to_csv(path_or_buf="$outfile", sep='\t', index=False) @@ -106,131 +112,12 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - -
-
- - -
- - - - - - - - -
-
- - -
- -
-
- - -
- -
-
- - -
- -
-
- - -
- -
-
- - -
- -
-
- - - - - - -
- - - -
-
- - - - - - -
- - - - - -
-
- -
- -
-
- -
+
- + diff -r f017e93ceda7 -r 5a06c81f044d main_macros.xml --- a/main_macros.xml Sat Apr 28 18:04:04 2018 -0400 +++ b/main_macros.xml Tue May 22 19:27:45 2018 -0400 @@ -2,12 +2,17 @@ 0.9 -def read_columns(f, c, **args): +def read_columns(f, c, return_df=False, **args): data = pandas.read_csv(f, **args) cols = c.split (',') cols = map(int, cols) cols = list(map(lambda x: x - 1, cols)) - y = data.iloc[:,cols].values + data = data.iloc[:,cols] + y = data.values + if return_df: + return y, data + else: + return y return y @@ -789,6 +794,128 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+
+ + +
+ + + + + + + + +
+
+ + +
+ +
+
+ + +
+ +
+
+ + +
+ +
+
+ + +
+ +
+
+ + +
+ +
+
+ + + + + + +
+ + + +
+
+ + + + + + +
+ + + + + +
+
+ +
+ +
+
+ +
+
diff -r f017e93ceda7 -r 5a06c81f044d test-data/feature_selection_result01 --- a/test-data/feature_selection_result01 Sat Apr 28 18:04:04 2018 -0400 +++ b/test-data/feature_selection_result01 Tue May 22 19:27:45 2018 -0400 @@ -1,4 +1,4 @@ -0 1 +temp_1 average 69.0 69.7 59.0 58.1 88.0 77.3 diff -r f017e93ceda7 -r 5a06c81f044d test-data/feature_selection_result02 --- a/test-data/feature_selection_result02 Sat Apr 28 18:04:04 2018 -0400 +++ b/test-data/feature_selection_result02 Tue May 22 19:27:45 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 +temp_2 temp_1 forecast_noaa friend 68.0 69.0 65.0 88.0 60.0 59.0 57.0 66.0 85.0 88.0 75.0 70.0 diff -r f017e93ceda7 -r 5a06c81f044d test-data/feature_selection_result03 --- a/test-data/feature_selection_result03 Sat Apr 28 18:04:04 2018 -0400 +++ b/test-data/feature_selection_result03 Tue May 22 19:27:45 2018 -0400 @@ -1,4 +1,4 @@ -0 1 +temp_1 friend 69.0 88.0 59.0 66.0 88.0 70.0 diff -r f017e93ceda7 -r 5a06c81f044d test-data/feature_selection_result04 --- a/test-data/feature_selection_result04 Sat Apr 28 18:04:04 2018 -0400 +++ b/test-data/feature_selection_result04 Tue May 22 19:27:45 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 9 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend week_Mon 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 1.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 0.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 0.0 diff -r f017e93ceda7 -r 5a06c81f044d test-data/feature_selection_result05 --- a/test-data/feature_selection_result05 Sat Apr 28 18:04:04 2018 -0400 +++ b/test-data/feature_selection_result05 Tue May 22 19:27:45 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 diff -r f017e93ceda7 -r 5a06c81f044d test-data/feature_selection_result06 --- a/test-data/feature_selection_result06 Sat Apr 28 18:04:04 2018 -0400 +++ b/test-data/feature_selection_result06 Tue May 22 19:27:45 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 diff -r f017e93ceda7 -r 5a06c81f044d test-data/feature_selection_result07 --- a/test-data/feature_selection_result07 Sat Apr 28 18:04:04 2018 -0400 +++ b/test-data/feature_selection_result07 Tue May 22 19:27:45 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 diff -r f017e93ceda7 -r 5a06c81f044d test-data/feature_selection_result08 --- a/test-data/feature_selection_result08 Sat Apr 28 18:04:04 2018 -0400 +++ b/test-data/feature_selection_result08 Tue May 22 19:27:45 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 +day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 diff -r f017e93ceda7 -r 5a06c81f044d test-data/feature_selection_result09 --- a/test-data/feature_selection_result09 Sat Apr 28 18:04:04 2018 -0400 +++ b/test-data/feature_selection_result09 Tue May 22 19:27:45 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 9 10 11 12 13 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend week_Fri week_Mon week_Sat week_Sun week_Tues 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 0.0 1.0 0.0 0.0 0.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 0.0 0.0 0.0 0.0 0.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 0.0 0.0 1.0 0.0 0.0 diff -r f017e93ceda7 -r 5a06c81f044d test-data/feature_selection_result10 --- a/test-data/feature_selection_result10 Sat Apr 28 18:04:04 2018 -0400 +++ b/test-data/feature_selection_result10 Tue May 22 19:27:45 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend week_Fri week_Mon week_Sat week_Sun week_Thurs week_Tues week_Wed 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0