annotate fitted_model_eval.py @ 33:e270c53b5df6 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5b2ac730ec6d3b762faa9034eddd19ad1b347476"
author bgruening
date Mon, 16 Dec 2019 09:39:33 +0000
parents 4d8c3191289d
children ff2a7f89069e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
31
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
1 import argparse
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
2 import json
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
3 import pandas as pd
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
4 import warnings
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
5
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
6 from scipy.io import mmread
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
7 from sklearn.pipeline import Pipeline
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
8 from sklearn.metrics.scorer import _check_multimetric_scoring
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
9 from sklearn.model_selection._validation import _score
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
10 from galaxy_ml.utils import get_scoring, load_model, read_columns
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
11
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
12
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
13 def _get_X_y(params, infile1, infile2):
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
14 """ read from inputs and output X and y
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
15
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
16 Parameters
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
17 ----------
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
18 params : dict
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
19 Tool inputs parameter
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
20 infile1 : str
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
21 File path to dataset containing features
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
22 infile2 : str
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
23 File path to dataset containing target values
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
24
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
25 """
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
26 # store read dataframe object
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
27 loaded_df = {}
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
28
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
29 input_type = params['input_options']['selected_input']
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
30 # tabular input
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
31 if input_type == 'tabular':
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
32 header = 'infer' if params['input_options']['header1'] else None
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
33 column_option = (params['input_options']['column_selector_options_1']
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
34 ['selected_column_selector_option'])
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
35 if column_option in ['by_index_number', 'all_but_by_index_number',
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
36 'by_header_name', 'all_but_by_header_name']:
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
37 c = params['input_options']['column_selector_options_1']['col1']
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
38 else:
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
39 c = None
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
40
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
41 df_key = infile1 + repr(header)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
42 df = pd.read_csv(infile1, sep='\t', header=header,
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
43 parse_dates=True)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
44 loaded_df[df_key] = df
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
45
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
46 X = read_columns(df, c=c, c_option=column_option).astype(float)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
47 # sparse input
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
48 elif input_type == 'sparse':
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
49 X = mmread(open(infile1, 'r'))
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
50
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
51 # Get target y
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
52 header = 'infer' if params['input_options']['header2'] else None
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
53 column_option = (params['input_options']['column_selector_options_2']
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
54 ['selected_column_selector_option2'])
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
55 if column_option in ['by_index_number', 'all_but_by_index_number',
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
56 'by_header_name', 'all_but_by_header_name']:
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
57 c = params['input_options']['column_selector_options_2']['col2']
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
58 else:
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
59 c = None
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
60
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
61 df_key = infile2 + repr(header)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
62 if df_key in loaded_df:
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
63 infile2 = loaded_df[df_key]
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
64 else:
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
65 infile2 = pd.read_csv(infile2, sep='\t',
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
66 header=header, parse_dates=True)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
67 loaded_df[df_key] = infile2
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
68
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
69 y = read_columns(
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
70 infile2,
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
71 c=c,
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
72 c_option=column_option,
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
73 sep='\t',
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
74 header=header,
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
75 parse_dates=True)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
76 if len(y.shape) == 2 and y.shape[1] == 1:
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
77 y = y.ravel()
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
78
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
79 return X, y
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
80
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
81
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
82 def main(inputs, infile_estimator, outfile_eval,
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
83 infile_weights=None, infile1=None,
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
84 infile2=None):
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
85 """
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
86 Parameter
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
87 ---------
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
88 inputs : str
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
89 File path to galaxy tool parameter
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
90
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
91 infile_estimator : strgit
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
92 File path to trained estimator input
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
93
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
94 outfile_eval : str
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
95 File path to save the evalulation results, tabular
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
96
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
97 infile_weights : str
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
98 File path to weights input
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
99
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
100 infile1 : str
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
101 File path to dataset containing features
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
102
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
103 infile2 : str
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
104 File path to dataset containing target values
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
105 """
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
106 warnings.filterwarnings('ignore')
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
107
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
108 with open(inputs, 'r') as param_handler:
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
109 params = json.load(param_handler)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
110
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
111 X_test, y_test = _get_X_y(params, infile1, infile2)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
112
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
113 # load model
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
114 with open(infile_estimator, 'rb') as est_handler:
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
115 estimator = load_model(est_handler)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
116
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
117 main_est = estimator
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
118 if isinstance(estimator, Pipeline):
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
119 main_est = estimator.steps[-1][-1]
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
120 if hasattr(main_est, 'config') and hasattr(main_est, 'load_weights'):
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
121 if not infile_weights or infile_weights == 'None':
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
122 raise ValueError("The selected model skeleton asks for weights, "
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
123 "but no dataset for weights was provided!")
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
124 main_est.load_weights(infile_weights)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
125
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
126 # handle scorer, convert to scorer dict
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
127 scoring = params['scoring']
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
128 scorer = get_scoring(scoring)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
129 scorer, _ = _check_multimetric_scoring(estimator, scoring=scorer)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
130
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
131 if hasattr(estimator, 'evaluate'):
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
132 scores = estimator.evaluate(X_test, y_test=y_test,
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
133 scorer=scorer,
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
134 is_multimetric=True)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
135 else:
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
136 scores = _score(estimator, X_test, y_test, scorer,
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
137 is_multimetric=True)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
138
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
139 # handle output
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
140 for name, score in scores.items():
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
141 scores[name] = [score]
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
142 df = pd.DataFrame(scores)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
143 df = df[sorted(df.columns)]
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
144 df.to_csv(path_or_buf=outfile_eval, sep='\t',
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
145 header=True, index=False)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
146
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
147
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
148 if __name__ == '__main__':
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
149 aparser = argparse.ArgumentParser()
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
150 aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
151 aparser.add_argument("-e", "--infile_estimator", dest="infile_estimator")
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
152 aparser.add_argument("-w", "--infile_weights", dest="infile_weights")
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
153 aparser.add_argument("-X", "--infile1", dest="infile1")
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
154 aparser.add_argument("-y", "--infile2", dest="infile2")
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
155 aparser.add_argument("-O", "--outfile_eval", dest="outfile_eval")
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
156 args = aparser.parse_args()
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
157
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
158 main(args.inputs, args.infile_estimator, args.outfile_eval,
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
159 infile_weights=args.infile_weights, infile1=args.infile1,
4d8c3191289d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
bgruening
parents:
diff changeset
160 infile2=args.infile2)