comparison pre_process.xml @ 28:a12d5eae322e draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author bgruening
date Fri, 09 Aug 2019 06:25:31 -0400
parents 55b36adb2dc7
children e270c53b5df6
comparison
equal deleted inserted replaced
27:f7e85579f2e6 28:a12d5eae322e
17 <![CDATA[ 17 <![CDATA[
18 import sys 18 import sys
19 import json 19 import json
20 import pandas 20 import pandas
21 import pickle 21 import pickle
22
22 from scipy.io import mmread 23 from scipy.io import mmread
23 from scipy.io import mmwrite 24 from scipy.io import mmwrite
24 from sklearn import preprocessing 25 from sklearn import preprocessing
25 26 from galaxy_ml.utils import read_columns, SafeEval
26 sys.path.insert(0, '$__tool_directory__') 27
27 from utils import read_columns 28
29 safe_eval = SafeEval()
28 30
29 input_json_path = sys.argv[1] 31 input_json_path = sys.argv[1]
30 with open(input_json_path, "r") as param_handler: 32 with open(input_json_path, "r") as param_handler:
31 params = json.load(param_handler) 33 params = json.load(param_handler)
32 34
37 column_option = params["input_type"]["column_selector_options_1"]["selected_column_selector_option"] 39 column_option = params["input_type"]["column_selector_options_1"]["selected_column_selector_option"]
38 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: 40 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
39 c = params["input_type"]["column_selector_options_1"]["col1"] 41 c = params["input_type"]["column_selector_options_1"]["col1"]
40 else: 42 else:
41 c = None 43 c = None
42 X = read_columns( 44 X, input_df = read_columns(
43 "$input_type.infile", 45 "$input_type.infile",
44 c = c, 46 c=c,
45 c_option = column_option, 47 c_option=column_option,
48 return_df=True,
46 sep='\t', 49 sep='\t',
47 header=header, 50 header=header,
48 parse_dates=True, 51 parse_dates=True,
49 encoding=None, 52 encoding=None,
50 index_col=None, 53 index_col=None,
51 tupleize_cols=False).astype(float) 54 tupleize_cols=False)
55 X = X.astype(float)
52 #end if 56 #end if
53 57
54 preprocessor = params["input_type"]["pre_processors"]["selected_pre_processor"] 58 preprocessor = params["input_type"]["pre_processors"]["selected_pre_processor"]
55 options = params["input_type"]["pre_processors"]["options"] 59 options = params["input_type"]["pre_processors"]["options"]
60 if 'feature_range' in options:
61 feature_range = safe_eval(options['feature_range'].strip())
62 if not feature_range:
63 feature_range = (0, 1)
64 options['feature_range'] = feature_range
56 65
57 my_class = getattr(preprocessing, preprocessor) 66 my_class = getattr(preprocessing, preprocessor)
58 estimator = my_class(**options) 67 estimator = my_class(**options)
59 estimator.fit(X) 68 estimator.fit(X)
60 result = estimator.transform(X) 69 result = estimator.transform(X)
61 70
62 #if $input_type.selected_input_type == "sparse": 71 #if $input_type.selected_input_type == "sparse":
63 with open("$outfile_transform", "wb") as transform_handler: 72 with open("$outfile_transform", "wb") as transform_handler:
64 mmwrite(transform_handler, result) 73 mmwrite(transform_handler, result)
65 #else: 74 #else:
66 res = pandas.DataFrame(result) 75 columns = input_df.columns
67 res.to_csv(path_or_buf = "$outfile_transform", sep="\t", index=False, header=None) 76 if preprocessor == 'PolynomialFeatures':
77 columns = None
78 header = False
79 res = pandas.DataFrame(result, columns=columns)
80 res.to_csv(path_or_buf = "$outfile_transform", sep="\t",
81 index=False, header=True if header else False)
68 #end if 82 #end if
69 83
70 #if $save: 84 #if $save:
71 with open("$outfile_fit", 'wb') as out_handler: 85 with open("$outfile_fit", 'wb') as out_handler:
72 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL) 86 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL)
153 <param name="save" value="true"/> 167 <param name="save" value="true"/>
154 <output name="outfile_transform" file="prp_result05" ftype="tabular"/> 168 <output name="outfile_transform" file="prp_result05" ftype="tabular"/>
155 <output name="outfile_fit" file="prp_model05" ftype="zip" compare="sim_size" delta="5"/> 169 <output name="outfile_fit" file="prp_model05" ftype="zip" compare="sim_size" delta="5"/>
156 </test> 170 </test>
157 <test> 171 <test>
158 <param name="infile" value="csr_sparse2.mtx" ftype="txt"/>
159 <param name="selected_input_type" value="sparse"/>
160 <param name="selected_pre_processor" value="Imputer"/>
161 <param name="save" value="true"/>
162 <param name="axis" value="true"/>
163 <output name="outfile_transform" file="prp_result06" ftype="tabular"/>
164 <output name="outfile_fit" file="prp_model06" ftype="zip" compare="sim_size" delta="50"/>
165 </test>
166 <test>
167 <param name="infile" value="train.tabular" ftype="tabular"/> 172 <param name="infile" value="train.tabular" ftype="tabular"/>
168 <param name="selected_input_type" value="tabular"/> 173 <param name="selected_input_type" value="tabular"/>
169 <param name="selected_column_selector_option" value="all_columns"/> 174 <param name="selected_column_selector_option" value="all_columns"/>
170 <param name="selected_pre_processor" value="StandardScaler"/> 175 <param name="selected_pre_processor" value="StandardScaler"/>
171 <param name="save" value="true"/> 176 <param name="save" value="true"/>
185 <param name="selected_input_type" value="sparse"/> 190 <param name="selected_input_type" value="sparse"/>
186 <param name="selected_pre_processor" value="Normalizer"/> 191 <param name="selected_pre_processor" value="Normalizer"/>
187 <param name="save" value="true"/> 192 <param name="save" value="true"/>
188 <output name="outfile_transform" file="prp_result09" ftype="tabular"/> 193 <output name="outfile_transform" file="prp_result09" ftype="tabular"/>
189 <output name="outfile_fit" file="prp_model09" ftype="zip" compare="sim_size" delta="5"/> 194 <output name="outfile_fit" file="prp_model09" ftype="zip" compare="sim_size" delta="5"/>
195 </test>
196 <test>
197 <param name="infile" value="regression_X.tabular" ftype="tabular"/>
198 <param name="header1" value="true"/>
199 <param name="selected_column_selector_option" value="all_columns"/>
200 <param name="selected_input_type" value="tabular"/>
201 <param name="selected_pre_processor" value="MinMaxScaler"/>
202 <param name="feature_range" value="(-1, 1)"/>
203 <param name="save" value="false"/>
204 <output name="outfile_transform" file="prp_result10" ftype="tabular"/>
190 </test> 205 </test>
191 </tests> 206 </tests>
192 <help> 207 <help>
193 <![CDATA[ 208 <![CDATA[
194 **What it does** 209 **What it does**