comparison pre_process.xml @ 42:6c23a44a3c4f draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 12:12:48 +0000
parents ff2a7f89069e
children
comparison
equal deleted inserted replaced
41:61a68849b815 42:6c23a44a3c4f
1 <tool id="sklearn_data_preprocess" name="Preprocess" version="@VERSION@" profile="20.05"> 1 <tool id="sklearn_data_preprocess" name="Preprocess" version="@VERSION@" profile="@PROFILE@">
2 <description>raw feature vectors into standardized datasets</description> 2 <description>raw feature vectors into standardized datasets</description>
3 <macros> 3 <macros>
4 <import>main_macros.xml</import> 4 <import>main_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="python_requirements" /> 6 <expand macro="python_requirements" />
16 <configfile name="pre_processor_script"> 16 <configfile name="pre_processor_script">
17 <![CDATA[ 17 <![CDATA[
18 import sys 18 import sys
19 import json 19 import json
20 import pandas 20 import pandas
21 import pickle
22 21
23 from scipy.io import mmread 22 from scipy.io import mmread
24 from scipy.io import mmwrite 23 from scipy.io import mmwrite
25 from sklearn import preprocessing 24 from sklearn import preprocessing
25 from galaxy_ml.model_persist import dump_model_to_h5
26 from galaxy_ml.utils import read_columns, SafeEval 26 from galaxy_ml.utils import read_columns, SafeEval
27 27
28 28
29 safe_eval = SafeEval() 29 safe_eval = SafeEval()
30 30
79 res.to_csv(path_or_buf = "$outfile_transform", sep="\t", 79 res.to_csv(path_or_buf = "$outfile_transform", sep="\t",
80 index=False, header=True if header else False) 80 index=False, header=True if header else False)
81 #end if 81 #end if
82 82
83 #if $save: 83 #if $save:
84 with open("$outfile_fit", 'wb') as out_handler: 84 dump_model_to_h5(estimator, "$outfile_fit")
85 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL)
86 #end if 85 #end if
87 ]]> 86 ]]>
88 </configfile> 87 </configfile>
89 </configfiles> 88 </configfiles>
90 <inputs> 89 <inputs>
114 </conditional> 113 </conditional>
115 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Save the preprocessor" help="Saves the preprocessor after fitting to the data. The preprocessor can then be passed to other tools and used in later operations." /> 114 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Save the preprocessor" help="Saves the preprocessor after fitting to the data. The preprocessor can then be passed to other tools and used in later operations." />
116 </inputs> 115 </inputs>
117 <outputs> 116 <outputs>
118 <data format="tabular" name="outfile_transform" from_work_dir="./output" /> 117 <data format="tabular" name="outfile_transform" from_work_dir="./output" />
119 <data format="zip" name="outfile_fit"> 118 <data format="h5mlm" name="outfile_fit">
120 <filter>save</filter> 119 <filter>save</filter>
121 </data> 120 </data>
122 </outputs> 121 </outputs>
123 <tests> 122 <tests>
124 <test> 123 <test>
125 <param name="infile" value="train.tabular" ftype="tabular" /> 124 <param name="infile" value="train.tabular" ftype="tabular" />
126 <param name="selected_column_selector_option" value="all_columns" /> 125 <param name="selected_column_selector_option" value="all_columns" />
127 <param name="selected_input_type" value="tabular" /> 126 <param name="selected_input_type" value="tabular" />
128 <param name="selected_pre_processor" value="KernelCenterer" /> 127 <param name="selected_pre_processor" value="QuantileTransformer" />
129 <param name="save" value="true" /> 128 <param name="save" value="true" />
129 <param name="random_state" value="200" />
130 <param name="n_quantiles" value="10" />
131 <param name="subsample" value="100" />
130 <output name="outfile_transform" file="prp_result01" ftype="tabular" /> 132 <output name="outfile_transform" file="prp_result01" ftype="tabular" />
131 <output name="outfile_fit" file="prp_model01" ftype="zip" compare="sim_size" delta="5" /> 133 <output name="outfile_fit" file="prp_model01" ftype="h5mlm" compare="sim_size" delta="5" />
132 </test> 134 </test>
133 <test> 135 <test>
134 <param name="infile" value="train.tabular" ftype="tabular" /> 136 <param name="infile" value="train.tabular" ftype="tabular" />
135 <param name="selected_column_selector_option" value="all_columns" /> 137 <param name="selected_column_selector_option" value="all_columns" />
136 <param name="selected_input_type" value="tabular" /> 138 <param name="selected_input_type" value="tabular" />
137 <param name="selected_pre_processor" value="MinMaxScaler" /> 139 <param name="selected_pre_processor" value="MinMaxScaler" />
138 <param name="save" value="true" /> 140 <param name="save" value="true" />
139 <output name="outfile_transform" file="prp_result02" ftype="tabular" /> 141 <output name="outfile_transform" file="prp_result02" ftype="tabular" />
140 <output name="outfile_fit" file="prp_model02" ftype="zip" compare="sim_size" delta="5" /> 142 <output name="outfile_fit" file="prp_model02" ftype="h5mlm" compare="sim_size" delta="5" />
141 </test> 143 </test>
142 <test> 144 <test>
143 <param name="infile" value="train.tabular" ftype="tabular" /> 145 <param name="infile" value="train.tabular" ftype="tabular" />
144 <param name="selected_column_selector_option" value="all_columns" /> 146 <param name="selected_column_selector_option" value="all_columns" />
145 <param name="selected_input_type" value="tabular" /> 147 <param name="selected_input_type" value="tabular" />
146 <param name="selected_pre_processor" value="PolynomialFeatures" /> 148 <param name="selected_pre_processor" value="PolynomialFeatures" />
147 <param name="save" value="true" /> 149 <param name="save" value="true" />
148 <output name="outfile_transform" file="prp_result03" ftype="tabular" /> 150 <output name="outfile_transform" file="prp_result03" ftype="tabular" />
149 <output name="outfile_fit" file="prp_model03" ftype="zip" compare="sim_size" delta="5" /> 151 <output name="outfile_fit" file="prp_model03" ftype="h5mlm" compare="sim_size" delta="5" />
150 </test> 152 </test>
151 <test> 153 <test>
152 <param name="infile" value="train.tabular" ftype="tabular" /> 154 <param name="infile" value="train.tabular" ftype="tabular" />
153 <param name="selected_column_selector_option" value="all_columns" /> 155 <param name="selected_column_selector_option" value="all_columns" />
154 <param name="selected_input_type" value="tabular" /> 156 <param name="selected_input_type" value="tabular" />
155 <param name="selected_pre_processor" value="RobustScaler" /> 157 <param name="selected_pre_processor" value="RobustScaler" />
156 <param name="save" value="true" /> 158 <param name="save" value="true" />
157 <output name="outfile_transform" file="prp_result04" ftype="tabular" /> 159 <output name="outfile_transform" file="prp_result04" ftype="tabular" />
158 <output name="outfile_fit" file="prp_model04" ftype="zip" compare="sim_size" delta="5" /> 160 <output name="outfile_fit" file="prp_model04" ftype="h5mlm" compare="sim_size" delta="5" />
159 </test> 161 </test>
160 <test> 162 <test>
161 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> 163 <param name="infile" value="csr_sparse2.mtx" ftype="txt" />
162 <param name="selected_input_type" value="sparse" /> 164 <param name="selected_input_type" value="sparse" />
163 <param name="selected_pre_processor" value="Binarizer" /> 165 <param name="selected_pre_processor" value="Binarizer" />
164 <param name="save" value="true" /> 166 <param name="save" value="true" />
165 <output name="outfile_transform" file="prp_result05" ftype="tabular" /> 167 <output name="outfile_transform" file="prp_result05" ftype="tabular" />
166 <output name="outfile_fit" file="prp_model05" ftype="zip" compare="sim_size" delta="5" /> 168 <output name="outfile_fit" file="prp_model05" ftype="h5mlm" compare="sim_size" delta="5" />
167 </test> 169 </test>
168 <test> 170 <test>
169 <param name="infile" value="train.tabular" ftype="tabular" /> 171 <param name="infile" value="train.tabular" ftype="tabular" />
170 <param name="selected_input_type" value="tabular" /> 172 <param name="selected_input_type" value="tabular" />
171 <param name="selected_column_selector_option" value="all_columns" /> 173 <param name="selected_column_selector_option" value="all_columns" />
172 <param name="selected_pre_processor" value="StandardScaler" /> 174 <param name="selected_pre_processor" value="StandardScaler" />
173 <param name="save" value="true" /> 175 <param name="save" value="true" />
174 <output name="outfile_transform" file="prp_result07" ftype="tabular" /> 176 <output name="outfile_transform" file="prp_result07" ftype="tabular" />
175 <output name="outfile_fit" file="prp_model07" ftype="zip" compare="sim_size" delta="5" /> 177 <output name="outfile_fit" file="prp_model07" ftype="h5mlm" compare="sim_size" delta="5" />
176 </test> 178 </test>
177 <test> 179 <test>
178 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> 180 <param name="infile" value="csr_sparse2.mtx" ftype="txt" />
179 <param name="selected_input_type" value="sparse" /> 181 <param name="selected_input_type" value="sparse" />
180 <param name="selected_pre_processor" value="MaxAbsScaler" /> 182 <param name="selected_pre_processor" value="MaxAbsScaler" />
181 <param name="save" value="true" /> 183 <param name="save" value="true" />
182 <output name="outfile_transform" file="prp_result08" ftype="tabular" /> 184 <output name="outfile_transform" file="prp_result08" ftype="tabular" />
183 <output name="outfile_fit" file="prp_model08" ftype="zip" compare="sim_size" delta="5" /> 185 <output name="outfile_fit" file="prp_model08" ftype="h5mlm" compare="sim_size" delta="5" />
184 </test> 186 </test>
185 <test> 187 <test>
186 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> 188 <param name="infile" value="csr_sparse2.mtx" ftype="txt" />
187 <param name="selected_input_type" value="sparse" /> 189 <param name="selected_input_type" value="sparse" />
188 <param name="selected_pre_processor" value="Normalizer" /> 190 <param name="selected_pre_processor" value="Normalizer" />
189 <param name="save" value="true" /> 191 <param name="save" value="true" />
190 <output name="outfile_transform" file="prp_result09" ftype="tabular" /> 192 <output name="outfile_transform" file="prp_result09" ftype="tabular" />
191 <output name="outfile_fit" file="prp_model09" ftype="zip" compare="sim_size" delta="5" /> 193 <output name="outfile_fit" file="prp_model09" ftype="h5mlm" compare="sim_size" delta="5" />
192 </test> 194 </test>
193 <test> 195 <test>
194 <param name="infile" value="regression_X.tabular" ftype="tabular" /> 196 <param name="infile" value="regression_X.tabular" ftype="tabular" />
195 <param name="header1" value="true" /> 197 <param name="header1" value="true" />
196 <param name="selected_column_selector_option" value="all_columns" /> 198 <param name="selected_column_selector_option" value="all_columns" />