Mercurial > repos > bgruening > sklearn_data_preprocess
comparison pre_process.xml @ 42:6c23a44a3c4f draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
| author | bgruening |
|---|---|
| date | Wed, 09 Aug 2023 12:12:48 +0000 |
| parents | ff2a7f89069e |
| children |
comparison
equal
deleted
inserted
replaced
| 41:61a68849b815 | 42:6c23a44a3c4f |
|---|---|
| 1 <tool id="sklearn_data_preprocess" name="Preprocess" version="@VERSION@" profile="20.05"> | 1 <tool id="sklearn_data_preprocess" name="Preprocess" version="@VERSION@" profile="@PROFILE@"> |
| 2 <description>raw feature vectors into standardized datasets</description> | 2 <description>raw feature vectors into standardized datasets</description> |
| 3 <macros> | 3 <macros> |
| 4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
| 5 </macros> | 5 </macros> |
| 6 <expand macro="python_requirements" /> | 6 <expand macro="python_requirements" /> |
| 16 <configfile name="pre_processor_script"> | 16 <configfile name="pre_processor_script"> |
| 17 <![CDATA[ | 17 <![CDATA[ |
| 18 import sys | 18 import sys |
| 19 import json | 19 import json |
| 20 import pandas | 20 import pandas |
| 21 import pickle | |
| 22 | 21 |
| 23 from scipy.io import mmread | 22 from scipy.io import mmread |
| 24 from scipy.io import mmwrite | 23 from scipy.io import mmwrite |
| 25 from sklearn import preprocessing | 24 from sklearn import preprocessing |
| 25 from galaxy_ml.model_persist import dump_model_to_h5 | |
| 26 from galaxy_ml.utils import read_columns, SafeEval | 26 from galaxy_ml.utils import read_columns, SafeEval |
| 27 | 27 |
| 28 | 28 |
| 29 safe_eval = SafeEval() | 29 safe_eval = SafeEval() |
| 30 | 30 |
| 79 res.to_csv(path_or_buf = "$outfile_transform", sep="\t", | 79 res.to_csv(path_or_buf = "$outfile_transform", sep="\t", |
| 80 index=False, header=True if header else False) | 80 index=False, header=True if header else False) |
| 81 #end if | 81 #end if |
| 82 | 82 |
| 83 #if $save: | 83 #if $save: |
| 84 with open("$outfile_fit", 'wb') as out_handler: | 84 dump_model_to_h5(estimator, "$outfile_fit") |
| 85 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL) | |
| 86 #end if | 85 #end if |
| 87 ]]> | 86 ]]> |
| 88 </configfile> | 87 </configfile> |
| 89 </configfiles> | 88 </configfiles> |
| 90 <inputs> | 89 <inputs> |
| 114 </conditional> | 113 </conditional> |
| 115 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Save the preprocessor" help="Saves the preprocessor after fitting to the data. The preprocessor can then be passed to other tools and used in later operations." /> | 114 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Save the preprocessor" help="Saves the preprocessor after fitting to the data. The preprocessor can then be passed to other tools and used in later operations." /> |
| 116 </inputs> | 115 </inputs> |
| 117 <outputs> | 116 <outputs> |
| 118 <data format="tabular" name="outfile_transform" from_work_dir="./output" /> | 117 <data format="tabular" name="outfile_transform" from_work_dir="./output" /> |
| 119 <data format="zip" name="outfile_fit"> | 118 <data format="h5mlm" name="outfile_fit"> |
| 120 <filter>save</filter> | 119 <filter>save</filter> |
| 121 </data> | 120 </data> |
| 122 </outputs> | 121 </outputs> |
| 123 <tests> | 122 <tests> |
| 124 <test> | 123 <test> |
| 125 <param name="infile" value="train.tabular" ftype="tabular" /> | 124 <param name="infile" value="train.tabular" ftype="tabular" /> |
| 126 <param name="selected_column_selector_option" value="all_columns" /> | 125 <param name="selected_column_selector_option" value="all_columns" /> |
| 127 <param name="selected_input_type" value="tabular" /> | 126 <param name="selected_input_type" value="tabular" /> |
| 128 <param name="selected_pre_processor" value="KernelCenterer" /> | 127 <param name="selected_pre_processor" value="QuantileTransformer" /> |
| 129 <param name="save" value="true" /> | 128 <param name="save" value="true" /> |
| 129 <param name="random_state" value="200" /> | |
| 130 <param name="n_quantiles" value="10" /> | |
| 131 <param name="subsample" value="100" /> | |
| 130 <output name="outfile_transform" file="prp_result01" ftype="tabular" /> | 132 <output name="outfile_transform" file="prp_result01" ftype="tabular" /> |
| 131 <output name="outfile_fit" file="prp_model01" ftype="zip" compare="sim_size" delta="5" /> | 133 <output name="outfile_fit" file="prp_model01" ftype="h5mlm" compare="sim_size" delta="5" /> |
| 132 </test> | 134 </test> |
| 133 <test> | 135 <test> |
| 134 <param name="infile" value="train.tabular" ftype="tabular" /> | 136 <param name="infile" value="train.tabular" ftype="tabular" /> |
| 135 <param name="selected_column_selector_option" value="all_columns" /> | 137 <param name="selected_column_selector_option" value="all_columns" /> |
| 136 <param name="selected_input_type" value="tabular" /> | 138 <param name="selected_input_type" value="tabular" /> |
| 137 <param name="selected_pre_processor" value="MinMaxScaler" /> | 139 <param name="selected_pre_processor" value="MinMaxScaler" /> |
| 138 <param name="save" value="true" /> | 140 <param name="save" value="true" /> |
| 139 <output name="outfile_transform" file="prp_result02" ftype="tabular" /> | 141 <output name="outfile_transform" file="prp_result02" ftype="tabular" /> |
| 140 <output name="outfile_fit" file="prp_model02" ftype="zip" compare="sim_size" delta="5" /> | 142 <output name="outfile_fit" file="prp_model02" ftype="h5mlm" compare="sim_size" delta="5" /> |
| 141 </test> | 143 </test> |
| 142 <test> | 144 <test> |
| 143 <param name="infile" value="train.tabular" ftype="tabular" /> | 145 <param name="infile" value="train.tabular" ftype="tabular" /> |
| 144 <param name="selected_column_selector_option" value="all_columns" /> | 146 <param name="selected_column_selector_option" value="all_columns" /> |
| 145 <param name="selected_input_type" value="tabular" /> | 147 <param name="selected_input_type" value="tabular" /> |
| 146 <param name="selected_pre_processor" value="PolynomialFeatures" /> | 148 <param name="selected_pre_processor" value="PolynomialFeatures" /> |
| 147 <param name="save" value="true" /> | 149 <param name="save" value="true" /> |
| 148 <output name="outfile_transform" file="prp_result03" ftype="tabular" /> | 150 <output name="outfile_transform" file="prp_result03" ftype="tabular" /> |
| 149 <output name="outfile_fit" file="prp_model03" ftype="zip" compare="sim_size" delta="5" /> | 151 <output name="outfile_fit" file="prp_model03" ftype="h5mlm" compare="sim_size" delta="5" /> |
| 150 </test> | 152 </test> |
| 151 <test> | 153 <test> |
| 152 <param name="infile" value="train.tabular" ftype="tabular" /> | 154 <param name="infile" value="train.tabular" ftype="tabular" /> |
| 153 <param name="selected_column_selector_option" value="all_columns" /> | 155 <param name="selected_column_selector_option" value="all_columns" /> |
| 154 <param name="selected_input_type" value="tabular" /> | 156 <param name="selected_input_type" value="tabular" /> |
| 155 <param name="selected_pre_processor" value="RobustScaler" /> | 157 <param name="selected_pre_processor" value="RobustScaler" /> |
| 156 <param name="save" value="true" /> | 158 <param name="save" value="true" /> |
| 157 <output name="outfile_transform" file="prp_result04" ftype="tabular" /> | 159 <output name="outfile_transform" file="prp_result04" ftype="tabular" /> |
| 158 <output name="outfile_fit" file="prp_model04" ftype="zip" compare="sim_size" delta="5" /> | 160 <output name="outfile_fit" file="prp_model04" ftype="h5mlm" compare="sim_size" delta="5" /> |
| 159 </test> | 161 </test> |
| 160 <test> | 162 <test> |
| 161 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> | 163 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> |
| 162 <param name="selected_input_type" value="sparse" /> | 164 <param name="selected_input_type" value="sparse" /> |
| 163 <param name="selected_pre_processor" value="Binarizer" /> | 165 <param name="selected_pre_processor" value="Binarizer" /> |
| 164 <param name="save" value="true" /> | 166 <param name="save" value="true" /> |
| 165 <output name="outfile_transform" file="prp_result05" ftype="tabular" /> | 167 <output name="outfile_transform" file="prp_result05" ftype="tabular" /> |
| 166 <output name="outfile_fit" file="prp_model05" ftype="zip" compare="sim_size" delta="5" /> | 168 <output name="outfile_fit" file="prp_model05" ftype="h5mlm" compare="sim_size" delta="5" /> |
| 167 </test> | 169 </test> |
| 168 <test> | 170 <test> |
| 169 <param name="infile" value="train.tabular" ftype="tabular" /> | 171 <param name="infile" value="train.tabular" ftype="tabular" /> |
| 170 <param name="selected_input_type" value="tabular" /> | 172 <param name="selected_input_type" value="tabular" /> |
| 171 <param name="selected_column_selector_option" value="all_columns" /> | 173 <param name="selected_column_selector_option" value="all_columns" /> |
| 172 <param name="selected_pre_processor" value="StandardScaler" /> | 174 <param name="selected_pre_processor" value="StandardScaler" /> |
| 173 <param name="save" value="true" /> | 175 <param name="save" value="true" /> |
| 174 <output name="outfile_transform" file="prp_result07" ftype="tabular" /> | 176 <output name="outfile_transform" file="prp_result07" ftype="tabular" /> |
| 175 <output name="outfile_fit" file="prp_model07" ftype="zip" compare="sim_size" delta="5" /> | 177 <output name="outfile_fit" file="prp_model07" ftype="h5mlm" compare="sim_size" delta="5" /> |
| 176 </test> | 178 </test> |
| 177 <test> | 179 <test> |
| 178 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> | 180 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> |
| 179 <param name="selected_input_type" value="sparse" /> | 181 <param name="selected_input_type" value="sparse" /> |
| 180 <param name="selected_pre_processor" value="MaxAbsScaler" /> | 182 <param name="selected_pre_processor" value="MaxAbsScaler" /> |
| 181 <param name="save" value="true" /> | 183 <param name="save" value="true" /> |
| 182 <output name="outfile_transform" file="prp_result08" ftype="tabular" /> | 184 <output name="outfile_transform" file="prp_result08" ftype="tabular" /> |
| 183 <output name="outfile_fit" file="prp_model08" ftype="zip" compare="sim_size" delta="5" /> | 185 <output name="outfile_fit" file="prp_model08" ftype="h5mlm" compare="sim_size" delta="5" /> |
| 184 </test> | 186 </test> |
| 185 <test> | 187 <test> |
| 186 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> | 188 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> |
| 187 <param name="selected_input_type" value="sparse" /> | 189 <param name="selected_input_type" value="sparse" /> |
| 188 <param name="selected_pre_processor" value="Normalizer" /> | 190 <param name="selected_pre_processor" value="Normalizer" /> |
| 189 <param name="save" value="true" /> | 191 <param name="save" value="true" /> |
| 190 <output name="outfile_transform" file="prp_result09" ftype="tabular" /> | 192 <output name="outfile_transform" file="prp_result09" ftype="tabular" /> |
| 191 <output name="outfile_fit" file="prp_model09" ftype="zip" compare="sim_size" delta="5" /> | 193 <output name="outfile_fit" file="prp_model09" ftype="h5mlm" compare="sim_size" delta="5" /> |
| 192 </test> | 194 </test> |
| 193 <test> | 195 <test> |
| 194 <param name="infile" value="regression_X.tabular" ftype="tabular" /> | 196 <param name="infile" value="regression_X.tabular" ftype="tabular" /> |
| 195 <param name="header1" value="true" /> | 197 <param name="header1" value="true" /> |
| 196 <param name="selected_column_selector_option" value="all_columns" /> | 198 <param name="selected_column_selector_option" value="all_columns" /> |
