Mercurial > repos > bgruening > keras_train_and_eval
changeset 10:78d35085054d draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 80417bf0158a9b596e485dd66408f738f405145a
author | bgruening |
---|---|
date | Mon, 02 Oct 2023 08:37:07 +0000 |
parents | 427b563a44a2 |
children | |
files | keras_train_and_eval.py keras_train_and_eval.xml ml_visualization_ex.py |
diffstat | 3 files changed, 105 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/keras_train_and_eval.py Wed Aug 09 12:22:47 2023 +0000 +++ b/keras_train_and_eval.py Mon Oct 02 08:37:07 2023 +0000 @@ -188,6 +188,7 @@ infile1, infile2, outfile_result, + outfile_history=None, outfile_object=None, outfile_y_true=None, outfile_y_preds=None, @@ -215,6 +216,9 @@ outfile_result : str File path to save the results, either cv_results or test result. + outfile_history : str, optional + File path to save the training history. + outfile_object : str, optional File path to save searchCV object. @@ -253,9 +257,7 @@ swapping = params["experiment_schemes"]["hyperparams_swapping"] swap_params = _eval_swap_params(swapping) estimator.set_params(**swap_params) - estimator_params = estimator.get_params() - # store read dataframe object loaded_df = {} @@ -448,12 +450,20 @@ # train and eval if hasattr(estimator, "config") and hasattr(estimator, "model_type"): if exp_scheme == "train_val_test": - estimator.fit(X_train, y_train, validation_data=(X_val, y_val)) + history = estimator.fit(X_train, y_train, validation_data=(X_val, y_val)) else: - estimator.fit(X_train, y_train, validation_data=(X_test, y_test)) + history = estimator.fit(X_train, y_train, validation_data=(X_test, y_test)) else: - estimator.fit(X_train, y_train) - + history = estimator.fit(X_train, y_train) + if "callbacks" in estimator_params: + for cb in estimator_params["callbacks"]: + if cb["callback_selection"]["callback_type"] == "CSVLogger": + hist_df = pd.DataFrame(history.history) + hist_df["epoch"] = np.arange(1, estimator_params["epochs"] + 1) + epo_col = hist_df.pop('epoch') + hist_df.insert(0, 'epoch', epo_col) + hist_df.to_csv(path_or_buf=outfile_history, sep="\t", header=True, index=False) + break if isinstance(estimator, KerasGBatchClassifier): scores = {} steps = estimator.prediction_steps @@ -526,6 +536,7 @@ aparser.add_argument("-X", "--infile1", dest="infile1") aparser.add_argument("-y", "--infile2", dest="infile2") aparser.add_argument("-O", "--outfile_result", dest="outfile_result") + aparser.add_argument("-hi", "--outfile_history", dest="outfile_history") aparser.add_argument("-o", "--outfile_object", dest="outfile_object") aparser.add_argument("-l", "--outfile_y_true", dest="outfile_y_true") aparser.add_argument("-p", "--outfile_y_preds", dest="outfile_y_preds") @@ -542,6 +553,7 @@ args.infile1, args.infile2, args.outfile_result, + outfile_history=args.outfile_history, outfile_object=args.outfile_object, outfile_y_true=args.outfile_y_true, outfile_y_preds=args.outfile_y_preds,
--- a/keras_train_and_eval.xml Wed Aug 09 12:22:47 2023 +0000 +++ b/keras_train_and_eval.xml Mon Oct 02 08:37:07 2023 +0000 @@ -8,7 +8,7 @@ <expand macro="macro_stdio" /> <version_command>echo "@VERSION@"</version_command> <command> - <![CDATA[ + <![CDATA[ export HDF5_USE_FILE_LOCKING='FALSE'; #if $input_options.selected_input == 'refseq_and_interval' bgzip -c '$input_options.target_file' > '${target_file.element_identifier}.gz' && @@ -29,6 +29,9 @@ #end if --infile2 '$input_options.infile2' --outfile_result '$outfile_result' + #if $save and 'save_csvlogger' in str($save) + --outfile_history '$outfile_history' + #end if #if $save and 'save_estimator' in str($save) --outfile_object '$outfile_object' #end if @@ -39,7 +42,6 @@ #if $experiment_schemes.test_split.split_algos.shuffle == 'group' --groups '$experiment_schemes.test_split.split_algos.groups_selector.infile_g' #end if - ]]> </command> <configfiles> @@ -81,10 +83,14 @@ <param name="save" type="select" multiple='true' display="checkboxes" label="Save the fitted model" optional="true" help="Evaluation scores will be output by default."> <option value="save_estimator" selected="true">Fitted estimator</option> <option value="save_prediction">True labels and prediction results from evaluation for downstream analysis</option> + <option value="save_csvlogger">Display CSVLogger if selected as a callback in the Keras model builder tool</option> </param> </inputs> <outputs> <data format="tabular" name="outfile_result" /> + <data format="tabular" name="outfile_history" label="Deep learning training history log on ${on_string}"> + <filter>str(save) and 'save_csvlogger' in str(save)</filter> + </data> <data format="h5mlm" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}"> <filter>str(save) and 'save_estimator' in str(save)</filter> </data>
--- a/ml_visualization_ex.py Wed Aug 09 12:22:47 2023 +0000 +++ b/ml_visualization_ex.py Mon Oct 02 08:37:07 2023 +0000 @@ -15,6 +15,7 @@ from sklearn.metrics import ( auc, average_precision_score, + confusion_matrix, precision_recall_curve, roc_curve, ) @@ -258,6 +259,30 @@ os.rename(os.path.join(folder, "output.svg"), os.path.join(folder, "output")) +def get_dataframe(file_path, plot_selection, header_name, column_name): + header = "infer" if plot_selection[header_name] else None + column_option = plot_selection[column_name]["selected_column_selector_option"] + if column_option in [ + "by_index_number", + "all_but_by_index_number", + "by_header_name", + "all_but_by_header_name", + ]: + col = plot_selection[column_name]["col1"] + else: + col = None + _, input_df = read_columns( + file_path, + c=col, + c_option=column_option, + return_df=True, + sep="\t", + header=header, + parse_dates=True, + ) + return input_df + + def main( inputs, infile_estimator=None, @@ -271,6 +296,10 @@ targets=None, fasta_path=None, model_config=None, + true_labels=None, + predicted_labels=None, + plot_color=None, + title=None, ): """ Parameter @@ -311,6 +340,18 @@ model_config : str, default is None File path to dataset containing JSON config for neural networks + + true_labels : str, default is None + File path to dataset containing true labels + + predicted_labels : str, default is None + File path to dataset containing true predicted labels + + plot_color : str, default is None + Color of the confusion matrix heatmap + + title : str, default is None + Title of the confusion matrix heatmap """ warnings.simplefilter("ignore") @@ -534,6 +575,36 @@ return 0 + elif plot_type == "classification_confusion_matrix": + plot_selection = params["plotting_selection"] + input_true = get_dataframe( + true_labels, plot_selection, "header_true", "column_selector_options_true" + ) + header_predicted = "infer" if plot_selection["header_predicted"] else None + input_predicted = pd.read_csv( + predicted_labels, sep="\t", parse_dates=True, header=header_predicted + ) + true_classes = input_true.iloc[:, -1].copy() + predicted_classes = input_predicted.iloc[:, -1].copy() + axis_labels = list(set(true_classes)) + c_matrix = confusion_matrix(true_classes, predicted_classes) + fig, ax = plt.subplots(figsize=(7, 7)) + im = plt.imshow(c_matrix, cmap=plot_color) + for i in range(len(c_matrix)): + for j in range(len(c_matrix)): + ax.text(j, i, c_matrix[i, j], ha="center", va="center", color="k") + ax.set_ylabel("True class labels") + ax.set_xlabel("Predicted class labels") + ax.set_title(title) + ax.set_xticks(axis_labels) + ax.set_yticks(axis_labels) + fig.colorbar(im, ax=ax) + fig.tight_layout() + plt.savefig("output.png", dpi=125) + os.rename("output.png", "output") + + return 0 + # save pdf file to disk # fig.write_image("image.pdf", format='pdf') # fig.write_image("image.pdf", format='pdf', width=340*2, height=226*2) @@ -553,6 +624,10 @@ aparser.add_argument("-t", "--targets", dest="targets") aparser.add_argument("-f", "--fasta_path", dest="fasta_path") aparser.add_argument("-c", "--model_config", dest="model_config") + aparser.add_argument("-tl", "--true_labels", dest="true_labels") + aparser.add_argument("-pl", "--predicted_labels", dest="predicted_labels") + aparser.add_argument("-pc", "--plot_color", dest="plot_color") + aparser.add_argument("-pt", "--title", dest="title") args = aparser.parse_args() main( @@ -568,4 +643,8 @@ targets=args.targets, fasta_path=args.fasta_path, model_config=args.model_config, + true_labels=args.true_labels, + predicted_labels=args.predicted_labels, + plot_color=args.plot_color, + title=args.title, )