Mercurial > repos > bgruening > flexynesis
annotate flexynesis_plot.py @ 6:33816f44fc7d draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
author | bgruening |
---|---|
date | Wed, 23 Jul 2025 07:49:41 +0000 |
parents | 466b593fd87e |
children |
rev | line source |
---|---|
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1 #!/usr/bin/env python |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
2 """Generate plots using flexynesis |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
3 This script generates dimensionality reduction plots, Kaplan-Meier survival curves, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
4 and Cox proportional hazards models from data processed by flexynesis.""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
5 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
6 import argparse |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
7 import os |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
8 from pathlib import Path |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
9 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
10 import matplotlib.pyplot as plt |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
11 import numpy as np |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
12 import pandas as pd |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
13 import seaborn as sns |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
14 from flexynesis import ( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
15 build_cox_model, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
16 plot_dim_reduced, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
17 plot_hazard_ratios, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
18 plot_kaplan_meier_curves, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
19 plot_pr_curves, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
20 plot_roc_curves, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
21 plot_scatter |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
22 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
23 from scipy.stats import kruskal, mannwhitneyu |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
24 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
25 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
26 def load_embeddings(embeddings_path): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
27 """Load embeddings from a file""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
28 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
29 # Determine file extension |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
30 file_ext = Path(embeddings_path).suffix.lower() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
31 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
32 if file_ext == '.csv': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
33 df = pd.read_csv(embeddings_path, index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
34 elif file_ext in ['.tsv', '.txt', '.tab', '.tabular']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
35 df = pd.read_csv(embeddings_path, sep='\t', index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
36 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
37 raise ValueError(f"Unsupported file extension: {file_ext}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
38 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
39 return df, df.index.tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
40 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
41 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
42 raise ValueError(f"Error loading embeddings from {embeddings_path}: {e}") from e |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
43 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
44 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
45 def load_labels(labels_input): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
46 """Load predicted labels from flexynesis""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
47 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
48 # Determine file extension |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
49 file_ext = Path(labels_input).suffix.lower() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
50 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
51 if file_ext == '.csv': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
52 df = pd.read_csv(labels_input) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
53 elif file_ext in ['.tsv', '.txt', '.tab', '.tabular']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
54 df = pd.read_csv(labels_input, sep='\t') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
55 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
56 print(f"available columns: {df.columns.tolist()}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
57 return df |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
58 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
59 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
60 raise ValueError(f"Error loading labels from {labels_input}: {e}") from e |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
61 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
62 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
63 def load_omics(omics_path): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
64 """Load omics data from a file. First column should be features""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
65 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
66 # Determine file extension |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
67 file_ext = Path(omics_path).suffix.lower() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
68 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
69 if file_ext == '.csv': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
70 df = pd.read_csv(omics_path, index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
71 elif file_ext in ['.tsv', '.txt', '.tab', '.tabular']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
72 df = pd.read_csv(omics_path, sep='\t', index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
73 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
74 raise ValueError(f"Unsupported file extension: {file_ext}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
75 return df |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
76 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
77 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
78 raise ValueError(f"Error loading omics data from {omics_path}: {e}") from e |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
79 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
80 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
81 def match_samples_to_embeddings(sample_names, labels): |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
82 """Filter label data to match sample names in the embeddings""" |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
83 # Create a DataFrame from sample_names to preserve order |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
84 sample_df = pd.DataFrame({'sample_names': sample_names}) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
85 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
86 # left_join |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
87 first_column = labels.columns[0] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
88 df_matched = sample_df.merge(labels, left_on='sample_names', right_on=first_column, how='left') |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
89 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
90 # remove sample_names to keep the initial structure |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
91 df_matched = df_matched.drop('sample_names', axis=1) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
92 return df_matched |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
93 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
94 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
95 def detect_color_type(labels_series): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
96 """Auto-detect whether target variables should be treated as categorical or numerical""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
97 # Remove NaN |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
98 clean_labels = labels_series.dropna() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
99 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
100 if clean_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
101 return 'categorical' # default output if no labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
102 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
103 # Check if all values can be converted to numbers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
104 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
105 numeric_labels = pd.to_numeric(clean_labels, errors='coerce') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
106 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
107 # If conversion failed -> categorical |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
108 if numeric_labels.isna().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
109 return 'categorical' |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
110 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
111 # Check number of unique values |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
112 unique_count = len(clean_labels.unique()) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
113 total_count = len(clean_labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
114 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
115 # If few unique values relative to total -> categorical |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
116 # Threshold: if unique values < 10 OR unique/total < 0.1 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
117 if unique_count < 10 or (unique_count / total_count) < 0.1: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
118 return 'categorical' |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
119 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
120 return 'numerical' |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
121 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
122 except Exception: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
123 return 'categorical' |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
124 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
125 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
126 def plot_label_concordance_heatmap(labels1, labels2, figsize=(12, 10)): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
127 """ |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
128 Plot a heatmap reflecting the concordance between two sets of labels using pandas crosstab. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
129 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
130 Parameters: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
131 - labels1: The first set of labels. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
132 - labels2: The second set of labels. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
133 """ |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
134 # Compute the cross-tabulation |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
135 ct = pd.crosstab(pd.Series(labels1, name='Labels Set 1'), pd.Series(labels2, name='Labels Set 2')) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
136 # Normalize the cross-tabulation matrix column-wise |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
137 ct_normalized = ct.div(ct.sum(axis=1), axis=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
138 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
139 # Plot the heatmap |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
140 plt.figure(figsize=figsize) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
141 sns.heatmap(ct_normalized, annot=True, cmap='viridis', linewidths=.5) # col_cluster=False) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
142 plt.title('Concordance between label groups') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
143 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
144 return plt.gcf() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
145 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
146 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
147 def plot_boxplot(categorical_x, numerical_y, title_x='Categories', title_y='Values', figsize=(10, 6), jittersize=4): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
148 """ |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
149 Create a boxplot with to visualize the distribution of predicted probabilities across different categories. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
150 the x axis represents the true labels, and the y axis represents the predicted probabilities for specific categories. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
151 """ |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
152 df = pd.DataFrame({title_x: categorical_x, title_y: numerical_y}) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
153 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
154 # Compute p-value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
155 groups = df[title_x].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
156 if len(groups) == 2: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
157 group1 = df[df[title_x] == groups[0]][title_y] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
158 group2 = df[df[title_x] == groups[1]][title_y] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
159 stat, p = mannwhitneyu(group1, group2, alternative='two-sided') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
160 test_name = "Mann-Whitney U" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
161 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
162 group_data = [df[df[title_x] == group][title_y] for group in groups] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
163 stat, p = kruskal(*group_data) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
164 test_name = "Kruskal-Wallis" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
165 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
166 # Create a boxplot with jittered points |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
167 plt.figure(figsize=figsize) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
168 sns.boxplot(x=title_x, y=title_y, hue=title_x, data=df, palette='Set2', legend=False, fill=False) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
169 sns.stripplot(x=title_x, y=title_y, data=df, color='black', size=jittersize, jitter=True, dodge=True, alpha=0.4) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
170 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
171 # Labels and p-value annotation |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
172 plt.xlabel(title_x) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
173 plt.ylabel(title_y) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
174 plt.text( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
175 x=-0.4, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
176 y=plt.ylim()[1], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
177 s=f'{test_name} p = {p:.3e}', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
178 verticalalignment='top', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
179 horizontalalignment='left', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
180 fontsize=12, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
181 bbox=dict(boxstyle='round,pad=0.3', facecolor='white', edgecolor='gray') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
182 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
183 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
184 plt.tight_layout() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
185 return plt.gcf() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
186 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
187 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
188 def generate_dimred_plots(embeddings, matched_labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
189 """Generate dimensionality reduction plots""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
190 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
191 # Check if this is the specific format with sample_id, known_label, predicted_label |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
192 required_cols = ['sample_id', 'variable', 'class_label', 'probability', 'known_label', 'predicted_label'] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
193 is_flexynesis_format = all(col in matched_labels.columns for col in required_cols) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
194 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
195 if not args.color: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
196 if is_flexynesis_format: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
197 print("Detected flexynesis labels format") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
198 print(f"Generating {args.method.upper()} plots for known and predicted labels...") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
199 else: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
200 print("Labels are not in flexynesis format (Custom labels), please specify a color variable with --color") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
201 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
202 # Parse target values from comma-separated string |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
203 if args.target_value: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
204 target_values = [val.strip() for val in args.target_value.split(',')] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
205 else: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
206 # If no target values specified, use all unique variables |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
207 target_values = matched_labels['variable'].unique().tolist() |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
208 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
209 print(f"Generating {args.method.upper()} plots for {len(target_values)} target variable(s): {', '.join(target_values)}") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
210 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
211 # Check variables |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
212 available_vars = matched_labels['variable'].unique() |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
213 missing_vars = [var for var in target_values if var not in available_vars] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
214 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
215 if missing_vars: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
216 print(f"Warning: The following target variables were not found in the data: {', '.join(missing_vars)}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
217 print(f"Available variables: {', '.join(available_vars)}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
218 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
219 # Filter to only process available variables |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
220 valid_vars = [var for var in target_values if var in available_vars] |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
221 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
222 if not valid_vars: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
223 raise ValueError(f"None of the specified target variables were found in the data. Available: {', '.join(available_vars)}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
224 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
225 # Generate plots for each valid target variable |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
226 for var in valid_vars: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
227 print(f"\nPlotting variable: {var}") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
228 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
229 # Filter matched labels for current variable |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
230 var_labels = matched_labels[matched_labels['variable'] == var].copy() |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
231 var_labels = var_labels.drop_duplicates(subset='sample_id') |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
232 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
233 if var_labels.empty: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
234 print(f"Warning: No data found for variable '{var}', skipping...") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
235 continue |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
236 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
237 # Auto-detect color type |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
238 known_color_type = detect_color_type(var_labels['known_label']) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
239 predicted_color_type = detect_color_type(var_labels['predicted_label']) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
240 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
241 print(f" Auto-detected color types - Known: {known_color_type}, Predicted: {predicted_color_type}") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
242 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
243 try: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
244 # Plot 1: Known labels |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
245 print(f" Creating known labels plot for {var}...") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
246 fig_known = plot_dim_reduced( |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
247 matrix=embeddings, |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
248 labels=var_labels['known_label'], |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
249 method=args.method, |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
250 color_type=known_color_type |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
251 ) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
252 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
253 output_path_known = output_dir / f"{output_name_base}_{var}_known.{args.format}" |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
254 print(f" Saving known labels plot to: {output_path_known.name}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
255 fig_known.save(output_path_known, dpi=args.dpi, bbox_inches='tight') |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
256 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
257 # Plot 2: Predicted labels |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
258 print(f" Creating predicted labels plot for {var}...") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
259 fig_predicted = plot_dim_reduced( |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
260 matrix=embeddings, |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
261 labels=var_labels['predicted_label'], |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
262 method=args.method, |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
263 color_type=predicted_color_type |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
264 ) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
265 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
266 output_path_predicted = output_dir / f"{output_name_base}_{var}_predicted.{args.format}" |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
267 print(f" Saving predicted labels plot to: {output_path_predicted.name}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
268 fig_predicted.save(output_path_predicted, dpi=args.dpi, bbox_inches='tight') |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
269 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
270 print(f" ✓ Successfully created plots for variable '{var}'") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
271 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
272 except Exception as e: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
273 print(f" ✗ Error creating plots for variable '{var}': {e}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
274 continue |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
275 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
276 print(f"\nDimensionality reduction plots completed for {len(valid_vars)} variable(s)!") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
277 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
278 else: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
279 # check if the color variable exists in matched_labels |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
280 if args.color not in matched_labels.columns: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
281 raise ValueError(f"Color variable '{args.color}' not found in matched labels. Available columns: {matched_labels.columns.tolist()}") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
282 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
283 # Auto-detect color type |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
284 color_type = detect_color_type(matched_labels[args.color]) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
285 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
286 print(f" Auto-detected color type: {color_type}") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
287 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
288 # Plot: Specified color column |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
289 print(f" Creating plot for {args.color}...") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
290 fig = plot_dim_reduced( |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
291 matrix=embeddings, |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
292 labels=matched_labels[args.color], |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
293 method=args.method, |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
294 color_type=color_type |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
295 ) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
296 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
297 output_path = output_dir / f"{output_name_base}_{args.color}.{args.format}" |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
298 print(f" Saving plot to: {output_path.name}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
299 fig.save(output_path, dpi=args.dpi, bbox_inches='tight') |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
300 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
301 print(f" ✓ Successfully created plot for variable '{args.color}'") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
302 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
303 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
304 def generate_km_plots(survival_data, labels, args, output_dir, output_name_base): |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
305 """Generate Kaplan-Meier plots""" |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
306 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
307 # Check if this is the specific format with sample_id, known_label, predicted_label |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
308 required_cols = ['sample_id', 'variable', 'class_label', 'probability', 'known_label', 'predicted_label'] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
309 is_flexynesis_format = all(col in labels.columns for col in required_cols) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
310 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
311 if not is_flexynesis_format: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
312 raise ValueError(f"Labels are not in flexynesis format (Custom labels). Please provide a valid label file with the required columns, {required_cols}.") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
313 |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
314 print("Generating Kaplan-Meier curves of risk subtypes...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
315 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
316 if survival_data.columns[0] != 'sample_id': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
317 survival_data = survival_data.rename(columns={survival_data.columns[0]: 'sample_id'}) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
318 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
319 # Check if the event column exists |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
320 if args.surv_event_var not in survival_data.columns: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
321 raise ValueError(f"Column '{args.surv_event_var}' not found in survival data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
322 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
323 labels = labels[(labels['variable'] == args.surv_event_var)] |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
324 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
325 # Merge survival data with labels |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
326 df_deceased = pd.merge(survival_data, labels, on='sample_id', how='inner') |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
327 df_deceased = df_deceased.dropna(subset=[args.surv_time_var, args.surv_event_var]) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
328 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
329 if df_deceased.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
330 raise ValueError("No matching samples found after merging survival and label data.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
331 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
332 # Get risk scores |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
333 risk_scores = df_deceased['predicted_label'].values |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
334 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
335 # Compute groups (e.g., median split) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
336 quantiles = np.quantile(risk_scores, [0.5]) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
337 groups = np.digitize(risk_scores, quantiles) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
338 group_labels = ['low_risk' if g == 0 else 'high_risk' for g in groups] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
339 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
340 fig_known = plot_kaplan_meier_curves( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
341 durations=df_deceased[args.surv_time_var], |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
342 events=df_deceased[args.surv_event_var], |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
343 categorical_variable=group_labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
344 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
345 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
346 output_path_known = output_dir / f"{output_name_base}_km_risk_subtypes.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
347 print(f"Saving Kaplan-Meier plot to: {output_path_known.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
348 fig_known.save(output_path_known, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
349 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
350 print("Kaplan-Meier plot saved successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
351 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
352 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
353 def generate_cox_plots(important_features, clinical_train, clinical_test, omics_train, omics_test, args, output_dir, output_name_base): |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
354 """Generate Cox proportional hazards plots""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
355 print("Generating Cox proportional hazards analysis...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
356 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
357 # Check if this is the specific format with target_variable, importance |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
358 required_cols = ['target_variable', 'layer', 'importance'] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
359 is_flexynesis_format = all(col in important_features.columns for col in required_cols) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
360 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
361 if not is_flexynesis_format: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
362 raise ValueError(f"Labels are not in flexynesis format (Custom labels). Please provide a valid important_features file with the required columns, {required_cols}.") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
363 |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
364 # Parse clinical variables |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
365 clinical_vars = [] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
366 if args.clinical_variables: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
367 clinical_vars = [var.strip() for var in args.clinical_variables.split(',')] |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
368 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
369 # Validate that survival variables are included |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
370 required_vars = [args.surv_time_var, args.surv_event_var] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
371 for var in required_vars: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
372 if var not in clinical_vars: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
373 clinical_vars.append(var) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
374 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
375 print(f"Using clinical variables: {', '.join(clinical_vars)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
376 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
377 # filter datasets for clinical variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
378 if all(var in clinical_train.columns and var in clinical_test.columns for var in clinical_vars): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
379 df_clin_train = clinical_train[clinical_vars] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
380 df_clin_test = clinical_test[clinical_vars] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
381 # Drop rows with NaN in clinical variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
382 df_clin_train = df_clin_train.dropna(subset=clinical_vars) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
383 df_clin_test = df_clin_test.dropna(subset=clinical_vars) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
384 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
385 raise ValueError(f"Not all clinical variables found in datasets. Available in train dataset: {clinical_train.columns.tolist()}, Available in test dataset: {clinical_test.columns.tolist()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
386 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
387 # Combine |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
388 df_clin = pd.concat([df_clin_train, df_clin_test], axis=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
389 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
390 # Get top survival markers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
391 print(f"Extracting top {args.top_features} important features for {args.surv_event_var}...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
392 try: |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
393 print(f"Loading {args.top_features} important features from: {args.important_features}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
394 imp_features = load_labels(args.important_features) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
395 imp_features = imp_features[imp_features['target_variable'] == args.surv_event_var] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
396 if args.layer not in imp_features['layer'].unique(): |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
397 print(f"Available class labels: {imp_features['layer'].unique()}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
398 raise ValueError(f"Class label '{args.layer}' not found in important features data: {args.important_features}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
399 imp_features = imp_features[imp_features['layer'] == args.layer] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
400 if imp_features.empty: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
401 raise ValueError(f"No important features found for target variable '{args.surv_event_var}' in {args.important_features}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
402 imp_features = imp_features.sort_values(by='importance', ascending=False) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
403 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
404 if len(imp_features) < args.top_features: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
405 raise ValueError(f"Requested top {args.top_features} features, but only {len(imp_features)} available in {args.important_features}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
406 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
407 imp = imp_features['name'].unique().tolist()[0:args.top_features] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
408 |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
409 print(f"Top features: {', '.join(imp)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
410 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
411 raise ValueError(f"Error getting important features: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
412 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
413 # Extract feature data from omics datasets |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
414 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
415 omics_test = omics_test.loc[omics_test.index.isin(imp)] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
416 omics_train = omics_train.loc[omics_train.index.isin(imp)] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
417 # Drop rows with NaN in omics datasets |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
418 omics_test = omics_test.dropna(subset=omics_test.columns) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
419 omics_train = omics_train.dropna(subset=omics_train.columns) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
420 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
421 df_imp = pd.concat([omics_train, omics_test], axis=1) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
422 df_imp = df_imp.T # Transpose to have samples as rows |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
423 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
424 print(f"Feature data shape: {df_imp.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
425 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
426 raise ValueError(f"Error extracting feature subset: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
427 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
428 # Combine markers with clinical variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
429 df = pd.merge(df_imp, df_clin, left_index=True, right_index=True) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
430 print(f"Combined data shape: {df.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
431 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
432 # Remove samples without survival endpoints |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
433 initial_samples = len(df) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
434 df = df[df[args.surv_event_var].notna()] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
435 final_samples = len(df) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
436 print(f"Removed {initial_samples - final_samples} samples without survival data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
437 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
438 if df.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
439 raise ValueError("No samples remain after filtering for survival data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
440 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
441 # Build Cox model |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
442 print(f"Building Cox model with time variable: {args.surv_time_var}, event variable: {args.surv_event_var}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
443 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
444 coxm = build_cox_model(df, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
445 duration_col=args.surv_time_var, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
446 event_col=args.surv_event_var, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
447 crossval=args.crossval, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
448 n_splits=args.n_splits, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
449 random_state=args.random_state) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
450 print("Cox model built successfully") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
451 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
452 raise ValueError(f"Error building Cox model: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
453 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
454 # Generate hazard ratios plot |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
455 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
456 print("Generating hazard ratios plot...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
457 fig = plot_hazard_ratios(coxm) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
458 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
459 output_path = output_dir / f"{output_name_base}_hazard_ratios.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
460 print(f"Saving hazard ratios plot to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
461 fig.save(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
462 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
463 print("Cox proportional hazards analysis completed successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
464 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
465 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
466 raise ValueError(f"Error generating hazard ratios plot: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
467 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
468 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
469 def generate_plot_scatter(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
470 """Generate scatter plot of known vs predicted labels""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
471 print("Generating scatter plots of known vs predicted labels...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
472 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
473 # Check if this is the specific format with sample_id, known_label, predicted_label |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
474 required_cols = ['sample_id', 'variable', 'class_label', 'probability', 'known_label', 'predicted_label'] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
475 is_flexynesis_format = all(col in labels.columns for col in required_cols) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
476 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
477 if is_flexynesis_format: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
478 # Parse target values from comma-separated string |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
479 if args.target_value: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
480 target_values = [val.strip() for val in args.target_value.split(',')] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
481 else: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
482 # If no target values specified, use all unique variables |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
483 target_values = labels['variable'].unique().tolist() |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
484 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
485 print(f"Processing target values: {target_values}") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
486 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
487 successful_plots = 0 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
488 skipped_plots = 0 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
489 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
490 for target_value in target_values: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
491 print(f"\nProcessing target value: '{target_value}'") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
492 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
493 # Filter labels for the current target value |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
494 target_labels = labels[labels['variable'] == target_value] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
495 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
496 if target_labels.empty: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
497 print(f" Warning: No data found for target value '{target_value}' - skipping") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
498 skipped_plots += 1 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
499 continue |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
500 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
501 # Check if labels are numeric and convert |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
502 true_values = pd.to_numeric(target_labels['known_label'], errors='coerce') |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
503 predicted_values = pd.to_numeric(target_labels['predicted_label'], errors='coerce') |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
504 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
505 if true_values.isna().all() or predicted_values.isna().all(): |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
506 print(f"No valid numeric values found for known or predicted labels in '{target_value}'") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
507 skipped_plots += 1 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
508 continue |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
509 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
510 try: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
511 print(f" Generating scatter plot for '{target_value}'...") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
512 fig = plot_scatter(true_values, predicted_values) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
513 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
514 # Create output filename with target value |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
515 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
516 output_filename = f"{output_name_base}_{safe_target_name}.{args.format}" |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
517 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
518 output_path = output_dir / output_filename |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
519 print(f" Saving scatter plot to: {output_path.absolute()}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
520 fig.save(output_path, dpi=args.dpi, bbox_inches='tight') |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
521 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
522 successful_plots += 1 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
523 print(f" Scatter plot for '{target_value}' generated successfully!") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
524 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
525 except Exception as e: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
526 print(f" Error generating plot for '{target_value}': {str(e)}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
527 skipped_plots += 1 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
528 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
529 # Summary |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
530 print(" Summary:") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
531 print(f" Successfully generated: {successful_plots} plots") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
532 print(f" Skipped: {skipped_plots} plots") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
533 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
534 if successful_plots == 0: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
535 raise ValueError("No scatter plots could be generated. Check your data and target values.") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
536 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
537 print("Scatter plot generation completed!") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
538 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
539 if not is_flexynesis_format: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
540 print("Labels are not in flexynesis format (Custom labels)") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
541 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
542 if not args.true_label or not args.predicted_label: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
543 raise ValueError("For custom labels, please specify --true_label and --predicted_label arguments.") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
544 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
545 # Check if labels are numeric and convert |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
546 true_values = pd.to_numeric(labels[args.true_label], errors='coerce') |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
547 predicted_values = pd.to_numeric(labels[args.predicted_label], errors='coerce') |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
548 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
549 if true_values.isna().all() or predicted_values.isna().all(): |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
550 print("No valid numeric values found for known or predicted labels") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
551 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
552 try: |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
553 print(" Generating scatter plot...") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
554 fig = plot_scatter(true_values, predicted_values) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
555 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
556 # Create output filename with target value |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
557 output_filename = f"{output_name_base}.{args.format}" |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
558 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
559 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
560 print(f" Saving scatter plot to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
561 fig.save(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
562 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
563 except Exception as e: |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
564 print(f" Error generating plot: {str(e)}") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
565 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
566 print("Scatter plot generation completed!") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
567 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
568 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
569 def generate_label_concordance_heatmap(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
570 """Generate label concordance heatmap""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
571 print("Generating label concordance heatmaps...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
572 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
573 # Check if this is the specific format with sample_id, known_label, predicted_label |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
574 required_cols = ['sample_id', 'variable', 'class_label', 'probability', 'known_label', 'predicted_label'] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
575 is_flexynesis_format = all(col in labels.columns for col in required_cols) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
576 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
577 if is_flexynesis_format: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
578 # Parse target values from comma-separated string |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
579 if args.target_value: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
580 target_values = [val.strip() for val in args.target_value.split(',')] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
581 else: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
582 # If no target values specified, use all unique variables |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
583 target_values = labels['variable'].unique().tolist() |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
584 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
585 print(f"Processing target values: {target_values}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
586 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
587 for target_value in target_values: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
588 print(f"\nProcessing target value: '{target_value}'") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
589 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
590 # Filter labels for the current target value |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
591 target_labels = labels[labels['variable'] == target_value] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
592 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
593 if target_labels.empty: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
594 print(f" Warning: No data found for target value '{target_value}' - skipping") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
595 continue |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
596 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
597 true_values = target_labels['known_label'].tolist() |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
598 predicted_values = target_labels['predicted_label'].tolist() |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
599 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
600 try: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
601 print(f" Generating heatmap for '{target_value}'...") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
602 fig = plot_label_concordance_heatmap(true_values, predicted_values) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
603 plt.close(fig) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
604 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
605 # Create output filename with target value |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
606 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
607 output_filename = f"{output_name_base}_{safe_target_name}.{args.format}" |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
608 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
609 output_path = output_dir / output_filename |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
610 print(f" Saving heatmap to: {output_path.absolute()}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
611 fig.savefig(output_path, dpi=args.dpi, bbox_inches='tight') |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
612 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
613 except Exception as e: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
614 print(f" Error generating heatmap for '{target_value}': {str(e)}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
615 continue |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
616 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
617 print("Label concordance heatmap generated successfully!") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
618 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
619 if not is_flexynesis_format: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
620 print("Labels are not in flexynesis format (Custom labels)") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
621 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
622 if not args.true_label or not args.predicted_label: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
623 raise ValueError("For custom labels, please specify --true_label and --predicted_label arguments.") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
624 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
625 true_values = labels[args.true_label].tolist() |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
626 predicted_values = labels[args.predicted_label].tolist() |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
627 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
628 try: |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
629 print(" Generating heatmap for...") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
630 fig = plot_label_concordance_heatmap(true_values, predicted_values) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
631 plt.close(fig) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
632 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
633 # Create output filename with target value |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
634 output_filename = f"{output_name_base}.{args.format}" |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
635 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
636 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
637 print(f" Saving heatmap to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
638 fig.savefig(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
639 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
640 except Exception as e: |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
641 print(f" Error generating heatmap': {str(e)}") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
642 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
643 print("Label concordance heatmap generated successfully!") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
644 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
645 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
646 def generate_pr_curves(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
647 """Generate precision-recall curves""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
648 print("Generating precision-recall curves...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
649 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
650 # Check if this is the specific format with sample_id, known_label, predicted_label |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
651 required_cols = ['sample_id', 'variable', 'class_label', 'probability', 'known_label', 'predicted_label'] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
652 is_flexynesis_format = all(col in labels.columns for col in required_cols) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
653 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
654 if not is_flexynesis_format: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
655 raise ValueError(f"Labels are not in flexynesis format (Custom labels). Please provide a valid label file with the required columns, {required_cols}.") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
656 |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
657 # Parse target values from comma-separated string |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
658 if args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
659 target_values = [val.strip() for val in args.target_value.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
660 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
661 # If no target values specified, use all unique variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
662 target_values = labels['variable'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
663 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
664 print(f"Processing target values: {target_values}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
665 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
666 for target_value in target_values: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
667 print(f"\nProcessing target value: '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
668 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
669 # Filter labels for the current target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
670 target_labels = labels[labels['variable'] == target_value] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
671 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
672 # Check if this is a regression problem (no class probabilities) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
673 prob_columns = target_labels['class_label'].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
674 non_na_probs = target_labels['probability'].notna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
675 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
676 print(f" Class labels found: {list(prob_columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
677 print(f" Non-NaN probabilities: {non_na_probs}/{len(target_labels)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
678 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
679 # If most probabilities are NaN, this is likely a regression problem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
680 if non_na_probs < len(target_labels) * 0.1: # Less than 10% valid probabilities |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
681 print(" Detected regression problem - precision-recall curves not applicable") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
682 print(f" Skipping '{target_value}' (use regression evaluation metrics instead)") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
683 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
684 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
685 # Debug: Check data quality |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
686 total_rows = len(target_labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
687 missing_labels = target_labels['known_label'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
688 missing_probs = target_labels['probability'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
689 unique_samples = target_labels['sample_id'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
690 unique_classes = target_labels['class_label'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
691 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
692 print(f" Data summary: {total_rows} total rows, {unique_samples} unique samples, {unique_classes} unique classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
693 print(f" Missing data: {missing_labels} missing known_label, {missing_probs} missing probability") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
694 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
695 if missing_labels > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
696 print(f" Warning: Found {missing_labels} missing known_label values") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
697 missing_samples = target_labels[target_labels['known_label'].isna()]['sample_id'].unique()[:5] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
698 print(f" Sample IDs with missing known_label: {list(missing_samples)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
699 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
700 # Remove rows with missing known_label |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
701 target_labels = target_labels.dropna(subset=['known_label']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
702 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
703 print(f" Error: No valid known_label data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
704 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
705 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
706 # 1. Pivot to wide format |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
707 prob_df = target_labels.pivot(index='sample_id', columns='class_label', values='probability') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
708 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
709 print(f" After pivot: {prob_df.shape[0]} samples x {prob_df.shape[1]} classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
710 print(f" Class columns: {list(prob_df.columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
711 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
712 # Check for NaN values in probability data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
713 nan_counts = prob_df.isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
714 if nan_counts.any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
715 print(f" NaN counts per class: {dict(nan_counts)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
716 print(f" Samples with any NaN: {prob_df.isna().any(axis=1).sum()}/{len(prob_df)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
717 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
718 # Drop only rows where ALL probabilities are NaN |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
719 all_nan_rows = prob_df.isna().all(axis=1) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
720 if all_nan_rows.any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
721 print(f" Dropping {all_nan_rows.sum()} samples with all NaN probabilities") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
722 prob_df = prob_df[~all_nan_rows] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
723 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
724 remaining_nans = prob_df.isna().sum().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
725 if remaining_nans > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
726 print(f" Warning: {remaining_nans} individual NaN values remain - filling with 0") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
727 prob_df = prob_df.fillna(0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
728 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
729 if prob_df.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
730 print(f" Error: No valid probability data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
731 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
732 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
733 # 2. Get true labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
734 true_labels_df = target_labels.drop_duplicates('sample_id')[['sample_id', 'known_label']].set_index('sample_id') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
735 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
736 # 3. Align indices - only keep samples that exist in both datasets |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
737 common_indices = prob_df.index.intersection(true_labels_df.index) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
738 if len(common_indices) == 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
739 print(f" Error: No common sample_ids between probability and true label data for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
740 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
741 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
742 print(f" Found {len(common_indices)} samples with both probability and true label data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
743 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
744 # Filter both datasets to common indices |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
745 prob_df_aligned = prob_df.loc[common_indices] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
746 y_true = true_labels_df.loc[common_indices]['known_label'] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
747 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
748 # 4. Final check for NaN values |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
749 if y_true.isna().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
750 print(f" Error: True labels still contain NaN after alignment for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
751 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
752 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
753 if prob_df_aligned.isna().any().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
754 print(f" Error: Probability data still contains NaN after alignment for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
755 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
756 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
757 # 5. Convert categorical labels to integer labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
758 # Create a mapping from class names to integers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
759 class_names = list(prob_df_aligned.columns) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
760 class_to_int = {class_name: i for i, class_name in enumerate(class_names)} |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
761 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
762 print(f" Class mapping: {class_to_int}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
763 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
764 # Convert true labels to integers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
765 y_true_np = y_true.map(class_to_int).to_numpy() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
766 y_probs_np = prob_df_aligned.to_numpy() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
767 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
768 print(f" Data shape: y_true={y_true_np.shape}, y_probs={y_probs_np.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
769 print(f" Unique true labels (integers): {set(y_true_np)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
770 print(f" Class labels (columns): {class_names}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
771 print(f" Label distribution: {dict(zip(*np.unique(y_true_np, return_counts=True)))}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
772 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
773 # Check for any unmapped labels (will be NaN) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
774 if pd.isna(y_true_np).any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
775 print(" Error: Some true labels could not be mapped to class columns") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
776 unmapped_labels = set(y_true[y_true.map(class_to_int).isna()]) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
777 print(f" Unmapped labels: {unmapped_labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
778 print(f" Available classes: {class_names}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
779 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
780 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
781 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
782 print(f" Generating precision-recall curve for '{target_value}'...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
783 fig = plot_pr_curves(y_true_np, y_probs_np) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
784 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
785 # Create output filename with target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
786 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
787 output_filename = f"{output_name_base}_{safe_target_name}.{args.format}" |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
788 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
789 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
790 print(f" Saving precision-recall curve to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
791 fig.save(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
792 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
793 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
794 print(f" Error generating precision-recall curve for '{target_value}': {str(e)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
795 print(f" Debug info - y_true type: {type(y_true_np)}, contains NaN: {pd.isna(y_true_np).any()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
796 print(f" Debug info - y_probs type: {type(y_probs_np)}, contains NaN: {pd.isna(y_probs_np).any()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
797 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
798 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
799 print("Precision-recall curves generated successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
800 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
801 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
802 def generate_roc_curves(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
803 """Generate ROC curves""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
804 print("Generating ROC curves...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
805 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
806 # Check if this is the specific format with sample_id, known_label, predicted_label |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
807 required_cols = ['sample_id', 'variable', 'class_label', 'probability', 'known_label', 'predicted_label'] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
808 is_flexynesis_format = all(col in labels.columns for col in required_cols) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
809 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
810 if not is_flexynesis_format: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
811 raise ValueError(f"Labels are not in flexynesis format (Custom labels). Please provide a valid label file with the required columns, {required_cols}.") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
812 |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
813 # Parse target values from comma-separated string |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
814 if args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
815 target_values = [val.strip() for val in args.target_value.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
816 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
817 # If no target values specified, use all unique variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
818 target_values = labels['variable'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
819 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
820 print(f"Processing target values: {target_values}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
821 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
822 for target_value in target_values: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
823 print(f"\nProcessing target value: '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
824 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
825 # Filter labels for the current target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
826 target_labels = labels[labels['variable'] == target_value] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
827 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
828 # Check if this is a regression problem (no class probabilities) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
829 prob_columns = target_labels['class_label'].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
830 non_na_probs = target_labels['probability'].notna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
831 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
832 print(f" Class labels found: {list(prob_columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
833 print(f" Non-NaN probabilities: {non_na_probs}/{len(target_labels)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
834 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
835 # If most probabilities are NaN, this is likely a regression problem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
836 if non_na_probs < len(target_labels) * 0.1: # Less than 10% valid probabilities |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
837 print(" Detected regression problem - ROC curves not applicable") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
838 print(f" Skipping '{target_value}' (use regression evaluation metrics instead)") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
839 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
840 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
841 # Debug: Check data quality |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
842 total_rows = len(target_labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
843 missing_labels = target_labels['known_label'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
844 missing_probs = target_labels['probability'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
845 unique_samples = target_labels['sample_id'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
846 unique_classes = target_labels['class_label'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
847 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
848 print(f" Data summary: {total_rows} total rows, {unique_samples} unique samples, {unique_classes} unique classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
849 print(f" Missing data: {missing_labels} missing known_label, {missing_probs} missing probability") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
850 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
851 if missing_labels > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
852 print(f" Warning: Found {missing_labels} missing known_label values") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
853 missing_samples = target_labels[target_labels['known_label'].isna()]['sample_id'].unique()[:5] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
854 print(f" Sample IDs with missing known_label: {list(missing_samples)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
855 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
856 # Remove rows with missing known_label |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
857 target_labels = target_labels.dropna(subset=['known_label']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
858 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
859 print(f" Error: No valid known_label data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
860 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
861 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
862 # 1. Pivot to wide format |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
863 prob_df = target_labels.pivot(index='sample_id', columns='class_label', values='probability') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
864 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
865 print(f" After pivot: {prob_df.shape[0]} samples x {prob_df.shape[1]} classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
866 print(f" Class columns: {list(prob_df.columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
867 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
868 # Check for NaN values in probability data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
869 nan_counts = prob_df.isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
870 if nan_counts.any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
871 print(f" NaN counts per class: {dict(nan_counts)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
872 print(f" Samples with any NaN: {prob_df.isna().any(axis=1).sum()}/{len(prob_df)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
873 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
874 # Drop only rows where ALL probabilities are NaN |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
875 all_nan_rows = prob_df.isna().all(axis=1) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
876 if all_nan_rows.any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
877 print(f" Dropping {all_nan_rows.sum()} samples with all NaN probabilities") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
878 prob_df = prob_df[~all_nan_rows] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
879 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
880 remaining_nans = prob_df.isna().sum().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
881 if remaining_nans > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
882 print(f" Warning: {remaining_nans} individual NaN values remain - filling with 0") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
883 prob_df = prob_df.fillna(0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
884 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
885 if prob_df.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
886 print(f" Error: No valid probability data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
887 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
888 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
889 # 2. Get true labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
890 true_labels_df = target_labels.drop_duplicates('sample_id')[['sample_id', 'known_label']].set_index('sample_id') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
891 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
892 # 3. Align indices - only keep samples that exist in both datasets |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
893 common_indices = prob_df.index.intersection(true_labels_df.index) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
894 if len(common_indices) == 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
895 print(f" Error: No common sample_ids between probability and true label data for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
896 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
897 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
898 print(f" Found {len(common_indices)} samples with both probability and true label data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
899 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
900 # Filter both datasets to common indices |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
901 prob_df_aligned = prob_df.loc[common_indices] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
902 y_true = true_labels_df.loc[common_indices]['known_label'] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
903 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
904 # 4. Final check for NaN values |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
905 if y_true.isna().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
906 print(f" Error: True labels still contain NaN after alignment for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
907 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
908 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
909 if prob_df_aligned.isna().any().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
910 print(f" Error: Probability data still contains NaN after alignment for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
911 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
912 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
913 # 5. Convert categorical labels to integer labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
914 # Create a mapping from class names to integers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
915 class_names = list(prob_df_aligned.columns) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
916 class_to_int = {class_name: i for i, class_name in enumerate(class_names)} |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
917 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
918 print(f" Class mapping: {class_to_int}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
919 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
920 # Convert true labels to integers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
921 y_true_np = y_true.map(class_to_int).to_numpy() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
922 y_probs_np = prob_df_aligned.to_numpy() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
923 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
924 print(f" Data shape: y_true={y_true_np.shape}, y_probs={y_probs_np.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
925 print(f" Unique true labels (integers): {set(y_true_np)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
926 print(f" Class labels (columns): {class_names}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
927 print(f" Label distribution: {dict(zip(*np.unique(y_true_np, return_counts=True)))}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
928 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
929 # Check for any unmapped labels (will be NaN) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
930 if pd.isna(y_true_np).any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
931 print(" Error: Some true labels could not be mapped to class columns") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
932 unmapped_labels = set(y_true[y_true.map(class_to_int).isna()]) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
933 print(f" Unmapped labels: {unmapped_labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
934 print(f" Available classes: {class_names}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
935 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
936 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
937 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
938 print(f" Generating ROC curve for '{target_value}'...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
939 fig = plot_roc_curves(y_true_np, y_probs_np) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
940 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
941 # Create output filename with target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
942 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
943 output_filename = f"{output_name_base}_{safe_target_name}.{args.format}" |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
944 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
945 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
946 print(f" Saving ROC curve to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
947 fig.save(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
948 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
949 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
950 print(f" Error generating ROC curve for '{target_value}': {str(e)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
951 print(f" Debug info - y_true type: {type(y_true_np)}, contains NaN: {pd.isna(y_true_np).any()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
952 print(f" Debug info - y_probs type: {type(y_probs_np)}, contains NaN: {pd.isna(y_probs_np).any()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
953 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
954 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
955 print("ROC curves generated successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
956 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
957 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
958 def generate_box_plots(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
959 """Generate box plots for model predictions""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
960 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
961 # Check if this is the specific format with sample_id, known_label, predicted_label |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
962 required_cols = ['sample_id', 'variable', 'class_label', 'probability', 'known_label', 'predicted_label'] |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
963 is_flexynesis_format = all(col in labels.columns for col in required_cols) |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
964 |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
965 if not is_flexynesis_format: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
966 raise ValueError(f"Labels are not in flexynesis format (Custom labels). Please provide a valid label file with the required columns, {required_cols}.") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
967 |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
968 print("Generating box plots...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
969 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
970 # Parse target values from comma-separated string |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
971 if args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
972 target_values = [val.strip() for val in args.target_value.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
973 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
974 # If no target values specified, use all unique variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
975 target_values = labels['variable'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
976 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
977 print(f"Processing target values: {target_values}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
978 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
979 for target_value in target_values: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
980 print(f"\nProcessing target value: '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
981 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
982 # Filter labels for the current target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
983 target_labels = labels[labels['variable'] == target_value] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
984 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
985 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
986 print(f" Warning: No data found for target value '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
987 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
988 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
989 # Check if this is a classification problem (has probabilities) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
990 prob_columns = target_labels['class_label'].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
991 non_na_probs = target_labels['probability'].notna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
992 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
993 print(f" Class labels found: {list(prob_columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
994 print(f" Non-NaN probabilities: {non_na_probs}/{len(target_labels)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
995 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
996 # If most probabilities are NaN, this is likely a regression problem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
997 if non_na_probs < len(target_labels) * 0.1: # Less than 10% valid probabilities |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
998 print(" Detected regression problem - precision-recall curves not applicable") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
999 print(f" Skipping '{target_value}' (use regression evaluation metrics instead)") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1000 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1001 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1002 # Debug: Check data quality |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1003 total_rows = len(target_labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1004 missing_labels = target_labels['known_label'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1005 missing_probs = target_labels['probability'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1006 unique_samples = target_labels['sample_id'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1007 unique_classes = target_labels['class_label'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1008 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1009 print(f" Data summary: {total_rows} total rows, {unique_samples} unique samples, {unique_classes} unique classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1010 print(f" Missing data: {missing_labels} missing known_label, {missing_probs} missing probability") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1011 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1012 if missing_labels > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1013 print(f" Warning: Found {missing_labels} missing known_label values") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1014 missing_samples = target_labels[target_labels['known_label'].isna()]['sample_id'].unique()[:5] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1015 print(f" Sample IDs with missing known_label: {list(missing_samples)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1016 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1017 # Remove rows with missing known_label |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1018 target_labels = target_labels.dropna(subset=['known_label']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1019 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1020 print(f" Error: No valid known_label data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1021 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1022 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1023 # Remove rows with missing data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1024 clean_data = target_labels.dropna(subset=['known_label', 'probability']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1025 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1026 if clean_data.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1027 print(" No valid data after cleaning - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1028 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1029 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1030 # Get unique classes |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1031 classes = clean_data['class_label'].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1032 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1033 for class_label in classes: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1034 print(f" Generating box plot for class: {class_label}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1035 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1036 # Filter for current class |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1037 class_data = clean_data[clean_data['class_label'] == class_label] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1038 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1039 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1040 # Create the box plot |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1041 fig = plot_boxplot( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1042 categorical_x=class_data['known_label'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1043 numerical_y=class_data['probability'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1044 title_x='True Label', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1045 title_y=f'Predicted Probability ({class_label})', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1046 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1047 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1048 # Save the plot |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1049 safe_class_name = str(class_label).replace('/', '_').replace('\\', '_').replace(' ', '_').replace(':', '_') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1050 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1051 output_filename = f"{output_name_base}_{safe_target_name}_{safe_class_name}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1052 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1053 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1054 print(f" Saving box plot to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1055 fig.savefig(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1056 plt.close(fig) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1057 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1058 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1059 print(f" Error generating box plot for class '{class_label}': {str(e)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1060 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1061 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1062 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1063 def main(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1064 """Main function to parse arguments and generate plots""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1065 parser = argparse.ArgumentParser(description="Generate plots using flexynesis") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1066 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1067 # Required arguments |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1068 parser.add_argument("--labels", type=str, required=False, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1069 help="Path to labels file generated by flexynesis") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1070 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1071 # Plot type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1072 parser.add_argument("--plot_type", type=str, required=True, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1073 choices=['dimred', 'kaplan_meier', 'cox', 'scatter', 'concordance_heatmap', 'pr_curve', 'roc_curve', 'box_plot'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1074 help="Type of plot to generate: 'dimred' for dimensionality reduction, 'kaplan_meier' for survival analysis, 'cox' for Cox proportional hazards analysis, 'scatter' for scatter plots, 'concordance_heatmap' for label concordance heatmaps, 'pr_curve' for precision-recall curves, 'roc_curve' for ROC curves, or 'box_plot' for box plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1075 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1076 # Arguments for dimensionality reduction |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1077 parser.add_argument("--embeddings", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1078 help="Path to input data embeddings file (CSV or tabular format). Required for dimred plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1079 parser.add_argument("--method", type=str, default='pca', choices=['pca', 'umap'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1080 help="Transformation method ('pca' or 'umap'). Default is 'pca'. Used for dimred plots.") |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1081 parser.add_argument("--color", type=str, default=None, |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1082 help="User-defined color for the plot.") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1083 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1084 # Arguments for Kaplan-Meier |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1085 parser.add_argument("--survival_data", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1086 help="Path to survival data file with columns: duration and event. Required for kaplan_meier plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1087 parser.add_argument("--surv_time_var", type=str, required=False, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1088 help="Column name for survival time") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1089 parser.add_argument("--surv_event_var", type=str, required=False, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1090 help="Column name for survival event") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1091 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1092 # Arguments for Cox analysis |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1093 parser.add_argument("--important_features", type=str, |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1094 help="Path to calculated feature importance file. Required for cox plots.") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1095 parser.add_argument("--clinical_train", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1096 help="Path to training dataset (pickle file). Required for cox plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1097 parser.add_argument("--clinical_test", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1098 help="Path to test dataset (pickle file). Required for cox plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1099 parser.add_argument("--omics_train", type=str, default=None, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1100 help="Path to training omics dataset. Optional for cox plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1101 parser.add_argument("--omics_test", type=str, default=None, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1102 help="Path to test omics dataset. Optional for cox plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1103 parser.add_argument("--clinical_variables", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1104 help="Comma-separated list of clinical variables to include in Cox model (e.g., 'AGE,SEX,HISTOLOGICAL_DIAGNOSIS,STUDY')") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1105 parser.add_argument("--top_features", type=int, default=20, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1106 help="Number of top important features to include in Cox model. Default is 5") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1107 parser.add_argument("--crossval", action='store_true', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1108 help="If True, performs K-fold cross-validation and returns average C-index. Default is False") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1109 parser.add_argument("--n_splits", type=int, default=5, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1110 help="Number of folds for cross-validation. Default is 5") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1111 parser.add_argument("--random_state", type=int, default=42, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1112 help="Random seed for reproducibility. Default is 42") |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1113 parser.add_argument("--layer", type=str, default=None, |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1114 help="Class label for filtering important features.") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1115 |
5
466b593fd87e
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
bgruening
parents:
3
diff
changeset
|
1116 # Arguments for dimred, scatter plot, heatmap, PR curves, ROC curves, and box plots |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1117 parser.add_argument("--target_value", type=str, default=None, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1118 help="Target value for scatter plot.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1119 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1120 # Arguments for scatter plots and concordance heatmaps |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1121 parser.add_argument("--true_label", type=str, default=None, |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1122 help="Column name for true labels in scatter plots and concordance heatmaps.") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1123 parser.add_argument("--predicted_label", type=str, default=None, |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1124 help="Column name for predicted labels in scatter plots and concordance heatmaps.") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1125 # Common arguments |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1126 parser.add_argument("--output_dir", type=str, default='output', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1127 help="Output directory. Default is 'output'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1128 parser.add_argument("--output_name", type=str, default=None, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1129 help="Output filename base") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1130 parser.add_argument("--format", type=str, default='jpg', choices=['png', 'pdf', 'svg', 'jpg'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1131 help="Output format for the plot. Default is 'jpg'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1132 parser.add_argument("--dpi", type=int, default=300, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1133 help="DPI for the output image. Default is 300") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1134 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1135 args = parser.parse_args() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1136 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1137 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1138 # validate plot type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1139 if not args.plot_type: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1140 raise ValueError("Please specify a plot type using --plot_type") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1141 if args.plot_type not in ['dimred', 'kaplan_meier', 'cox', 'scatter', 'concordance_heatmap', 'pr_curve', 'roc_curve', 'box_plot']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1142 raise ValueError(f"Invalid plot type: {args.plot_type}. Must be one of: 'dimred', 'kaplan_meier', 'cox', 'scatter', 'concordance_heatmap', 'pr_curve', 'roc_curve', 'box_plot'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1143 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1144 # Validate plot type requirements |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1145 if args.plot_type in ['dimred']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1146 if not args.embeddings: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1147 raise ValueError("--embeddings is required when plot_type is 'dimred'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1148 if not os.path.isfile(args.embeddings): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1149 raise FileNotFoundError(f"embeddings file not found: {args.embeddings}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1150 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1151 raise ValueError("--labels is required for dimensionality reduction plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1152 if not args.method: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1153 raise ValueError("--method is required for dimensionality reduction plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1154 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1155 if args.plot_type in ['kaplan_meier']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1156 if not args.survival_data: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1157 raise ValueError("--survival_data is required when plot_type is 'kaplan_meier'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1158 if not os.path.isfile(args.survival_data): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1159 raise FileNotFoundError(f"Survival data file not found: {args.survival_data}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1160 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1161 raise ValueError("--labels is required for dimensionality reduction plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1162 if not args.method: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1163 raise ValueError("--method is required for dimensionality reduction plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1164 if not args.surv_time_var: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1165 raise ValueError("--surv_time_var is required for Kaplan-Meier plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1166 if not args.surv_event_var: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1167 raise ValueError("--surv_event_var is required for Kaplan-Meier plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1168 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1169 if args.plot_type in ['cox']: |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1170 if not args.important_features: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1171 raise ValueError("--important_features is required when plot_type is 'cox'") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1172 if not os.path.isfile(args.important_features): |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1173 raise FileNotFoundError(f"Important features file not found: {args.important_features}") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1174 if not args.clinical_train: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1175 raise ValueError("--clinical_train is required when plot_type is 'cox'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1176 if not os.path.isfile(args.clinical_train): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1177 raise FileNotFoundError(f"Training dataset file not found: {args.clinical_train}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1178 if not args.clinical_test: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1179 raise ValueError("--clinical_test is required when plot_type is 'cox'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1180 if not os.path.isfile(args.clinical_test): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1181 raise FileNotFoundError(f"Test dataset file not found: {args.clinical_test}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1182 if not args.omics_train: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1183 raise ValueError("--omics_train is required when plot_type is 'cox'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1184 if not os.path.isfile(args.omics_train): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1185 raise FileNotFoundError(f"Training omics dataset file not found: {args.omics_train}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1186 if not args.omics_test: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1187 raise ValueError("--omics_test is required when plot_type is 'cox'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1188 if not os.path.isfile(args.omics_test): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1189 raise FileNotFoundError(f"Test omics dataset file not found: {args.omics_test}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1190 if not args.surv_time_var: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1191 raise ValueError("--surv_time_var is required for Cox plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1192 if not args.surv_event_var: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1193 raise ValueError("--surv_event_var is required for Cox plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1194 if not args.clinical_variables: |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1195 print("--clinical_variables is not set for Cox plots") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1196 if not isinstance(args.top_features, int) or args.top_features <= 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1197 raise ValueError("--top_features must be a positive integer") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1198 if not args.crossval: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1199 args.crossval = False |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1200 if not isinstance(args.n_splits, int) or args.n_splits <= 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1201 raise ValueError("--n_splits must be a positive integer") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1202 if not isinstance(args.random_state, int): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1203 raise ValueError("--random_state must be an integer") |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1204 if not args.layer: |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1205 print("--layer is not specified, using all classes from labels") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1206 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1207 if args.plot_type in ['scatter']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1208 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1209 raise ValueError("--labels is required for scatter plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1210 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1211 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1212 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1213 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1214 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1215 if args.plot_type in ['concordance_heatmap']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1216 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1217 raise ValueError("--labels is required for concordance heatmap") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1218 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1219 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1220 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1221 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1222 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1223 if args.plot_type in ['pr_curve']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1224 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1225 raise ValueError("--labels is required for precision-recall curves") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1226 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1227 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1228 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1229 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1230 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1231 if args.plot_type in ['roc_curve']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1232 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1233 raise ValueError("--labels is required for ROC curves") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1234 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1235 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1236 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1237 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1238 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1239 if args.plot_type in ['box_plot']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1240 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1241 raise ValueError("--labels is required for box plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1242 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1243 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1244 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1245 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1246 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1247 # Validate other arguments |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1248 if args.method not in ['pca', 'umap']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1249 raise ValueError("Method must be 'pca' or 'umap'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1250 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1251 # Create output directory |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1252 output_dir = Path(args.output_dir) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1253 output_dir.mkdir(parents=True, exist_ok=True) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1254 print(f"Output directory: {output_dir.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1255 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1256 # Generate output filename base |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1257 if args.output_name: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1258 output_name_base = args.output_name |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1259 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1260 if args.plot_type == 'dimred': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1261 embeddings_name = Path(args.embeddings).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1262 output_name_base = f"{embeddings_name}_{args.method}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1263 elif args.plot_type == 'kaplan_meier': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1264 survival_name = Path(args.survival_data).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1265 output_name_base = f"{survival_name}_km" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1266 elif args.plot_type == 'cox': |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1267 model_name = Path(args.important_features).stem |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1268 output_name_base = f"{model_name}_cox" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1269 elif args.plot_type == 'scatter': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1270 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1271 output_name_base = f"{labels_name}_scatter" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1272 elif args.plot_type == 'concordance_heatmap': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1273 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1274 output_name_base = f"{labels_name}_concordance" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1275 elif args.plot_type == 'pr_curve': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1276 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1277 output_name_base = f"{labels_name}_pr_curves" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1278 elif args.plot_type == 'roc_curve': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1279 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1280 output_name_base = f"{labels_name}_roc_curves" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1281 elif args.plot_type == 'box_plot': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1282 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1283 output_name_base = f"{labels_name}_box_plot" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1284 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1285 # Generate plots based on type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1286 if args.plot_type in ['dimred']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1287 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1288 print(f"Loading labels from: {args.labels}") |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1289 labels = load_labels(args.labels) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1290 # Load embeddings data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1291 print(f"Loading embeddings from: {args.embeddings}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1292 embeddings, sample_names = load_embeddings(args.embeddings) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1293 print(f"embeddings shape: {embeddings.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1294 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1295 # Match samples to embeddings |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1296 matched_labels = match_samples_to_embeddings(sample_names, labels) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1297 print(f"Successfully matched {len(matched_labels)} samples for dimensionality reduction") |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1298 print(f"Matched labels shape: {matched_labels.shape}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1299 print(f"Columns in matched labels: {matched_labels.columns.tolist()}") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1300 generate_dimred_plots(embeddings, matched_labels, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1301 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1302 elif args.plot_type in ['kaplan_meier']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1303 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1304 print(f"Loading labels from: {args.labels}") |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1305 labels = load_labels(args.labels) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1306 # Load survival data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1307 print(f"Loading survival data from: {args.survival_data}") |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1308 survival_data = load_labels(args.survival_data) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1309 print(f"Survival data shape: {survival_data.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1310 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1311 generate_km_plots(survival_data, labels, args, output_dir, output_name_base) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1312 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1313 elif args.plot_type in ['cox']: |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1314 # Load important_features and datasets |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1315 print(f"Loading important features from: {args.important_features}") |
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1316 important_features = load_labels(args.important_features) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1317 print(f"Loading training dataset from: {args.clinical_train}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1318 clinical_train = load_omics(args.clinical_train) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1319 print(f"Loading test dataset from: {args.clinical_test}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1320 clinical_test = load_omics(args.clinical_test) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1321 print(f"Loading training omics dataset from: {args.omics_train}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1322 omics_train = load_omics(args.omics_train) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1323 print(f"Loading test omics dataset from: {args.omics_test}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1324 omics_test = load_omics(args.omics_test) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1325 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1326 generate_cox_plots(important_features, clinical_train, clinical_test, omics_test, omics_train, args, output_dir, output_name_base) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1327 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1328 elif args.plot_type in ['scatter']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1329 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1330 print(f"Loading labels from: {args.labels}") |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1331 labels = load_labels(args.labels) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1332 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1333 generate_plot_scatter(labels, args, output_dir, output_name_base) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1334 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1335 elif args.plot_type in ['concordance_heatmap']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1336 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1337 print(f"Loading labels from: {args.labels}") |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1338 labels = load_labels(args.labels) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1339 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1340 generate_label_concordance_heatmap(labels, args, output_dir, output_name_base) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1341 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1342 elif args.plot_type in ['pr_curve']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1343 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1344 print(f"Loading labels from: {args.labels}") |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1345 labels = load_labels(args.labels) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1346 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1347 generate_pr_curves(labels, args, output_dir, output_name_base) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1348 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1349 elif args.plot_type in ['roc_curve']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1350 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1351 print(f"Loading labels from: {args.labels}") |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1352 labels = load_labels(args.labels) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1353 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1354 generate_roc_curves(labels, args, output_dir, output_name_base) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1355 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1356 elif args.plot_type in ['box_plot']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1357 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1358 print(f"Loading labels from: {args.labels}") |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1359 labels = load_labels(args.labels) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1360 |
6
33816f44fc7d
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 6b520305ec30e6dc37eba92c67a5368cea0fc5ad
bgruening
parents:
5
diff
changeset
|
1361 generate_box_plots(labels, args, output_dir, output_name_base) |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1362 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1363 print("All plots generated successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1364 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1365 except (FileNotFoundError, ValueError, pd.errors.ParserError) as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1366 print(f"Error: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1367 return 1 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1368 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1369 return 0 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1370 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1371 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1372 if __name__ == "__main__": |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1373 exit(main()) |