Mercurial > repos > bgruening > flexynesis
annotate flexynesis_plot.py @ 5:466b593fd87e draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
author | bgruening |
---|---|
date | Fri, 04 Jul 2025 14:57:40 +0000 |
parents | 525c661a7fdc |
children |
rev | line source |
---|---|
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1 #!/usr/bin/env python |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
2 """Generate plots using flexynesis |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
3 This script generates dimensionality reduction plots, Kaplan-Meier survival curves, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
4 and Cox proportional hazards models from data processed by flexynesis.""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
5 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
6 import argparse |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
7 import os |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
8 from pathlib import Path |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
9 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
10 import matplotlib.pyplot as plt |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
11 import numpy as np |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
12 import pandas as pd |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
13 import seaborn as sns |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
14 import torch |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
15 from flexynesis import ( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
16 build_cox_model, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
17 get_important_features, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
18 plot_dim_reduced, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
19 plot_hazard_ratios, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
20 plot_kaplan_meier_curves, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
21 plot_pr_curves, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
22 plot_roc_curves, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
23 plot_scatter |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
24 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
25 from scipy.stats import kruskal, mannwhitneyu |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
26 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
27 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
28 def load_embeddings(embeddings_path): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
29 """Load embeddings from a file""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
30 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
31 # Determine file extension |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
32 file_ext = Path(embeddings_path).suffix.lower() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
33 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
34 if file_ext == '.csv': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
35 df = pd.read_csv(embeddings_path, index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
36 elif file_ext in ['.tsv', '.txt', '.tab', '.tabular']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
37 df = pd.read_csv(embeddings_path, sep='\t', index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
38 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
39 raise ValueError(f"Unsupported file extension: {file_ext}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
40 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
41 return df, df.index.tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
42 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
43 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
44 raise ValueError(f"Error loading embeddings from {embeddings_path}: {e}") from e |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
45 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
46 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
47 def load_labels(labels_input): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
48 """Load predicted labels from flexynesis""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
49 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
50 # Determine file extension |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
51 file_ext = Path(labels_input).suffix.lower() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
52 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
53 if file_ext == '.csv': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
54 df = pd.read_csv(labels_input) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
55 elif file_ext in ['.tsv', '.txt', '.tab', '.tabular']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
56 df = pd.read_csv(labels_input, sep='\t') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
57 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
58 # Check if this is the specific format with sample_id, known_label, predicted_label |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
59 required_cols = ['sample_id', 'variable', 'class_label', 'probability', 'known_label', 'predicted_label'] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
60 if all(col in df.columns for col in required_cols): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
61 return df |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
62 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
63 raise ValueError(f"Labels file {labels_input} does not contain required columns: {required_cols}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
64 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
65 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
66 raise ValueError(f"Error loading labels from {labels_input}: {e}") from e |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
67 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
68 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
69 def load_survival_data(survival_path): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
70 """Load survival data from a file. First column should be sample_id""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
71 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
72 # Determine file extension |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
73 file_ext = Path(survival_path).suffix.lower() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
74 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
75 if file_ext == '.csv': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
76 df = pd.read_csv(survival_path, index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
77 elif file_ext in ['.tsv', '.txt', '.tab', '.tabular']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
78 df = pd.read_csv(survival_path, sep='\t', index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
79 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
80 raise ValueError(f"Unsupported file extension: {file_ext}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
81 return df |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
82 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
83 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
84 raise ValueError(f"Error loading survival data from {survival_path}: {e}") from e |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
85 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
86 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
87 def load_omics(omics_path): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
88 """Load omics data from a file. First column should be features""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
89 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
90 # Determine file extension |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
91 file_ext = Path(omics_path).suffix.lower() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
92 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
93 if file_ext == '.csv': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
94 df = pd.read_csv(omics_path, index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
95 elif file_ext in ['.tsv', '.txt', '.tab', '.tabular']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
96 df = pd.read_csv(omics_path, sep='\t', index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
97 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
98 raise ValueError(f"Unsupported file extension: {file_ext}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
99 return df |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
100 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
101 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
102 raise ValueError(f"Error loading omics data from {omics_path}: {e}") from e |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
103 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
104 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
105 def load_model(model_path): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
106 """Load flexynesis model from pickle file""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
107 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
108 with open(model_path, 'rb') as f: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
109 model = torch.load(f, weights_only=False) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
110 return model |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
111 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
112 raise ValueError(f"Error loading model from {model_path}: {e}") from e |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
113 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
114 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
115 def match_samples_to_embeddings(sample_names, label_data): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
116 """Filter label data to match sample names in the embeddings""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
117 df_matched = label_data[label_data['sample_id'].isin(sample_names)] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
118 return df_matched |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
119 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
120 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
121 def detect_color_type(labels_series): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
122 """Auto-detect whether target variables should be treated as categorical or numerical""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
123 # Remove NaN |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
124 clean_labels = labels_series.dropna() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
125 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
126 if clean_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
127 return 'categorical' # default output if no labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
128 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
129 # Check if all values can be converted to numbers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
130 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
131 numeric_labels = pd.to_numeric(clean_labels, errors='coerce') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
132 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
133 # If conversion failed -> categorical |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
134 if numeric_labels.isna().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
135 return 'categorical' |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
136 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
137 # Check number of unique values |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
138 unique_count = len(clean_labels.unique()) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
139 total_count = len(clean_labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
140 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
141 # If few unique values relative to total -> categorical |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
142 # Threshold: if unique values < 10 OR unique/total < 0.1 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
143 if unique_count < 10 or (unique_count / total_count) < 0.1: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
144 return 'categorical' |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
145 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
146 return 'numerical' |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
147 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
148 except Exception: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
149 return 'categorical' |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
150 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
151 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
152 def plot_label_concordance_heatmap(labels1, labels2, figsize=(12, 10)): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
153 """ |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
154 Plot a heatmap reflecting the concordance between two sets of labels using pandas crosstab. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
155 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
156 Parameters: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
157 - labels1: The first set of labels. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
158 - labels2: The second set of labels. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
159 """ |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
160 # Compute the cross-tabulation |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
161 ct = pd.crosstab(pd.Series(labels1, name='Labels Set 1'), pd.Series(labels2, name='Labels Set 2')) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
162 # Normalize the cross-tabulation matrix column-wise |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
163 ct_normalized = ct.div(ct.sum(axis=1), axis=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
164 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
165 # Plot the heatmap |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
166 plt.figure(figsize=figsize) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
167 sns.heatmap(ct_normalized, annot=True, cmap='viridis', linewidths=.5) # col_cluster=False) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
168 plt.title('Concordance between label groups') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
169 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
170 return plt.gcf() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
171 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
172 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
173 def plot_boxplot(categorical_x, numerical_y, title_x='Categories', title_y='Values', figsize=(10, 6), jittersize=4): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
174 """ |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
175 Create a boxplot with to visualize the distribution of predicted probabilities across different categories. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
176 the x axis represents the true labels, and the y axis represents the predicted probabilities for specific categories. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
177 """ |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
178 df = pd.DataFrame({title_x: categorical_x, title_y: numerical_y}) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
179 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
180 # Compute p-value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
181 groups = df[title_x].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
182 if len(groups) == 2: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
183 group1 = df[df[title_x] == groups[0]][title_y] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
184 group2 = df[df[title_x] == groups[1]][title_y] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
185 stat, p = mannwhitneyu(group1, group2, alternative='two-sided') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
186 test_name = "Mann-Whitney U" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
187 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
188 group_data = [df[df[title_x] == group][title_y] for group in groups] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
189 stat, p = kruskal(*group_data) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
190 test_name = "Kruskal-Wallis" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
191 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
192 # Create a boxplot with jittered points |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
193 plt.figure(figsize=figsize) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
194 sns.boxplot(x=title_x, y=title_y, hue=title_x, data=df, palette='Set2', legend=False, fill=False) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
195 sns.stripplot(x=title_x, y=title_y, data=df, color='black', size=jittersize, jitter=True, dodge=True, alpha=0.4) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
196 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
197 # Labels and p-value annotation |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
198 plt.xlabel(title_x) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
199 plt.ylabel(title_y) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
200 plt.text( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
201 x=-0.4, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
202 y=plt.ylim()[1], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
203 s=f'{test_name} p = {p:.3e}', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
204 verticalalignment='top', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
205 horizontalalignment='left', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
206 fontsize=12, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
207 bbox=dict(boxstyle='round,pad=0.3', facecolor='white', edgecolor='gray') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
208 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
209 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
210 plt.tight_layout() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
211 return plt.gcf() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
212 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
213 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
214 def generate_dimred_plots(embeddings, matched_labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
215 """Generate dimensionality reduction plots""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
216 |
5
466b593fd87e
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
bgruening
parents:
3
diff
changeset
|
217 # Parse target values from comma-separated string |
466b593fd87e
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
bgruening
parents:
3
diff
changeset
|
218 if args.target_value: |
466b593fd87e
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
bgruening
parents:
3
diff
changeset
|
219 target_values = [val.strip() for val in args.target_value.split(',')] |
466b593fd87e
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
bgruening
parents:
3
diff
changeset
|
220 else: |
466b593fd87e
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
bgruening
parents:
3
diff
changeset
|
221 # If no target values specified, use all unique variables |
466b593fd87e
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
bgruening
parents:
3
diff
changeset
|
222 target_values = matched_labels['variable'].unique().tolist() |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
223 |
5
466b593fd87e
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
bgruening
parents:
3
diff
changeset
|
224 print(f"Generating {args.method.upper()} plots for {len(target_values)} target variable(s): {', '.join(target_values)}") |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
225 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
226 # Check variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
227 available_vars = matched_labels['variable'].unique() |
5
466b593fd87e
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
bgruening
parents:
3
diff
changeset
|
228 missing_vars = [var for var in target_values if var not in available_vars] |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
229 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
230 if missing_vars: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
231 print(f"Warning: The following target variables were not found in the data: {', '.join(missing_vars)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
232 print(f"Available variables: {', '.join(available_vars)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
233 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
234 # Filter to only process available variables |
5
466b593fd87e
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
bgruening
parents:
3
diff
changeset
|
235 valid_vars = [var for var in target_values if var in available_vars] |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
236 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
237 if not valid_vars: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
238 raise ValueError(f"None of the specified target variables were found in the data. Available: {', '.join(available_vars)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
239 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
240 # Generate plots for each valid target variable |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
241 for var in valid_vars: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
242 print(f"\nPlotting variable: {var}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
243 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
244 # Filter matched labels for current variable |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
245 var_labels = matched_labels[matched_labels['variable'] == var].copy() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
246 var_labels = var_labels.drop_duplicates(subset='sample_id') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
247 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
248 if var_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
249 print(f"Warning: No data found for variable '{var}', skipping...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
250 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
251 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
252 # Auto-detect color type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
253 known_color_type = detect_color_type(var_labels['known_label']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
254 predicted_color_type = detect_color_type(var_labels['predicted_label']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
255 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
256 print(f" Auto-detected color types - Known: {known_color_type}, Predicted: {predicted_color_type}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
257 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
258 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
259 # Plot 1: Known labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
260 print(f" Creating known labels plot for {var}...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
261 fig_known = plot_dim_reduced( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
262 matrix=embeddings, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
263 labels=var_labels['known_label'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
264 method=args.method, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
265 color_type=known_color_type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
266 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
267 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
268 output_path_known = output_dir / f"{output_name_base}_{var}_known.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
269 print(f" Saving known labels plot to: {output_path_known.name}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
270 fig_known.save(output_path_known, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
271 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
272 # Plot 2: Predicted labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
273 print(f" Creating predicted labels plot for {var}...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
274 fig_predicted = plot_dim_reduced( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
275 matrix=embeddings, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
276 labels=var_labels['predicted_label'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
277 method=args.method, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
278 color_type=predicted_color_type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
279 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
280 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
281 output_path_predicted = output_dir / f"{output_name_base}_{var}_predicted.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
282 print(f" Saving predicted labels plot to: {output_path_predicted.name}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
283 fig_predicted.save(output_path_predicted, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
284 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
285 print(f" ✓ Successfully created plots for variable '{var}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
286 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
287 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
288 print(f" ✗ Error creating plots for variable '{var}': {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
289 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
290 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
291 print(f"\nDimensionality reduction plots completed for {len(valid_vars)} variable(s)!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
292 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
293 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
294 def generate_km_plots(survival_data, label_data, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
295 """Generate Kaplan-Meier plots""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
296 print("Generating Kaplan-Meier curves of risk subtypes...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
297 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
298 # Reset index and rename the index column to sample_id |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
299 survival_data = survival_data.reset_index() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
300 if survival_data.columns[0] != 'sample_id': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
301 survival_data = survival_data.rename(columns={survival_data.columns[0]: 'sample_id'}) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
302 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
303 # Convert survival event column to binary (0/1) based on event_value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
304 # Check if the event column exists |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
305 if args.surv_event_var not in survival_data.columns: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
306 raise ValueError(f"Column '{args.surv_event_var}' not found in survival data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
307 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
308 # Convert to string for comparison to handle mixed types |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
309 survival_data[args.surv_event_var] = survival_data[args.surv_event_var].astype(str) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
310 event_value_str = str(args.event_value) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
311 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
312 # Create binary event column (1 if matches event_value, 0 otherwise) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
313 survival_data[f'{args.surv_event_var}_binary'] = ( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
314 survival_data[args.surv_event_var] == event_value_str |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
315 ).astype(int) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
316 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
317 # Filter for survival category and class_label == '1:DECEASED' |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
318 label_data['class_label'] = label_data['class_label'].astype(str) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
319 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
320 label_data = label_data[(label_data['variable'] == args.surv_event_var) & (label_data['class_label'] == event_value_str)] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
321 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
322 # check survival data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
323 for col in [args.surv_time_var, args.surv_event_var]: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
324 if col not in survival_data.columns: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
325 raise ValueError(f"Column '{col}' not found in survival data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
326 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
327 # Merge survival data with labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
328 df_deceased = pd.merge(survival_data, label_data, on='sample_id', how='inner') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
329 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
330 if df_deceased.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
331 raise ValueError("No matching samples found after merging survival and label data.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
332 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
333 # Get risk scores |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
334 risk_scores = df_deceased['probability'].values |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
335 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
336 # Compute groups (e.g., median split) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
337 quantiles = np.quantile(risk_scores, [0.5]) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
338 groups = np.digitize(risk_scores, quantiles) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
339 group_labels = ['low_risk' if g == 0 else 'high_risk' for g in groups] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
340 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
341 fig_known = plot_kaplan_meier_curves( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
342 durations=df_deceased[args.surv_time_var], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
343 events=df_deceased[f'{args.surv_event_var}_binary'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
344 categorical_variable=group_labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
345 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
346 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
347 output_path_known = output_dir / f"{output_name_base}_km_risk_subtypes.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
348 print(f"Saving Kaplan-Meier plot to: {output_path_known.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
349 fig_known.save(output_path_known, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
350 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
351 print("Kaplan-Meier plot saved successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
352 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
353 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
354 def generate_cox_plots(model, clinical_train, clinical_test, omics_train, omics_test, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
355 """Generate Cox proportional hazards plots""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
356 print("Generating Cox proportional hazards analysis...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
357 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
358 # Parse clinical variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
359 clinical_vars = [var.strip() for var in args.clinical_variables.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
360 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
361 # Validate that survival variables are included |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
362 required_vars = [args.surv_time_var, args.surv_event_var] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
363 for var in required_vars: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
364 if var not in clinical_vars: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
365 clinical_vars.append(var) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
366 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
367 print(f"Using clinical variables: {', '.join(clinical_vars)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
368 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
369 # filter datasets for clinical variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
370 if all(var in clinical_train.columns and var in clinical_test.columns for var in clinical_vars): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
371 df_clin_train = clinical_train[clinical_vars] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
372 df_clin_test = clinical_test[clinical_vars] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
373 # Drop rows with NaN in clinical variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
374 df_clin_train = df_clin_train.dropna(subset=clinical_vars) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
375 df_clin_test = df_clin_test.dropna(subset=clinical_vars) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
376 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
377 raise ValueError(f"Not all clinical variables found in datasets. Available in train dataset: {clinical_train.columns.tolist()}, Available in test dataset: {clinical_test.columns.tolist()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
378 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
379 # Combine |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
380 df_clin = pd.concat([df_clin_train, df_clin_test], axis=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
381 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
382 # Get top survival markers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
383 print(f"Extracting top {args.top_features} important features for {args.surv_event_var}...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
384 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
385 imp = get_important_features(model, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
386 var=args.surv_event_var, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
387 top=args.top_features |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
388 )['name'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
389 print(f"Top features: {', '.join(imp)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
390 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
391 raise ValueError(f"Error getting important features: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
392 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
393 # Extract feature data from omics datasets |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
394 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
395 omics_test = omics_test.loc[omics_test.index.isin(imp)] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
396 omics_train = omics_train.loc[omics_train.index.isin(imp)] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
397 # Drop rows with NaN in omics datasets |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
398 omics_test = omics_test.dropna(subset=omics_test.columns) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
399 omics_train = omics_train.dropna(subset=omics_train.columns) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
400 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
401 df_imp = pd.concat([omics_train, omics_test], axis=1) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
402 df_imp = df_imp.T # Transpose to have samples as rows |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
403 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
404 print(f"Feature data shape: {df_imp.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
405 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
406 raise ValueError(f"Error extracting feature subset: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
407 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
408 # Combine markers with clinical variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
409 df = pd.merge(df_imp, df_clin, left_index=True, right_index=True) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
410 print(f"Combined data shape: {df.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
411 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
412 # Remove samples without survival endpoints |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
413 initial_samples = len(df) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
414 df = df[df[args.surv_event_var].notna()] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
415 final_samples = len(df) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
416 print(f"Removed {initial_samples - final_samples} samples without survival data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
417 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
418 if df.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
419 raise ValueError("No samples remain after filtering for survival data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
420 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
421 # Convert survival event column to binary (0/1) based on event_value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
422 # Convert to string for comparison to handle mixed types |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
423 df[args.surv_event_var] = df[args.surv_event_var].astype(str) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
424 event_value_str = str(args.event_value) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
425 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
426 df[f'{args.surv_event_var}'] = ( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
427 df[args.surv_event_var] == event_value_str |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
428 ).astype(int) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
429 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
430 # Build Cox model |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
431 print(f"Building Cox model with time variable: {args.surv_time_var}, event variable: {args.surv_event_var}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
432 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
433 coxm = build_cox_model(df, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
434 duration_col=args.surv_time_var, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
435 event_col=args.surv_event_var, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
436 crossval=args.crossval, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
437 n_splits=args.n_splits, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
438 random_state=args.random_state) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
439 print("Cox model built successfully") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
440 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
441 raise ValueError(f"Error building Cox model: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
442 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
443 # Generate hazard ratios plot |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
444 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
445 print("Generating hazard ratios plot...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
446 fig = plot_hazard_ratios(coxm) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
447 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
448 output_path = output_dir / f"{output_name_base}_hazard_ratios.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
449 print(f"Saving hazard ratios plot to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
450 fig.save(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
451 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
452 print("Cox proportional hazards analysis completed successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
453 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
454 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
455 raise ValueError(f"Error generating hazard ratios plot: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
456 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
457 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
458 def generate_plot_scatter(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
459 """Generate scatter plot of known vs predicted labels""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
460 print("Generating scatter plots of known vs predicted labels...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
461 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
462 # Parse target values from comma-separated string |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
463 if args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
464 target_values = [val.strip() for val in args.target_value.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
465 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
466 # If no target values specified, use all unique variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
467 target_values = labels['variable'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
468 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
469 print(f"Processing target values: {target_values}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
470 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
471 successful_plots = 0 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
472 skipped_plots = 0 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
473 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
474 for target_value in target_values: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
475 print(f"\nProcessing target value: '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
476 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
477 # Filter labels for the current target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
478 target_labels = labels[labels['variable'] == target_value] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
479 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
480 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
481 print(f" Warning: No data found for target value '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
482 skipped_plots += 1 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
483 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
484 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
485 # Check if labels are numeric and convert |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
486 true_values = pd.to_numeric(target_labels['known_label'], errors='coerce') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
487 predicted_values = pd.to_numeric(target_labels['predicted_label'], errors='coerce') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
488 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
489 if true_values.isna().all() or predicted_values.isna().all(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
490 print(f"No valid numeric values found for known or predicted labels in '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
491 skipped_plots += 1 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
492 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
493 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
494 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
495 print(f" Generating scatter plot for '{target_value}'...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
496 fig = plot_scatter(true_values, predicted_values) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
497 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
498 # Create output filename with target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
499 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
500 if len(target_values) > 1: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
501 output_filename = f"{output_name_base}_{safe_target_name}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
502 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
503 output_filename = f"{output_name_base}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
504 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
505 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
506 print(f" Saving scatter plot to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
507 fig.save(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
508 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
509 successful_plots += 1 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
510 print(f" Scatter plot for '{target_value}' generated successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
511 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
512 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
513 print(f" Error generating plot for '{target_value}': {str(e)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
514 skipped_plots += 1 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
515 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
516 # Summary |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
517 print(" Summary:") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
518 print(f" Successfully generated: {successful_plots} plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
519 print(f" Skipped: {skipped_plots} plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
520 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
521 if successful_plots == 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
522 raise ValueError("No scatter plots could be generated. Check your data and target values.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
523 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
524 print("Scatter plot generation completed!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
525 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
526 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
527 def generate_label_concordance_heatmap(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
528 """Generate label concordance heatmap""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
529 print("Generating label concordance heatmaps...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
530 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
531 # Parse target values from comma-separated string |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
532 if args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
533 target_values = [val.strip() for val in args.target_value.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
534 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
535 # If no target values specified, use all unique variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
536 target_values = labels['variable'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
537 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
538 print(f"Processing target values: {target_values}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
539 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
540 for target_value in target_values: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
541 print(f"\nProcessing target value: '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
542 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
543 # Filter labels for the current target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
544 target_labels = labels[labels['variable'] == target_value] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
545 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
546 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
547 print(f" Warning: No data found for target value '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
548 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
549 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
550 true_values = target_labels['known_label'].tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
551 predicted_values = target_labels['predicted_label'].tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
552 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
553 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
554 print(f" Generating heatmap for '{target_value}'...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
555 fig = plot_label_concordance_heatmap(true_values, predicted_values) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
556 plt.close(fig) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
557 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
558 # Create output filename with target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
559 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
560 if len(target_values) > 1: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
561 output_filename = f"{output_name_base}_{safe_target_name}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
562 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
563 output_filename = f"{output_name_base}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
564 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
565 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
566 print(f" Saving heatmap to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
567 fig.savefig(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
568 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
569 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
570 print(f" Error generating heatmap for '{target_value}': {str(e)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
571 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
572 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
573 print("Label concordance heatmap generated successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
574 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
575 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
576 def generate_pr_curves(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
577 """Generate precision-recall curves""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
578 print("Generating precision-recall curves...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
579 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
580 # Parse target values from comma-separated string |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
581 if args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
582 target_values = [val.strip() for val in args.target_value.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
583 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
584 # If no target values specified, use all unique variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
585 target_values = labels['variable'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
586 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
587 print(f"Processing target values: {target_values}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
588 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
589 for target_value in target_values: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
590 print(f"\nProcessing target value: '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
591 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
592 # Filter labels for the current target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
593 target_labels = labels[labels['variable'] == target_value] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
594 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
595 # Check if this is a regression problem (no class probabilities) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
596 prob_columns = target_labels['class_label'].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
597 non_na_probs = target_labels['probability'].notna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
598 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
599 print(f" Class labels found: {list(prob_columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
600 print(f" Non-NaN probabilities: {non_na_probs}/{len(target_labels)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
601 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
602 # If most probabilities are NaN, this is likely a regression problem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
603 if non_na_probs < len(target_labels) * 0.1: # Less than 10% valid probabilities |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
604 print(" Detected regression problem - precision-recall curves not applicable") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
605 print(f" Skipping '{target_value}' (use regression evaluation metrics instead)") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
606 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
607 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
608 # Debug: Check data quality |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
609 total_rows = len(target_labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
610 missing_labels = target_labels['known_label'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
611 missing_probs = target_labels['probability'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
612 unique_samples = target_labels['sample_id'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
613 unique_classes = target_labels['class_label'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
614 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
615 print(f" Data summary: {total_rows} total rows, {unique_samples} unique samples, {unique_classes} unique classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
616 print(f" Missing data: {missing_labels} missing known_label, {missing_probs} missing probability") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
617 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
618 if missing_labels > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
619 print(f" Warning: Found {missing_labels} missing known_label values") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
620 missing_samples = target_labels[target_labels['known_label'].isna()]['sample_id'].unique()[:5] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
621 print(f" Sample IDs with missing known_label: {list(missing_samples)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
622 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
623 # Remove rows with missing known_label |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
624 target_labels = target_labels.dropna(subset=['known_label']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
625 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
626 print(f" Error: No valid known_label data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
627 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
628 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
629 # 1. Pivot to wide format |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
630 prob_df = target_labels.pivot(index='sample_id', columns='class_label', values='probability') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
631 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
632 print(f" After pivot: {prob_df.shape[0]} samples x {prob_df.shape[1]} classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
633 print(f" Class columns: {list(prob_df.columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
634 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
635 # Check for NaN values in probability data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
636 nan_counts = prob_df.isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
637 if nan_counts.any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
638 print(f" NaN counts per class: {dict(nan_counts)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
639 print(f" Samples with any NaN: {prob_df.isna().any(axis=1).sum()}/{len(prob_df)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
640 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
641 # Drop only rows where ALL probabilities are NaN |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
642 all_nan_rows = prob_df.isna().all(axis=1) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
643 if all_nan_rows.any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
644 print(f" Dropping {all_nan_rows.sum()} samples with all NaN probabilities") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
645 prob_df = prob_df[~all_nan_rows] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
646 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
647 remaining_nans = prob_df.isna().sum().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
648 if remaining_nans > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
649 print(f" Warning: {remaining_nans} individual NaN values remain - filling with 0") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
650 prob_df = prob_df.fillna(0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
651 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
652 if prob_df.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
653 print(f" Error: No valid probability data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
654 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
655 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
656 # 2. Get true labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
657 true_labels_df = target_labels.drop_duplicates('sample_id')[['sample_id', 'known_label']].set_index('sample_id') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
658 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
659 # 3. Align indices - only keep samples that exist in both datasets |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
660 common_indices = prob_df.index.intersection(true_labels_df.index) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
661 if len(common_indices) == 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
662 print(f" Error: No common sample_ids between probability and true label data for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
663 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
664 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
665 print(f" Found {len(common_indices)} samples with both probability and true label data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
666 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
667 # Filter both datasets to common indices |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
668 prob_df_aligned = prob_df.loc[common_indices] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
669 y_true = true_labels_df.loc[common_indices]['known_label'] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
670 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
671 # 4. Final check for NaN values |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
672 if y_true.isna().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
673 print(f" Error: True labels still contain NaN after alignment for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
674 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
675 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
676 if prob_df_aligned.isna().any().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
677 print(f" Error: Probability data still contains NaN after alignment for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
678 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
679 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
680 # 5. Convert categorical labels to integer labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
681 # Create a mapping from class names to integers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
682 class_names = list(prob_df_aligned.columns) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
683 class_to_int = {class_name: i for i, class_name in enumerate(class_names)} |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
684 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
685 print(f" Class mapping: {class_to_int}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
686 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
687 # Convert true labels to integers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
688 y_true_np = y_true.map(class_to_int).to_numpy() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
689 y_probs_np = prob_df_aligned.to_numpy() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
690 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
691 print(f" Data shape: y_true={y_true_np.shape}, y_probs={y_probs_np.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
692 print(f" Unique true labels (integers): {set(y_true_np)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
693 print(f" Class labels (columns): {class_names}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
694 print(f" Label distribution: {dict(zip(*np.unique(y_true_np, return_counts=True)))}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
695 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
696 # Check for any unmapped labels (will be NaN) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
697 if pd.isna(y_true_np).any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
698 print(" Error: Some true labels could not be mapped to class columns") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
699 unmapped_labels = set(y_true[y_true.map(class_to_int).isna()]) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
700 print(f" Unmapped labels: {unmapped_labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
701 print(f" Available classes: {class_names}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
702 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
703 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
704 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
705 print(f" Generating precision-recall curve for '{target_value}'...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
706 fig = plot_pr_curves(y_true_np, y_probs_np) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
707 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
708 # Create output filename with target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
709 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
710 if len(target_values) > 1: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
711 output_filename = f"{output_name_base}_{safe_target_name}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
712 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
713 output_filename = f"{output_name_base}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
714 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
715 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
716 print(f" Saving precision-recall curve to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
717 fig.save(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
718 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
719 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
720 print(f" Error generating precision-recall curve for '{target_value}': {str(e)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
721 print(f" Debug info - y_true type: {type(y_true_np)}, contains NaN: {pd.isna(y_true_np).any()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
722 print(f" Debug info - y_probs type: {type(y_probs_np)}, contains NaN: {pd.isna(y_probs_np).any()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
723 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
724 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
725 print("Precision-recall curves generated successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
726 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
727 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
728 def generate_roc_curves(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
729 """Generate ROC curves""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
730 print("Generating ROC curves...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
731 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
732 # Parse target values from comma-separated string |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
733 if args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
734 target_values = [val.strip() for val in args.target_value.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
735 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
736 # If no target values specified, use all unique variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
737 target_values = labels['variable'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
738 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
739 print(f"Processing target values: {target_values}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
740 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
741 for target_value in target_values: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
742 print(f"\nProcessing target value: '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
743 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
744 # Filter labels for the current target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
745 target_labels = labels[labels['variable'] == target_value] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
746 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
747 # Check if this is a regression problem (no class probabilities) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
748 prob_columns = target_labels['class_label'].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
749 non_na_probs = target_labels['probability'].notna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
750 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
751 print(f" Class labels found: {list(prob_columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
752 print(f" Non-NaN probabilities: {non_na_probs}/{len(target_labels)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
753 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
754 # If most probabilities are NaN, this is likely a regression problem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
755 if non_na_probs < len(target_labels) * 0.1: # Less than 10% valid probabilities |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
756 print(" Detected regression problem - ROC curves not applicable") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
757 print(f" Skipping '{target_value}' (use regression evaluation metrics instead)") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
758 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
759 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
760 # Debug: Check data quality |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
761 total_rows = len(target_labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
762 missing_labels = target_labels['known_label'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
763 missing_probs = target_labels['probability'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
764 unique_samples = target_labels['sample_id'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
765 unique_classes = target_labels['class_label'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
766 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
767 print(f" Data summary: {total_rows} total rows, {unique_samples} unique samples, {unique_classes} unique classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
768 print(f" Missing data: {missing_labels} missing known_label, {missing_probs} missing probability") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
769 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
770 if missing_labels > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
771 print(f" Warning: Found {missing_labels} missing known_label values") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
772 missing_samples = target_labels[target_labels['known_label'].isna()]['sample_id'].unique()[:5] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
773 print(f" Sample IDs with missing known_label: {list(missing_samples)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
774 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
775 # Remove rows with missing known_label |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
776 target_labels = target_labels.dropna(subset=['known_label']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
777 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
778 print(f" Error: No valid known_label data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
779 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
780 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
781 # 1. Pivot to wide format |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
782 prob_df = target_labels.pivot(index='sample_id', columns='class_label', values='probability') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
783 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
784 print(f" After pivot: {prob_df.shape[0]} samples x {prob_df.shape[1]} classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
785 print(f" Class columns: {list(prob_df.columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
786 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
787 # Check for NaN values in probability data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
788 nan_counts = prob_df.isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
789 if nan_counts.any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
790 print(f" NaN counts per class: {dict(nan_counts)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
791 print(f" Samples with any NaN: {prob_df.isna().any(axis=1).sum()}/{len(prob_df)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
792 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
793 # Drop only rows where ALL probabilities are NaN |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
794 all_nan_rows = prob_df.isna().all(axis=1) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
795 if all_nan_rows.any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
796 print(f" Dropping {all_nan_rows.sum()} samples with all NaN probabilities") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
797 prob_df = prob_df[~all_nan_rows] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
798 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
799 remaining_nans = prob_df.isna().sum().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
800 if remaining_nans > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
801 print(f" Warning: {remaining_nans} individual NaN values remain - filling with 0") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
802 prob_df = prob_df.fillna(0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
803 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
804 if prob_df.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
805 print(f" Error: No valid probability data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
806 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
807 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
808 # 2. Get true labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
809 true_labels_df = target_labels.drop_duplicates('sample_id')[['sample_id', 'known_label']].set_index('sample_id') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
810 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
811 # 3. Align indices - only keep samples that exist in both datasets |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
812 common_indices = prob_df.index.intersection(true_labels_df.index) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
813 if len(common_indices) == 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
814 print(f" Error: No common sample_ids between probability and true label data for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
815 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
816 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
817 print(f" Found {len(common_indices)} samples with both probability and true label data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
818 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
819 # Filter both datasets to common indices |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
820 prob_df_aligned = prob_df.loc[common_indices] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
821 y_true = true_labels_df.loc[common_indices]['known_label'] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
822 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
823 # 4. Final check for NaN values |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
824 if y_true.isna().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
825 print(f" Error: True labels still contain NaN after alignment for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
826 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
827 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
828 if prob_df_aligned.isna().any().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
829 print(f" Error: Probability data still contains NaN after alignment for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
830 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
831 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
832 # 5. Convert categorical labels to integer labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
833 # Create a mapping from class names to integers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
834 class_names = list(prob_df_aligned.columns) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
835 class_to_int = {class_name: i for i, class_name in enumerate(class_names)} |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
836 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
837 print(f" Class mapping: {class_to_int}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
838 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
839 # Convert true labels to integers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
840 y_true_np = y_true.map(class_to_int).to_numpy() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
841 y_probs_np = prob_df_aligned.to_numpy() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
842 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
843 print(f" Data shape: y_true={y_true_np.shape}, y_probs={y_probs_np.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
844 print(f" Unique true labels (integers): {set(y_true_np)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
845 print(f" Class labels (columns): {class_names}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
846 print(f" Label distribution: {dict(zip(*np.unique(y_true_np, return_counts=True)))}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
847 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
848 # Check for any unmapped labels (will be NaN) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
849 if pd.isna(y_true_np).any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
850 print(" Error: Some true labels could not be mapped to class columns") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
851 unmapped_labels = set(y_true[y_true.map(class_to_int).isna()]) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
852 print(f" Unmapped labels: {unmapped_labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
853 print(f" Available classes: {class_names}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
854 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
855 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
856 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
857 print(f" Generating ROC curve for '{target_value}'...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
858 fig = plot_roc_curves(y_true_np, y_probs_np) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
859 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
860 # Create output filename with target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
861 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
862 if len(target_values) > 1: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
863 output_filename = f"{output_name_base}_{safe_target_name}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
864 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
865 output_filename = f"{output_name_base}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
866 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
867 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
868 print(f" Saving ROC curve to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
869 fig.save(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
870 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
871 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
872 print(f" Error generating ROC curve for '{target_value}': {str(e)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
873 print(f" Debug info - y_true type: {type(y_true_np)}, contains NaN: {pd.isna(y_true_np).any()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
874 print(f" Debug info - y_probs type: {type(y_probs_np)}, contains NaN: {pd.isna(y_probs_np).any()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
875 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
876 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
877 print("ROC curves generated successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
878 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
879 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
880 def generate_box_plots(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
881 """Generate box plots for model predictions""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
882 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
883 print("Generating box plots...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
884 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
885 # Parse target values from comma-separated string |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
886 if args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
887 target_values = [val.strip() for val in args.target_value.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
888 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
889 # If no target values specified, use all unique variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
890 target_values = labels['variable'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
891 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
892 print(f"Processing target values: {target_values}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
893 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
894 for target_value in target_values: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
895 print(f"\nProcessing target value: '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
896 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
897 # Filter labels for the current target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
898 target_labels = labels[labels['variable'] == target_value] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
899 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
900 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
901 print(f" Warning: No data found for target value '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
902 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
903 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
904 # Check if this is a classification problem (has probabilities) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
905 prob_columns = target_labels['class_label'].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
906 non_na_probs = target_labels['probability'].notna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
907 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
908 print(f" Class labels found: {list(prob_columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
909 print(f" Non-NaN probabilities: {non_na_probs}/{len(target_labels)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
910 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
911 # If most probabilities are NaN, this is likely a regression problem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
912 if non_na_probs < len(target_labels) * 0.1: # Less than 10% valid probabilities |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
913 print(" Detected regression problem - precision-recall curves not applicable") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
914 print(f" Skipping '{target_value}' (use regression evaluation metrics instead)") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
915 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
916 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
917 # Debug: Check data quality |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
918 total_rows = len(target_labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
919 missing_labels = target_labels['known_label'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
920 missing_probs = target_labels['probability'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
921 unique_samples = target_labels['sample_id'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
922 unique_classes = target_labels['class_label'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
923 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
924 print(f" Data summary: {total_rows} total rows, {unique_samples} unique samples, {unique_classes} unique classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
925 print(f" Missing data: {missing_labels} missing known_label, {missing_probs} missing probability") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
926 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
927 if missing_labels > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
928 print(f" Warning: Found {missing_labels} missing known_label values") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
929 missing_samples = target_labels[target_labels['known_label'].isna()]['sample_id'].unique()[:5] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
930 print(f" Sample IDs with missing known_label: {list(missing_samples)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
931 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
932 # Remove rows with missing known_label |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
933 target_labels = target_labels.dropna(subset=['known_label']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
934 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
935 print(f" Error: No valid known_label data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
936 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
937 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
938 # Remove rows with missing data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
939 clean_data = target_labels.dropna(subset=['known_label', 'probability']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
940 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
941 if clean_data.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
942 print(" No valid data after cleaning - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
943 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
944 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
945 # Get unique classes |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
946 classes = clean_data['class_label'].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
947 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
948 for class_label in classes: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
949 print(f" Generating box plot for class: {class_label}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
950 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
951 # Filter for current class |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
952 class_data = clean_data[clean_data['class_label'] == class_label] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
953 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
954 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
955 # Create the box plot |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
956 fig = plot_boxplot( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
957 categorical_x=class_data['known_label'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
958 numerical_y=class_data['probability'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
959 title_x='True Label', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
960 title_y=f'Predicted Probability ({class_label})', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
961 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
962 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
963 # Save the plot |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
964 safe_class_name = str(class_label).replace('/', '_').replace('\\', '_').replace(' ', '_').replace(':', '_') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
965 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
966 output_filename = f"{output_name_base}_{safe_target_name}_{safe_class_name}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
967 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
968 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
969 print(f" Saving box plot to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
970 fig.savefig(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
971 plt.close(fig) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
972 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
973 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
974 print(f" Error generating box plot for class '{class_label}': {str(e)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
975 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
976 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
977 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
978 def main(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
979 """Main function to parse arguments and generate plots""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
980 parser = argparse.ArgumentParser(description="Generate plots using flexynesis") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
981 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
982 # Required arguments |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
983 parser.add_argument("--labels", type=str, required=False, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
984 help="Path to labels file generated by flexynesis") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
985 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
986 # Plot type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
987 parser.add_argument("--plot_type", type=str, required=True, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
988 choices=['dimred', 'kaplan_meier', 'cox', 'scatter', 'concordance_heatmap', 'pr_curve', 'roc_curve', 'box_plot'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
989 help="Type of plot to generate: 'dimred' for dimensionality reduction, 'kaplan_meier' for survival analysis, 'cox' for Cox proportional hazards analysis, 'scatter' for scatter plots, 'concordance_heatmap' for label concordance heatmaps, 'pr_curve' for precision-recall curves, 'roc_curve' for ROC curves, or 'box_plot' for box plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
990 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
991 # Arguments for dimensionality reduction |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
992 parser.add_argument("--embeddings", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
993 help="Path to input data embeddings file (CSV or tabular format). Required for dimred plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
994 parser.add_argument("--method", type=str, default='pca', choices=['pca', 'umap'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
995 help="Transformation method ('pca' or 'umap'). Default is 'pca'. Used for dimred plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
996 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
997 # Arguments for Kaplan-Meier |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
998 parser.add_argument("--survival_data", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
999 help="Path to survival data file with columns: duration and event. Required for kaplan_meier plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1000 parser.add_argument("--surv_time_var", type=str, required=False, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1001 help="Column name for survival time") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1002 parser.add_argument("--surv_event_var", type=str, required=False, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1003 help="Column name for survival event") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1004 parser.add_argument("--event_value", type=str, required=False, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1005 help="Value in event column that represents an event (e.g., 'DECEASED')") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1006 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1007 # Arguments for Cox analysis |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1008 parser.add_argument("--model", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1009 help="Path to trained flexynesis model (pickle file). Required for cox plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1010 parser.add_argument("--clinical_train", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1011 help="Path to training dataset (pickle file). Required for cox plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1012 parser.add_argument("--clinical_test", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1013 help="Path to test dataset (pickle file). Required for cox plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1014 parser.add_argument("--omics_train", type=str, default=None, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1015 help="Path to training omics dataset. Optional for cox plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1016 parser.add_argument("--omics_test", type=str, default=None, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1017 help="Path to test omics dataset. Optional for cox plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1018 parser.add_argument("--clinical_variables", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1019 help="Comma-separated list of clinical variables to include in Cox model (e.g., 'AGE,SEX,HISTOLOGICAL_DIAGNOSIS,STUDY')") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1020 parser.add_argument("--top_features", type=int, default=20, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1021 help="Number of top important features to include in Cox model. Default is 5") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1022 parser.add_argument("--crossval", action='store_true', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1023 help="If True, performs K-fold cross-validation and returns average C-index. Default is False") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1024 parser.add_argument("--n_splits", type=int, default=5, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1025 help="Number of folds for cross-validation. Default is 5") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1026 parser.add_argument("--random_state", type=int, default=42, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1027 help="Random seed for reproducibility. Default is 42") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1028 |
5
466b593fd87e
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
bgruening
parents:
3
diff
changeset
|
1029 # Arguments for dimred, scatter plot, heatmap, PR curves, ROC curves, and box plots |
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1030 parser.add_argument("--target_value", type=str, default=None, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1031 help="Target value for scatter plot.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1032 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1033 # Common arguments |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1034 parser.add_argument("--output_dir", type=str, default='output', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1035 help="Output directory. Default is 'output'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1036 parser.add_argument("--output_name", type=str, default=None, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1037 help="Output filename base") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1038 parser.add_argument("--format", type=str, default='jpg', choices=['png', 'pdf', 'svg', 'jpg'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1039 help="Output format for the plot. Default is 'jpg'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1040 parser.add_argument("--dpi", type=int, default=300, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1041 help="DPI for the output image. Default is 300") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1042 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1043 args = parser.parse_args() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1044 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1045 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1046 # validate plot type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1047 if not args.plot_type: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1048 raise ValueError("Please specify a plot type using --plot_type") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1049 if args.plot_type not in ['dimred', 'kaplan_meier', 'cox', 'scatter', 'concordance_heatmap', 'pr_curve', 'roc_curve', 'box_plot']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1050 raise ValueError(f"Invalid plot type: {args.plot_type}. Must be one of: 'dimred', 'kaplan_meier', 'cox', 'scatter', 'concordance_heatmap', 'pr_curve', 'roc_curve', 'box_plot'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1051 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1052 # Validate plot type requirements |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1053 if args.plot_type in ['dimred']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1054 if not args.embeddings: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1055 raise ValueError("--embeddings is required when plot_type is 'dimred'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1056 if not os.path.isfile(args.embeddings): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1057 raise FileNotFoundError(f"embeddings file not found: {args.embeddings}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1058 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1059 raise ValueError("--labels is required for dimensionality reduction plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1060 if not args.method: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1061 raise ValueError("--method is required for dimensionality reduction plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1062 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1063 if args.plot_type in ['kaplan_meier']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1064 if not args.survival_data: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1065 raise ValueError("--survival_data is required when plot_type is 'kaplan_meier'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1066 if not os.path.isfile(args.survival_data): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1067 raise FileNotFoundError(f"Survival data file not found: {args.survival_data}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1068 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1069 raise ValueError("--labels is required for dimensionality reduction plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1070 if not args.method: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1071 raise ValueError("--method is required for dimensionality reduction plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1072 if not args.surv_time_var: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1073 raise ValueError("--surv_time_var is required for Kaplan-Meier plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1074 if not args.surv_event_var: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1075 raise ValueError("--surv_event_var is required for Kaplan-Meier plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1076 if not args.event_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1077 raise ValueError("--event_value is required for Kaplan-Meier plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1078 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1079 if args.plot_type in ['cox']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1080 if not args.model: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1081 raise ValueError("--model is required when plot_type is 'cox'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1082 if not os.path.isfile(args.model): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1083 raise FileNotFoundError(f"Model file not found: {args.model}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1084 if not args.clinical_train: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1085 raise ValueError("--clinical_train is required when plot_type is 'cox'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1086 if not os.path.isfile(args.clinical_train): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1087 raise FileNotFoundError(f"Training dataset file not found: {args.clinical_train}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1088 if not args.clinical_test: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1089 raise ValueError("--clinical_test is required when plot_type is 'cox'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1090 if not os.path.isfile(args.clinical_test): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1091 raise FileNotFoundError(f"Test dataset file not found: {args.clinical_test}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1092 if not args.omics_train: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1093 raise ValueError("--omics_train is required when plot_type is 'cox'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1094 if not os.path.isfile(args.omics_train): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1095 raise FileNotFoundError(f"Training omics dataset file not found: {args.omics_train}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1096 if not args.omics_test: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1097 raise ValueError("--omics_test is required when plot_type is 'cox'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1098 if not os.path.isfile(args.omics_test): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1099 raise FileNotFoundError(f"Test omics dataset file not found: {args.omics_test}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1100 if not args.surv_time_var: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1101 raise ValueError("--surv_time_var is required for Cox plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1102 if not args.surv_event_var: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1103 raise ValueError("--surv_event_var is required for Cox plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1104 if not args.clinical_variables: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1105 raise ValueError("--clinical_variables is required for Cox plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1106 if not isinstance(args.top_features, int) or args.top_features <= 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1107 raise ValueError("--top_features must be a positive integer") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1108 if not args.event_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1109 raise ValueError("--event_value is required for Kaplan-Meier plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1110 if not args.crossval: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1111 args.crossval = False |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1112 if not isinstance(args.n_splits, int) or args.n_splits <= 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1113 raise ValueError("--n_splits must be a positive integer") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1114 if not isinstance(args.random_state, int): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1115 raise ValueError("--random_state must be an integer") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1116 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1117 if args.plot_type in ['scatter']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1118 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1119 raise ValueError("--labels is required for scatter plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1120 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1121 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1122 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1123 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1124 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1125 if args.plot_type in ['concordance_heatmap']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1126 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1127 raise ValueError("--labels is required for concordance heatmap") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1128 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1129 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1130 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1131 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1132 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1133 if args.plot_type in ['pr_curve']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1134 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1135 raise ValueError("--labels is required for precision-recall curves") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1136 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1137 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1138 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1139 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1140 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1141 if args.plot_type in ['roc_curve']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1142 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1143 raise ValueError("--labels is required for ROC curves") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1144 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1145 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1146 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1147 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1148 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1149 if args.plot_type in ['box_plot']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1150 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1151 raise ValueError("--labels is required for box plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1152 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1153 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1154 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1155 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1156 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1157 # Validate other arguments |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1158 if args.method not in ['pca', 'umap']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1159 raise ValueError("Method must be 'pca' or 'umap'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1160 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1161 # Create output directory |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1162 output_dir = Path(args.output_dir) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1163 output_dir.mkdir(parents=True, exist_ok=True) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1164 print(f"Output directory: {output_dir.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1165 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1166 # Generate output filename base |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1167 if args.output_name: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1168 output_name_base = args.output_name |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1169 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1170 if args.plot_type == 'dimred': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1171 embeddings_name = Path(args.embeddings).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1172 output_name_base = f"{embeddings_name}_{args.method}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1173 elif args.plot_type == 'kaplan_meier': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1174 survival_name = Path(args.survival_data).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1175 output_name_base = f"{survival_name}_km" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1176 elif args.plot_type == 'cox': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1177 model_name = Path(args.model).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1178 output_name_base = f"{model_name}_cox" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1179 elif args.plot_type == 'scatter': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1180 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1181 output_name_base = f"{labels_name}_scatter" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1182 elif args.plot_type == 'concordance_heatmap': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1183 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1184 output_name_base = f"{labels_name}_concordance" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1185 elif args.plot_type == 'pr_curve': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1186 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1187 output_name_base = f"{labels_name}_pr_curves" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1188 elif args.plot_type == 'roc_curve': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1189 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1190 output_name_base = f"{labels_name}_roc_curves" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1191 elif args.plot_type == 'box_plot': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1192 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1193 output_name_base = f"{labels_name}_box_plot" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1194 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1195 # Generate plots based on type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1196 if args.plot_type in ['dimred']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1197 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1198 print(f"Loading labels from: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1199 label_data = load_labels(args.labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1200 # Load embeddings data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1201 print(f"Loading embeddings from: {args.embeddings}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1202 embeddings, sample_names = load_embeddings(args.embeddings) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1203 print(f"embeddings shape: {embeddings.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1204 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1205 # Match samples to embeddings |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1206 matched_labels = match_samples_to_embeddings(sample_names, label_data) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1207 print(f"Successfully matched {len(matched_labels)} samples for dimensionality reduction") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1208 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1209 generate_dimred_plots(embeddings, matched_labels, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1210 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1211 elif args.plot_type in ['kaplan_meier']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1212 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1213 print(f"Loading labels from: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1214 label_data = load_labels(args.labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1215 # Load survival data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1216 print(f"Loading survival data from: {args.survival_data}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1217 survival_data = load_survival_data(args.survival_data) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1218 print(f"Survival data shape: {survival_data.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1219 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1220 generate_km_plots(survival_data, label_data, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1221 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1222 elif args.plot_type in ['cox']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1223 # Load model and datasets |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1224 print(f"Loading model from: {args.model}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1225 model = load_model(args.model) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1226 print(f"Loading training dataset from: {args.clinical_train}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1227 clinical_train = load_omics(args.clinical_train) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1228 print(f"Loading test dataset from: {args.clinical_test}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1229 clinical_test = load_omics(args.clinical_test) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1230 print(f"Loading training omics dataset from: {args.omics_train}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1231 omics_train = load_omics(args.omics_train) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1232 print(f"Loading test omics dataset from: {args.omics_test}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1233 omics_test = load_omics(args.omics_test) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1234 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1235 generate_cox_plots(model, clinical_train, clinical_test, omics_test, omics_train, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1236 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1237 elif args.plot_type in ['scatter']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1238 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1239 print(f"Loading labels from: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1240 label_data = load_labels(args.labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1241 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1242 generate_plot_scatter(label_data, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1243 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1244 elif args.plot_type in ['concordance_heatmap']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1245 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1246 print(f"Loading labels from: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1247 label_data = load_labels(args.labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1248 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1249 generate_label_concordance_heatmap(label_data, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1250 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1251 elif args.plot_type in ['pr_curve']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1252 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1253 print(f"Loading labels from: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1254 label_data = load_labels(args.labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1255 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1256 generate_pr_curves(label_data, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1257 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1258 elif args.plot_type in ['roc_curve']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1259 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1260 print(f"Loading labels from: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1261 label_data = load_labels(args.labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1262 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1263 generate_roc_curves(label_data, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1264 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1265 elif args.plot_type in ['box_plot']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1266 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1267 print(f"Loading labels from: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1268 label_data = load_labels(args.labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1269 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1270 generate_box_plots(label_data, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1271 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1272 print("All plots generated successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1273 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1274 except (FileNotFoundError, ValueError, pd.errors.ParserError) as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1275 print(f"Error: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1276 return 1 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1277 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1278 return 0 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1279 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1280 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1281 if __name__ == "__main__": |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1282 exit(main()) |