Mercurial > repos > bgruening > flexynesis
annotate flexynesis_plot.py @ 3:525c661a7fdc draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
author | bgruening |
---|---|
date | Tue, 24 Jun 2025 05:55:40 +0000 |
parents | |
children |
rev | line source |
---|---|
3
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1 #!/usr/bin/env python |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
2 """Generate plots using flexynesis |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
3 This script generates dimensionality reduction plots, Kaplan-Meier survival curves, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
4 and Cox proportional hazards models from data processed by flexynesis.""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
5 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
6 import argparse |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
7 import os |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
8 from pathlib import Path |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
9 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
10 import matplotlib.pyplot as plt |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
11 import numpy as np |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
12 import pandas as pd |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
13 import seaborn as sns |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
14 import torch |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
15 from flexynesis import ( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
16 build_cox_model, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
17 get_important_features, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
18 plot_dim_reduced, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
19 plot_hazard_ratios, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
20 plot_kaplan_meier_curves, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
21 plot_pr_curves, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
22 plot_roc_curves, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
23 plot_scatter |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
24 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
25 from scipy.stats import kruskal, mannwhitneyu |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
26 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
27 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
28 def load_embeddings(embeddings_path): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
29 """Load embeddings from a file""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
30 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
31 # Determine file extension |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
32 file_ext = Path(embeddings_path).suffix.lower() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
33 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
34 if file_ext == '.csv': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
35 df = pd.read_csv(embeddings_path, index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
36 elif file_ext in ['.tsv', '.txt', '.tab', '.tabular']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
37 df = pd.read_csv(embeddings_path, sep='\t', index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
38 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
39 raise ValueError(f"Unsupported file extension: {file_ext}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
40 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
41 return df, df.index.tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
42 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
43 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
44 raise ValueError(f"Error loading embeddings from {embeddings_path}: {e}") from e |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
45 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
46 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
47 def load_labels(labels_input): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
48 """Load predicted labels from flexynesis""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
49 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
50 # Determine file extension |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
51 file_ext = Path(labels_input).suffix.lower() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
52 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
53 if file_ext == '.csv': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
54 df = pd.read_csv(labels_input) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
55 elif file_ext in ['.tsv', '.txt', '.tab', '.tabular']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
56 df = pd.read_csv(labels_input, sep='\t') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
57 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
58 # Check if this is the specific format with sample_id, known_label, predicted_label |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
59 required_cols = ['sample_id', 'variable', 'class_label', 'probability', 'known_label', 'predicted_label'] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
60 if all(col in df.columns for col in required_cols): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
61 return df |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
62 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
63 raise ValueError(f"Labels file {labels_input} does not contain required columns: {required_cols}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
64 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
65 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
66 raise ValueError(f"Error loading labels from {labels_input}: {e}") from e |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
67 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
68 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
69 def load_survival_data(survival_path): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
70 """Load survival data from a file. First column should be sample_id""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
71 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
72 # Determine file extension |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
73 file_ext = Path(survival_path).suffix.lower() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
74 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
75 if file_ext == '.csv': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
76 df = pd.read_csv(survival_path, index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
77 elif file_ext in ['.tsv', '.txt', '.tab', '.tabular']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
78 df = pd.read_csv(survival_path, sep='\t', index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
79 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
80 raise ValueError(f"Unsupported file extension: {file_ext}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
81 return df |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
82 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
83 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
84 raise ValueError(f"Error loading survival data from {survival_path}: {e}") from e |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
85 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
86 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
87 def load_omics(omics_path): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
88 """Load omics data from a file. First column should be features""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
89 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
90 # Determine file extension |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
91 file_ext = Path(omics_path).suffix.lower() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
92 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
93 if file_ext == '.csv': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
94 df = pd.read_csv(omics_path, index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
95 elif file_ext in ['.tsv', '.txt', '.tab', '.tabular']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
96 df = pd.read_csv(omics_path, sep='\t', index_col=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
97 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
98 raise ValueError(f"Unsupported file extension: {file_ext}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
99 return df |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
100 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
101 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
102 raise ValueError(f"Error loading omics data from {omics_path}: {e}") from e |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
103 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
104 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
105 def load_model(model_path): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
106 """Load flexynesis model from pickle file""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
107 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
108 with open(model_path, 'rb') as f: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
109 model = torch.load(f, weights_only=False) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
110 return model |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
111 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
112 raise ValueError(f"Error loading model from {model_path}: {e}") from e |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
113 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
114 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
115 def match_samples_to_embeddings(sample_names, label_data): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
116 """Filter label data to match sample names in the embeddings""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
117 df_matched = label_data[label_data['sample_id'].isin(sample_names)] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
118 return df_matched |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
119 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
120 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
121 def detect_color_type(labels_series): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
122 """Auto-detect whether target variables should be treated as categorical or numerical""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
123 # Remove NaN |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
124 clean_labels = labels_series.dropna() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
125 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
126 if clean_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
127 return 'categorical' # default output if no labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
128 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
129 # Check if all values can be converted to numbers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
130 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
131 numeric_labels = pd.to_numeric(clean_labels, errors='coerce') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
132 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
133 # If conversion failed -> categorical |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
134 if numeric_labels.isna().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
135 return 'categorical' |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
136 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
137 # Check number of unique values |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
138 unique_count = len(clean_labels.unique()) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
139 total_count = len(clean_labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
140 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
141 # If few unique values relative to total -> categorical |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
142 # Threshold: if unique values < 10 OR unique/total < 0.1 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
143 if unique_count < 10 or (unique_count / total_count) < 0.1: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
144 return 'categorical' |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
145 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
146 return 'numerical' |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
147 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
148 except Exception: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
149 return 'categorical' |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
150 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
151 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
152 def plot_label_concordance_heatmap(labels1, labels2, figsize=(12, 10)): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
153 """ |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
154 Plot a heatmap reflecting the concordance between two sets of labels using pandas crosstab. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
155 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
156 Parameters: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
157 - labels1: The first set of labels. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
158 - labels2: The second set of labels. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
159 """ |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
160 # Compute the cross-tabulation |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
161 ct = pd.crosstab(pd.Series(labels1, name='Labels Set 1'), pd.Series(labels2, name='Labels Set 2')) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
162 # Normalize the cross-tabulation matrix column-wise |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
163 ct_normalized = ct.div(ct.sum(axis=1), axis=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
164 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
165 # Plot the heatmap |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
166 plt.figure(figsize=figsize) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
167 sns.heatmap(ct_normalized, annot=True, cmap='viridis', linewidths=.5) # col_cluster=False) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
168 plt.title('Concordance between label groups') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
169 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
170 return plt.gcf() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
171 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
172 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
173 def plot_boxplot(categorical_x, numerical_y, title_x='Categories', title_y='Values', figsize=(10, 6), jittersize=4): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
174 """ |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
175 Create a boxplot with to visualize the distribution of predicted probabilities across different categories. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
176 the x axis represents the true labels, and the y axis represents the predicted probabilities for specific categories. |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
177 """ |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
178 df = pd.DataFrame({title_x: categorical_x, title_y: numerical_y}) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
179 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
180 # Compute p-value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
181 groups = df[title_x].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
182 if len(groups) == 2: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
183 group1 = df[df[title_x] == groups[0]][title_y] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
184 group2 = df[df[title_x] == groups[1]][title_y] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
185 stat, p = mannwhitneyu(group1, group2, alternative='two-sided') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
186 test_name = "Mann-Whitney U" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
187 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
188 group_data = [df[df[title_x] == group][title_y] for group in groups] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
189 stat, p = kruskal(*group_data) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
190 test_name = "Kruskal-Wallis" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
191 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
192 # Create a boxplot with jittered points |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
193 plt.figure(figsize=figsize) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
194 sns.boxplot(x=title_x, y=title_y, hue=title_x, data=df, palette='Set2', legend=False, fill=False) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
195 sns.stripplot(x=title_x, y=title_y, data=df, color='black', size=jittersize, jitter=True, dodge=True, alpha=0.4) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
196 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
197 # Labels and p-value annotation |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
198 plt.xlabel(title_x) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
199 plt.ylabel(title_y) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
200 plt.text( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
201 x=-0.4, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
202 y=plt.ylim()[1], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
203 s=f'{test_name} p = {p:.3e}', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
204 verticalalignment='top', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
205 horizontalalignment='left', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
206 fontsize=12, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
207 bbox=dict(boxstyle='round,pad=0.3', facecolor='white', edgecolor='gray') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
208 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
209 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
210 plt.tight_layout() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
211 return plt.gcf() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
212 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
213 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
214 def generate_dimred_plots(embeddings, matched_labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
215 """Generate dimensionality reduction plots""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
216 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
217 # Parse target variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
218 target_vars = [var.strip() for var in args.target_variables.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
219 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
220 print(f"Generating {args.method.upper()} plots for {len(target_vars)} target variable(s): {', '.join(target_vars)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
221 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
222 # Check variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
223 available_vars = matched_labels['variable'].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
224 missing_vars = [var for var in target_vars if var not in available_vars] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
225 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
226 if missing_vars: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
227 print(f"Warning: The following target variables were not found in the data: {', '.join(missing_vars)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
228 print(f"Available variables: {', '.join(available_vars)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
229 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
230 # Filter to only process available variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
231 valid_vars = [var for var in target_vars if var in available_vars] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
232 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
233 if not valid_vars: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
234 raise ValueError(f"None of the specified target variables were found in the data. Available: {', '.join(available_vars)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
235 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
236 # Generate plots for each valid target variable |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
237 for var in valid_vars: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
238 print(f"\nPlotting variable: {var}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
239 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
240 # Filter matched labels for current variable |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
241 var_labels = matched_labels[matched_labels['variable'] == var].copy() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
242 var_labels = var_labels.drop_duplicates(subset='sample_id') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
243 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
244 if var_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
245 print(f"Warning: No data found for variable '{var}', skipping...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
246 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
247 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
248 # Auto-detect color type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
249 known_color_type = detect_color_type(var_labels['known_label']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
250 predicted_color_type = detect_color_type(var_labels['predicted_label']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
251 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
252 print(f" Auto-detected color types - Known: {known_color_type}, Predicted: {predicted_color_type}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
253 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
254 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
255 # Plot 1: Known labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
256 print(f" Creating known labels plot for {var}...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
257 fig_known = plot_dim_reduced( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
258 matrix=embeddings, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
259 labels=var_labels['known_label'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
260 method=args.method, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
261 color_type=known_color_type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
262 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
263 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
264 output_path_known = output_dir / f"{output_name_base}_{var}_known.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
265 print(f" Saving known labels plot to: {output_path_known.name}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
266 fig_known.save(output_path_known, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
267 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
268 # Plot 2: Predicted labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
269 print(f" Creating predicted labels plot for {var}...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
270 fig_predicted = plot_dim_reduced( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
271 matrix=embeddings, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
272 labels=var_labels['predicted_label'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
273 method=args.method, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
274 color_type=predicted_color_type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
275 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
276 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
277 output_path_predicted = output_dir / f"{output_name_base}_{var}_predicted.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
278 print(f" Saving predicted labels plot to: {output_path_predicted.name}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
279 fig_predicted.save(output_path_predicted, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
280 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
281 print(f" ✓ Successfully created plots for variable '{var}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
282 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
283 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
284 print(f" ✗ Error creating plots for variable '{var}': {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
285 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
286 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
287 print(f"\nDimensionality reduction plots completed for {len(valid_vars)} variable(s)!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
288 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
289 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
290 def generate_km_plots(survival_data, label_data, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
291 """Generate Kaplan-Meier plots""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
292 print("Generating Kaplan-Meier curves of risk subtypes...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
293 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
294 # Reset index and rename the index column to sample_id |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
295 survival_data = survival_data.reset_index() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
296 if survival_data.columns[0] != 'sample_id': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
297 survival_data = survival_data.rename(columns={survival_data.columns[0]: 'sample_id'}) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
298 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
299 # Convert survival event column to binary (0/1) based on event_value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
300 # Check if the event column exists |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
301 if args.surv_event_var not in survival_data.columns: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
302 raise ValueError(f"Column '{args.surv_event_var}' not found in survival data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
303 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
304 # Convert to string for comparison to handle mixed types |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
305 survival_data[args.surv_event_var] = survival_data[args.surv_event_var].astype(str) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
306 event_value_str = str(args.event_value) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
307 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
308 # Create binary event column (1 if matches event_value, 0 otherwise) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
309 survival_data[f'{args.surv_event_var}_binary'] = ( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
310 survival_data[args.surv_event_var] == event_value_str |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
311 ).astype(int) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
312 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
313 # Filter for survival category and class_label == '1:DECEASED' |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
314 label_data['class_label'] = label_data['class_label'].astype(str) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
315 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
316 label_data = label_data[(label_data['variable'] == args.surv_event_var) & (label_data['class_label'] == event_value_str)] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
317 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
318 # check survival data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
319 for col in [args.surv_time_var, args.surv_event_var]: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
320 if col not in survival_data.columns: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
321 raise ValueError(f"Column '{col}' not found in survival data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
322 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
323 # Merge survival data with labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
324 df_deceased = pd.merge(survival_data, label_data, on='sample_id', how='inner') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
325 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
326 if df_deceased.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
327 raise ValueError("No matching samples found after merging survival and label data.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
328 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
329 # Get risk scores |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
330 risk_scores = df_deceased['probability'].values |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
331 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
332 # Compute groups (e.g., median split) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
333 quantiles = np.quantile(risk_scores, [0.5]) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
334 groups = np.digitize(risk_scores, quantiles) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
335 group_labels = ['low_risk' if g == 0 else 'high_risk' for g in groups] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
336 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
337 fig_known = plot_kaplan_meier_curves( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
338 durations=df_deceased[args.surv_time_var], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
339 events=df_deceased[f'{args.surv_event_var}_binary'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
340 categorical_variable=group_labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
341 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
342 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
343 output_path_known = output_dir / f"{output_name_base}_km_risk_subtypes.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
344 print(f"Saving Kaplan-Meier plot to: {output_path_known.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
345 fig_known.save(output_path_known, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
346 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
347 print("Kaplan-Meier plot saved successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
348 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
349 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
350 def generate_cox_plots(model, clinical_train, clinical_test, omics_train, omics_test, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
351 """Generate Cox proportional hazards plots""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
352 print("Generating Cox proportional hazards analysis...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
353 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
354 # Parse clinical variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
355 clinical_vars = [var.strip() for var in args.clinical_variables.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
356 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
357 # Validate that survival variables are included |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
358 required_vars = [args.surv_time_var, args.surv_event_var] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
359 for var in required_vars: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
360 if var not in clinical_vars: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
361 clinical_vars.append(var) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
362 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
363 print(f"Using clinical variables: {', '.join(clinical_vars)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
364 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
365 # filter datasets for clinical variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
366 if all(var in clinical_train.columns and var in clinical_test.columns for var in clinical_vars): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
367 df_clin_train = clinical_train[clinical_vars] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
368 df_clin_test = clinical_test[clinical_vars] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
369 # Drop rows with NaN in clinical variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
370 df_clin_train = df_clin_train.dropna(subset=clinical_vars) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
371 df_clin_test = df_clin_test.dropna(subset=clinical_vars) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
372 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
373 raise ValueError(f"Not all clinical variables found in datasets. Available in train dataset: {clinical_train.columns.tolist()}, Available in test dataset: {clinical_test.columns.tolist()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
374 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
375 # Combine |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
376 df_clin = pd.concat([df_clin_train, df_clin_test], axis=0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
377 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
378 # Get top survival markers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
379 print(f"Extracting top {args.top_features} important features for {args.surv_event_var}...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
380 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
381 imp = get_important_features(model, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
382 var=args.surv_event_var, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
383 top=args.top_features |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
384 )['name'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
385 print(f"Top features: {', '.join(imp)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
386 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
387 raise ValueError(f"Error getting important features: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
388 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
389 # Extract feature data from omics datasets |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
390 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
391 omics_test = omics_test.loc[omics_test.index.isin(imp)] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
392 omics_train = omics_train.loc[omics_train.index.isin(imp)] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
393 # Drop rows with NaN in omics datasets |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
394 omics_test = omics_test.dropna(subset=omics_test.columns) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
395 omics_train = omics_train.dropna(subset=omics_train.columns) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
396 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
397 df_imp = pd.concat([omics_train, omics_test], axis=1) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
398 df_imp = df_imp.T # Transpose to have samples as rows |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
399 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
400 print(f"Feature data shape: {df_imp.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
401 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
402 raise ValueError(f"Error extracting feature subset: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
403 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
404 # Combine markers with clinical variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
405 df = pd.merge(df_imp, df_clin, left_index=True, right_index=True) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
406 print(f"Combined data shape: {df.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
407 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
408 # Remove samples without survival endpoints |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
409 initial_samples = len(df) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
410 df = df[df[args.surv_event_var].notna()] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
411 final_samples = len(df) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
412 print(f"Removed {initial_samples - final_samples} samples without survival data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
413 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
414 if df.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
415 raise ValueError("No samples remain after filtering for survival data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
416 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
417 # Convert survival event column to binary (0/1) based on event_value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
418 # Convert to string for comparison to handle mixed types |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
419 df[args.surv_event_var] = df[args.surv_event_var].astype(str) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
420 event_value_str = str(args.event_value) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
421 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
422 df[f'{args.surv_event_var}'] = ( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
423 df[args.surv_event_var] == event_value_str |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
424 ).astype(int) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
425 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
426 # Build Cox model |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
427 print(f"Building Cox model with time variable: {args.surv_time_var}, event variable: {args.surv_event_var}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
428 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
429 coxm = build_cox_model(df, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
430 duration_col=args.surv_time_var, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
431 event_col=args.surv_event_var, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
432 crossval=args.crossval, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
433 n_splits=args.n_splits, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
434 random_state=args.random_state) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
435 print("Cox model built successfully") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
436 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
437 raise ValueError(f"Error building Cox model: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
438 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
439 # Generate hazard ratios plot |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
440 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
441 print("Generating hazard ratios plot...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
442 fig = plot_hazard_ratios(coxm) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
443 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
444 output_path = output_dir / f"{output_name_base}_hazard_ratios.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
445 print(f"Saving hazard ratios plot to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
446 fig.save(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
447 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
448 print("Cox proportional hazards analysis completed successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
449 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
450 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
451 raise ValueError(f"Error generating hazard ratios plot: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
452 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
453 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
454 def generate_plot_scatter(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
455 """Generate scatter plot of known vs predicted labels""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
456 print("Generating scatter plots of known vs predicted labels...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
457 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
458 # Parse target values from comma-separated string |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
459 if args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
460 target_values = [val.strip() for val in args.target_value.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
461 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
462 # If no target values specified, use all unique variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
463 target_values = labels['variable'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
464 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
465 print(f"Processing target values: {target_values}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
466 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
467 successful_plots = 0 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
468 skipped_plots = 0 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
469 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
470 for target_value in target_values: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
471 print(f"\nProcessing target value: '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
472 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
473 # Filter labels for the current target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
474 target_labels = labels[labels['variable'] == target_value] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
475 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
476 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
477 print(f" Warning: No data found for target value '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
478 skipped_plots += 1 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
479 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
480 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
481 # Check if labels are numeric and convert |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
482 true_values = pd.to_numeric(target_labels['known_label'], errors='coerce') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
483 predicted_values = pd.to_numeric(target_labels['predicted_label'], errors='coerce') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
484 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
485 if true_values.isna().all() or predicted_values.isna().all(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
486 print(f"No valid numeric values found for known or predicted labels in '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
487 skipped_plots += 1 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
488 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
489 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
490 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
491 print(f" Generating scatter plot for '{target_value}'...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
492 fig = plot_scatter(true_values, predicted_values) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
493 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
494 # Create output filename with target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
495 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
496 if len(target_values) > 1: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
497 output_filename = f"{output_name_base}_{safe_target_name}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
498 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
499 output_filename = f"{output_name_base}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
500 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
501 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
502 print(f" Saving scatter plot to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
503 fig.save(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
504 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
505 successful_plots += 1 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
506 print(f" Scatter plot for '{target_value}' generated successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
507 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
508 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
509 print(f" Error generating plot for '{target_value}': {str(e)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
510 skipped_plots += 1 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
511 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
512 # Summary |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
513 print(" Summary:") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
514 print(f" Successfully generated: {successful_plots} plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
515 print(f" Skipped: {skipped_plots} plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
516 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
517 if successful_plots == 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
518 raise ValueError("No scatter plots could be generated. Check your data and target values.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
519 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
520 print("Scatter plot generation completed!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
521 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
522 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
523 def generate_label_concordance_heatmap(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
524 """Generate label concordance heatmap""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
525 print("Generating label concordance heatmaps...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
526 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
527 # Parse target values from comma-separated string |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
528 if args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
529 target_values = [val.strip() for val in args.target_value.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
530 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
531 # If no target values specified, use all unique variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
532 target_values = labels['variable'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
533 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
534 print(f"Processing target values: {target_values}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
535 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
536 for target_value in target_values: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
537 print(f"\nProcessing target value: '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
538 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
539 # Filter labels for the current target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
540 target_labels = labels[labels['variable'] == target_value] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
541 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
542 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
543 print(f" Warning: No data found for target value '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
544 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
545 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
546 true_values = target_labels['known_label'].tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
547 predicted_values = target_labels['predicted_label'].tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
548 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
549 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
550 print(f" Generating heatmap for '{target_value}'...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
551 fig = plot_label_concordance_heatmap(true_values, predicted_values) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
552 plt.close(fig) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
553 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
554 # Create output filename with target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
555 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
556 if len(target_values) > 1: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
557 output_filename = f"{output_name_base}_{safe_target_name}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
558 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
559 output_filename = f"{output_name_base}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
560 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
561 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
562 print(f" Saving heatmap to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
563 fig.savefig(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
564 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
565 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
566 print(f" Error generating heatmap for '{target_value}': {str(e)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
567 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
568 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
569 print("Label concordance heatmap generated successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
570 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
571 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
572 def generate_pr_curves(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
573 """Generate precision-recall curves""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
574 print("Generating precision-recall curves...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
575 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
576 # Parse target values from comma-separated string |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
577 if args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
578 target_values = [val.strip() for val in args.target_value.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
579 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
580 # If no target values specified, use all unique variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
581 target_values = labels['variable'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
582 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
583 print(f"Processing target values: {target_values}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
584 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
585 for target_value in target_values: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
586 print(f"\nProcessing target value: '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
587 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
588 # Filter labels for the current target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
589 target_labels = labels[labels['variable'] == target_value] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
590 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
591 # Check if this is a regression problem (no class probabilities) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
592 prob_columns = target_labels['class_label'].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
593 non_na_probs = target_labels['probability'].notna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
594 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
595 print(f" Class labels found: {list(prob_columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
596 print(f" Non-NaN probabilities: {non_na_probs}/{len(target_labels)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
597 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
598 # If most probabilities are NaN, this is likely a regression problem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
599 if non_na_probs < len(target_labels) * 0.1: # Less than 10% valid probabilities |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
600 print(" Detected regression problem - precision-recall curves not applicable") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
601 print(f" Skipping '{target_value}' (use regression evaluation metrics instead)") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
602 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
603 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
604 # Debug: Check data quality |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
605 total_rows = len(target_labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
606 missing_labels = target_labels['known_label'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
607 missing_probs = target_labels['probability'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
608 unique_samples = target_labels['sample_id'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
609 unique_classes = target_labels['class_label'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
610 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
611 print(f" Data summary: {total_rows} total rows, {unique_samples} unique samples, {unique_classes} unique classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
612 print(f" Missing data: {missing_labels} missing known_label, {missing_probs} missing probability") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
613 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
614 if missing_labels > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
615 print(f" Warning: Found {missing_labels} missing known_label values") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
616 missing_samples = target_labels[target_labels['known_label'].isna()]['sample_id'].unique()[:5] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
617 print(f" Sample IDs with missing known_label: {list(missing_samples)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
618 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
619 # Remove rows with missing known_label |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
620 target_labels = target_labels.dropna(subset=['known_label']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
621 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
622 print(f" Error: No valid known_label data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
623 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
624 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
625 # 1. Pivot to wide format |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
626 prob_df = target_labels.pivot(index='sample_id', columns='class_label', values='probability') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
627 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
628 print(f" After pivot: {prob_df.shape[0]} samples x {prob_df.shape[1]} classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
629 print(f" Class columns: {list(prob_df.columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
630 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
631 # Check for NaN values in probability data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
632 nan_counts = prob_df.isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
633 if nan_counts.any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
634 print(f" NaN counts per class: {dict(nan_counts)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
635 print(f" Samples with any NaN: {prob_df.isna().any(axis=1).sum()}/{len(prob_df)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
636 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
637 # Drop only rows where ALL probabilities are NaN |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
638 all_nan_rows = prob_df.isna().all(axis=1) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
639 if all_nan_rows.any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
640 print(f" Dropping {all_nan_rows.sum()} samples with all NaN probabilities") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
641 prob_df = prob_df[~all_nan_rows] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
642 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
643 remaining_nans = prob_df.isna().sum().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
644 if remaining_nans > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
645 print(f" Warning: {remaining_nans} individual NaN values remain - filling with 0") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
646 prob_df = prob_df.fillna(0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
647 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
648 if prob_df.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
649 print(f" Error: No valid probability data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
650 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
651 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
652 # 2. Get true labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
653 true_labels_df = target_labels.drop_duplicates('sample_id')[['sample_id', 'known_label']].set_index('sample_id') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
654 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
655 # 3. Align indices - only keep samples that exist in both datasets |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
656 common_indices = prob_df.index.intersection(true_labels_df.index) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
657 if len(common_indices) == 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
658 print(f" Error: No common sample_ids between probability and true label data for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
659 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
660 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
661 print(f" Found {len(common_indices)} samples with both probability and true label data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
662 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
663 # Filter both datasets to common indices |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
664 prob_df_aligned = prob_df.loc[common_indices] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
665 y_true = true_labels_df.loc[common_indices]['known_label'] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
666 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
667 # 4. Final check for NaN values |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
668 if y_true.isna().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
669 print(f" Error: True labels still contain NaN after alignment for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
670 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
671 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
672 if prob_df_aligned.isna().any().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
673 print(f" Error: Probability data still contains NaN after alignment for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
674 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
675 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
676 # 5. Convert categorical labels to integer labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
677 # Create a mapping from class names to integers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
678 class_names = list(prob_df_aligned.columns) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
679 class_to_int = {class_name: i for i, class_name in enumerate(class_names)} |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
680 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
681 print(f" Class mapping: {class_to_int}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
682 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
683 # Convert true labels to integers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
684 y_true_np = y_true.map(class_to_int).to_numpy() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
685 y_probs_np = prob_df_aligned.to_numpy() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
686 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
687 print(f" Data shape: y_true={y_true_np.shape}, y_probs={y_probs_np.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
688 print(f" Unique true labels (integers): {set(y_true_np)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
689 print(f" Class labels (columns): {class_names}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
690 print(f" Label distribution: {dict(zip(*np.unique(y_true_np, return_counts=True)))}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
691 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
692 # Check for any unmapped labels (will be NaN) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
693 if pd.isna(y_true_np).any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
694 print(" Error: Some true labels could not be mapped to class columns") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
695 unmapped_labels = set(y_true[y_true.map(class_to_int).isna()]) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
696 print(f" Unmapped labels: {unmapped_labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
697 print(f" Available classes: {class_names}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
698 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
699 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
700 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
701 print(f" Generating precision-recall curve for '{target_value}'...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
702 fig = plot_pr_curves(y_true_np, y_probs_np) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
703 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
704 # Create output filename with target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
705 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
706 if len(target_values) > 1: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
707 output_filename = f"{output_name_base}_{safe_target_name}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
708 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
709 output_filename = f"{output_name_base}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
710 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
711 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
712 print(f" Saving precision-recall curve to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
713 fig.save(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
714 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
715 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
716 print(f" Error generating precision-recall curve for '{target_value}': {str(e)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
717 print(f" Debug info - y_true type: {type(y_true_np)}, contains NaN: {pd.isna(y_true_np).any()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
718 print(f" Debug info - y_probs type: {type(y_probs_np)}, contains NaN: {pd.isna(y_probs_np).any()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
719 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
720 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
721 print("Precision-recall curves generated successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
722 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
723 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
724 def generate_roc_curves(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
725 """Generate ROC curves""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
726 print("Generating ROC curves...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
727 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
728 # Parse target values from comma-separated string |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
729 if args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
730 target_values = [val.strip() for val in args.target_value.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
731 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
732 # If no target values specified, use all unique variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
733 target_values = labels['variable'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
734 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
735 print(f"Processing target values: {target_values}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
736 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
737 for target_value in target_values: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
738 print(f"\nProcessing target value: '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
739 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
740 # Filter labels for the current target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
741 target_labels = labels[labels['variable'] == target_value] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
742 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
743 # Check if this is a regression problem (no class probabilities) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
744 prob_columns = target_labels['class_label'].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
745 non_na_probs = target_labels['probability'].notna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
746 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
747 print(f" Class labels found: {list(prob_columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
748 print(f" Non-NaN probabilities: {non_na_probs}/{len(target_labels)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
749 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
750 # If most probabilities are NaN, this is likely a regression problem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
751 if non_na_probs < len(target_labels) * 0.1: # Less than 10% valid probabilities |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
752 print(" Detected regression problem - ROC curves not applicable") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
753 print(f" Skipping '{target_value}' (use regression evaluation metrics instead)") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
754 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
755 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
756 # Debug: Check data quality |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
757 total_rows = len(target_labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
758 missing_labels = target_labels['known_label'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
759 missing_probs = target_labels['probability'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
760 unique_samples = target_labels['sample_id'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
761 unique_classes = target_labels['class_label'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
762 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
763 print(f" Data summary: {total_rows} total rows, {unique_samples} unique samples, {unique_classes} unique classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
764 print(f" Missing data: {missing_labels} missing known_label, {missing_probs} missing probability") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
765 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
766 if missing_labels > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
767 print(f" Warning: Found {missing_labels} missing known_label values") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
768 missing_samples = target_labels[target_labels['known_label'].isna()]['sample_id'].unique()[:5] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
769 print(f" Sample IDs with missing known_label: {list(missing_samples)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
770 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
771 # Remove rows with missing known_label |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
772 target_labels = target_labels.dropna(subset=['known_label']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
773 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
774 print(f" Error: No valid known_label data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
775 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
776 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
777 # 1. Pivot to wide format |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
778 prob_df = target_labels.pivot(index='sample_id', columns='class_label', values='probability') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
779 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
780 print(f" After pivot: {prob_df.shape[0]} samples x {prob_df.shape[1]} classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
781 print(f" Class columns: {list(prob_df.columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
782 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
783 # Check for NaN values in probability data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
784 nan_counts = prob_df.isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
785 if nan_counts.any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
786 print(f" NaN counts per class: {dict(nan_counts)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
787 print(f" Samples with any NaN: {prob_df.isna().any(axis=1).sum()}/{len(prob_df)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
788 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
789 # Drop only rows where ALL probabilities are NaN |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
790 all_nan_rows = prob_df.isna().all(axis=1) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
791 if all_nan_rows.any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
792 print(f" Dropping {all_nan_rows.sum()} samples with all NaN probabilities") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
793 prob_df = prob_df[~all_nan_rows] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
794 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
795 remaining_nans = prob_df.isna().sum().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
796 if remaining_nans > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
797 print(f" Warning: {remaining_nans} individual NaN values remain - filling with 0") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
798 prob_df = prob_df.fillna(0) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
799 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
800 if prob_df.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
801 print(f" Error: No valid probability data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
802 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
803 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
804 # 2. Get true labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
805 true_labels_df = target_labels.drop_duplicates('sample_id')[['sample_id', 'known_label']].set_index('sample_id') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
806 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
807 # 3. Align indices - only keep samples that exist in both datasets |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
808 common_indices = prob_df.index.intersection(true_labels_df.index) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
809 if len(common_indices) == 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
810 print(f" Error: No common sample_ids between probability and true label data for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
811 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
812 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
813 print(f" Found {len(common_indices)} samples with both probability and true label data") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
814 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
815 # Filter both datasets to common indices |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
816 prob_df_aligned = prob_df.loc[common_indices] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
817 y_true = true_labels_df.loc[common_indices]['known_label'] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
818 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
819 # 4. Final check for NaN values |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
820 if y_true.isna().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
821 print(f" Error: True labels still contain NaN after alignment for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
822 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
823 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
824 if prob_df_aligned.isna().any().any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
825 print(f" Error: Probability data still contains NaN after alignment for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
826 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
827 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
828 # 5. Convert categorical labels to integer labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
829 # Create a mapping from class names to integers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
830 class_names = list(prob_df_aligned.columns) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
831 class_to_int = {class_name: i for i, class_name in enumerate(class_names)} |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
832 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
833 print(f" Class mapping: {class_to_int}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
834 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
835 # Convert true labels to integers |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
836 y_true_np = y_true.map(class_to_int).to_numpy() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
837 y_probs_np = prob_df_aligned.to_numpy() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
838 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
839 print(f" Data shape: y_true={y_true_np.shape}, y_probs={y_probs_np.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
840 print(f" Unique true labels (integers): {set(y_true_np)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
841 print(f" Class labels (columns): {class_names}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
842 print(f" Label distribution: {dict(zip(*np.unique(y_true_np, return_counts=True)))}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
843 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
844 # Check for any unmapped labels (will be NaN) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
845 if pd.isna(y_true_np).any(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
846 print(" Error: Some true labels could not be mapped to class columns") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
847 unmapped_labels = set(y_true[y_true.map(class_to_int).isna()]) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
848 print(f" Unmapped labels: {unmapped_labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
849 print(f" Available classes: {class_names}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
850 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
851 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
852 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
853 print(f" Generating ROC curve for '{target_value}'...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
854 fig = plot_roc_curves(y_true_np, y_probs_np) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
855 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
856 # Create output filename with target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
857 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
858 if len(target_values) > 1: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
859 output_filename = f"{output_name_base}_{safe_target_name}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
860 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
861 output_filename = f"{output_name_base}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
862 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
863 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
864 print(f" Saving ROC curve to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
865 fig.save(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
866 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
867 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
868 print(f" Error generating ROC curve for '{target_value}': {str(e)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
869 print(f" Debug info - y_true type: {type(y_true_np)}, contains NaN: {pd.isna(y_true_np).any()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
870 print(f" Debug info - y_probs type: {type(y_probs_np)}, contains NaN: {pd.isna(y_probs_np).any()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
871 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
872 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
873 print("ROC curves generated successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
874 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
875 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
876 def generate_box_plots(labels, args, output_dir, output_name_base): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
877 """Generate box plots for model predictions""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
878 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
879 print("Generating box plots...") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
880 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
881 # Parse target values from comma-separated string |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
882 if args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
883 target_values = [val.strip() for val in args.target_value.split(',')] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
884 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
885 # If no target values specified, use all unique variables |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
886 target_values = labels['variable'].unique().tolist() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
887 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
888 print(f"Processing target values: {target_values}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
889 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
890 for target_value in target_values: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
891 print(f"\nProcessing target value: '{target_value}'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
892 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
893 # Filter labels for the current target value |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
894 target_labels = labels[labels['variable'] == target_value] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
895 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
896 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
897 print(f" Warning: No data found for target value '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
898 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
899 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
900 # Check if this is a classification problem (has probabilities) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
901 prob_columns = target_labels['class_label'].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
902 non_na_probs = target_labels['probability'].notna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
903 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
904 print(f" Class labels found: {list(prob_columns)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
905 print(f" Non-NaN probabilities: {non_na_probs}/{len(target_labels)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
906 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
907 # If most probabilities are NaN, this is likely a regression problem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
908 if non_na_probs < len(target_labels) * 0.1: # Less than 10% valid probabilities |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
909 print(" Detected regression problem - precision-recall curves not applicable") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
910 print(f" Skipping '{target_value}' (use regression evaluation metrics instead)") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
911 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
912 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
913 # Debug: Check data quality |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
914 total_rows = len(target_labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
915 missing_labels = target_labels['known_label'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
916 missing_probs = target_labels['probability'].isna().sum() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
917 unique_samples = target_labels['sample_id'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
918 unique_classes = target_labels['class_label'].nunique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
919 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
920 print(f" Data summary: {total_rows} total rows, {unique_samples} unique samples, {unique_classes} unique classes") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
921 print(f" Missing data: {missing_labels} missing known_label, {missing_probs} missing probability") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
922 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
923 if missing_labels > 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
924 print(f" Warning: Found {missing_labels} missing known_label values") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
925 missing_samples = target_labels[target_labels['known_label'].isna()]['sample_id'].unique()[:5] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
926 print(f" Sample IDs with missing known_label: {list(missing_samples)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
927 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
928 # Remove rows with missing known_label |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
929 target_labels = target_labels.dropna(subset=['known_label']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
930 if target_labels.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
931 print(f" Error: No valid known_label data remaining for '{target_value}' - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
932 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
933 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
934 # Remove rows with missing data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
935 clean_data = target_labels.dropna(subset=['known_label', 'probability']) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
936 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
937 if clean_data.empty: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
938 print(" No valid data after cleaning - skipping") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
939 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
940 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
941 # Get unique classes |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
942 classes = clean_data['class_label'].unique() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
943 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
944 for class_label in classes: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
945 print(f" Generating box plot for class: {class_label}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
946 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
947 # Filter for current class |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
948 class_data = clean_data[clean_data['class_label'] == class_label] |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
949 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
950 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
951 # Create the box plot |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
952 fig = plot_boxplot( |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
953 categorical_x=class_data['known_label'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
954 numerical_y=class_data['probability'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
955 title_x='True Label', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
956 title_y=f'Predicted Probability ({class_label})', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
957 ) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
958 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
959 # Save the plot |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
960 safe_class_name = str(class_label).replace('/', '_').replace('\\', '_').replace(' ', '_').replace(':', '_') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
961 safe_target_name = target_value.replace('/', '_').replace('\\', '_').replace(' ', '_') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
962 output_filename = f"{output_name_base}_{safe_target_name}_{safe_class_name}.{args.format}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
963 output_path = output_dir / output_filename |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
964 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
965 print(f" Saving box plot to: {output_path.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
966 fig.savefig(output_path, dpi=args.dpi, bbox_inches='tight') |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
967 plt.close(fig) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
968 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
969 except Exception as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
970 print(f" Error generating box plot for class '{class_label}': {str(e)}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
971 continue |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
972 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
973 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
974 def main(): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
975 """Main function to parse arguments and generate plots""" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
976 parser = argparse.ArgumentParser(description="Generate plots using flexynesis") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
977 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
978 # Required arguments |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
979 parser.add_argument("--labels", type=str, required=False, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
980 help="Path to labels file generated by flexynesis") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
981 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
982 # Plot type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
983 parser.add_argument("--plot_type", type=str, required=True, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
984 choices=['dimred', 'kaplan_meier', 'cox', 'scatter', 'concordance_heatmap', 'pr_curve', 'roc_curve', 'box_plot'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
985 help="Type of plot to generate: 'dimred' for dimensionality reduction, 'kaplan_meier' for survival analysis, 'cox' for Cox proportional hazards analysis, 'scatter' for scatter plots, 'concordance_heatmap' for label concordance heatmaps, 'pr_curve' for precision-recall curves, 'roc_curve' for ROC curves, or 'box_plot' for box plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
986 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
987 # Arguments for dimensionality reduction |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
988 parser.add_argument("--embeddings", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
989 help="Path to input data embeddings file (CSV or tabular format). Required for dimred plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
990 parser.add_argument("--method", type=str, default='pca', choices=['pca', 'umap'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
991 help="Transformation method ('pca' or 'umap'). Default is 'pca'. Used for dimred plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
992 parser.add_argument("--target_variables", type=str, required=False, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
993 help="Comma-separated list of target variables to plot.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
994 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
995 # Arguments for Kaplan-Meier |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
996 parser.add_argument("--survival_data", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
997 help="Path to survival data file with columns: duration and event. Required for kaplan_meier plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
998 parser.add_argument("--surv_time_var", type=str, required=False, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
999 help="Column name for survival time") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1000 parser.add_argument("--surv_event_var", type=str, required=False, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1001 help="Column name for survival event") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1002 parser.add_argument("--event_value", type=str, required=False, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1003 help="Value in event column that represents an event (e.g., 'DECEASED')") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1004 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1005 # Arguments for Cox analysis |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1006 parser.add_argument("--model", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1007 help="Path to trained flexynesis model (pickle file). Required for cox plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1008 parser.add_argument("--clinical_train", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1009 help="Path to training dataset (pickle file). Required for cox plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1010 parser.add_argument("--clinical_test", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1011 help="Path to test dataset (pickle file). Required for cox plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1012 parser.add_argument("--omics_train", type=str, default=None, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1013 help="Path to training omics dataset. Optional for cox plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1014 parser.add_argument("--omics_test", type=str, default=None, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1015 help="Path to test omics dataset. Optional for cox plots.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1016 parser.add_argument("--clinical_variables", type=str, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1017 help="Comma-separated list of clinical variables to include in Cox model (e.g., 'AGE,SEX,HISTOLOGICAL_DIAGNOSIS,STUDY')") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1018 parser.add_argument("--top_features", type=int, default=20, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1019 help="Number of top important features to include in Cox model. Default is 5") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1020 parser.add_argument("--crossval", action='store_true', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1021 help="If True, performs K-fold cross-validation and returns average C-index. Default is False") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1022 parser.add_argument("--n_splits", type=int, default=5, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1023 help="Number of folds for cross-validation. Default is 5") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1024 parser.add_argument("--random_state", type=int, default=42, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1025 help="Random seed for reproducibility. Default is 42") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1026 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1027 # Arguments for scatter plot, heatmap, PR curves, ROC curves, and box plots |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1028 parser.add_argument("--target_value", type=str, default=None, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1029 help="Target value for scatter plot.") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1030 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1031 # Common arguments |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1032 parser.add_argument("--output_dir", type=str, default='output', |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1033 help="Output directory. Default is 'output'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1034 parser.add_argument("--output_name", type=str, default=None, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1035 help="Output filename base") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1036 parser.add_argument("--format", type=str, default='jpg', choices=['png', 'pdf', 'svg', 'jpg'], |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1037 help="Output format for the plot. Default is 'jpg'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1038 parser.add_argument("--dpi", type=int, default=300, |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1039 help="DPI for the output image. Default is 300") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1040 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1041 args = parser.parse_args() |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1042 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1043 try: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1044 # validate plot type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1045 if not args.plot_type: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1046 raise ValueError("Please specify a plot type using --plot_type") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1047 if args.plot_type not in ['dimred', 'kaplan_meier', 'cox', 'scatter', 'concordance_heatmap', 'pr_curve', 'roc_curve', 'box_plot']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1048 raise ValueError(f"Invalid plot type: {args.plot_type}. Must be one of: 'dimred', 'kaplan_meier', 'cox', 'scatter', 'concordance_heatmap', 'pr_curve', 'roc_curve', 'box_plot'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1049 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1050 # Validate plot type requirements |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1051 if args.plot_type in ['dimred']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1052 if not args.embeddings: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1053 raise ValueError("--embeddings is required when plot_type is 'dimred'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1054 if not os.path.isfile(args.embeddings): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1055 raise FileNotFoundError(f"embeddings file not found: {args.embeddings}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1056 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1057 raise ValueError("--labels is required for dimensionality reduction plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1058 if not args.method: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1059 raise ValueError("--method is required for dimensionality reduction plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1060 if not args.target_variables: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1061 raise ValueError("--target_variables is required for dimensionality reduction plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1062 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1063 if args.plot_type in ['kaplan_meier']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1064 if not args.survival_data: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1065 raise ValueError("--survival_data is required when plot_type is 'kaplan_meier'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1066 if not os.path.isfile(args.survival_data): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1067 raise FileNotFoundError(f"Survival data file not found: {args.survival_data}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1068 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1069 raise ValueError("--labels is required for dimensionality reduction plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1070 if not args.method: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1071 raise ValueError("--method is required for dimensionality reduction plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1072 if not args.surv_time_var: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1073 raise ValueError("--surv_time_var is required for Kaplan-Meier plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1074 if not args.surv_event_var: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1075 raise ValueError("--surv_event_var is required for Kaplan-Meier plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1076 if not args.event_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1077 raise ValueError("--event_value is required for Kaplan-Meier plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1078 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1079 if args.plot_type in ['cox']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1080 if not args.model: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1081 raise ValueError("--model is required when plot_type is 'cox'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1082 if not os.path.isfile(args.model): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1083 raise FileNotFoundError(f"Model file not found: {args.model}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1084 if not args.clinical_train: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1085 raise ValueError("--clinical_train is required when plot_type is 'cox'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1086 if not os.path.isfile(args.clinical_train): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1087 raise FileNotFoundError(f"Training dataset file not found: {args.clinical_train}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1088 if not args.clinical_test: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1089 raise ValueError("--clinical_test is required when plot_type is 'cox'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1090 if not os.path.isfile(args.clinical_test): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1091 raise FileNotFoundError(f"Test dataset file not found: {args.clinical_test}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1092 if not args.omics_train: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1093 raise ValueError("--omics_train is required when plot_type is 'cox'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1094 if not os.path.isfile(args.omics_train): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1095 raise FileNotFoundError(f"Training omics dataset file not found: {args.omics_train}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1096 if not args.omics_test: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1097 raise ValueError("--omics_test is required when plot_type is 'cox'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1098 if not os.path.isfile(args.omics_test): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1099 raise FileNotFoundError(f"Test omics dataset file not found: {args.omics_test}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1100 if not args.surv_time_var: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1101 raise ValueError("--surv_time_var is required for Cox plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1102 if not args.surv_event_var: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1103 raise ValueError("--surv_event_var is required for Cox plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1104 if not args.clinical_variables: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1105 raise ValueError("--clinical_variables is required for Cox plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1106 if not isinstance(args.top_features, int) or args.top_features <= 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1107 raise ValueError("--top_features must be a positive integer") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1108 if not args.event_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1109 raise ValueError("--event_value is required for Kaplan-Meier plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1110 if not args.crossval: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1111 args.crossval = False |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1112 if not isinstance(args.n_splits, int) or args.n_splits <= 0: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1113 raise ValueError("--n_splits must be a positive integer") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1114 if not isinstance(args.random_state, int): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1115 raise ValueError("--random_state must be an integer") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1116 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1117 if args.plot_type in ['scatter']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1118 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1119 raise ValueError("--labels is required for scatter plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1120 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1121 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1122 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1123 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1124 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1125 if args.plot_type in ['concordance_heatmap']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1126 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1127 raise ValueError("--labels is required for concordance heatmap") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1128 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1129 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1130 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1131 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1132 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1133 if args.plot_type in ['pr_curve']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1134 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1135 raise ValueError("--labels is required for precision-recall curves") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1136 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1137 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1138 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1139 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1140 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1141 if args.plot_type in ['roc_curve']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1142 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1143 raise ValueError("--labels is required for ROC curves") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1144 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1145 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1146 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1147 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1148 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1149 if args.plot_type in ['box_plot']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1150 if not args.labels: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1151 raise ValueError("--labels is required for box plots") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1152 if not args.target_value: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1153 print("--target_value is not specified, using all unique variables from labels") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1154 if not os.path.isfile(args.labels): |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1155 raise FileNotFoundError(f"Labels file not found: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1156 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1157 # Validate other arguments |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1158 if args.method not in ['pca', 'umap']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1159 raise ValueError("Method must be 'pca' or 'umap'") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1160 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1161 # Create output directory |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1162 output_dir = Path(args.output_dir) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1163 output_dir.mkdir(parents=True, exist_ok=True) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1164 print(f"Output directory: {output_dir.absolute()}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1165 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1166 # Generate output filename base |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1167 if args.output_name: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1168 output_name_base = args.output_name |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1169 else: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1170 if args.plot_type == 'dimred': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1171 embeddings_name = Path(args.embeddings).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1172 output_name_base = f"{embeddings_name}_{args.method}" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1173 elif args.plot_type == 'kaplan_meier': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1174 survival_name = Path(args.survival_data).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1175 output_name_base = f"{survival_name}_km" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1176 elif args.plot_type == 'cox': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1177 model_name = Path(args.model).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1178 output_name_base = f"{model_name}_cox" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1179 elif args.plot_type == 'scatter': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1180 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1181 output_name_base = f"{labels_name}_scatter" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1182 elif args.plot_type == 'concordance_heatmap': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1183 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1184 output_name_base = f"{labels_name}_concordance" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1185 elif args.plot_type == 'pr_curve': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1186 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1187 output_name_base = f"{labels_name}_pr_curves" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1188 elif args.plot_type == 'roc_curve': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1189 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1190 output_name_base = f"{labels_name}_roc_curves" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1191 elif args.plot_type == 'box_plot': |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1192 labels_name = Path(args.labels).stem |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1193 output_name_base = f"{labels_name}_box_plot" |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1194 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1195 # Generate plots based on type |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1196 if args.plot_type in ['dimred']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1197 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1198 print(f"Loading labels from: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1199 label_data = load_labels(args.labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1200 # Load embeddings data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1201 print(f"Loading embeddings from: {args.embeddings}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1202 embeddings, sample_names = load_embeddings(args.embeddings) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1203 print(f"embeddings shape: {embeddings.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1204 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1205 # Match samples to embeddings |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1206 matched_labels = match_samples_to_embeddings(sample_names, label_data) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1207 print(f"Successfully matched {len(matched_labels)} samples for dimensionality reduction") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1208 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1209 generate_dimred_plots(embeddings, matched_labels, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1210 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1211 elif args.plot_type in ['kaplan_meier']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1212 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1213 print(f"Loading labels from: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1214 label_data = load_labels(args.labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1215 # Load survival data |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1216 print(f"Loading survival data from: {args.survival_data}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1217 survival_data = load_survival_data(args.survival_data) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1218 print(f"Survival data shape: {survival_data.shape}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1219 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1220 generate_km_plots(survival_data, label_data, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1221 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1222 elif args.plot_type in ['cox']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1223 # Load model and datasets |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1224 print(f"Loading model from: {args.model}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1225 model = load_model(args.model) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1226 print(f"Loading training dataset from: {args.clinical_train}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1227 clinical_train = load_omics(args.clinical_train) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1228 print(f"Loading test dataset from: {args.clinical_test}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1229 clinical_test = load_omics(args.clinical_test) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1230 print(f"Loading training omics dataset from: {args.omics_train}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1231 omics_train = load_omics(args.omics_train) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1232 print(f"Loading test omics dataset from: {args.omics_test}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1233 omics_test = load_omics(args.omics_test) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1234 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1235 generate_cox_plots(model, clinical_train, clinical_test, omics_test, omics_train, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1236 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1237 elif args.plot_type in ['scatter']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1238 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1239 print(f"Loading labels from: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1240 label_data = load_labels(args.labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1241 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1242 generate_plot_scatter(label_data, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1243 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1244 elif args.plot_type in ['concordance_heatmap']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1245 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1246 print(f"Loading labels from: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1247 label_data = load_labels(args.labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1248 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1249 generate_label_concordance_heatmap(label_data, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1250 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1251 elif args.plot_type in ['pr_curve']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1252 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1253 print(f"Loading labels from: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1254 label_data = load_labels(args.labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1255 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1256 generate_pr_curves(label_data, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1257 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1258 elif args.plot_type in ['roc_curve']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1259 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1260 print(f"Loading labels from: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1261 label_data = load_labels(args.labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1262 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1263 generate_roc_curves(label_data, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1264 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1265 elif args.plot_type in ['box_plot']: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1266 # Load labels |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1267 print(f"Loading labels from: {args.labels}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1268 label_data = load_labels(args.labels) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1269 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1270 generate_box_plots(label_data, args, output_dir, output_name_base) |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1271 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1272 print("All plots generated successfully!") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1273 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1274 except (FileNotFoundError, ValueError, pd.errors.ParserError) as e: |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1275 print(f"Error: {e}") |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1276 return 1 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1277 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1278 return 0 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1279 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1280 |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1281 if __name__ == "__main__": |
525c661a7fdc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
bgruening
parents:
diff
changeset
|
1282 exit(main()) |