comparison flexynesis_plot.xml @ 0:bb91bf19eb40 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit b2463fb68d0ae54864d87718ee72f5e063aa4587
author bgruening
date Tue, 24 Jun 2025 05:55:50 +0000
parents
children 3c5d82bf6e8a
comparison
equal deleted inserted replaced
-1:000000000000 0:bb91bf19eb40
1 <tool id="flexynesis_plot" name="Flexynesis plot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>tool for visualizing flexynesis results</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <required_files>
8 <include path="flexynesis_plot.py" />
9 </required_files>
10 <command detect_errors="exit_code"><![CDATA[
11 @CHECK_NON_COMMERCIAL_USE@
12 mkdir -p inputs/ plots/ &&
13 ln -s '$plot_conditional.labels' 'inputs/$plot_conditional.labels.element_identifier.$plot_conditional.labels.ext' &&
14 #if $plot_conditional.plot_type == "dimred":
15 ln -s '$plot_conditional.embeddings' 'inputs/$plot_conditional.embeddings.element_identifier.$plot_conditional.embeddings.ext' &&
16 #end if
17 cat '$flexynesis_plot_config' &&
18 python '$flexynesis_plot_config'
19 ]]></command>
20 <configfiles>
21 <configfile name="flexynesis_plot_config"><![CDATA[
22 import sys
23 sys.path.append('$__tool_directory__/')
24
25 import matplotlib.pyplot as plt
26 import numpy as np
27 import pandas as pd
28 import seaborn as sns
29 import torch
30 from flexynesis import (
31 get_important_features,
32 plot_dim_reduced,
33 plot_hazard_ratios,
34 plot_kaplan_meier_curves,
35 plot_pr_curves,
36 plot_roc_curves,
37 plot_scatter
38 )
39 from scipy.stats import kruskal, mannwhitneyu
40 from flexynesis_plot import (
41 plot_label_concordance_heatmap,
42 plot_boxplot,
43 detect_color_type,
44 load_labels,
45 load_embeddings,
46 match_samples_to_embeddings
47 )
48 #if $plot_conditional.plot_type == "dimred":
49 @PLOT_COMMON_CONFIG@
50 embeddings, sample_names = load_embeddings('inputs/$plot_conditional.embeddings.element_identifier.$plot_conditional.embeddings.ext')
51 matched_labels = match_samples_to_embeddings(sample_names, label_data)
52
53 label = matched_labels.columns[$plot_conditional.label-1]
54 color_type = detect_color_type(matched_labels[label])
55
56 fig = plot_dim_reduced(
57 matrix=embeddings,
58 labels=matched_labels[label],
59 method='$plot_conditional.method',
60 color_type=color_type
61 )
62 output_path = "plots/{label}_${plot_conditional.method}.${plot_conditional.format}"
63 fig.save(output_path, dpi=$plot_conditional.dpi, bbox_inches='tight')
64 #else if $plot_conditional.plot_type == "scatter":
65 @PLOT_COMMON_CONFIG@
66
67 true_label = label_data.columns[$plot_conditional.true_label-1]
68 predicted_label = label_data.columns[$plot_conditional.predicted_label-1]
69 true_values = pd.to_numeric(label_data[true_label], errors='coerce')
70 predicted_values = pd.to_numeric(label_data[predicted_label], errors='coerce')
71
72 if true_values.isna().all() or predicted_values.isna().all():
73 raise ValueError("No valid numeric values found for known or predicted labels")
74
75 fig = plot_scatter(true_values, predicted_values)
76
77 output_path = "plots/${plot_conditional.true_label}_${plot_conditional.predicted_label}_scatter.${plot_conditional.format}"
78 fig.save(output_path, dpi=$plot_conditional.dpi, bbox_inches='tight')
79
80 #else if $plot_conditional.plot_type == "concordance_heatmap":
81 @PLOT_COMMON_CONFIG@
82
83 true_label = label_data.columns[$plot_conditional.true_label-1]
84 predicted_label = label_data.columns[$plot_conditional.predicted_label-1]
85
86 true_values = label_data[true_label].tolist()
87 predicted_values = label_data[predicted_label].tolist()
88 fig = plot_label_concordance_heatmap(true_values, predicted_values)
89 plt.close(fig)
90 output_path = "plots/{true_label}_{predicted_label}_concordance_heatmap.${plot_conditional.format}"
91 fig.savefig(output_path, dpi=$plot_conditional.dpi, bbox_inches='tight')
92
93 #else if $plot_conditional.plot_type == "pr_curve":
94 @PR_ROC_CONFIG@
95
96 fig = plot_pr_curves(y_true_np, y_probs_np)
97
98 output_path = "plots/pr_curves.${plot_conditional.format}"
99 fig.save(output_path, dpi=$plot_conditional.dpi, bbox_inches='tight')
100
101 #else if $plot_conditional.plot_type == "roc_curve":
102 @PR_ROC_CONFIG@
103 fig = plot_roc_curves(y_true_np, y_probs_np)
104 output_path = "plots/roc_curves.${plot_conditional.format}"
105 fig.save(output_path, dpi=$plot_conditional.dpi, bbox_inches='tight')
106
107 #else if $plot_conditional.plot_type == "box_plot":
108 @PR_ROC_BOX_CONFIG@
109 # Remove rows with missing data
110 clean_data = label_data.dropna(subset=['known_label', 'probability'])
111
112 if clean_data.empty:
113 raise ValueError(" No valid data after cleaning")
114
115 # Get unique classes
116 classes = clean_data['class_label'].unique()
117
118 for class_label in classes:
119 print(f" Generating box plot for class: {class_label}")
120
121 # Filter for current class
122 class_data = clean_data[clean_data['class_label'] == class_label]
123
124 # Create the box plot
125 fig = plot_boxplot(
126 categorical_x=class_data['known_label'],
127 numerical_y=class_data['probability'],
128 title_x='True Label',
129 title_y=f'Predicted Probability ({class_label})',
130 )
131
132 # Save the plot
133 safe_class_name = str(class_label).replace('/', '_').replace('\\', '_').replace(' ', '_').replace(':', '_')
134 plt.close(fig)
135
136 output_path = f"plots/box_plot_{safe_class_name}.${plot_conditional.format}"
137 fig.savefig(output_path, dpi=$plot_conditional.dpi, bbox_inches='tight')
138 #end if
139 ]]></configfile>
140 </configfiles>
141 <inputs>
142 <expand macro="commercial_use_param"/>
143 <conditional name="plot_conditional">
144 <param name="plot_type" type="select" label="Flexynesis plot">
145 <option value="dimred">Dimensionality reduction</option>
146 <option value="scatter">Scatter plot of known vs predicted labels</option>
147 <option value="concordance_heatmap">Label concordance heatmap</option>
148 <option value="pr_curve">Precision-recall curves</option>
149 <option value="roc_curve">ROC curves</option>
150 <option value="box_plot">Box plot</option>
151 </param>
152 <when value="dimred">
153 <expand macro="plots_common_param">
154 <expand macro="plots_common_input"/>
155 <param argument="--embeddings" type="data" format="tabular,csv" label="Embeddings" help="Generated by flexynesis"/>
156 <param argument="--label" type="data_column" data_ref="labels" label="Column in the labels file to use for coloring the points in the plot"/>
157 <param name="method" type="select" label="Transformation method">
158 <option value="pca" selected="true">PCA</option>
159 <option value="umap">UMAP</option>
160 </param>
161 </expand>
162 </when>
163 <when value="scatter">
164 <expand macro="plots_common_param">
165 <expand macro="plots_common_input"/>
166 <param name="true_label" type="data_column" data_ref="labels" label="Column name in the labels file to use for the true labels"/>
167 <param name="predicted_label" type="data_column" data_ref="labels" label="Column name in the labels file to use for the predicted labels"/>
168 </expand>
169 </when>
170 <when value="concordance_heatmap">
171 <expand macro="plots_common_param">
172 <expand macro="plots_common_input"/>
173 <param name="true_label" type="data_column" data_ref="labels" label="Column name in the labels file to use for the true labels"/>
174 <param name="predicted_label" type="data_column" data_ref="labels" label="Column name in the labels file to use for the predicted labels"/>
175 </expand>
176 </when>
177 <when value="pr_curve">
178 <expand macro="plots_common_param">
179 <expand macro="plots_common_input"/>
180 </expand>
181 </when>
182 <when value="roc_curve">
183 <expand macro="plots_common_param">
184 <expand macro="plots_common_input"/>
185 </expand>
186 </when>
187 <when value="box_plot">
188 <expand macro="plots_common_param">
189 <expand macro="plots_common_input"/>
190 </expand>
191 </when>
192 </conditional>
193 </inputs>
194 <outputs>
195 <data name="plot_out" auto_format="true" from_work_dir="plots/*" label="${tool.name} on ${on_string}: ${plot_conditional.plot_type}">
196 <filter>plot_conditional['plot_type'] != "box_plot"</filter>
197 </data>
198 <collection name="boxplot_out" type="list" label="${tool.name} on ${on_string}: box_plot">
199 <discover_datasets pattern="__name_and_ext__" directory="plots/"/>
200 <filter>plot_conditional['plot_type'] == "box_plot"</filter>
201 </collection>
202 </outputs>
203 <tests>
204 <!-- test 1: dimred -->
205 <test expect_num_outputs="1">
206 <param name="non_commercial_use" value="True"/>
207 <conditional name="plot_conditional">
208 <param name="plot_type" value="dimred"/>
209 <param name="embeddings" value="embeddings.csv"/>
210 <param name="label" value="6"/>
211 <param name="method" value="pca"/>
212 <param name="labels" value="labels.csv"/>
213 <param name="format" value="jpg"/>
214 <param name="dpi" value="300"/>
215 </conditional>
216 <output name="plot_out">
217 <assert_contents>
218 <has_image_center_of_mass center_of_mass="970,733" eps="50"/>
219 <has_image_channels channels="3"/>
220 <has_image_height height="1461" delta="50"/>
221 <has_image_width width="1941" delta="50"/>
222 </assert_contents>
223 </output>
224 </test>
225 <!-- test 2: scatter -->
226 <test expect_num_outputs="1">
227 <param name="non_commercial_use" value="True"/>
228 <conditional name="plot_conditional">
229 <param name="plot_type" value="scatter"/>
230 <param name="labels" value="labels_scatter.csv"/>
231 <param name="true_label" value="5"/>
232 <param name="predicted_label" value="6"/>
233 <param name="format" value="jpg"/>
234 <param name="dpi" value="300"/>
235 </conditional>
236 <output name="plot_out">
237 <assert_contents>
238 <has_image_center_of_mass center_of_mass="970,733" eps="50"/>
239 <has_image_channels channels="3"/>
240 <has_image_height height="1461" delta="50"/>
241 <has_image_width width="1941" delta="50"/>
242 </assert_contents>
243 </output>
244 </test>
245 <!-- test 3: concordance_heatmap -->
246 <test expect_num_outputs="1">
247 <param name="non_commercial_use" value="True"/>
248 <conditional name="plot_conditional">
249 <param name="plot_type" value="concordance_heatmap"/>
250 <param name="labels" value="labels.csv"/>
251 <param name="true_label" value="5"/>
252 <param name="predicted_label" value="6"/>
253 <param name="format" value="jpg"/>
254 <param name="dpi" value="300"/>
255 </conditional>
256 <output name="plot_out">
257 <assert_contents>
258 <has_image_center_of_mass center_of_mass="1450,1310" eps="50"/>
259 <has_image_channels channels="3"/>
260 <has_image_height height="2558" delta="50"/>
261 <has_image_width width="2770" delta="50"/>
262 </assert_contents>
263 </output>
264 </test>
265 <!-- test 4: pr_curve -->
266 <test expect_num_outputs="1">
267 <param name="non_commercial_use" value="True"/>
268 <conditional name="plot_conditional">
269 <param name="plot_type" value="pr_curve"/>
270 <param name="labels" value="labels_pr.csv"/>
271 <param name="format" value="jpg"/>
272 <param name="dpi" value="300"/>
273 </conditional>
274 <output name="plot_out">
275 <assert_contents>
276 <has_image_center_of_mass center_of_mass="970,733" eps="50"/>
277 <has_image_channels channels="3"/>
278 <has_image_height height="1461" delta="50"/>
279 <has_image_width width="1941" delta="50"/>
280 </assert_contents>
281 </output>
282 </test>
283 <!-- test 5: roc_curve -->
284 <test expect_num_outputs="1">
285 <param name="non_commercial_use" value="True"/>
286 <conditional name="plot_conditional">
287 <param name="plot_type" value="roc_curve"/>
288 <param name="labels" value="labels_pr.csv"/>
289 <param name="format" value="jpg"/>
290 <param name="dpi" value="300"/>
291 </conditional>
292 <output name="plot_out">
293 <assert_contents>
294 <has_image_center_of_mass center_of_mass="970,733" eps="50"/>
295 <has_image_channels channels="3"/>
296 <has_image_height height="1461" delta="50"/>
297 <has_image_width width="1941" delta="50"/>
298 </assert_contents>
299 </output>
300 </test>
301 <!-- test 6: box_plot -->
302 <test expect_num_outputs="1">
303 <param name="non_commercial_use" value="True"/>
304 <conditional name="plot_conditional">
305 <param name="plot_type" value="box_plot"/>
306 <param name="labels" value="labels_pr.csv"/>
307 <param name="format" value="jpg"/>
308 <param name="dpi" value="300"/>
309 </conditional>
310 <output_collection name="boxplot_out" type="list" count="7">
311 <element name="box_plot_Basal">
312 <assert_contents>
313 <has_image_center_of_mass center_of_mass="1485,882" eps="20"/>
314 <has_image_channels channels="3"/>
315 <has_image_height height="1783" delta="20"/>
316 <has_image_width width="2967" delta="20"/>
317 </assert_contents>
318 </element>
319 <element name="box_plot_Her2">
320 <assert_contents>
321 <has_image_center_of_mass center_of_mass="1485,882" eps="20"/>
322 <has_image_channels channels="3"/>
323 <has_image_height height="1765" delta="20"/>
324 <has_image_width width="2967" delta="20"/>
325 </assert_contents>
326 </element>
327 <element name="box_plot_LumA">
328 <assert_contents>
329 <has_image_center_of_mass center_of_mass="1485,882" eps="20"/>
330 <has_image_channels channels="3"/>
331 <has_image_height height="1783" delta="20"/>
332 <has_image_width width="2967" delta="20"/>
333 </assert_contents>
334 </element>
335 <element name="box_plot_LumB">
336 <assert_contents>
337 <has_image_center_of_mass center_of_mass="1485,882" eps="20"/>
338 <has_image_channels channels="3"/>
339 <has_image_height height="1783" delta="20"/>
340 <has_image_width width="2967" delta="20"/>
341 </assert_contents>
342 </element>
343 <element name="box_plot_NC">
344 <assert_contents>
345 <has_image_center_of_mass center_of_mass="1485,882" eps="20"/>
346 <has_image_channels channels="3"/>
347 <has_image_height height="1783" delta="20"/>
348 <has_image_width width="2967" delta="20"/>
349 </assert_contents>
350 </element>
351 <element name="box_plot_Normal">
352 <assert_contents>
353 <has_image_center_of_mass center_of_mass="1485,882" eps="20"/>
354 <has_image_channels channels="3"/>
355 <has_image_height height="1783" delta="20"/>
356 <has_image_width width="2967" delta="20"/>
357 </assert_contents>
358 </element>
359 <element name="box_plot_claudin-low">
360 <assert_contents>
361 <has_image_center_of_mass center_of_mass="1485,882" eps="20"/>
362 <has_image_channels channels="3"/>
363 <has_image_height height="1783" delta="20"/>
364 <has_image_width width="2967" delta="20"/>
365 </assert_contents>
366 </element>
367 </output_collection>
368 </test>
369 </tests>
370 <help><![CDATA[
371 @COMMON_HELP@
372
373 Flexynesis plot is a comprehensive visualization tool designed to create various types of plots for analyzing machine learning results from the Flexynesis framework. This tool supports multiple visualization types to help researchers understand their data and model performance.
374
375 Available plot types include:
376
377 - **Dimensionality Reduction:** Visualizes high-dimensional data in a lower-dimensional space using methods like PCA or UMAP.
378 - **Scatter Plot:** Compares known and predicted labels to assess model performance.
379 - **Label Concordance Heatmap:** Displays the agreement between true and predicted labels in a heatmap format.
380 - **Precision-Recall Curves:** Plots precision against recall to evaluate the trade-off between these metrics.
381 - **ROC Curves:** Visualizes the true positive rate against the false positive rate to assess model discrimination.
382 - **Box Plot:** Shows the distribution of predicted probabilities across different classes, highlighting medians and quartiles.
383
384
385 **Input Files**
386
387 Main common input file is the labels file, which is the predicted labels file generated by Flexynesis. It should contain the following columns:
388
389 - `sample_id`: Unique identifier for each sample.
390 - `variable`: The target variable used for the analysis.
391 - `class_label`: The class labels for the samples, used in box plots.
392 - `probability`: The predicted probabilities for the labels.
393 - `known_label`: The true labels for the samples, used in scatter plots and concordance heatmaps.
394 - `predicted_label`: The labels predicted by the Flexynesis model, used in scatter plots and concordance heatmaps.
395
396
397 For dimensionality reduction plots, an additional embeddings file is required, which contains the reduced-dimensional representations of the samples. This file is generated by Flexynesis.
398
399
400 .. class:: warningmark
401
402 PR and ROC curves can only be applied on classification tasks!
403
404
405 .. _Documentation: https://bimsbstatic.mdc-berlin.de/akalin/buyar/flexynesis/site/
406 .. _copyright holders: https://github.com/BIMSBbioinfo/flexynesis
407 ]]></help>
408 <expand macro="creator"/>
409 <expand macro="citations"/>
410 </tool>