comparison dimet_bivariate_analysis.xml @ 0:eeb106c7bd04 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit 3dba8748fbc8cc8e89ffc08e5febe0a0527a96a5
author iuc
date Fri, 21 Jun 2024 18:43:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:eeb106c7bd04
1 <tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
2 <description>
3 Computation of the correlation of MDV profiles, or the metabolite time course profiles (by DIMet)
4 </description>
5 <macros>
6 <token name="@TOOL_LABEL@">bivariate analysis</token>
7 <token name="@EXECUTABLE@">bivariate_analysis</token>
8 <import>macros.xml</import>
9 </macros>
10 <expand macro="requirements"/>
11 <command detect_errors="exit_code"><![CDATA[
12 @INIT_CONFIG@
13 @INIT_BI_ANALYSIS@
14 @INIT_BIVAR_COMPARISONS@
15 HYDRA_FULL_ERROR=1 python -m dimet
16 '++hydra.run.dir=.'
17 '++figure_path=figures'
18 '++table_path=tables'
19 '++analysis={
20 dataset:{
21 _target_: dimet.data.DatasetConfig,
22 name: "I am a synthetic data example"
23 },
24 method:{
25 _target_: dimet.method.BivariateAnalysisConfig,
26 label: "bivariate analysis",
27 name: "Computation of the correlation of MDV profiles, or the metabolite time course profiles"
28 },
29 label: bivariate-analysis-example2
30 }'
31 '++analysis.conditions=${conditions}'
32 '++analysis.dataset.label='
33 '++analysis.method.correction_method=${correction_method}'
34 '++analysis.method.impute_values=${impute_values}'
35 '++analysis.method.conditions_metabolite_time_profiles=${conditions_metabolite_time_profiles}'
36 '++analysis.method.timepoints_MDV_comparison=${timepoints_MDV_comparison}'
37 '++analysis.method.conditions_MDV_comparison=${conditions_MDV_comparison}'
38 '++analysis.method.output_include_gmean_arr_columns=true'
39 '++analysis.dataset.subfolder='
40 '++analysis.dataset.conditions=${conditions}'
41 #if $metadata_path:
42 '++analysis.dataset.metadata=metadata'
43 #end if
44 #if str( $data_input.data_input_selector ) == "abundance":
45 #if $data_input.abundance_file:
46 '++analysis.dataset.abundances=abundance'
47 #end if
48 #elif str( $data_input.data_input_selector ) == "mean_enrichment":
49 #if $data_input.me_or_frac_contrib_file:
50 '++analysis.dataset.mean_enrichment=me_or_frac_contrib'
51 #end if
52 #elif str( $data_input.data_input_selector ) == "isotop_prop":
53 #if $data_input.isotop_prop_file:
54 '++analysis.dataset.isotopologue_proportions=isotop_prop'
55 #end if
56 #else
57 #if $data_input.isotop_abs_file:
58 '++analysis.dataset.isotopologues=isotop_abs'
59 #end if
60 #end if
61 @REMOVE_CONFIG@
62 ]]></command>
63 <inputs>
64 <expand macro="input_parameters_bivar_analysis"/>
65 <expand macro="plot_factor_list"/>
66 <expand macro="correction_method"/>
67 </inputs>
68
69 <outputs>
70 <collection name="report" type="list">
71 <discover_datasets pattern="__designation__" directory="tables" format="tabular"/>
72 </collection>
73 </outputs>
74 <tests>
75 <test>
76 <param name="data_input_selector" value="isotop_prop" />
77 <param name="isotop_prop_file" ftype="tabular" value="CorrectedIsotopologues_5.csv"/>
78 <param name="metadata_path" ftype="tabular" value="example5_metadata.csv"/>
79 <param name="correction_method" value="fdr_bh"/>
80 <repeat name="plot_factor_list">
81 <param name="condition" value="Control"/>
82 </repeat>
83 <repeat name="plot_factor_list">
84 <param name="condition" value="L-Cycloserine"/>
85 </repeat>
86 <output_collection name="report" type="list" count="8">
87 <element file="isotop_prop--cell--MDV-Control-L-Cycloserine--T0-pearson.tsv" name="isotop_prop--cell--MDV-Control-L-Cycloserine--T0-pearson.tsv" ftype="tabular"/>
88 <element file="isotop_prop--cell--MDV-Control-L-Cycloserine--T2h-pearson.tsv" name="isotop_prop--cell--MDV-Control-L-Cycloserine--T2h-pearson.tsv" ftype="tabular"/>
89 <element file="isotop_prop--cell--MDV-T2h-T0--Control-pearson.tsv" name="isotop_prop--cell--MDV-T2h-T0--Control-pearson.tsv" ftype="tabular"/>
90 <element file="isotop_prop--cell--MDV-T2h-T0--L-Cycloserine-pearson.tsv" name="isotop_prop--cell--MDV-T2h-T0--L-Cycloserine-pearson.tsv" ftype="tabular"/>
91 <element file="isotop_prop--med--MDV-Control-L-Cycloserine--T0-pearson.tsv" name="isotop_prop--med--MDV-Control-L-Cycloserine--T0-pearson.tsv" ftype="tabular"/>
92 <element file="isotop_prop--med--MDV-Control-L-Cycloserine--T2h-pearson.tsv" name="isotop_prop--med--MDV-Control-L-Cycloserine--T2h-pearson.tsv" ftype="tabular"/>
93 <element file="isotop_prop--med--MDV-T2h-T0--Control-pearson.tsv" name="isotop_prop--med--MDV-T2h-T0--Control-pearson.tsv" ftype="tabular"/>
94 <element file="isotop_prop--med--MDV-T2h-T0--L-Cycloserine-pearson.tsv" name="isotop_prop--med--MDV-T2h-T0--L-Cycloserine-pearson.tsv" ftype="tabular"/>
95 </output_collection>
96 </test>
97 </tests>
98 <help><![CDATA[
99
100 This module is part of DIMet: Computation of the correlation of entire MDV profiles, or the metabolite time course profiles (https://pypi.org/project/DIMet/).
101
102 DIMet bi-variate analysis performs the comparison of entire MDV profiles, with the user provided isotopologue proportions data. Moreover, when total abundances and/or mean enrichment are provided, the comparison of the metabolite time-course profiles is also computed. Specifically, three types of bi-variate comparisons are performed automatically:
103
104
105 - MDV profile comparison between two conditions
106
107 - MDV profile comparison between two consecutive time-points
108
109 - Metabolite (total abundances and/or mean enrichment) time course profiles comparison between two conditions
110
111
112 For all these three types of bi-variate comparison, the statistical test that is applied is the Pearson's correlation test.
113
114
115 To note, MDV (Mass Distribution Vector) are obtained automatically by the tool, using the isotopologue proportions.
116
117
118
119 **Input data files**
120
121 This tool requires (at max.) 4 tab-delimited .csv files as inputs. There are two types of files:
122
123 - The measures' (or quantifications') files, that can be of 3 types.
124
125 - The metadata, a unique file with the description of the samples in your measures' files. This is compulsory.
126
127 For running DIMet @EXECUTABLE@ you need **at least one file** of measures:
128
129 - The **isotopologue proportions** file
130
131 - The total **abundances** (of the metabolites) file
132
133 - The mean **enrichment** or labelled fractional contributions
134
135
136 and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**.
137
138
139 **Measures files**
140
141 The measures files must be organized as matrices:
142
143 - The first column must contain Metabolite IDs that are unique (not repeated) within the file.
144
145 - The rest of the columns correspond to the samples
146
147 - The rows correspond to the metabolites
148
149 - The values must be tab separated, with the first row containing the sample/column labels.
150
151 See the following examples of measures files:
152
153
154 Example - Metabolites **abundances**:
155
156 =============== ================== ================== ================== ================== ================== ==================
157 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
158 =============== ================== ================== ================== ================== ================== ==================
159 2_3-PG 8698823.9926 10718737.7217 10724373.9 8536484.5 22060650 28898956
160 2-OHGLu 36924336 424336 92060650 45165 84951950 965165051
161 Glc6P 2310 2142 2683 1683 012532068 1252172
162 Gly3P 399298 991656565 525195 6365231 89451625 4952651963
163 IsoCit 0 0 0 84915613 856236 954651610
164 =============== ================== ================== ================== ================== ================== ==================
165
166 Example - mean **enrichment** or labeled fractional contributions:
167
168 =============== ================== ================== ================== ================== ================== ==================
169 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
170 =============== ================== ================== ================== ================== ================== ==================
171 2_3-PG 0.9711 0.968 0.9909 0.991 0.40 0.9
172 2-OHGLu 0.01719 0.0246 0.554 0.555 0.73 0.68
173 Glc6P 0.06 0.66 2683 0.06 2068 2172
174 Gly3P 0.06 0.06 0.06 1 5 3
175 IsoCit 0.06 1 0.49 0.36 6 10
176 =============== ================== ================== ================== ================== ================== ==================
177
178
179
180 Example - **Isotopologue proportions**:
181
182 =============== ================== ================== ================== ================== ================== ==================
183 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
184 =============== ================== ================== ================== ================== ================== ==================
185 2_3-PG_m+0 0.023701408 0.026667837 0.003395407 0.05955 0.034383527 0.12
186 2_3-PG_m+1 0.0 0.0 0.0 0.0 0.4 0.12
187 2_3-PG_m+2 0.015379329 0.01506 0.017029723 0.35483229 0.54131313 0.743
188 2_3-PG_m+3 0.960919263 0.958268099 0.97957487 0.581310816 0.017029723 0.017
189 2-OHGLu_m+0 0.972778716 0.960016157 0.238843937 0.234383527 0.9998888 0.015064063
190 2-OHGLu_m+1 0.0 0.0 0.0 0.0 0.0001112 0.960919263
191 =============== ================== ================== ================== ================== ================== ==================
192
193
194
195 **Metadata File Information**
196
197 Provide a tab-separated file that has the names of the samples in the first column and one header row.
198 Column names must be exactly in this order:
199
200 name_to_plot
201 condition
202 timepoint
203 timenum
204 compartment
205 original_name
206
207
208 Example **Metadata File**:
209
210
211 ==================== =============== ============= ============ ================ =================
212 **name_to_plot** **condition** **timepoint** **timenum** **compartment** **original_name**
213 -------------------- --------------- ------------- ------------ ---------------- -----------------
214 Control_cell_T0-1 Control T0 0 cell MCF001089_TD01
215 Control_cell_T0-2 Control T0 0 cell MCF001089_TD02
216 Control_cell_T0-3 Control T0 0 cell MCF001089_TD03
217 Tumoral_cell_T0-1 Tumoral T0 0 cell MCF001089_TD04
218 Tumoral_cell_T0-2 Tumoral T0 0 cell MCF001089_TD05
219 Tumoral_cell_T0-3 Tumoral T0 0 cell MCF001089_TD06
220 Tumoral_cell_T24-1 Tumoral T24 24 cell MCF001089_TD07
221 Tumoral_cell_T24-2 Tumoral T24 24 cell MCF001089_TD08
222 Tumoral_cell_T24-3 Tumoral T24 24 cell MCF001090_TD01
223 Control_med_T24-1 Control T24 24 med MCF001090_TD02
224 Control_med_T24-2 Control T24 24 med MCF001090_TD03
225 Tumoral_med_T24-1 Tumoral T24 24 med MCF001090_TD04
226 Tumoral_med_T24-2 Tumoral T24 24 med MCF001090_TD05
227 Control_med_T0-1 Control T0 0 med MCF001090_TD06
228 Tumoral_med_T0-1 Tumoral T0 0 med MCF001090_TD07
229 Tumoral_med_T0-2 Tumoral T0 0 med MCF001090_TD08
230 ==================== =============== ============= ============ ================ =================
231
232
233 The column **original_name** must have the names of the samples as given in your data.
234
235 The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that
236 are meaningful is a better choice, as we will take them to display the results.
237
238 The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc)
239 nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!).
240
241 The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the
242 compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column.
243
244
245 **Running the analysis**
246
247 You can precise how you want your analysis to be executed, with the parameters:
248
249 - **datatypes** : the measures type(s) that you want to run.
250
251 - **conditions**: the two conditions to be compared in the bi-variate analysis. If 3 or more conditions are set, the tool automatically performs all the 1-to-1 condition comparisons. If you only have one condition in your data, select the condition, and see the Note at the end of this section.
252
253 - **correction_method** : one of the methods for multiple testing correction available in statsmodels library (bonferroni, fdr_bh, sidak, among others, see https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html).
254
255
256 **Note**: the tool automatically performs the comparison of *MDV profile between two consecutive time-points*. All the time-points are processed.
257
258
259 There exist hints on use that will guide you, next to the parameters.
260
261 For more information about the implemented statistical tests, please visit: https://github.com/cbib/DIMet/wiki/2-Statistical-tests
262
263 The output files are explained in https://github.com/cbib/DIMet/wiki/3-Output
264
265 **Available data for testing**
266
267 You can test our tool with the data from our manuscript https://zenodo.org/records/10579862 (the pertinent
268 files for you are located in the subfolders inside the data folder).
269 You can also use the minimal data examples from https://zenodo.org/records/10579891
270
271 ]]>
272 </help>
273 <expand macro="citations" />
274 </tool>