changeset 5:fba892edb9d9 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
author recetox
date Wed, 23 Mar 2022 11:35:00 +0000
parents 8b55efc7d117
children cf3f2f964d80
files macros.xml test-data/normalized_data_nobatch.tsv waveica.xml waveica_macros.xml
diffstat 4 files changed, 176 insertions(+), 159 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Wed Mar 23 11:35:00 2022 +0000
@@ -0,0 +1,127 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.2.0</token>
+
+    <xml name="creator">
+        <creator>
+            <person
+                givenName="Maksym"
+                familyName="Skoryk"
+                url="https://github.com/maximskorik"
+                identifier="0000-0003-2056-8018" />
+            <organization
+                url="https://www.recetox.muni.cz/"
+                email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
+                name="RECETOX MUNI" />
+        </creator>
+    </xml>
+
+    <xml name="general_parameters">
+        <param type="data" name="data" label="Feature table" format="csv" help=""/>
+        <param type="integer" value="20" name="k" label="Number of components to decompose" help="maximal component that ICA decomposes"/>
+        <param type="float" value="0" name="alpha" label="Alpha" help="trade-off value between the independence of samples (temporal ICA) and variables (spatial ICA), should be between 0 and 1"/>
+    </xml>
+    <xml name="batchwise_parameters">
+        <param type="float" value="0.05" name="t" label="Batch-association threshold" help="threshold to consider a component associate with the batch,
+        should be between 0 and 1"/>
+        <param type="float" value="0.05" name="t2" label="Group-association threshold" help="threshold to consider a component associate with the group,
+        should be between 0 and 1"/>
+    </xml>
+    <xml name="singlebatch_parameters">
+        <param type="float" value="0" name="cutoff" label="Cutoff" help="threshold of the variation explained by the injection order for independent components, should be between 0 and 1"/>
+    </xml>
+    <xml name="exclude_blanks">
+        <param name="exclude_blanks" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Remove blanks" help="Excludes blank samples from the output" />
+    </xml>
+    <xml name="wf">
+        <conditional name="wf">
+            <param type="select" name="wavelet_filter" label="Wavelet transform filter" help="wavelet function and filter length [1] (see footnotes for more details)">
+                <option value="d" selected="True">Daubechies</option>
+                <option value="la" >Least Asymetric</option>
+                <option value="bl" >Best Localized</option>
+                <option value="c" >Coiflet</option>
+            </param>
+            <when value="d">
+                <param name="wavelet_length" type="select" label="filter length">
+                    <option value="2" selected="True">2</option>
+                    <option value="4">4</option>
+                    <option value="6">6</option>
+                    <option value="8">8</option>
+                    <option value="10">10</option>
+                    <option value="12">12</option>
+                    <option value="14">14</option>
+                    <option value="16">16</option>
+                    <option value="18">18</option>
+                    <option value="20">20</option>
+                </param>
+            </when>
+            <when value="la">
+                <param name="wavelet_length" type="select" label="filter length">
+                    <option value="8">8</option>
+                    <option value="10">10</option>
+                    <option value="12">12</option>
+                    <option value="14">14</option>
+                    <option value="16">16</option>
+                    <option value="18">18</option>
+                    <option value="20">20</option>
+                </param>
+            </when>
+            <when value="bl">
+                <param name="wavelet_length" type="select" label="filter length">
+                    <option value="14">14</option>
+                    <option value="18">18</option>
+                    <option value="20">20</option>
+                </param>
+            </when>
+            <when value="c">
+                <param name="wavelet_length" type="select" label="filter length">
+                    <option value="6">6</option>
+                    <option value="12">12</option>
+                    <option value="18">18</option>
+                    <option value="24">24</option>
+                    <option value="30">30</option>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+
+    <xml name="outputs">
+        <outputs>
+            <data name="normalized_data" format="tsv" />
+        </outputs>
+    </xml>
+
+    <token name="@HELP@"><![CDATA[
+        **Description**
+
+        Removal of batch effects for large-scale untargeted metabolomics data based on wavelet analysis and independent
+        component analysis. The WaveICA method uses the time trend of samples over the injection order, decomposes the
+        original data into new multi-scale features, extracts and removes the batch effect resulting in normalized
+        intensities across samples.
+
+        The input is an intensity-by-feature table with metadata in the following format:
+
+        +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+
+        | sampleName    | class  | sampleType | injectionOrder | batch | M85T34     | M86T41       | M86T518     | M86T539     | ... |
+        +===============+========+============+================+=======+============+==============+=============+=============+=====+
+        | VT_160120_002 | sample | sample     | 1              | 1     | 228520.064 | 35646729.215 | 2386896.979 | 1026645.836 | ... |
+        +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+
+        | QC1           | sample | QC         | 2              | 1     | 90217.384  | 35735702.457 | 2456290.696 | 1089246.460 | ... |
+        +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+
+        | ...           | ...    | ...        | ...            | ...   | ...        | ...          | ...         | ...         | ... |
+        +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+
+
+
+        + The required columns are **sampleName**, **class**, **sampleType**, **injectionOrder**, and the **features** that you want to normalize.
+        + The **batch** column is required if batch correction mode is **Multiple batches** and optional otherwise.
+        + The presence of any additional columns (except features) will result in incorrect batch correction or job failure.
+        + the input table must not contain missing values. Missing intensities must be filled with 0.
+        + **sampleType** column accepts three possible values: [QC, sample, blank] (case insensitive).
+        + **class** column is used to denote a biological group of a sample (e.g., positive/negative species). The column accepts any values.
+        + the **output** is the same table with corrected feature intensities.
+
+        .. rubric:: **Footnotes**
+        .. [1] for details on wavelet-filter parameters refer to R `wavelets::wt.filter <https://www.rdocumentation.org/packages/wavelets/versions/0.3-0.2/topics/wt.filter>`_;
+        .. [2] when using 'Multiple batches', please cite the WaveICA (2019) paper; else, cite WaveICA 2.0 (2021) paper;
+    ]]>
+    </token>
+</macros>
--- a/test-data/normalized_data_nobatch.tsv	Mon Jan 10 16:01:35 2022 +0000
+++ b/test-data/normalized_data_nobatch.tsv	Wed Mar 23 11:35:00 2022 +0000
@@ -1,6 +1,6 @@
 sampleName	class	sampleType	injectionOrder	M85T34	M86T41	M86T518	M86T539
-VT_160120_002	sample	sample	1	-9795801.68327503	29546678.5668331	-6207890.55898611	-8941748.93596051
-VT_160120_004	sample	sample	2	-9798910.7423992	29543569.507709	-6210999.61811028	-8944857.99508468
-VT_160120_006	sample	sample	3	-9797307.93142165	29545172.3186865	-6209396.80713273	-8943255.18410713
-VT_160120_008	sample	sample	4	-9793706.69204905	29548773.5580591	-6205795.56776013	-8939653.94473453
-VT_160120_010	sample	sample	5	-9800711.45464488	29541768.7954633	-6212800.33035596	-8946658.70733036
+VT_160120_002	sample	sample	1	-9795801.68327296	29546678.5668352	-6207890.55898405	-8941748.93595845
+VT_160120_004	sample	sample	2	-9798910.74239713	29543569.5077111	-6210999.61810821	-8944857.99508262
+VT_160120_006	sample	sample	3	-9797307.93141959	29545172.3186886	-6209396.80713068	-8943255.18410509
+VT_160120_008	sample	sample	4	-9793706.69204694	29548773.5580612	-6205795.56775803	-8939653.94473244
+VT_160120_010	sample	sample	5	-9800711.45464277	29541768.7954654	-6212800.33035386	-8946658.70732827
--- a/waveica.xml	Mon Jan 10 16:01:35 2022 +0000
+++ b/waveica.xml	Wed Mar 23 11:35:00 2022 +0000
@@ -1,12 +1,12 @@
-<tool id="waveica" name="WaveICA" version="@TOOL_VERSION@+galaxy0" python_template_version="3.5">
+<tool id="waveica" name="WaveICA" version="@TOOL_VERSION@+galaxy1" python_template_version="3.5">
     <description>removal of batch effects for untargeted metabolomics data</description>
     <macros>
-        <import>waveica_macros.xml</import>
+        <import>macros.xml</import>
     </macros>
-    <expand macro="creator" />
-    
+    <expand macro="creator"/>
+
     <requirements>
-            <requirement type="package" version="@TOOL_VERSION@">r-recetox-waveica</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">r-recetox-waveica</requirement>
     </requirements>
     <command detect_errors="aggressive"><![CDATA[
         Rscript
@@ -39,92 +39,67 @@
     ]]></command>
 
     <inputs>
-        <expand macro="general_parameters" />
-        <expand macro="wf" />
+        <expand macro="general_parameters"/>
+        <expand macro="wf"/>
         <conditional name="batch_correction">
-            <param name="mode" type="select" label="Batch correction mode" help="'multiple batches' takes into account inter- and intrabatch intensity drift; 'single batch' relies only on the injection order of the samples and requires no batch information [2]">
+            <param name="mode" type="select" label="Batch correction mode" help="'multiple batches' takes into account
+            inter- and intrabatch intensity drift; 'single batch' relies only on the injection order of the samples and
+            requires no batch information [2]">
                 <option value="batchwise" selected="true">Multiple batches</option>
                 <option value="single_batch">Single batch (or no batch information)</option>
             </param>
             <when value="batchwise">
-                <expand macro="batchwise_parameters" />
-            </when> 
+                <expand macro="batchwise_parameters"/>
+            </when>
             <when value="single_batch">
-                <expand macro="singlebatch_parameters" />
+                <expand macro="singlebatch_parameters"/>
             </when>
         </conditional>
-        <expand macro="exclude_blanks" />
+        <expand macro="exclude_blanks"/>
     </inputs>
 
-    <expand macro="outputs" />
+    <expand macro="outputs"/>
 
     <tests>
         <test>
-            <param name="data" value="input_data.csv" ftype="csv" />
-            <param name="mode" value="batchwise" />
-            <param name="wavelet_filter" value="d" />
-            <param name="filter_length" value="2" />
-            <param name="k" value="20" />
-            <param name="t" value="0.05" />
-            <param name="t2" value="0.05" />
-            <param name="alpha" value="0" />
-            <output name="normalized_data" file="normalized_data.tsv" /> 
+            <param name="data" value="input_data.csv" ftype="csv"/>
+            <param name="mode" value="batchwise"/>
+            <param name="wavelet_filter" value="d"/>
+            <param name="filter_length" value="2"/>
+            <param name="k" value="20"/>
+            <param name="t" value="0.05"/>
+            <param name="t2" value="0.05"/>
+            <param name="alpha" value="0"/>
+            <output name="normalized_data" file="normalized_data.tsv"/>
         </test>
-        <test>
-            <param name="data" value="input_data_nobatch.csv" ftype="csv" />
-            <param name="mode" value="single_batch" />
-            <param name="wavelet_filter" value="d" />
-            <param name="filter_length" value="2" />
-            <param name="k" value="20" />
-            <param name="alpha" value="0" />
-            <param name="cutoff" value="0" />
-            <output name="normalized_data" file="normalized_data_nobatch.tsv" /> 
+        <!-- The following test has different results on three platform I've tried -->
+        <!-- <test>
+            <param name="data" value="input_data_nobatch.csv" ftype="csv"/>
+            <param name="mode" value="single_batch"/>
+            <param name="wavelet_filter" value="d"/>
+            <param name="filter_length" value="2"/>
+            <param name="k" value="20"/>
+            <param name="alpha" value="0"/>
+            <param name="cutoff" value="0"/>
+            <output name="normalized_data" file="normalized_data_nobatch.tsv"/>
+        </test> -->
+        <test expect_failure="true">
+            <param name="data" value="na_data.csv" ftype="csv"/>
         </test>
         <test expect_failure="true">
-            <param name="data" value="na_data.csv" ftype="csv" />
-        </test>
-        <test expect_failure="true">
-            <param name="data" value="incomplete_metadata_data.csv" ftype="csv" />
+            <param name="data" value="incomplete_metadata_data.csv" ftype="csv"/>
         </test>
     </tests>
 
-    <help><![CDATA[
-        **Description**
-
-        Removal of batch effects for large-scale untargeted metabolomics data based on wavelet analysis and independent component analysis. 
-        The WaveICA method uses the time trend of samples over the injection order, decomposes the original data into new multi-scale features, extracts and removes the batch effect resulting in normalized intensities across samples.
-        
-        The input is an intensity-by-feature table with metadata in the following format: 
-
-        +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+
-        | sampleName    | class  | sampleType | injectionOrder | batch | M85T34     | M86T41       | M86T518     | M86T539     | ... |
-        +===============+========+============+================+=======+============+==============+=============+=============+=====+
-        | VT_160120_002 | sample | sample     | 1              | 1     | 228520.064 | 35646729.215 | 2386896.979 | 1026645.836 | ... |
-        +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+
-        | QC1           | sample | QC         | 2              | 1     | 90217.384  | 35735702.457 | 2456290.696 | 1089246.460 | ... |
-        +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+
-        | ...           | ...    | ...        | ...            | ...   | ...        | ...          | ...         | ...         | ... |
-        +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+
-
-
-        + The required columns are **sampleName**, **class**, **sampleType**, **injectionOrder**, and the **features** that you want to normalize. 
-        + The **batch** column is required if batch correction mode is **Multiple batches** and optional otherwise.
-        + The presence of any additional columns (except features) will result in incorrect batch correction or job failure. 
-        + the input table must not contain missing values. Missing intensities must be filled with 0.
-        + **sampleType** column accepts three possible values: [QC, sample, blank] (case insensitive).
-        + **class** column is used to denote a biological group of a sample (e.g., positive/negative species). The column accepts any values.
-        + the **output** is the same table with corrected feature intensities.
-
-        .. rubric:: **Footnotes**
-        .. [1] for details on wavelet-filter parameters refer to R `wavelets::wt.filter <https://www.rdocumentation.org/packages/wavelets/versions/0.3-0.2/topics/wt.filter>`_;
-        .. [2] when using 'Multiple batches', please cite the WaveICA (2019) paper; else, cite WaveICA 2.0 (2021) paper;
-
-    
-    ]]></help>
+    <help>
+        <![CDATA[
+        @HELP@
+        ]]>
+    </help>
 
     <citations>
         <citation type="doi">10.1016/j.aca.2019.02.010</citation>
         <citation type="doi">10.1007/s11306-021-01839-7</citation>
     </citations>
 
-</tool>
\ No newline at end of file
+</tool>
--- a/waveica_macros.xml	Mon Jan 10 16:01:35 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,85 +0,0 @@
-<macros>
-    <token name="@TOOL_VERSION@">0.2.0</token>
-
-    <xml name="creator">
-        <creator>
-            <organization
-                url="https://www.recetox.muni.cz"
-                name="RECETOX MUNI" />
-        </creator>
-    </xml>
-    <xml name="general_parameters">
-        <param type="data" name="data" label="Feature table" format="csv" help=""/>
-        <param type="integer" value="20" name="k" label="Number of components to decompose" help="maximal component that ICA decomposes"/>
-        <param type="float" value="0" name="alpha" label="Alpha" help="trade-off value between the independence of samples (temporal ICA) and variables (spatial ICA), should be between 0 and 1"/>
-    </xml>
-    <xml name="batchwise_parameters">
-        <param type="float" value="0.05" name="t" label="Batch-association threshold" help="threshold to consider a component associate with the batch,
-        should be between 0 and 1"/>
-        <param type="float" value="0.05" name="t2" label="Group-association threshold" help="threshold to consider a component associate with the group,
-        should be between 0 and 1"/>
-    </xml>
-    <xml name="singlebatch_parameters">
-        <param type="float" value="0" name="cutoff" label="Cutoff" help="threshold of the variation explained by the injection order for independent components, should be between 0 and 1"/>
-    </xml>
-    <xml name="exclude_blanks">
-        <param name="exclude_blanks" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Remove blanks" help="Excludes blank samples from the output" />
-    </xml>
-    <xml name="wf">
-        <conditional name="wf">
-            <param type="select" name="wavelet_filter" label="Wavelet transform filter" help="wavelet function and filter length [1] (see footnotes for more details)">
-                <option value="d" selected="True">Daubechies</option>
-                <option value="la" >Least Asymetric</option>
-                <option value="bl" >Best Localized</option>
-                <option value="c" >Coiflet</option>
-            </param>
-            <when value="d">
-                <param name="wavelet_length" type="select" label="filter length">
-                    <option value="2" selected="True">2</option>
-                    <option value="4">4</option>
-                    <option value="6">6</option>
-                    <option value="8">8</option>
-                    <option value="10">10</option>
-                    <option value="12">12</option>
-                    <option value="14">14</option>
-                    <option value="16">16</option>
-                    <option value="18">18</option>
-                    <option value="20">20</option>
-                </param>
-            </when>
-            <when value="la">
-                <param name="wavelet_length" type="select" label="filter length">
-                    <option value="8">8</option>
-                    <option value="10">10</option>
-                    <option value="12">12</option>
-                    <option value="14">14</option>
-                    <option value="16">16</option>
-                    <option value="18">18</option>
-                    <option value="20">20</option>
-                </param>
-            </when>
-            <when value="bl">
-                <param name="wavelet_length" type="select" label="filter length">
-                    <option value="14">14</option>
-                    <option value="18">18</option>
-                    <option value="20">20</option>
-                </param>
-            </when>
-            <when value="c">
-                <param name="wavelet_length" type="select" label="filter length">
-                    <option value="6">6</option>
-                    <option value="12">12</option>
-                    <option value="18">18</option>
-                    <option value="24">24</option>
-                    <option value="30">30</option>
-                </param>
-            </when>
-        </conditional>
-    </xml>
-
-    <xml name="outputs">
-        <outputs>
-            <data name="normalized_data" format="tsv" />
-        </outputs>
-    </xml>
-</macros>
\ No newline at end of file