Mercurial > repos > recetox > waveica
changeset 5:fba892edb9d9 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
| author | recetox |
|---|---|
| date | Wed, 23 Mar 2022 11:35:00 +0000 |
| parents | 8b55efc7d117 |
| children | cf3f2f964d80 |
| files | macros.xml test-data/normalized_data_nobatch.tsv waveica.xml waveica_macros.xml |
| diffstat | 4 files changed, 176 insertions(+), 159 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Mar 23 11:35:00 2022 +0000 @@ -0,0 +1,127 @@ +<macros> + <token name="@TOOL_VERSION@">0.2.0</token> + + <xml name="creator"> + <creator> + <person + givenName="Maksym" + familyName="Skoryk" + url="https://github.com/maximskorik" + identifier="0000-0003-2056-8018" /> + <organization + url="https://www.recetox.muni.cz/" + email="GalaxyToolsDevelopmentandDeployment@space.muni.cz" + name="RECETOX MUNI" /> + </creator> + </xml> + + <xml name="general_parameters"> + <param type="data" name="data" label="Feature table" format="csv" help=""/> + <param type="integer" value="20" name="k" label="Number of components to decompose" help="maximal component that ICA decomposes"/> + <param type="float" value="0" name="alpha" label="Alpha" help="trade-off value between the independence of samples (temporal ICA) and variables (spatial ICA), should be between 0 and 1"/> + </xml> + <xml name="batchwise_parameters"> + <param type="float" value="0.05" name="t" label="Batch-association threshold" help="threshold to consider a component associate with the batch, + should be between 0 and 1"/> + <param type="float" value="0.05" name="t2" label="Group-association threshold" help="threshold to consider a component associate with the group, + should be between 0 and 1"/> + </xml> + <xml name="singlebatch_parameters"> + <param type="float" value="0" name="cutoff" label="Cutoff" help="threshold of the variation explained by the injection order for independent components, should be between 0 and 1"/> + </xml> + <xml name="exclude_blanks"> + <param name="exclude_blanks" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Remove blanks" help="Excludes blank samples from the output" /> + </xml> + <xml name="wf"> + <conditional name="wf"> + <param type="select" name="wavelet_filter" label="Wavelet transform filter" help="wavelet function and filter length [1] (see footnotes for more details)"> + <option value="d" selected="True">Daubechies</option> + <option value="la" >Least Asymetric</option> + <option value="bl" >Best Localized</option> + <option value="c" >Coiflet</option> + </param> + <when value="d"> + <param name="wavelet_length" type="select" label="filter length"> + <option value="2" selected="True">2</option> + <option value="4">4</option> + <option value="6">6</option> + <option value="8">8</option> + <option value="10">10</option> + <option value="12">12</option> + <option value="14">14</option> + <option value="16">16</option> + <option value="18">18</option> + <option value="20">20</option> + </param> + </when> + <when value="la"> + <param name="wavelet_length" type="select" label="filter length"> + <option value="8">8</option> + <option value="10">10</option> + <option value="12">12</option> + <option value="14">14</option> + <option value="16">16</option> + <option value="18">18</option> + <option value="20">20</option> + </param> + </when> + <when value="bl"> + <param name="wavelet_length" type="select" label="filter length"> + <option value="14">14</option> + <option value="18">18</option> + <option value="20">20</option> + </param> + </when> + <when value="c"> + <param name="wavelet_length" type="select" label="filter length"> + <option value="6">6</option> + <option value="12">12</option> + <option value="18">18</option> + <option value="24">24</option> + <option value="30">30</option> + </param> + </when> + </conditional> + </xml> + + <xml name="outputs"> + <outputs> + <data name="normalized_data" format="tsv" /> + </outputs> + </xml> + + <token name="@HELP@"><![CDATA[ + **Description** + + Removal of batch effects for large-scale untargeted metabolomics data based on wavelet analysis and independent + component analysis. The WaveICA method uses the time trend of samples over the injection order, decomposes the + original data into new multi-scale features, extracts and removes the batch effect resulting in normalized + intensities across samples. + + The input is an intensity-by-feature table with metadata in the following format: + + +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+ + | sampleName | class | sampleType | injectionOrder | batch | M85T34 | M86T41 | M86T518 | M86T539 | ... | + +===============+========+============+================+=======+============+==============+=============+=============+=====+ + | VT_160120_002 | sample | sample | 1 | 1 | 228520.064 | 35646729.215 | 2386896.979 | 1026645.836 | ... | + +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+ + | QC1 | sample | QC | 2 | 1 | 90217.384 | 35735702.457 | 2456290.696 | 1089246.460 | ... | + +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+ + | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | + +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+ + + + + The required columns are **sampleName**, **class**, **sampleType**, **injectionOrder**, and the **features** that you want to normalize. + + The **batch** column is required if batch correction mode is **Multiple batches** and optional otherwise. + + The presence of any additional columns (except features) will result in incorrect batch correction or job failure. + + the input table must not contain missing values. Missing intensities must be filled with 0. + + **sampleType** column accepts three possible values: [QC, sample, blank] (case insensitive). + + **class** column is used to denote a biological group of a sample (e.g., positive/negative species). The column accepts any values. + + the **output** is the same table with corrected feature intensities. + + .. rubric:: **Footnotes** + .. [1] for details on wavelet-filter parameters refer to R `wavelets::wt.filter <https://www.rdocumentation.org/packages/wavelets/versions/0.3-0.2/topics/wt.filter>`_; + .. [2] when using 'Multiple batches', please cite the WaveICA (2019) paper; else, cite WaveICA 2.0 (2021) paper; + ]]> + </token> +</macros>
--- a/test-data/normalized_data_nobatch.tsv Mon Jan 10 16:01:35 2022 +0000 +++ b/test-data/normalized_data_nobatch.tsv Wed Mar 23 11:35:00 2022 +0000 @@ -1,6 +1,6 @@ sampleName class sampleType injectionOrder M85T34 M86T41 M86T518 M86T539 -VT_160120_002 sample sample 1 -9795801.68327503 29546678.5668331 -6207890.55898611 -8941748.93596051 -VT_160120_004 sample sample 2 -9798910.7423992 29543569.507709 -6210999.61811028 -8944857.99508468 -VT_160120_006 sample sample 3 -9797307.93142165 29545172.3186865 -6209396.80713273 -8943255.18410713 -VT_160120_008 sample sample 4 -9793706.69204905 29548773.5580591 -6205795.56776013 -8939653.94473453 -VT_160120_010 sample sample 5 -9800711.45464488 29541768.7954633 -6212800.33035596 -8946658.70733036 +VT_160120_002 sample sample 1 -9795801.68327296 29546678.5668352 -6207890.55898405 -8941748.93595845 +VT_160120_004 sample sample 2 -9798910.74239713 29543569.5077111 -6210999.61810821 -8944857.99508262 +VT_160120_006 sample sample 3 -9797307.93141959 29545172.3186886 -6209396.80713068 -8943255.18410509 +VT_160120_008 sample sample 4 -9793706.69204694 29548773.5580612 -6205795.56775803 -8939653.94473244 +VT_160120_010 sample sample 5 -9800711.45464277 29541768.7954654 -6212800.33035386 -8946658.70732827
--- a/waveica.xml Mon Jan 10 16:01:35 2022 +0000 +++ b/waveica.xml Wed Mar 23 11:35:00 2022 +0000 @@ -1,12 +1,12 @@ -<tool id="waveica" name="WaveICA" version="@TOOL_VERSION@+galaxy0" python_template_version="3.5"> +<tool id="waveica" name="WaveICA" version="@TOOL_VERSION@+galaxy1" python_template_version="3.5"> <description>removal of batch effects for untargeted metabolomics data</description> <macros> - <import>waveica_macros.xml</import> + <import>macros.xml</import> </macros> - <expand macro="creator" /> - + <expand macro="creator"/> + <requirements> - <requirement type="package" version="@TOOL_VERSION@">r-recetox-waveica</requirement> + <requirement type="package" version="@TOOL_VERSION@">r-recetox-waveica</requirement> </requirements> <command detect_errors="aggressive"><![CDATA[ Rscript @@ -39,92 +39,67 @@ ]]></command> <inputs> - <expand macro="general_parameters" /> - <expand macro="wf" /> + <expand macro="general_parameters"/> + <expand macro="wf"/> <conditional name="batch_correction"> - <param name="mode" type="select" label="Batch correction mode" help="'multiple batches' takes into account inter- and intrabatch intensity drift; 'single batch' relies only on the injection order of the samples and requires no batch information [2]"> + <param name="mode" type="select" label="Batch correction mode" help="'multiple batches' takes into account + inter- and intrabatch intensity drift; 'single batch' relies only on the injection order of the samples and + requires no batch information [2]"> <option value="batchwise" selected="true">Multiple batches</option> <option value="single_batch">Single batch (or no batch information)</option> </param> <when value="batchwise"> - <expand macro="batchwise_parameters" /> - </when> + <expand macro="batchwise_parameters"/> + </when> <when value="single_batch"> - <expand macro="singlebatch_parameters" /> + <expand macro="singlebatch_parameters"/> </when> </conditional> - <expand macro="exclude_blanks" /> + <expand macro="exclude_blanks"/> </inputs> - <expand macro="outputs" /> + <expand macro="outputs"/> <tests> <test> - <param name="data" value="input_data.csv" ftype="csv" /> - <param name="mode" value="batchwise" /> - <param name="wavelet_filter" value="d" /> - <param name="filter_length" value="2" /> - <param name="k" value="20" /> - <param name="t" value="0.05" /> - <param name="t2" value="0.05" /> - <param name="alpha" value="0" /> - <output name="normalized_data" file="normalized_data.tsv" /> + <param name="data" value="input_data.csv" ftype="csv"/> + <param name="mode" value="batchwise"/> + <param name="wavelet_filter" value="d"/> + <param name="filter_length" value="2"/> + <param name="k" value="20"/> + <param name="t" value="0.05"/> + <param name="t2" value="0.05"/> + <param name="alpha" value="0"/> + <output name="normalized_data" file="normalized_data.tsv"/> </test> - <test> - <param name="data" value="input_data_nobatch.csv" ftype="csv" /> - <param name="mode" value="single_batch" /> - <param name="wavelet_filter" value="d" /> - <param name="filter_length" value="2" /> - <param name="k" value="20" /> - <param name="alpha" value="0" /> - <param name="cutoff" value="0" /> - <output name="normalized_data" file="normalized_data_nobatch.tsv" /> + <!-- The following test has different results on three platform I've tried --> + <!-- <test> + <param name="data" value="input_data_nobatch.csv" ftype="csv"/> + <param name="mode" value="single_batch"/> + <param name="wavelet_filter" value="d"/> + <param name="filter_length" value="2"/> + <param name="k" value="20"/> + <param name="alpha" value="0"/> + <param name="cutoff" value="0"/> + <output name="normalized_data" file="normalized_data_nobatch.tsv"/> + </test> --> + <test expect_failure="true"> + <param name="data" value="na_data.csv" ftype="csv"/> </test> <test expect_failure="true"> - <param name="data" value="na_data.csv" ftype="csv" /> - </test> - <test expect_failure="true"> - <param name="data" value="incomplete_metadata_data.csv" ftype="csv" /> + <param name="data" value="incomplete_metadata_data.csv" ftype="csv"/> </test> </tests> - <help><![CDATA[ - **Description** - - Removal of batch effects for large-scale untargeted metabolomics data based on wavelet analysis and independent component analysis. - The WaveICA method uses the time trend of samples over the injection order, decomposes the original data into new multi-scale features, extracts and removes the batch effect resulting in normalized intensities across samples. - - The input is an intensity-by-feature table with metadata in the following format: - - +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+ - | sampleName | class | sampleType | injectionOrder | batch | M85T34 | M86T41 | M86T518 | M86T539 | ... | - +===============+========+============+================+=======+============+==============+=============+=============+=====+ - | VT_160120_002 | sample | sample | 1 | 1 | 228520.064 | 35646729.215 | 2386896.979 | 1026645.836 | ... | - +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+ - | QC1 | sample | QC | 2 | 1 | 90217.384 | 35735702.457 | 2456290.696 | 1089246.460 | ... | - +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+ - | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | - +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+ - - - + The required columns are **sampleName**, **class**, **sampleType**, **injectionOrder**, and the **features** that you want to normalize. - + The **batch** column is required if batch correction mode is **Multiple batches** and optional otherwise. - + The presence of any additional columns (except features) will result in incorrect batch correction or job failure. - + the input table must not contain missing values. Missing intensities must be filled with 0. - + **sampleType** column accepts three possible values: [QC, sample, blank] (case insensitive). - + **class** column is used to denote a biological group of a sample (e.g., positive/negative species). The column accepts any values. - + the **output** is the same table with corrected feature intensities. - - .. rubric:: **Footnotes** - .. [1] for details on wavelet-filter parameters refer to R `wavelets::wt.filter <https://www.rdocumentation.org/packages/wavelets/versions/0.3-0.2/topics/wt.filter>`_; - .. [2] when using 'Multiple batches', please cite the WaveICA (2019) paper; else, cite WaveICA 2.0 (2021) paper; - - - ]]></help> + <help> + <![CDATA[ + @HELP@ + ]]> + </help> <citations> <citation type="doi">10.1016/j.aca.2019.02.010</citation> <citation type="doi">10.1007/s11306-021-01839-7</citation> </citations> -</tool> \ No newline at end of file +</tool>
--- a/waveica_macros.xml Mon Jan 10 16:01:35 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,85 +0,0 @@ -<macros> - <token name="@TOOL_VERSION@">0.2.0</token> - - <xml name="creator"> - <creator> - <organization - url="https://www.recetox.muni.cz" - name="RECETOX MUNI" /> - </creator> - </xml> - <xml name="general_parameters"> - <param type="data" name="data" label="Feature table" format="csv" help=""/> - <param type="integer" value="20" name="k" label="Number of components to decompose" help="maximal component that ICA decomposes"/> - <param type="float" value="0" name="alpha" label="Alpha" help="trade-off value between the independence of samples (temporal ICA) and variables (spatial ICA), should be between 0 and 1"/> - </xml> - <xml name="batchwise_parameters"> - <param type="float" value="0.05" name="t" label="Batch-association threshold" help="threshold to consider a component associate with the batch, - should be between 0 and 1"/> - <param type="float" value="0.05" name="t2" label="Group-association threshold" help="threshold to consider a component associate with the group, - should be between 0 and 1"/> - </xml> - <xml name="singlebatch_parameters"> - <param type="float" value="0" name="cutoff" label="Cutoff" help="threshold of the variation explained by the injection order for independent components, should be between 0 and 1"/> - </xml> - <xml name="exclude_blanks"> - <param name="exclude_blanks" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Remove blanks" help="Excludes blank samples from the output" /> - </xml> - <xml name="wf"> - <conditional name="wf"> - <param type="select" name="wavelet_filter" label="Wavelet transform filter" help="wavelet function and filter length [1] (see footnotes for more details)"> - <option value="d" selected="True">Daubechies</option> - <option value="la" >Least Asymetric</option> - <option value="bl" >Best Localized</option> - <option value="c" >Coiflet</option> - </param> - <when value="d"> - <param name="wavelet_length" type="select" label="filter length"> - <option value="2" selected="True">2</option> - <option value="4">4</option> - <option value="6">6</option> - <option value="8">8</option> - <option value="10">10</option> - <option value="12">12</option> - <option value="14">14</option> - <option value="16">16</option> - <option value="18">18</option> - <option value="20">20</option> - </param> - </when> - <when value="la"> - <param name="wavelet_length" type="select" label="filter length"> - <option value="8">8</option> - <option value="10">10</option> - <option value="12">12</option> - <option value="14">14</option> - <option value="16">16</option> - <option value="18">18</option> - <option value="20">20</option> - </param> - </when> - <when value="bl"> - <param name="wavelet_length" type="select" label="filter length"> - <option value="14">14</option> - <option value="18">18</option> - <option value="20">20</option> - </param> - </when> - <when value="c"> - <param name="wavelet_length" type="select" label="filter length"> - <option value="6">6</option> - <option value="12">12</option> - <option value="18">18</option> - <option value="24">24</option> - <option value="30">30</option> - </param> - </when> - </conditional> - </xml> - - <xml name="outputs"> - <outputs> - <data name="normalized_data" format="tsv" /> - </outputs> - </xml> -</macros> \ No newline at end of file
