changeset 0:2461d20911c9 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 91376ea7a6736351b0cc086ca1bc6c553fdcda97"
author recetox
date Thu, 18 Mar 2021 15:53:38 +0000
parents
children 2bcfd5b450bb
files test-data/features-normalized.tsv test-data/features-test.csv waveica.xml waveica_wrapper.R
diffstat 4 files changed, 232 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/features-normalized.tsv	Thu Mar 18 15:53:38 2021 +0000
@@ -0,0 +1,6 @@
+""	"data_wave.M85T34"	"data_wave.M86T41"	"data_wave.M86T518"	"data_wave.M86T539"
+"VT_160120_002"	355200.506508035	75115889.9077485	6101488.54615418	2007379.02604984
+"VT_160120_004"	216897.826587868	75204863.1495248	6170882.26270475	2069979.64992079
+"VT_160120_006"	362337.195084504	76490295.1450204	12588041.969092	1818589.63912375
+"VT_160120_008"	143303.377379009	83771659.9549148	6181538.46316058	1975712.25920485
+"VT_160120_010"	189065.516447239	84108898.7658797	6103964.42378424	1935671.32085241
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/features-test.csv	Thu Mar 18 15:53:38 2021 +0000
@@ -0,0 +1,6 @@
+sample_name,class,sampleType,injectionOrder,batch,M85T34,M86T41,M86T518,M86T539
+VT_160120_002,sample,sample,1,1,228520.06430737,35646729.21543971,2386896.97966461,1026645.83653468
+VT_160120_004,sample,sample,2,1,90217.384387202,35735702.457215995,2456290.69621518,1089246.46040563
+VT_160120_006,sample,sample,3,1,235656.75288383896,37021134.452711605,8873450.40260241,837856.449608585
+VT_160120_008,sample,sample,4,1,16622.9351783435,44302499.262606,2466946.89667101,994979.069689685
+VT_160120_010,sample,sample,5,1,62385.0742465736,44639738.0735709,2389372.85729467,954938.131337246
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/waveica.xml	Thu Mar 18 15:53:38 2021 +0000
@@ -0,0 +1,133 @@
+<tool id="waveica" name="WaveICA" version="0.1.0+galaxy0" python_template_version="3.5">
+
+    <description>removal of batch effects for untargeted metabolomics data</description>
+    
+    <requirements>
+        <container type="docker">recetox/waveica:0.1.0-recetox0</container>
+    </requirements>
+
+    <command detect_errors="aggressive"><![CDATA[
+        Rscript
+            -e 'source("${__tool_directory__}/waveica_wrapper.R")'
+
+            -e 'normalized_data <- waveica(
+                data = "$data",
+                wavelet_filter = "$parameters.wf.wavelet_filter",
+                wavelet_length = "$parameters.wf.wavelet_length",
+                k = $parameters.k,
+                t = $parameters.t,
+                t2 = $parameters.t2,
+                alpha = $parameters.alpha,
+                exclude_blanks = $optional_parameters.exclude_blanks
+            )'
+
+            -e 'store_data(normalized_data,"$normalized_features")'
+    ]]></command>
+
+    <inputs>
+        <param type="data" name="data" label="Sample-by-matrix data" format="csv" help=""/>
+        <section name="parameters" title="Normalization Parameters" expanded="True">
+            <conditional name="wf">
+                <param type="select" name="wavelet_filter" label="Wavelet transform filter" help="selecting wavelet function and filter length">
+                    <option value="d" selected="True">Daubechies</option>
+                    <option value="la" >Least Asymetric</option>
+                    <option value="bl" >Best Localized</option>
+                    <option value="c" >Coiflet</option>
+                </param>
+                <when value="d">
+                    <param name="wavelet_length" type="select" label="filter length">
+                        <option value="2" selected="True">2</option>
+                        <option value="4">4</option>
+                        <option value="6">6</option>
+                        <option value="8">8</option>
+                        <option value="10">10</option>
+                        <option value="12">12</option>
+                        <option value="14">14</option>
+                        <option value="16">16</option>
+                        <option value="18">18</option>
+                        <option value="20">20</option>
+                    </param>
+                </when>
+                <when value="la">
+                    <param name="wavelet_length" type="select" label="filter length">
+                        <option value="8">8</option>
+                        <option value="10">10</option>
+                        <option value="12">12</option>
+                        <option value="14">14</option>
+                        <option value="16">16</option>
+                        <option value="18">18</option>
+                        <option value="20">20</option>
+                    </param>
+                </when>
+                <when value="bl">
+                    <param name="wavelet_length" type="select" label="filter length">
+                        <option value="14">14</option>
+                        <option value="18">18</option>
+                        <option value="20">20</option>
+                    </param>
+                </when>
+                <when value="c">
+                    <param name="wavelet_length" type="select" label="filter length">
+                        <option value="6">6</option>
+                        <option value="12">12</option>
+                        <option value="18">18</option>
+                        <option value="24">24</option>
+                        <option value="30">30</option>
+                    </param>
+                </when>
+            </conditional>
+            <param type="integer" value="20" name="k" label="Number of components to decompose" help="the maximal component that ICA decomposes"/>
+            <param type="float" value="0.05" name="t" label="Batch-assosiation threshold" help="the threshold to consider a component associate with the batch,
+ should be between 0 and 1"/>
+            <param type="float" value="0.05" name="t2" label="Group-assosiation threshold" help="the threshold to consider a component associate with the group,
+ should be between 0 and 1"/>
+            <param type="float" value="0" name="alpha" label="Alpha" help="the trade-off value between the independence of samples and those of variables and should be between 0 and 1"/>
+        </section>
+        <section name="optional_parameters" expanded="true" title="Optional Parameters">
+            <param name="exclude_blanks" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Remove blanks" help="Excludes blank samples from the output" />
+        </section>
+    </inputs>
+
+    <outputs>
+        <data name="normalized_features" format="tsv" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="data" value="features-test.csv" ftype="csv" />
+            <param name="wavelet_filter" value="d" />
+            <param name="filter_length" value="2" />
+            <param name="k" value="20" />
+            <param name="t" value="0.05" />
+            <param name="t2" value="0.05" />
+            <param name="alpha" value="0" />
+            <output name="normalized_features" file="features-normalized.tsv" /> 
+        </test>
+    </tests>
+
+    <help><![CDATA[
+        **Description**
+
+        Removal of batch effects for large-scale untargeted metabolomics data based on wavelet analysis. The WaveICA R package provides a new algorithm to removing batch effects for metabolomics data.
+        
+        The input is Sample-by-matrix table which must include: 
+
+        1. Injection order of samples
+
+        2. Types of the samples denoted as "blank", "sample" or "QC"
+
+        3. Batch numbers
+
+        4. Features data
+
+        **Documentation**
+
+        See original repository for further information: https://github.com/dengkuistat/WaveICA
+
+    ]]></help>
+
+    <citations>
+        <citation type="doi">10.1016/j.aca.2019.02.010</citation>
+    </citations>
+
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/waveica_wrapper.R	Thu Mar 18 15:53:38 2021 +0000
@@ -0,0 +1,87 @@
+waveica <- function(
+    data,
+    wavelet_filter,
+    wavelet_length,
+    k,
+    t,
+    t2,
+    alpha,
+    exclude_blanks
+) {
+
+    # get input from the Galaxy, preprocess data
+    data <- read.csv(data, header = TRUE, row.names = "sample_name")
+    data <- preprocess_data(data)
+
+    # remove blanks from dataset
+    if (exclude_blanks) {
+        data <- exclude_group(data)
+    }
+
+    # separate data into features, batch and group
+    features <- data[, -c(1:4)]
+    group <- as.numeric(data$class)
+    batch <- data$batch
+
+    # run WaveICA
+    normalized_data <- WaveICA::WaveICA(
+        data = features,
+        wf = get_wf(wavelet_filter, wavelet_length),
+        batch = batch,
+        group = group,
+        K = k,
+        t = t,
+        t2 = t2,
+        alpha = alpha
+        )
+
+    return(normalized_data)
+}
+
+
+# Sort data, set numerical values for groups
+preprocess_data <- function(data) {
+    # sort data by injection order
+    data <- data[order(data$injectionOrder, decreasing = FALSE), ]
+
+    data$class[data$class == "blank"] <- 0
+    data$class[data$class == "sample"] <- 1
+    data$class[data$class == "QC"] <- 2
+
+    return(data)
+}
+
+
+# Create appropriate input for R wavelets function
+get_wf <- function(wavelet_filter, wavelet_length) {
+    wf <- paste(wavelet_filter, wavelet_length, sep = "")
+
+    # exception to the wavelet function
+    if (wf == "d2") {
+        wf <- "haar"
+        }
+
+    return(wf)
+}
+
+
+# Exclude blanks from a dataframe
+exclude_group <- function(data) {
+    row_idx_to_exclude <- which(data$class %in% 0)
+    if (length(row_idx_to_exclude) > 1) {
+        data_without_blanks <- data[-c(row_idx_to_exclude), ]
+        msg <- paste("Blank samples have been excluded from the dataframe.\n")
+        cat(msg)
+        return(data_without_blanks)
+        }
+    else {
+        return(data)
+    }
+}
+
+
+# Store output of WaveICA in a tsv file
+store_data <- function(normalized_data, output) {
+    write.table(normalized_data, file = output, sep = "\t", col.names = NA)
+    cat("Normalization has been completed.\n")
+}