Mercurial > repos > recetox > waveica
changeset 0:2461d20911c9 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 91376ea7a6736351b0cc086ca1bc6c553fdcda97"
| author | recetox |
|---|---|
| date | Thu, 18 Mar 2021 15:53:38 +0000 |
| parents | |
| children | 2bcfd5b450bb |
| files | test-data/features-normalized.tsv test-data/features-test.csv waveica.xml waveica_wrapper.R |
| diffstat | 4 files changed, 232 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/features-normalized.tsv Thu Mar 18 15:53:38 2021 +0000 @@ -0,0 +1,6 @@ +"" "data_wave.M85T34" "data_wave.M86T41" "data_wave.M86T518" "data_wave.M86T539" +"VT_160120_002" 355200.506508035 75115889.9077485 6101488.54615418 2007379.02604984 +"VT_160120_004" 216897.826587868 75204863.1495248 6170882.26270475 2069979.64992079 +"VT_160120_006" 362337.195084504 76490295.1450204 12588041.969092 1818589.63912375 +"VT_160120_008" 143303.377379009 83771659.9549148 6181538.46316058 1975712.25920485 +"VT_160120_010" 189065.516447239 84108898.7658797 6103964.42378424 1935671.32085241
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/features-test.csv Thu Mar 18 15:53:38 2021 +0000 @@ -0,0 +1,6 @@ +sample_name,class,sampleType,injectionOrder,batch,M85T34,M86T41,M86T518,M86T539 +VT_160120_002,sample,sample,1,1,228520.06430737,35646729.21543971,2386896.97966461,1026645.83653468 +VT_160120_004,sample,sample,2,1,90217.384387202,35735702.457215995,2456290.69621518,1089246.46040563 +VT_160120_006,sample,sample,3,1,235656.75288383896,37021134.452711605,8873450.40260241,837856.449608585 +VT_160120_008,sample,sample,4,1,16622.9351783435,44302499.262606,2466946.89667101,994979.069689685 +VT_160120_010,sample,sample,5,1,62385.0742465736,44639738.0735709,2389372.85729467,954938.131337246
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/waveica.xml Thu Mar 18 15:53:38 2021 +0000 @@ -0,0 +1,133 @@ +<tool id="waveica" name="WaveICA" version="0.1.0+galaxy0" python_template_version="3.5"> + + <description>removal of batch effects for untargeted metabolomics data</description> + + <requirements> + <container type="docker">recetox/waveica:0.1.0-recetox0</container> + </requirements> + + <command detect_errors="aggressive"><![CDATA[ + Rscript + -e 'source("${__tool_directory__}/waveica_wrapper.R")' + + -e 'normalized_data <- waveica( + data = "$data", + wavelet_filter = "$parameters.wf.wavelet_filter", + wavelet_length = "$parameters.wf.wavelet_length", + k = $parameters.k, + t = $parameters.t, + t2 = $parameters.t2, + alpha = $parameters.alpha, + exclude_blanks = $optional_parameters.exclude_blanks + )' + + -e 'store_data(normalized_data,"$normalized_features")' + ]]></command> + + <inputs> + <param type="data" name="data" label="Sample-by-matrix data" format="csv" help=""/> + <section name="parameters" title="Normalization Parameters" expanded="True"> + <conditional name="wf"> + <param type="select" name="wavelet_filter" label="Wavelet transform filter" help="selecting wavelet function and filter length"> + <option value="d" selected="True">Daubechies</option> + <option value="la" >Least Asymetric</option> + <option value="bl" >Best Localized</option> + <option value="c" >Coiflet</option> + </param> + <when value="d"> + <param name="wavelet_length" type="select" label="filter length"> + <option value="2" selected="True">2</option> + <option value="4">4</option> + <option value="6">6</option> + <option value="8">8</option> + <option value="10">10</option> + <option value="12">12</option> + <option value="14">14</option> + <option value="16">16</option> + <option value="18">18</option> + <option value="20">20</option> + </param> + </when> + <when value="la"> + <param name="wavelet_length" type="select" label="filter length"> + <option value="8">8</option> + <option value="10">10</option> + <option value="12">12</option> + <option value="14">14</option> + <option value="16">16</option> + <option value="18">18</option> + <option value="20">20</option> + </param> + </when> + <when value="bl"> + <param name="wavelet_length" type="select" label="filter length"> + <option value="14">14</option> + <option value="18">18</option> + <option value="20">20</option> + </param> + </when> + <when value="c"> + <param name="wavelet_length" type="select" label="filter length"> + <option value="6">6</option> + <option value="12">12</option> + <option value="18">18</option> + <option value="24">24</option> + <option value="30">30</option> + </param> + </when> + </conditional> + <param type="integer" value="20" name="k" label="Number of components to decompose" help="the maximal component that ICA decomposes"/> + <param type="float" value="0.05" name="t" label="Batch-assosiation threshold" help="the threshold to consider a component associate with the batch, + should be between 0 and 1"/> + <param type="float" value="0.05" name="t2" label="Group-assosiation threshold" help="the threshold to consider a component associate with the group, + should be between 0 and 1"/> + <param type="float" value="0" name="alpha" label="Alpha" help="the trade-off value between the independence of samples and those of variables and should be between 0 and 1"/> + </section> + <section name="optional_parameters" expanded="true" title="Optional Parameters"> + <param name="exclude_blanks" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Remove blanks" help="Excludes blank samples from the output" /> + </section> + </inputs> + + <outputs> + <data name="normalized_features" format="tsv" /> + </outputs> + + <tests> + <test> + <param name="data" value="features-test.csv" ftype="csv" /> + <param name="wavelet_filter" value="d" /> + <param name="filter_length" value="2" /> + <param name="k" value="20" /> + <param name="t" value="0.05" /> + <param name="t2" value="0.05" /> + <param name="alpha" value="0" /> + <output name="normalized_features" file="features-normalized.tsv" /> + </test> + </tests> + + <help><![CDATA[ + **Description** + + Removal of batch effects for large-scale untargeted metabolomics data based on wavelet analysis. The WaveICA R package provides a new algorithm to removing batch effects for metabolomics data. + + The input is Sample-by-matrix table which must include: + + 1. Injection order of samples + + 2. Types of the samples denoted as "blank", "sample" or "QC" + + 3. Batch numbers + + 4. Features data + + **Documentation** + + See original repository for further information: https://github.com/dengkuistat/WaveICA + + ]]></help> + + <citations> + <citation type="doi">10.1016/j.aca.2019.02.010</citation> + </citations> + +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/waveica_wrapper.R Thu Mar 18 15:53:38 2021 +0000 @@ -0,0 +1,87 @@ +waveica <- function( + data, + wavelet_filter, + wavelet_length, + k, + t, + t2, + alpha, + exclude_blanks +) { + + # get input from the Galaxy, preprocess data + data <- read.csv(data, header = TRUE, row.names = "sample_name") + data <- preprocess_data(data) + + # remove blanks from dataset + if (exclude_blanks) { + data <- exclude_group(data) + } + + # separate data into features, batch and group + features <- data[, -c(1:4)] + group <- as.numeric(data$class) + batch <- data$batch + + # run WaveICA + normalized_data <- WaveICA::WaveICA( + data = features, + wf = get_wf(wavelet_filter, wavelet_length), + batch = batch, + group = group, + K = k, + t = t, + t2 = t2, + alpha = alpha + ) + + return(normalized_data) +} + + +# Sort data, set numerical values for groups +preprocess_data <- function(data) { + # sort data by injection order + data <- data[order(data$injectionOrder, decreasing = FALSE), ] + + data$class[data$class == "blank"] <- 0 + data$class[data$class == "sample"] <- 1 + data$class[data$class == "QC"] <- 2 + + return(data) +} + + +# Create appropriate input for R wavelets function +get_wf <- function(wavelet_filter, wavelet_length) { + wf <- paste(wavelet_filter, wavelet_length, sep = "") + + # exception to the wavelet function + if (wf == "d2") { + wf <- "haar" + } + + return(wf) +} + + +# Exclude blanks from a dataframe +exclude_group <- function(data) { + row_idx_to_exclude <- which(data$class %in% 0) + if (length(row_idx_to_exclude) > 1) { + data_without_blanks <- data[-c(row_idx_to_exclude), ] + msg <- paste("Blank samples have been excluded from the dataframe.\n") + cat(msg) + return(data_without_blanks) + } + else { + return(data) + } +} + + +# Store output of WaveICA in a tsv file +store_data <- function(normalized_data, output) { + write.table(normalized_data, file = output, sep = "\t", col.names = NA) + cat("Normalization has been completed.\n") +}
