diff waveica_wrapper.R @ 2:d08deef1eb44 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit e33ef984e78721ed37d825c6672795a539a461e1"
author recetox
date Fri, 12 Nov 2021 09:14:04 +0000
parents 2bcfd5b450bb
children 8b55efc7d117
line wrap: on
line diff
--- a/waveica_wrapper.R	Wed Jul 28 11:58:20 2021 +0000
+++ b/waveica_wrapper.R	Fri Nov 12 09:14:04 2021 +0000
@@ -1,89 +1,97 @@
-waveica <- function(
-    data,
-    wavelet_filter,
-    wavelet_length,
-    k,
-    t,
-    t2,
-    alpha,
-    exclude_blanks
-) {
+waveica <- function(data,
+                    wavelet_filter,
+                    wavelet_length,
+                    k,
+                    t,
+                    t2,
+                    alpha,
+                    exclude_blanks) {
+
+  # get input from the Galaxy, preprocess data
+  data <- read.csv(data, header = TRUE)
 
-    # get input from the Galaxy, preprocess data
-    data <- read.csv(data, header = TRUE, row.names = "sampleName")
+  required_columns <- c("sampleName", "class", "sampleType", "injectionOrder", "batch")
+  if (anyNA(data)) {
+    stop("Error: dataframe cannot contain NULL values!
+Make sure that your dataframe does not contain empty cells")
+  } else if (!all(required_columns %in% colnames(data))) {
+    stop("Error: missing metadata!
+Make sure that the following columns are present in your dataframe: [sampleName, class, sampleType, injectionOrder, batch]")
+  }
 
-    # sort data by injection order
-    data <- data[order(data$injectionOrder, decreasing = FALSE), ]
-
-    data <- enumerate_groups(data)
+  # sort data by injection order
+  data <- data[order(data[, "batch"],
+    data[, "injectionOrder"],
+    decreasing = FALSE
+  ), ]
 
-    # remove blanks from dataset
-    if (exclude_blanks) {
-        data <- exclude_group(data)
-    }
-
-    # separate data into features, batch and group
-    features <- data[, -c(1:4)]
-    group <- as.numeric(data$class)
-    batch <- data$batch
+  # separate data into features, batch and group
+  feature_columns <- colnames(data)[!colnames(data) %in% required_columns]
+  features <- data[, feature_columns]
+  group <- enumerate_groups(as.character(data$sampleType))
+  batch <- data$batch
 
-    # run WaveICA
-    normalized_data <- WaveICA::WaveICA(
-        data = features,
-        wf = get_wf(wavelet_filter, wavelet_length),
-        batch = batch,
-        group = group,
-        K = k,
-        t = t,
-        t2 = t2,
-        alpha = alpha
-        )
+  # run WaveICA
+  features <- WaveICA::WaveICA(
+    data = features,
+    wf = get_wf(wavelet_filter, wavelet_length),
+    batch = batch,
+    group = group,
+    K = k,
+    t = t,
+    t2 = t2,
+    alpha = alpha
+  )
 
-    return(normalized_data)
+  data[, feature_columns] <- features$data_wave
+
+  # remove blanks from dataset
+  if (exclude_blanks) {
+    data <- exclude_group(data, group)
+  }
+
+  return(data)
 }
 
 
 # Match group labels with [blank/sample/qc] and enumerate them
-enumerate_groups <- function(data) {
+enumerate_groups <- function(group) {
+  group[grepl("blank", tolower(group))] <- 0
+  group[grepl("sample", tolower(group))] <- 1
+  group[grepl("qc", tolower(group))] <- 2
 
-    data$sampleType[grepl("blank", tolower(data$sampleType))] <- 0
-    data$sampleType[grepl("sample", tolower(data$sampleType))] <- 1
-    data$sampleType[grepl("qc", tolower(data$sampleType))] <- 2
-
-    return(data)
+  return(group)
 }
 
 
 # Create appropriate input for R wavelets function
 get_wf <- function(wavelet_filter, wavelet_length) {
-    wf <- paste(wavelet_filter, wavelet_length, sep = "")
+  wf <- paste(wavelet_filter, wavelet_length, sep = "")
 
-    # exception to the wavelet function
-    if (wf == "d2") {
-        wf <- "haar"
-        }
+  # exception to the wavelet function
+  if (wf == "d2") {
+    wf <- "haar"
+  }
 
-    return(wf)
+  return(wf)
 }
 
 
 # Exclude blanks from a dataframe
-exclude_group <- function(data) {
-    row_idx_to_exclude <- which(data$class %in% 0)
-    if (length(row_idx_to_exclude) > 0) {
-        data_without_blanks <- data[-c(row_idx_to_exclude), ]
-        msg <- paste("Blank samples have been excluded from the dataframe.\n")
-        cat(msg)
-        return(data_without_blanks)
-        }
-    else {
-        return(data)
-    }
+exclude_group <- function(data, group) {
+  row_idx_to_exclude <- which(group %in% 0)
+  if (length(row_idx_to_exclude) > 0) {
+    data_without_blanks <- data[-c(row_idx_to_exclude), ]
+    cat("Blank samples have been excluded from the dataframe.\n")
+    return(data_without_blanks)
+  } else {
+    return(data)
+  }
 }
 
 
 # Store output of WaveICA in a tsv file
-store_data <- function(normalized_data, output) {
-    write.table(normalized_data, file = output, sep = "\t", col.names = NA)
-    cat("Normalization has been completed.\n")
+store_data <- function(data, output) {
+  write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE)
+  cat("Normalization has been completed.\n")
 }