Mercurial > repos > recetox > waveica
diff waveica_wrapper.R @ 2:d08deef1eb44 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit e33ef984e78721ed37d825c6672795a539a461e1"
| author | recetox |
|---|---|
| date | Fri, 12 Nov 2021 09:14:04 +0000 |
| parents | 2bcfd5b450bb |
| children | 8b55efc7d117 |
line wrap: on
line diff
--- a/waveica_wrapper.R Wed Jul 28 11:58:20 2021 +0000 +++ b/waveica_wrapper.R Fri Nov 12 09:14:04 2021 +0000 @@ -1,89 +1,97 @@ -waveica <- function( - data, - wavelet_filter, - wavelet_length, - k, - t, - t2, - alpha, - exclude_blanks -) { +waveica <- function(data, + wavelet_filter, + wavelet_length, + k, + t, + t2, + alpha, + exclude_blanks) { + + # get input from the Galaxy, preprocess data + data <- read.csv(data, header = TRUE) - # get input from the Galaxy, preprocess data - data <- read.csv(data, header = TRUE, row.names = "sampleName") + required_columns <- c("sampleName", "class", "sampleType", "injectionOrder", "batch") + if (anyNA(data)) { + stop("Error: dataframe cannot contain NULL values! +Make sure that your dataframe does not contain empty cells") + } else if (!all(required_columns %in% colnames(data))) { + stop("Error: missing metadata! +Make sure that the following columns are present in your dataframe: [sampleName, class, sampleType, injectionOrder, batch]") + } - # sort data by injection order - data <- data[order(data$injectionOrder, decreasing = FALSE), ] - - data <- enumerate_groups(data) + # sort data by injection order + data <- data[order(data[, "batch"], + data[, "injectionOrder"], + decreasing = FALSE + ), ] - # remove blanks from dataset - if (exclude_blanks) { - data <- exclude_group(data) - } - - # separate data into features, batch and group - features <- data[, -c(1:4)] - group <- as.numeric(data$class) - batch <- data$batch + # separate data into features, batch and group + feature_columns <- colnames(data)[!colnames(data) %in% required_columns] + features <- data[, feature_columns] + group <- enumerate_groups(as.character(data$sampleType)) + batch <- data$batch - # run WaveICA - normalized_data <- WaveICA::WaveICA( - data = features, - wf = get_wf(wavelet_filter, wavelet_length), - batch = batch, - group = group, - K = k, - t = t, - t2 = t2, - alpha = alpha - ) + # run WaveICA + features <- WaveICA::WaveICA( + data = features, + wf = get_wf(wavelet_filter, wavelet_length), + batch = batch, + group = group, + K = k, + t = t, + t2 = t2, + alpha = alpha + ) - return(normalized_data) + data[, feature_columns] <- features$data_wave + + # remove blanks from dataset + if (exclude_blanks) { + data <- exclude_group(data, group) + } + + return(data) } # Match group labels with [blank/sample/qc] and enumerate them -enumerate_groups <- function(data) { +enumerate_groups <- function(group) { + group[grepl("blank", tolower(group))] <- 0 + group[grepl("sample", tolower(group))] <- 1 + group[grepl("qc", tolower(group))] <- 2 - data$sampleType[grepl("blank", tolower(data$sampleType))] <- 0 - data$sampleType[grepl("sample", tolower(data$sampleType))] <- 1 - data$sampleType[grepl("qc", tolower(data$sampleType))] <- 2 - - return(data) + return(group) } # Create appropriate input for R wavelets function get_wf <- function(wavelet_filter, wavelet_length) { - wf <- paste(wavelet_filter, wavelet_length, sep = "") + wf <- paste(wavelet_filter, wavelet_length, sep = "") - # exception to the wavelet function - if (wf == "d2") { - wf <- "haar" - } + # exception to the wavelet function + if (wf == "d2") { + wf <- "haar" + } - return(wf) + return(wf) } # Exclude blanks from a dataframe -exclude_group <- function(data) { - row_idx_to_exclude <- which(data$class %in% 0) - if (length(row_idx_to_exclude) > 0) { - data_without_blanks <- data[-c(row_idx_to_exclude), ] - msg <- paste("Blank samples have been excluded from the dataframe.\n") - cat(msg) - return(data_without_blanks) - } - else { - return(data) - } +exclude_group <- function(data, group) { + row_idx_to_exclude <- which(group %in% 0) + if (length(row_idx_to_exclude) > 0) { + data_without_blanks <- data[-c(row_idx_to_exclude), ] + cat("Blank samples have been excluded from the dataframe.\n") + return(data_without_blanks) + } else { + return(data) + } } # Store output of WaveICA in a tsv file -store_data <- function(normalized_data, output) { - write.table(normalized_data, file = output, sep = "\t", col.names = NA) - cat("Normalization has been completed.\n") +store_data <- function(data, output) { + write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE) + cat("Normalization has been completed.\n") }
