Mercurial > repos > recetox > waveica
comparison waveica_wrapper.R @ 2:d08deef1eb44 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit e33ef984e78721ed37d825c6672795a539a461e1"
| author | recetox |
|---|---|
| date | Fri, 12 Nov 2021 09:14:04 +0000 |
| parents | 2bcfd5b450bb |
| children | 8b55efc7d117 |
comparison
equal
deleted
inserted
replaced
| 1:2bcfd5b450bb | 2:d08deef1eb44 |
|---|---|
| 1 waveica <- function( | 1 waveica <- function(data, |
| 2 data, | 2 wavelet_filter, |
| 3 wavelet_filter, | 3 wavelet_length, |
| 4 wavelet_length, | 4 k, |
| 5 k, | 5 t, |
| 6 t, | 6 t2, |
| 7 t2, | 7 alpha, |
| 8 alpha, | 8 exclude_blanks) { |
| 9 exclude_blanks | |
| 10 ) { | |
| 11 | 9 |
| 12 # get input from the Galaxy, preprocess data | 10 # get input from the Galaxy, preprocess data |
| 13 data <- read.csv(data, header = TRUE, row.names = "sampleName") | 11 data <- read.csv(data, header = TRUE) |
| 14 | 12 |
| 15 # sort data by injection order | 13 required_columns <- c("sampleName", "class", "sampleType", "injectionOrder", "batch") |
| 16 data <- data[order(data$injectionOrder, decreasing = FALSE), ] | 14 if (anyNA(data)) { |
| 15 stop("Error: dataframe cannot contain NULL values! | |
| 16 Make sure that your dataframe does not contain empty cells") | |
| 17 } else if (!all(required_columns %in% colnames(data))) { | |
| 18 stop("Error: missing metadata! | |
| 19 Make sure that the following columns are present in your dataframe: [sampleName, class, sampleType, injectionOrder, batch]") | |
| 20 } | |
| 17 | 21 |
| 18 data <- enumerate_groups(data) | 22 # sort data by injection order |
| 23 data <- data[order(data[, "batch"], | |
| 24 data[, "injectionOrder"], | |
| 25 decreasing = FALSE | |
| 26 ), ] | |
| 19 | 27 |
| 20 # remove blanks from dataset | 28 # separate data into features, batch and group |
| 21 if (exclude_blanks) { | 29 feature_columns <- colnames(data)[!colnames(data) %in% required_columns] |
| 22 data <- exclude_group(data) | 30 features <- data[, feature_columns] |
| 23 } | 31 group <- enumerate_groups(as.character(data$sampleType)) |
| 32 batch <- data$batch | |
| 24 | 33 |
| 25 # separate data into features, batch and group | 34 # run WaveICA |
| 26 features <- data[, -c(1:4)] | 35 features <- WaveICA::WaveICA( |
| 27 group <- as.numeric(data$class) | 36 data = features, |
| 28 batch <- data$batch | 37 wf = get_wf(wavelet_filter, wavelet_length), |
| 38 batch = batch, | |
| 39 group = group, | |
| 40 K = k, | |
| 41 t = t, | |
| 42 t2 = t2, | |
| 43 alpha = alpha | |
| 44 ) | |
| 29 | 45 |
| 30 # run WaveICA | 46 data[, feature_columns] <- features$data_wave |
| 31 normalized_data <- WaveICA::WaveICA( | |
| 32 data = features, | |
| 33 wf = get_wf(wavelet_filter, wavelet_length), | |
| 34 batch = batch, | |
| 35 group = group, | |
| 36 K = k, | |
| 37 t = t, | |
| 38 t2 = t2, | |
| 39 alpha = alpha | |
| 40 ) | |
| 41 | 47 |
| 42 return(normalized_data) | 48 # remove blanks from dataset |
| 49 if (exclude_blanks) { | |
| 50 data <- exclude_group(data, group) | |
| 51 } | |
| 52 | |
| 53 return(data) | |
| 43 } | 54 } |
| 44 | 55 |
| 45 | 56 |
| 46 # Match group labels with [blank/sample/qc] and enumerate them | 57 # Match group labels with [blank/sample/qc] and enumerate them |
| 47 enumerate_groups <- function(data) { | 58 enumerate_groups <- function(group) { |
| 59 group[grepl("blank", tolower(group))] <- 0 | |
| 60 group[grepl("sample", tolower(group))] <- 1 | |
| 61 group[grepl("qc", tolower(group))] <- 2 | |
| 48 | 62 |
| 49 data$sampleType[grepl("blank", tolower(data$sampleType))] <- 0 | 63 return(group) |
| 50 data$sampleType[grepl("sample", tolower(data$sampleType))] <- 1 | |
| 51 data$sampleType[grepl("qc", tolower(data$sampleType))] <- 2 | |
| 52 | |
| 53 return(data) | |
| 54 } | 64 } |
| 55 | 65 |
| 56 | 66 |
| 57 # Create appropriate input for R wavelets function | 67 # Create appropriate input for R wavelets function |
| 58 get_wf <- function(wavelet_filter, wavelet_length) { | 68 get_wf <- function(wavelet_filter, wavelet_length) { |
| 59 wf <- paste(wavelet_filter, wavelet_length, sep = "") | 69 wf <- paste(wavelet_filter, wavelet_length, sep = "") |
| 60 | 70 |
| 61 # exception to the wavelet function | 71 # exception to the wavelet function |
| 62 if (wf == "d2") { | 72 if (wf == "d2") { |
| 63 wf <- "haar" | 73 wf <- "haar" |
| 64 } | 74 } |
| 65 | 75 |
| 66 return(wf) | 76 return(wf) |
| 67 } | 77 } |
| 68 | 78 |
| 69 | 79 |
| 70 # Exclude blanks from a dataframe | 80 # Exclude blanks from a dataframe |
| 71 exclude_group <- function(data) { | 81 exclude_group <- function(data, group) { |
| 72 row_idx_to_exclude <- which(data$class %in% 0) | 82 row_idx_to_exclude <- which(group %in% 0) |
| 73 if (length(row_idx_to_exclude) > 0) { | 83 if (length(row_idx_to_exclude) > 0) { |
| 74 data_without_blanks <- data[-c(row_idx_to_exclude), ] | 84 data_without_blanks <- data[-c(row_idx_to_exclude), ] |
| 75 msg <- paste("Blank samples have been excluded from the dataframe.\n") | 85 cat("Blank samples have been excluded from the dataframe.\n") |
| 76 cat(msg) | 86 return(data_without_blanks) |
| 77 return(data_without_blanks) | 87 } else { |
| 78 } | 88 return(data) |
| 79 else { | 89 } |
| 80 return(data) | |
| 81 } | |
| 82 } | 90 } |
| 83 | 91 |
| 84 | 92 |
| 85 # Store output of WaveICA in a tsv file | 93 # Store output of WaveICA in a tsv file |
| 86 store_data <- function(normalized_data, output) { | 94 store_data <- function(data, output) { |
| 87 write.table(normalized_data, file = output, sep = "\t", col.names = NA) | 95 write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE) |
| 88 cat("Normalization has been completed.\n") | 96 cat("Normalization has been completed.\n") |
| 89 } | 97 } |
