Mercurial > repos > devteam > dwt_var_perfeature
comparison execute_dwt_var_perFeature.R @ 4:e6e495fa6a79 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/dwt_var_perfeature commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
author | devteam |
---|---|
date | Mon, 06 Jul 2020 18:13:13 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3:93b71985efd6 | 4:e6e495fa6a79 |
---|---|
1 ##################################################################### | |
2 ## plot multiscale wavelet variance | |
3 ## create null bands by permuting the original data series | |
4 ## generate plots and table of wavelet variance including p-values | |
5 ####################################################################### | |
6 options(echo = FALSE) | |
7 library("wavethresh"); | |
8 library("waveslim"); | |
9 library("bitops"); | |
10 | |
11 ## to determine if data is properly formatted 2^N observations | |
12 is_power2 <- function(x) { | |
13 x && !(bitops::bitAnd(x, x - 1)); | |
14 } | |
15 | |
16 ## dwt : discrete wavelet transform using Haar wavelet filter, simplest wavelet function but later can modify to let user-define the wavelet filter function | |
17 dwt_var_permut_get_max <- function(data, names, alpha, filter = 1, family = "DaubExPhase", bc = "symmetric", method = "kendall", wf = "haar", boundary = "reflection") { | |
18 title <- NULL; | |
19 final_pvalue <- NULL; | |
20 j <- NULL; | |
21 scale <- NULL; | |
22 out <- NULL; | |
23 | |
24 print(class(data)); | |
25 print(names); | |
26 print(alpha); | |
27 | |
28 par(mar = c(5, 4, 4, 3), oma = c(4, 4, 3, 2), xaxt = "s", cex = 1, las = 1); | |
29 | |
30 title <- c("Wavelet", "Variance", "Pvalue", "Test"); | |
31 print(title); | |
32 | |
33 for (i in seq_len(length(names))) { | |
34 temp <- NULL; | |
35 results <- NULL; | |
36 wave1_dwt <- NULL; | |
37 | |
38 ## if data fails formatting check, do something | |
39 print(is.numeric(as.matrix(data)[, i])); | |
40 if (!is.numeric(as.matrix(data)[, i])) { | |
41 stop("data must be a numeric vector"); | |
42 } | |
43 print(length(as.matrix(data)[, i])); | |
44 print(is_power2(length(as.matrix(data)[, i]))); | |
45 if (!is_power2(length(as.matrix(data)[, i]))) { | |
46 stop("data length must be a power of two"); | |
47 } | |
48 j <- wavethresh::wd(as.matrix(data)[, i], filter.number = filter, family = family, bc = bc)$nlevels; | |
49 print(j); | |
50 temp <- vector(length = j); | |
51 wave1_dwt <- waveslim::dwt(as.matrix(data)[, i], wf = wf, j, boundary = boundary); | |
52 | |
53 temp <- waveslim::wave.variance(wave1_dwt)[- (j + 1), 1]; | |
54 print(temp); | |
55 | |
56 ##permutations code : | |
57 feature1 <- NULL; | |
58 null <- NULL; | |
59 var_lower <- NULL; | |
60 limit_lower <- NULL; | |
61 var_upper <- NULL; | |
62 limit_upper <- NULL; | |
63 med <- NULL; | |
64 | |
65 limit_lower <- alpha / 2 * 1000; | |
66 print(limit_lower); | |
67 limit_upper <- (1 - alpha / 2) * 1000; | |
68 print(limit_upper); | |
69 | |
70 feature1 <- as.matrix(data)[, i]; | |
71 for (k in 1:1000) { | |
72 nk_1 <- NULL; | |
73 null_levels <- NULL; | |
74 var <- NULL; | |
75 null_wave1 <- NULL; | |
76 | |
77 nk_1 <- sample(feature1, length(feature1), replace = FALSE); | |
78 null_levels <- wavethresh::wd(nk_1, filter.number = filter, family = family, bc = bc)$nlevels; | |
79 var <- vector(length = length(null_levels)); | |
80 null_wave1 <- waveslim::dwt(nk_1, wf = wf, j, boundary = boundary); | |
81 var <- waveslim::wave.variance(null_wave1)[- (null_levels + 1), 1]; | |
82 null <- rbind(null, var); | |
83 } | |
84 null <- apply(null, 2, sort, na.last = TRUE); | |
85 var_lower <- null[limit_lower, ]; | |
86 var_upper <- null[limit_upper, ]; | |
87 med <- (apply(null, 2, median, na.rm = TRUE)); | |
88 | |
89 ## plot | |
90 results <- cbind(temp, var_lower, var_upper); | |
91 print(results); | |
92 matplot(results, type = "b", pch = "*", lty = 1, col = c(1, 2, 2), xaxt = "n", xlab = "Wavelet Scale", ylab = "Wavelet variance"); | |
93 mtext(names[i], side = 3, line = 0.5, cex = 1); | |
94 axis(1, at = 1:j, labels = c(2 ^ (0:(j - 1))), las = 3, cex.axis = 1); | |
95 | |
96 ## get pvalues by comparison to null distribution | |
97 for (m in seq_len(length(temp))) { | |
98 print(paste("scale", m, sep = " ")); | |
99 print(paste("var", temp[m], sep = " ")); | |
100 print(paste("med", med[m], sep = " ")); | |
101 pv <- NULL; | |
102 tail <- NULL; | |
103 scale <- NULL; | |
104 scale <- 2 ^ (m - 1); | |
105 if (temp[m] >= med[m]) { | |
106 ## R tail test | |
107 print("R"); | |
108 tail <- "R"; | |
109 pv <- (length(which(null[, m] >= temp[m]))) / (length(na.exclude(null[, m]))); | |
110 } else { | |
111 if (temp[m] < med[m]) { | |
112 ## L tail test | |
113 print("L"); | |
114 tail <- "L"; | |
115 pv <- (length(which(null[, m] <= temp[m]))) / (length(na.exclude(null[, m]))); | |
116 } | |
117 } | |
118 print(pv); | |
119 out <- rbind(out, c(paste("Scale", scale, sep = "_"), format(temp[m], digits = 3), pv, tail)); | |
120 } | |
121 final_pvalue <- rbind(final_pvalue, out); | |
122 } | |
123 colnames(final_pvalue) <- title; | |
124 return(final_pvalue); | |
125 } | |
126 | |
127 ## execute | |
128 ## read in data | |
129 args <- commandArgs(trailingOnly = TRUE) | |
130 | |
131 data_test <- NULL; | |
132 final <- NULL; | |
133 sub <- NULL; | |
134 sub_names <- NULL; | |
135 data_test <- read.delim(args[1], header = FALSE); | |
136 pdf(file = args[5], width = 11, height = 8) | |
137 for (f in strsplit(args[2], ",")) { | |
138 f <- as.integer(f) | |
139 if (f > ncol(data_test)) | |
140 stop(paste("column", f, "doesn't exist")); | |
141 sub <- data_test[, f]; | |
142 sub_names <- colnames(data_test)[f]; | |
143 final <- rbind(final, dwt_var_permut_get_max(sub, sub_names, as.double(args[3]))); | |
144 } | |
145 | |
146 dev.off(); | |
147 write.table(final, file = args[4], sep = "\t", quote = FALSE, row.names = FALSE); |