Mercurial > repos > ethevenot > univariate
comparison univariate_script.R @ 0:ab2ee3414e4e draft
planemo upload for repository https://github.com/workflow4metabolomics/univariate.git commit 98e8f4464b2f7321acb010e26e2a1c82fe37096e
author | ethevenot |
---|---|
date | Tue, 24 Oct 2017 08:57:25 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ab2ee3414e4e |
---|---|
1 univariateF <- function(datMN, | |
2 samDF, | |
3 varDF, | |
4 facC, | |
5 tesC = c("ttest", "wilcoxon", "anova", "kruskal", "pearson", "spearman")[1], | |
6 adjC = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[7], | |
7 thrN = 0.05, | |
8 pdfC) { | |
9 | |
10 | |
11 ## Option | |
12 | |
13 strAsFacL <- options()$stringsAsFactors | |
14 options(stingsAsFactors = FALSE) | |
15 options(warn = -1) | |
16 | |
17 ## Getting the response (either a factor or a numeric) | |
18 | |
19 if(mode(samDF[, facC]) == "character") { | |
20 facFcVn <- factor(samDF[, facC]) | |
21 facLevVc <- levels(facFcVn) | |
22 } else | |
23 facFcVn <- samDF[, facC] | |
24 | |
25 cat("\nPerforming '", tesC, "'\n", sep="") | |
26 | |
27 varPfxC <- paste0(make.names(facC), "_", tesC, "_") | |
28 | |
29 | |
30 if(tesC %in% c("ttest", "wilcoxon", "pearson", "spearman")) { | |
31 | |
32 | |
33 switch(tesC, | |
34 ttest = { | |
35 staF <- function(y) diff(tapply(y, facFcVn, function(x) mean(x, na.rm = TRUE))) | |
36 tesF <- function(y) t.test(y ~ facFcVn)[["p.value"]] | |
37 }, | |
38 wilcoxon = { | |
39 staF <- function(y) diff(tapply(y, facFcVn, function(x) median(x, na.rm = TRUE))) | |
40 tesF <- function(y) wilcox.test(y ~ facFcVn)[["p.value"]] | |
41 }, | |
42 pearson = { | |
43 staF <- function(y) cor(facFcVn, y, method = "pearson", use = "pairwise.complete.obs") | |
44 tesF <- function(y) cor.test(facFcVn, y, method = "pearson", use = "pairwise.complete.obs")[["p.value"]] | |
45 }, | |
46 spearman = { | |
47 staF <- function(y) cor(facFcVn, y, method = "spearman", use = "pairwise.complete.obs") | |
48 tesF <- function(y) cor.test(facFcVn, y, method = "spearman", use = "pairwise.complete.obs")[["p.value"]] | |
49 }) | |
50 | |
51 staVn <- apply(datMN, 2, staF) | |
52 | |
53 adjVn <- p.adjust(apply(datMN, | |
54 2, | |
55 tesF), | |
56 method = adjC) | |
57 | |
58 sigVn <- as.numeric(adjVn < thrN) | |
59 | |
60 if(tesC %in% c("ttest", "wilcoxon")) | |
61 varPfxC <- paste0(varPfxC, paste(rev(facLevVc), collapse = "."), "_") | |
62 | |
63 varDF[, paste0(varPfxC, ifelse(tesC %in% c("ttest", "wilcoxon"), "dif", "cor"))] <- staVn | |
64 | |
65 varDF[, paste0(varPfxC, adjC)] <- adjVn | |
66 | |
67 varDF[, paste0(varPfxC, "sig")] <- sigVn | |
68 | |
69 ## graphic | |
70 | |
71 pdf(pdfC, onefile = TRUE) | |
72 | |
73 varVi <- which(sigVn > 0) | |
74 | |
75 if(tesC %in% c("ttest", "wilcoxon")) { | |
76 | |
77 facVc <- as.character(facFcVn) | |
78 names(facVc) <- rownames(samDF) | |
79 | |
80 for(varI in varVi) { | |
81 | |
82 varC <- rownames(varDF)[varI] | |
83 | |
84 boxF(facFcVn, | |
85 datMN[, varI], | |
86 paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")"), | |
87 facVc) | |
88 | |
89 } | |
90 | |
91 } else { ## pearson or spearman | |
92 | |
93 for(varI in varVi) { | |
94 | |
95 varC <- rownames(varDF)[varI] | |
96 | |
97 mod <- lm(datMN[, varI] ~ facFcVn) | |
98 | |
99 plot(facFcVn, datMN[, varI], | |
100 xlab = facC, | |
101 ylab = "", | |
102 pch = 18, | |
103 main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ", R2 = ", signif(summary(mod)$r.squared, 2), ")")) | |
104 | |
105 abline(mod, col = "red") | |
106 | |
107 } | |
108 | |
109 } | |
110 | |
111 dev.off() | |
112 | |
113 | |
114 } else if(tesC == "anova") { | |
115 | |
116 | |
117 ## getting the names of the pairwise comparisons 'class1Vclass2' | |
118 prwVc <- rownames(TukeyHSD(aov(datMN[, 1] ~ facFcVn))[["facFcVn"]]) | |
119 | |
120 prwVc <- gsub("-", ".", prwVc, fixed = TRUE) ## 2016-08-05: '-' character in dataframe column names seems not to be converted to "." by write.table on ubuntu R-3.3.1 | |
121 | |
122 ## omnibus and post-hoc tests | |
123 | |
124 aovMN <- t(apply(datMN, 2, function(varVn) { | |
125 | |
126 aovMod <- aov(varVn ~ facFcVn) | |
127 pvaN <- summary(aovMod)[[1]][1, "Pr(>F)"] | |
128 hsdMN <- TukeyHSD(aovMod)[["facFcVn"]] | |
129 c(pvaN, c(hsdMN[, c("diff", "p adj")])) | |
130 | |
131 })) | |
132 | |
133 difVi <- 1:length(prwVc) + 1 | |
134 | |
135 ## difference of the means for each pairwise comparison | |
136 | |
137 difMN <- aovMN[, difVi] | |
138 colnames(difMN) <- paste0(varPfxC, prwVc, "_dif") | |
139 | |
140 ## correction for multiple testing | |
141 | |
142 aovMN <- aovMN[, -difVi, drop = FALSE] | |
143 aovMN <- apply(aovMN, 2, function(pvaVn) p.adjust(pvaVn, method = adjC)) | |
144 | |
145 ## significance coding (0 = not significant, 1 = significant) | |
146 | |
147 adjVn <- aovMN[, 1] | |
148 sigVn <- as.numeric(adjVn < thrN) | |
149 | |
150 aovMN <- aovMN[, -1, drop = FALSE] | |
151 colnames(aovMN) <- paste0(varPfxC, prwVc, "_", adjC) | |
152 | |
153 aovSigMN <- aovMN < thrN | |
154 mode(aovSigMN) <- "numeric" | |
155 colnames(aovSigMN) <- paste0(varPfxC, prwVc, "_sig") | |
156 | |
157 ## final aggregated table | |
158 | |
159 resMN <- cbind(adjVn, sigVn, difMN, aovMN, aovSigMN) | |
160 colnames(resMN)[1:2] <- paste0(varPfxC, c(adjC, "sig")) | |
161 | |
162 varDF <- cbind.data.frame(varDF, as.data.frame(resMN)) | |
163 | |
164 ## graphic | |
165 | |
166 pdf(pdfC, onefile = TRUE) | |
167 | |
168 for(varI in 1:nrow(varDF)) { | |
169 | |
170 if(sum(aovSigMN[varI, ]) > 0) { | |
171 | |
172 varC <- rownames(varDF)[varI] | |
173 | |
174 boxplot(datMN[, varI] ~ facFcVn, | |
175 main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")")) | |
176 | |
177 for(prwI in 1:length(prwVc)) { | |
178 | |
179 if(aovSigMN[varI, paste0(varPfxC, prwVc[prwI], "_sig")] == 1) { | |
180 | |
181 claVc <- unlist(strsplit(prwVc[prwI], ".", fixed = TRUE)) | |
182 aovClaVl <- facFcVn %in% claVc | |
183 aovFc <- facFcVn[aovClaVl, drop = TRUE] | |
184 aovVc <- as.character(aovFc) | |
185 names(aovVc) <- rownames(samDF)[aovClaVl] | |
186 boxF(aovFc, | |
187 datMN[aovClaVl, varI], | |
188 paste0(varC, " (", adjC, " = ", signif(aovMN[varI, paste0(varPfxC, prwVc[prwI], "_", adjC)], 2), ")"), | |
189 aovVc) | |
190 | |
191 } | |
192 | |
193 } | |
194 | |
195 } | |
196 | |
197 } | |
198 | |
199 dev.off() | |
200 | |
201 | |
202 } else if(tesC == "kruskal") { | |
203 | |
204 | |
205 ## getting the names of the pairwise comparisons 'class1.class2' | |
206 | |
207 nemMN <- posthoc.kruskal.nemenyi.test(datMN[, 1], facFcVn, "Tukey")[["p.value"]] | |
208 nemVl <- c(lower.tri(nemMN, diag = TRUE)) | |
209 nemClaMC <- cbind(rownames(nemMN)[c(row(nemMN))][nemVl], | |
210 colnames(nemMN)[c(col(nemMN))][nemVl]) | |
211 nemNamVc <- paste0(nemClaMC[, 1], ".", nemClaMC[, 2]) | |
212 pfxNemVc <- paste0(varPfxC, nemNamVc) | |
213 | |
214 ## omnibus and post-hoc tests | |
215 | |
216 nemMN <- t(apply(datMN, 2, function(varVn) { | |
217 | |
218 pvaN <- kruskal.test(varVn ~ facFcVn)[["p.value"]] | |
219 varNemMN <- posthoc.kruskal.nemenyi.test(varVn, facFcVn, "Tukey")[["p.value"]] | |
220 c(pvaN, c(varNemMN)) | |
221 | |
222 })) | |
223 | |
224 ## correction for multiple testing | |
225 | |
226 nemMN <- apply(nemMN, 2, | |
227 function(pvaVn) p.adjust(pvaVn, method = adjC)) | |
228 adjVn <- nemMN[, 1] | |
229 sigVn <- as.numeric(adjVn < thrN) | |
230 nemMN <- nemMN[, c(FALSE, nemVl)] | |
231 colnames(nemMN) <- paste0(pfxNemVc, "_", adjC) | |
232 | |
233 ## significance coding (0 = not significant, 1 = significant) | |
234 | |
235 nemSigMN <- nemMN < thrN | |
236 mode(nemSigMN) <- "numeric" | |
237 colnames(nemSigMN) <- paste0(pfxNemVc, "_sig") | |
238 | |
239 ## difference of the medians for each pairwise comparison | |
240 | |
241 difMN <- sapply(1:nrow(nemClaMC), function(prwI) { | |
242 prwVc <- nemClaMC[prwI, ] | |
243 prwVi <- which(facFcVn %in% prwVc) | |
244 prwFacFc <- factor(as.character(facFcVn)[prwVi], levels = prwVc) | |
245 apply(datMN[prwVi, ], 2, function(varVn) -diff(as.numeric(tapply(varVn, prwFacFc, function(x) median(x, na.rm = TRUE))))) | |
246 }) | |
247 colnames(difMN) <- gsub("_sig", "_dif", colnames(nemSigMN)) | |
248 | |
249 ## final aggregated table | |
250 | |
251 resMN <- cbind(adjVn, sigVn, difMN, nemMN, nemSigMN) | |
252 colnames(resMN)[1:2] <- paste0(varPfxC, c(adjC, "sig")) | |
253 | |
254 varDF <- cbind.data.frame(varDF, as.data.frame(resMN)) | |
255 | |
256 ## graphic | |
257 | |
258 pdf(pdfC, onefile = TRUE) | |
259 | |
260 for(varI in 1:nrow(varDF)) { | |
261 | |
262 if(sum(nemSigMN[varI, ]) > 0) { | |
263 | |
264 varC <- rownames(varDF)[varI] | |
265 | |
266 boxplot(datMN[, varI] ~ facFcVn, | |
267 main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")")) | |
268 | |
269 for(nemI in 1:length(nemNamVc)) { | |
270 | |
271 if(nemSigMN[varI, paste0(varPfxC, nemNamVc[nemI], "_sig")] == 1) { | |
272 | |
273 nemClaVc <- nemClaMC[nemI, ] | |
274 nemClaVl <- facFcVn %in% nemClaVc | |
275 nemFc <- facFcVn[nemClaVl, drop = TRUE] | |
276 nemVc <- as.character(nemFc) | |
277 names(nemVc) <- rownames(samDF)[nemClaVl] | |
278 boxF(nemFc, | |
279 datMN[nemClaVl, varI], | |
280 paste0(varC, " (", adjC, " = ", signif(nemMN[varI, paste0(varPfxC, nemNamVc[nemI], "_", adjC)], 2), ")"), | |
281 nemVc) | |
282 | |
283 } | |
284 | |
285 } | |
286 | |
287 } | |
288 | |
289 } | |
290 | |
291 dev.off() | |
292 | |
293 } | |
294 | |
295 names(sigVn) <- rownames(varDF) | |
296 sigSumN <- sum(sigVn, na.rm = TRUE) | |
297 if(sigSumN) { | |
298 cat("\nThe following ", sigSumN, " variable", ifelse(sigSumN > 1, "s", ""), " (", round(sigSumN / length(sigVn) * 100), "%) ", ifelse(sigSumN > 1, "were", "was"), " found significant at the ", thrN, " level:\n", sep = "") | |
299 cat(paste(rownames(varDF)[sigVn > 0], collapse = "\n"), "\n", sep = "") | |
300 } else | |
301 cat("\nNo significant variable found at the selected ", thrN, " level\n", sep = "") | |
302 | |
303 options(stingsAsFactors = strAsFacL) | |
304 | |
305 return(varDF) | |
306 | |
307 } | |
308 | |
309 | |
310 boxF <- function(xFc, | |
311 yVn, | |
312 maiC, | |
313 xVc) { | |
314 | |
315 boxLs <- boxplot(yVn ~ xFc, | |
316 main = maiC) | |
317 | |
318 outVn <- boxLs[["out"]] | |
319 | |
320 if(length(outVn)) { | |
321 | |
322 for(outI in 1:length(outVn)) { | |
323 levI <- which(levels(xFc) == xVc[names(outVn)[outI]]) | |
324 text(levI, | |
325 outVn[outI], | |
326 labels = names(outVn)[outI], | |
327 pos = ifelse(levI == 2, 2, 4)) | |
328 } | |
329 | |
330 } | |
331 | |
332 } |