3
|
1 #################################################################################################
|
|
2 # CORRELATION TABLE #
|
|
3 # #
|
|
4 # #
|
|
5 # Input : 2 tables with common samples #
|
|
6 # Output : Correlation table ; Heatmap (pdf) #
|
|
7 # #
|
|
8 # Dependencies : Libraries "ggplot2" and "reshape2" #
|
|
9 # #
|
|
10 #################################################################################################
|
|
11
|
|
12
|
|
13 # Parameters (for dev)
|
|
14 if(FALSE){
|
|
15
|
|
16 rm(list = ls())
|
|
17 getwd()
|
|
18 setwd(dir = "Y:/Developpement")
|
|
19
|
|
20 tab1.name <- "Test/Ressources/Inputs/CT2_DM.tabular"
|
|
21 tab2.name <- "Test/Ressources/Inputs/CT2_base_Diapason_14ClinCES_PRIN.txt"
|
|
22 param1.samples <- "column"
|
|
23 param2.samples <- "row"
|
|
24 corr.method <- "pearson"
|
|
25 test.corr <- "yes"
|
|
26 alpha <- 0.05
|
|
27 multi.name <- "none"
|
|
28 filter <- "yes"
|
|
29 filters.choice <- "filters_0_thr"
|
|
30 threshold <- 0.2
|
|
31 reorder.var <- "yes"
|
|
32 color.heatmap <- "yes"
|
|
33 type.classes <-"irregular"
|
|
34 reg.value <- 1/3
|
|
35 irreg.vect <- c(-0.3, -0.2, -0.1, 0, 0.3, 0.4)
|
|
36 output1 <- "Correlation_table.txt"
|
|
37 output2 <- "Heatmap.pdf"
|
|
38
|
|
39 }
|
|
40
|
|
41
|
|
42
|
|
43 correlation.tab <- function(tab1.name, tab2.name, param1.samples, param2.samples, corr.method, test.corr, alpha,
|
|
44 multi.name, filter, filters.choice, threshold, reorder.var, color.heatmap, type.classes,
|
|
45 reg.value, irreg.vect, output1, output2){
|
|
46
|
|
47 # This function allows to visualize the correlation between two tables
|
|
48 #
|
|
49 # Parameters:
|
|
50 # - tab1.name: table 1 file's access
|
|
51 # - tab2.name: table 2 file's access
|
|
52 # - param1.samples ("row" or "column"): where the samples are in tab1
|
|
53 # - param2.samples ("row" or "column"): where the samples are in tab2
|
|
54 # - corr.method ("pearson", "spearman", "kendall"):
|
|
55 # - test.corr ("yes" or "no"): test the significance of a correlation coefficient
|
|
56 # - alpha (value between 0 and 1): risk for the correlation significance test
|
|
57 # - multi.name ("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"): correction of multiple tests
|
|
58 # - filter ("yes", "no"): use filter.0 or/and filter.threshold
|
|
59 # - filters.choice ("filter_0" or "filters_0_thr"): zero filter removes variables with all their correlation coefficients = 0
|
|
60 # and threshold filter remove variables with all their correlation coefficients in abs < threshold
|
|
61 # - threshold (value between 0 and 1): threshold for filter threshold
|
|
62 # - reorder.var ("yes" or "no"): reorder variables in the correlation table thanks to the HCA
|
|
63 # - color.heatmap ("yes" or "no"): color the heatmap with classes defined by the user
|
|
64 # - type.classes ("regular" or "irregular"): choose to color the heatmap with regular or irregular classes
|
|
65 # - reg.value (value between 0 and 1): value for regular classes
|
|
66 # - irreg.vect (vector with values between -1 and 1): vector which indicates values for intervals (irregular classes)
|
|
67 # - output1: correlation table file's access
|
|
68 # - output2: heatmap (colored correlation table) file's access
|
|
69
|
|
70
|
|
71 # Input ----------------------------------------------------------------------------------------------
|
|
72
|
|
73 tab1 <- read.table(tab1.name, sep = "\t", header = TRUE, check.names = FALSE, row.names = 1)
|
|
74 tab2 <- read.table(tab2.name, sep = "\t", header = TRUE, check.names = FALSE, row.names = 1)
|
|
75
|
|
76 # Transpose tables according to the samples
|
|
77 if(param1.samples == "column"){
|
|
78 tab1 <- t(tab1)
|
|
79 }
|
|
80
|
|
81 if(param2.samples == "column"){
|
|
82 tab2 <- t(tab2)
|
|
83 }
|
|
84
|
|
85 # Sorting tables in alphabetical order of the samples
|
|
86 tab1 <- tab1[order(rownames(tab1)),]
|
|
87 tab2 <- tab2[order(rownames(tab2)),]
|
|
88
|
|
89
|
|
90 # Check if the 2 datasets match regarding samples identifiers
|
|
91 # Adapt from functions "check.err" and "match2", RcheckLibrary.R
|
|
92
|
|
93 err.stock <- NULL
|
|
94
|
|
95 id1 <- rownames(tab1)
|
|
96 id2 <- rownames(tab2)
|
|
97
|
|
98 if(sum(id1 != id2) > 0){
|
|
99 err.stock <- c("\nBoth tables do not match regarding samples identifiers.")
|
|
100
|
|
101 if(length(which(id1%in%id2)) != length(id1)){
|
|
102 identif <- id1[which(!(id1%in%id2))]
|
|
103 if (length(identif) < 4){
|
|
104 err.stock <- c(err.stock, "\nThe following identifier(s) found in the first table do not appear in the second table:\n")
|
|
105 }
|
|
106 else {
|
|
107 err.stock <- c(err.stock, "\nFor example, the following identifiers found in the first table do not appear in the second table:\n")
|
|
108 }
|
|
109 identif <- identif[1:min(3,length(which(!(id1%in%id2))))]
|
|
110 err.stock <- c(err.stock," ",paste(identif,collapse="\n "),"\n")
|
|
111 }
|
|
112
|
|
113 if(length(which(id2%in%id1)) != length(id2)){
|
|
114 identif <- id2[which(!(id2%in%id1))]
|
|
115 if (length(identif) < 4){
|
|
116 err.stock <- c(err.stock, "\nThe following identifier(s) found in the second table do not appear in the first table:\n")
|
|
117 }
|
|
118 else{
|
|
119 err.stock <- c(err.stock, "\nFor example, the following identifiers found in the second table do not appear in the first table:\n")
|
|
120 }
|
|
121 identif <- identif[1:min(3,length(which(!(id2%in%id1))))]
|
|
122 err.stock <- c(err.stock," ",paste(identif,collapse="\n "),"\n")
|
|
123 }
|
|
124 err.stock <- c(err.stock,"\nPlease check your data.\n")
|
|
125 }
|
|
126
|
|
127 if(length(err.stock)!=0){
|
|
128 stop("\n- - - - - - - - -\n",err.stock,"\n- - - - - - - - -\n")
|
|
129 }
|
|
130
|
|
131
|
|
132 # Check qualitative variables in each input tables
|
|
133 err.msg <- NULL
|
|
134
|
|
135 var1.quali <- vector()
|
|
136 var2.quali <- vector()
|
|
137
|
|
138 for (i in 1:dim(tab1)[2]){
|
|
139 if(class(tab1[,i]) != "numeric" & class(tab1[,i]) != "integer"){
|
|
140 var1.quali <- c(var1.quali,i)
|
|
141 }
|
|
142 }
|
|
143
|
|
144 for (j in 1:dim(tab2)[2]){
|
|
145 if(class(tab2[,j]) != "numeric" & class(tab2[,j]) != "integer"){
|
|
146 var2.quali <- c(var2.quali, j)
|
|
147 }
|
|
148 }
|
|
149
|
|
150 if (length(var1.quali) != 0 | length(var2.quali) != 0){
|
|
151 err.msg <- c(err.msg, "\nThere are qualitative variables in your input tables which have been removed to realize the correlation table.\n\n")
|
|
152
|
|
153 if(length(var1.quali) != 0 && length(var1.quali) < 4){
|
|
154 err.msg <- c(err.msg, "In table 1, the following qualitative variable(s) have been removed:\n",
|
|
155 " ",paste(colnames(tab1)[var1.quali],collapse="\n "),"\n")
|
|
156 } else if(length(var1.quali) != 0 && length(var1.quali) > 3){
|
|
157 err.msg <- c(err.msg, "For example, in table 1, the following qualitative variables have been removed:\n",
|
|
158 " ",paste(colnames(tab1)[var1.quali[1:3]],collapse="\n "),"\n")
|
|
159 }
|
|
160
|
|
161 if(length(var2.quali) != 0 && length(var2.quali) < 4){
|
|
162 err.msg <- c(err.msg, "In table 2, the following qualitative variable(s) have been removed:\n",
|
|
163 " ",paste(colnames(tab2)[var2.quali],collapse="\n "),"\n")
|
|
164 } else if(length(var2.quali) != 0 && length(var2.quali) > 3){
|
|
165 err.msg <- c(err.msg, "For example, in table 2, the following qualitative variables have been removed:\n",
|
|
166 " ",paste(colnames(tab2)[var2.quali[1:3]],collapse="\n "),"\n")
|
|
167 }
|
|
168 }
|
|
169
|
|
170 if(length(var1.quali) != 0){
|
|
171 tab1 <- tab1[,-var1.quali]
|
|
172 }
|
|
173 if(length(var2.quali) != 0){
|
|
174 tab2 <- tab2[,-var2.quali]
|
|
175 }
|
|
176
|
|
177 if(length(err.msg) != 0){
|
|
178 cat("\n- - - - - - - - -\n",err.msg,"\n- - - - - - - - -\n")
|
|
179 }
|
|
180
|
|
181 # Correlation table ---------------------------------------------------------------------------------
|
|
182
|
|
183 tab.corr <- matrix(nrow = dim(tab2)[2], ncol = dim(tab1)[2])
|
|
184 for (i in 1:dim(tab2)[2]){
|
|
185 for (j in 1:dim(tab1)[2]){
|
|
186 tab.corr[i,j] <- cor(tab2[,i], tab1[,j], method = corr.method, use = "pairwise.complete.obs")
|
|
187 }
|
|
188 }
|
|
189
|
|
190 colnames(tab.corr) <- colnames(tab1)
|
|
191 rownames(tab.corr) <- colnames(tab2)
|
|
192
|
|
193
|
|
194
|
|
195 # Significance of correlation test ------------------------------------------------------------------
|
|
196
|
|
197 if (test.corr == "yes"){
|
|
198
|
|
199 pvalue <- vector()
|
|
200 for (i in 1:dim(tab.corr)[1]){
|
|
201 for (j in 1:dim(tab.corr)[2]){
|
|
202 corrtest <- cor.test(tab2[,i], tab1[,j], method = corr.method)
|
|
203 pvalue <- c(pvalue, corrtest$p.value)
|
|
204 if (multi.name == "none"){
|
|
205 if (corrtest$p.value > alpha){
|
|
206 tab.corr[i,j] <- 0
|
|
207 }
|
|
208 }
|
|
209 }
|
|
210 }
|
|
211
|
|
212 if(multi.name != "none"){
|
|
213 adjust <- matrix(p.adjust(pvalue, method = multi.name), nrow = dim(tab.corr)[1], ncol = dim(tab.corr)[2], byrow = T)
|
|
214 tab.corr[adjust > alpha] <- 0
|
|
215 }
|
|
216 }
|
|
217
|
|
218
|
|
219 # Filter settings ------------------------------------------------------------------------------------
|
|
220
|
|
221 if (filter == "yes"){
|
|
222
|
|
223 # Remove variables with all their correlation coefficients = 0 :
|
|
224 if (filters.choice == "filter_0"){
|
|
225 threshold <- 0
|
|
226 }
|
|
227
|
|
228 var2.thres <- vector()
|
|
229 for (i in 1:dim(tab.corr)[1]){
|
|
230 if (length(which(abs(tab.corr[i,]) <= threshold)) == dim(tab.corr)[2]){
|
|
231 var2.thres <- c(var2.thres, i)
|
|
232 }
|
|
233 }
|
|
234
|
|
235 if (length(var2.thres) != 0){
|
|
236 tab.corr <- tab.corr[-var2.thres,]
|
|
237 tab2 <- tab2[, -var2.thres]
|
|
238 }
|
|
239
|
|
240 var1.thres <- vector()
|
|
241 for (i in 1:dim(tab.corr)[2]){
|
|
242 if (length(which(abs(tab.corr[,i]) <= threshold)) == dim(tab.corr)[1]){
|
|
243 var1.thres <- c(var1.thres, i)
|
|
244 }
|
|
245 }
|
|
246
|
|
247 if (length(var1.thres) != 0){
|
|
248 tab.corr <- tab.corr[,-var1.thres]
|
|
249 tab1 <- tab1[,-var1.thres]
|
|
250 }
|
|
251
|
|
252 }
|
|
253
|
|
254
|
|
255 # Reorder variables in the correlation table (with the HCA) ------------------------------------------
|
|
256 if (reorder.var == "yes"){
|
|
257
|
|
258 cormat.tab2 <- cor(tab2, method = corr.method, use = "pairwise.complete.obs")
|
|
259 dist.tab2 <- as.dist(1 - cormat.tab2)
|
|
260 hc.tab2 <- hclust(dist.tab2, method = "ward.D2")
|
|
261 tab.corr <- tab.corr[hc.tab2$order,]
|
|
262
|
|
263 cormat.tab1 <- cor(tab1, method = corr.method, use = "pairwise.complete.obs")
|
|
264 dist.tab1 <- as.dist(1 - cormat.tab1)
|
|
265 hc.tab1 <- hclust(dist.tab1, method = "ward.D2")
|
|
266 tab.corr <- tab.corr[,hc.tab1$order]
|
|
267
|
|
268 }
|
|
269
|
|
270
|
|
271
|
|
272 # Output 1 : Correlation table -----------------------------------------------------------------------
|
|
273
|
|
274 # Export correlation table
|
|
275 write.table(x = data.frame(name = rownames(tab.corr), tab.corr), file = output1, sep = "\t", quote = FALSE, row.names = FALSE)
|
|
276
|
|
277 # Create the heatmap ---------------------------------------------------------------------------------
|
|
278
|
|
279 library(ggplot2)
|
|
280 library(reshape2)
|
|
281
|
|
282 # Melt the correlation table :
|
|
283 melted.tab.corr <- melt(tab.corr)
|
|
284
|
|
285 if (color.heatmap == "yes") {
|
|
286
|
|
287 # Add a column for the classes of each correlation coefficient
|
|
288 classe <- rep(0, dim(melted.tab.corr)[1])
|
|
289 melted <- cbind(melted.tab.corr, classe)
|
|
290
|
|
291 if (type.classes == "regular"){
|
|
292
|
|
293 vect <- vector()
|
|
294 if (seq(-1,0,reg.value)[length(seq(-1,0,reg.value))] == 0){
|
|
295 vect <- c(seq(-1,0,reg.value)[-length(seq(-1,0,reg.value))],
|
|
296 rev(seq(1,0,-reg.value)))
|
|
297 } else {
|
|
298 vect <- c(seq(-1,0,reg.value), 0, rev(seq(1,0,-reg.value)))
|
|
299 }
|
|
300
|
|
301 } else if (type.classes == "irregular") {
|
|
302
|
|
303 irreg.vect <- c(-1, irreg.vect, 1)
|
|
304 vect <- irreg.vect
|
|
305
|
|
306 }
|
|
307
|
|
308 # Color palette :
|
|
309 myPal <- colorRampPalette(c("#00CC00", "white", "red"), space = "Lab", interpolate = "spline")
|
|
310
|
|
311 # Create vector intervals
|
|
312 cl <- vector()
|
|
313 cl <- paste("[", vect[1], ";", round(vect[2],3), "]", sep = "")
|
|
314
|
|
315 for (x in 2:(length(vect)-1)) {
|
|
316 if (vect[x+1] == 0) {
|
|
317 cl <- c(cl, paste("]", round(vect[x],3), ";", round(vect[x+1],3), "[", sep = ""))
|
|
318 } else {
|
|
319 cl <- c(cl, paste("]", round(vect[x],3), ";",
|
|
320 round(vect[x+1],3), "]", sep = ""))
|
|
321 }
|
|
322 }
|
|
323
|
|
324 # Assign an interval to each correlation coefficient
|
|
325 for (i in 1:dim(melted.tab.corr)[1]){
|
|
326 for (j in 1:(length(cl))){
|
|
327 if (vect[j] == -1){
|
|
328 melted$classe[i][melted$value[i] >= vect[j]
|
|
329 && melted$value[i] <= vect[j+1]] <- cl[j]
|
|
330 } else {
|
|
331 melted$classe[i][melted$value[i] > vect[j]
|
|
332 && melted$value[i] <= vect[j+1]] <- cl[j]
|
|
333 }
|
|
334 }
|
|
335 }
|
|
336
|
|
337 # Find the 0 and assign it the white as name
|
|
338 if (length(which(vect == 0)) == 1) {
|
|
339 melted$classe[melted$value == 0] <- "0"
|
|
340 indic <- which(vect == 0)
|
|
341 cl <- c(cl[1:(indic-1)], 0, cl[indic:length(cl)])
|
|
342 names(cl)[indic] <- "#FFFFFF"
|
|
343 } else if (length(which(vect == 0)) == 0) {
|
|
344 indic <- 0
|
|
345 for (x in 1:(length(vect)-1)) {
|
|
346 if (0 > vect[x] && 0 <= vect[x+1]) {
|
|
347 names(cl)[x] <- "#FFFFFF"
|
|
348 indic <- x
|
|
349 }
|
|
350 }
|
|
351 }
|
|
352
|
|
353 indic <- length(cl) - indic + 1
|
|
354 cl <- rev(cl)
|
|
355
|
|
356 # Assign the colors of each intervals as their name
|
|
357 names(cl)[1:(indic-1)] <- myPal(length(cl[1:indic])*2-1)[1:indic-1]
|
|
358 names(cl)[(indic+1):length(cl)] <- myPal(length(cl[indic:length(cl)])*2-1)[(ceiling(length(myPal(length(cl[indic:length(cl)])*2-1))/2)+1):length(myPal(length(cl[indic:length(cl)])*2-1))]
|
|
359
|
|
360
|
|
361 melted$classe <- factor(melted$classe)
|
|
362 melted$classe <- factor(melted$classe, levels = cl[cl%in%levels(melted$classe)])
|
|
363
|
|
364 # Heatmap if color.heatmap = yes :
|
|
365 ggplot(melted, aes(Var2, Var1, fill = classe)) +
|
|
366 ggtitle("Colored correlation table" ) + xlab("Table 1") + ylab("Table 2") +
|
|
367 geom_tile(color ="ghostwhite") +
|
|
368 scale_fill_manual( breaks = levels(melted$classe),
|
|
369 values = names(cl)[cl%in%levels(melted$classe)],
|
|
370 name = paste(corr.method, "correlation", sep = "\n")) +
|
|
371 theme_classic() +
|
|
372 theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
|
|
373 plot.title = element_text(hjust = 0.5))
|
|
374
|
|
375 } else {
|
|
376
|
|
377 # Heatmap if color.heatmap = no :
|
|
378 ggplot(melted.tab.corr, aes(Var2, Var1, fill = value)) +
|
|
379 ggtitle("Colored correlation table" ) + xlab("Table 1") + ylab("Table 2") +
|
|
380 geom_tile(color ="ghostwhite") +
|
|
381 scale_fill_gradient2(low = "red", high = "#00CC00", mid = "white", midpoint = 0, limit = c(-1,1),
|
|
382 name = paste(corr.method, "correlation", sep = "\n")) +
|
|
383 theme_classic() +
|
|
384 theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
|
|
385 plot.title = element_text(hjust = 0.5))
|
|
386 }
|
|
387
|
|
388
|
|
389 ggsave(output2, device = "pdf", width = 10+0.075*dim(tab.corr)[2], height = 5+0.075*dim(tab.corr)[1], limitsize = FALSE)
|
|
390
|
|
391 } # End of correlation.tab
|
|
392
|
|
393
|
|
394 # Function call
|
|
395 # correlation.tab(tab1.name, tab2.name, param1.samples, param2.samples, corr.method, test.corr, alpha, multi.name, filter,
|
|
396 # filters.choice, threshold, reorder.var, color.heatmap, type.classes,
|
|
397 # reg.value, irreg.vect, output1, output2)
|