3
|
1 ---
|
|
2 title: 'WGCNA: Module-Trait Association Report.'
|
|
3 output:
|
|
4 html_document:
|
|
5 number_sections: false
|
|
6 toc: true
|
|
7 ---
|
|
8
|
|
9 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
|
11
|
10 knitr::opts_chunk$set(error = FALSE, echo = FALSE)
|
3
|
11 ```
|
|
12 ```{r}
|
|
13 # Load the data from the previous step.
|
|
14 load(file=opt$r_data)
|
|
15 ```
|
7
|
16 # Trait/Phenotype Data
|
3
|
17
|
7
|
18 The table below shows the list of trait/phenotype data provided, but with any ignored columns removed and any categorical columns converted to a one-hot enconding (e.g. 0 when present 1 when not present).
|
3
|
19
|
|
20 ```{r}
|
7
|
21 # Load the trait data file.
|
3
|
22 trait_data = data.frame()
|
|
23 trait_data = read.csv(opt$trait_data, header = TRUE, row.names = opt$sname_col)
|
|
24 sample_names = rownames(gemt)
|
|
25 trait_rows = match(sample_names, rownames(trait_data))
|
|
26 trait_data = trait_data[trait_rows, ]
|
7
|
27
|
|
28 # Remove ignored columns.
|
|
29 ignore = strsplit(opt$ignore_cols, ',')
|
|
30 if (length(ignore[[1]]) > 0) {
|
|
31 print(paste('You chose to ignore the following fields:', paste(ignore[[1]], collapse=", ")))
|
|
32 trait_data = trait_data[, colnames(trait_data)[!(colnames(trait_data) %in% ignore[[1]])]]
|
|
33 }
|
|
34
|
|
35 # Change any categorical fields to 1 hot encoding as requested by the caller.
|
|
36 one_hot_cols = strsplit(opt$one_hot_cols, ',')
|
|
37 if (length(one_hot_cols[[1]]) > 0) {
|
|
38 print(paste('You chose to 1-hot encode the following fields:', paste(one_hot_cols[[1]], collapse=", ")))
|
|
39
|
|
40 # Perform the 1-hot encoding for specified fields.
|
|
41 swap_cols = colnames(trait_data)[(colnames(trait_data) %in% one_hot_cols[[1]])]
|
|
42 temp = as.data.frame(trait_data[, swap_cols])
|
|
43 colnames(temp) = swap_cols
|
|
44 temp = apply(temp, 2, make.names)
|
|
45 dmy <- dummyVars(" ~ .", data = temp)
|
|
46 encoded <- data.frame(predict(dmy, newdata = temp))
|
|
47 encoded = sapply(encoded, as.integer)
|
|
48
|
|
49 # Make a new trait_data table with these new 1-hot fields.
|
|
50 keep_cols = colnames(trait_data)[!(colnames(trait_data) %in% one_hot_cols[[1]])]
|
|
51 keep = as.data.frame(trait_data[, keep_cols])
|
|
52 colnames(keep) = keep_cols
|
|
53
|
|
54 # Make a new trait_data object that has the columns to keep and the new 1-hot columns.
|
|
55 trait_data = cbind(keep, encoded)
|
|
56 }
|
|
57
|
3
|
58 datatable(trait_data)
|
|
59 ```
|
11
|
60 # Module-Condition Association
|
|
61
|
7
|
62 Now that we have trait/phenotype data, we can explore if any of the network modules are asociated with these features. First, is an empirical exploration by viewing again the sample dendrogram but with traits added and colored by category or numerical intensity, as appropriate. If groups of samples with similar expression also share similar annotations then the same colors will appear "in blocks" under the clustered samples. This view does not indicate associations but can help visualize when some modules might be associated.
|
3
|
63
|
|
64 ```{r fig.align='center', fig.width=8, fig.height=9}
|
|
65
|
|
66 # Determine the column types within the trait annotation data.
|
|
67 trait_types = sapply(trait_data, class)
|
11
|
68
|
|
69 # So that we can merge colors together with a cbind, create a
|
|
70 # data frame with an empty column
|
7
|
71 trait_colors = data.frame(empty = rep(1:dim(trait_data)[1]))
|
3
|
72
|
|
73 # Set the colors for the quantitative data.
|
7
|
74 quantitative_fields = colnames(trait_data)[which(trait_types == "numeric")]
|
|
75 if (length(quantitative_fields) > 0) {
|
|
76 qdata = as.data.frame(trait_data[,quantitative_fields])
|
|
77 quantitative_colors = numbers2colors(qdata, signed = FALSE)
|
|
78 colnames(quantitative_colors) = quantitative_fields
|
|
79 trait_colors = cbind(trait_colors,quantitative_colors)
|
|
80 }
|
3
|
81
|
|
82 # Set the colors for the categorical data.
|
7
|
83 categorical_fields = colnames(trait_data)[which(trait_types == "factor")]
|
|
84 if (length(categorical_fields) > 0) {
|
|
85 cdata = as.data.frame(trait_data[,categorical_fields])
|
|
86 categorical_colors = labels2colors(cdata)
|
|
87 colnames(categorical_colors) = categorical_fields
|
|
88 trait_colors = cbind(trait_colors,categorical_colors)
|
|
89 }
|
3
|
90
|
|
91 # Set the colors for the ordinal data.
|
7
|
92 ordinal_fields = colnames(trait_data)[which(trait_types == "integer")]
|
|
93 if (length(ordinal_fields) > 0) {
|
|
94 odata = as.data.frame(trait_data[,ordinal_fields])
|
|
95 ordinal_colors = numbers2colors(odata, signed = FALSE)
|
|
96 colnames(ordinal_colors) = ordinal_fields
|
|
97 trait_colors = cbind(trait_colors, ordinal_colors)
|
|
98 }
|
3
|
99
|
11
|
100 # Remove the empty column from teh trait_colors dataframe and
|
|
101 # reorder the colors to match the same order of columns in the trait_data df.
|
7
|
102 trait_colors = subset(trait_colors, select=-c(empty))
|
|
103 trait_colors = trait_colors[,colnames(trait_data)]
|
|
104 options(repr.plot.width=15, repr.plot.height=10)
|
|
105 plotDendroAndColors(sampleTree, trait_colors,
|
3
|
106 groupLabels = names(trait_data),
|
|
107 main = "Sample Dendrogram and Annotation Heatmap",
|
|
108 cex.dendroLabels = 0.5)
|
|
109
|
|
110 ```
|
|
111
|
|
112 To statistically identify the associations, correlation tests are performed of the eigengenes of each module with the annotation data. The following heatmap shows the results between each annotation feature and each module. Modules with a signficant positive assocation have a correlation value near 1. Modules with a significant negative association have a correlation value near -1. Modules with no correlation have a value near 0.
|
|
113
|
|
114 ```{r fig.align='center', fig.width=15, fig.height=15}
|
|
115 MEs = orderMEs(MEs)
|
|
116 moduleTraitCor = cor(MEs, trait_data, use = "p");
|
|
117 moduleTraitPvalue = corPvalueStudent(moduleTraitCor, n_samples);
|
|
118
|
7
|
119 # The WGCNA labeledHeatmap function is too overloaded with detail, we'll create a simpler plot.
|
3
|
120
|
7
|
121 plotData = melt(moduleTraitCor)
|
|
122 # We want to makes sure the order is the same as in the
|
|
123 # labeledHeatmap function (example above)
|
|
124 plotData$Var1 = factor(plotData$Var1, levels = rev(colnames(MEs)), ordered=TRUE)
|
|
125 # Now use ggplot2 to make a nicer image.
|
|
126 p <- ggplot(plotData, aes(Var2, Var1, fill=value)) +
|
|
127 geom_tile() + xlab('Experimental Conditions') + ylab('WGCNA Modules') +
|
|
128 scale_fill_gradient2(low = "#0072B2", high = "#D55E00",
|
|
129 mid = "white", midpoint = 0,
|
|
130 limit = c(-1,1), name="PCC") +
|
|
131 theme_bw() +
|
|
132 theme(axis.text.x = element_text(angle = 45, hjust=1, vjust=1, size=15),
|
|
133 axis.text.y = element_text(angle = 0, hjust=1, vjust=0.5, size=15),
|
|
134 legend.text=element_text(size=15),
|
|
135 panel.border = element_blank(),
|
|
136 panel.grid.major = element_blank(),
|
|
137 panel.grid.minor = element_blank(),
|
|
138 axis.line = element_blank())
|
|
139 print(p)
|
3
|
140 ```
|
|
141
|
|
142 ```{r}
|
|
143 output = cbind(moduleTraitCor, moduleTraitPvalue)
|
|
144 write.csv(output, file = opt$module_association_file, quote=FALSE, row.names=TRUE)
|
|
145 ```
|
|
146 A file has been generated named `module_association.csv` which conatins the list of modules, and their correlation values as well as p-values indicating the strength of the associations.
|
|
147 ```{r}
|
|
148 # names (colors) of the modules
|
|
149 modNames = substring(names(MEs), 3)
|
|
150 geneModuleMembership = as.data.frame(cor(gemt, MEs, use = "p"));
|
|
151 MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), n_samples));
|
|
152 names(geneModuleMembership) = paste("MM", modNames, sep="");
|
|
153 names(MMPvalue) = paste("p.MM", modNames, sep="");
|
|
154
|
|
155 # Calculate the gene trait significance as a Pearson's R and p-value.
|
|
156 gts = as.data.frame(cor(gemt, trait_data, use = "p"));
|
|
157 gtsp = as.data.frame(corPvalueStudent(as.matrix(gts), n_samples));
|
|
158 colnames(gtsp) = c(paste("p", names(trait_data), sep="."))
|
|
159 colnames(gts) = c(paste("GS", names(trait_data), sep="."))
|
|
160
|
|
161 # Write out the gene information.
|
|
162 output = cbind(Module = module_labels, gts, gtsp)
|
|
163 write.csv(output, file = opt$gene_association_file, quote=FALSE, row.names=TRUE)
|
|
164
|
|
165 ```
|
7
|
166 Genes themselves can also have assocation with traits. This is calculated via a traditional correlation test as well. Another file has been generated named `gene_association.csv` which provides the list of genes, the modules they belong to and the assocaition of each gene to the trait features.
|