annotate aurora_wgcna_trait.Rmd @ 11:01ace2c8a593 draft

Uploaded
author spficklin
date Fri, 22 Nov 2019 01:47:07 +0000
parents ffbafe466107
children 3ed8495d7df7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
1 ---
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
2 title: 'WGCNA: Module-Trait Association Report.'
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
3 output:
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
4 html_document:
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
5 number_sections: false
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
6 toc: true
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
7 ---
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
8
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
9 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
11
01ace2c8a593 Uploaded
spficklin
parents: 7
diff changeset
10 knitr::opts_chunk$set(error = FALSE, echo = FALSE)
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
11 ```
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
12 ```{r}
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
13 # Load the data from the previous step.
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
14 load(file=opt$r_data)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
15 ```
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
16 # Trait/Phenotype Data
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
17
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
18 The table below shows the list of trait/phenotype data provided, but with any ignored columns removed and any categorical columns converted to a one-hot enconding (e.g. 0 when present 1 when not present).
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
19
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
20 ```{r}
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
21 # Load the trait data file.
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
22 trait_data = data.frame()
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
23 trait_data = read.csv(opt$trait_data, header = TRUE, row.names = opt$sname_col)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
24 sample_names = rownames(gemt)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
25 trait_rows = match(sample_names, rownames(trait_data))
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
26 trait_data = trait_data[trait_rows, ]
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
27
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
28 # Remove ignored columns.
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
29 ignore = strsplit(opt$ignore_cols, ',')
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
30 if (length(ignore[[1]]) > 0) {
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
31 print(paste('You chose to ignore the following fields:', paste(ignore[[1]], collapse=", ")))
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
32 trait_data = trait_data[, colnames(trait_data)[!(colnames(trait_data) %in% ignore[[1]])]]
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
33 }
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
34
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
35 # Change any categorical fields to 1 hot encoding as requested by the caller.
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
36 one_hot_cols = strsplit(opt$one_hot_cols, ',')
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
37 if (length(one_hot_cols[[1]]) > 0) {
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
38 print(paste('You chose to 1-hot encode the following fields:', paste(one_hot_cols[[1]], collapse=", ")))
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
39
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
40 # Perform the 1-hot encoding for specified fields.
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
41 swap_cols = colnames(trait_data)[(colnames(trait_data) %in% one_hot_cols[[1]])]
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
42 temp = as.data.frame(trait_data[, swap_cols])
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
43 colnames(temp) = swap_cols
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
44 temp = apply(temp, 2, make.names)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
45 dmy <- dummyVars(" ~ .", data = temp)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
46 encoded <- data.frame(predict(dmy, newdata = temp))
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
47 encoded = sapply(encoded, as.integer)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
48
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
49 # Make a new trait_data table with these new 1-hot fields.
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
50 keep_cols = colnames(trait_data)[!(colnames(trait_data) %in% one_hot_cols[[1]])]
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
51 keep = as.data.frame(trait_data[, keep_cols])
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
52 colnames(keep) = keep_cols
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
53
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
54 # Make a new trait_data object that has the columns to keep and the new 1-hot columns.
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
55 trait_data = cbind(keep, encoded)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
56 }
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
57
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
58 datatable(trait_data)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
59 ```
11
01ace2c8a593 Uploaded
spficklin
parents: 7
diff changeset
60 # Module-Condition Association
01ace2c8a593 Uploaded
spficklin
parents: 7
diff changeset
61
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
62 Now that we have trait/phenotype data, we can explore if any of the network modules are asociated with these features. First, is an empirical exploration by viewing again the sample dendrogram but with traits added and colored by category or numerical intensity, as appropriate. If groups of samples with similar expression also share similar annotations then the same colors will appear "in blocks" under the clustered samples. This view does not indicate associations but can help visualize when some modules might be associated.
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
63
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
64 ```{r fig.align='center', fig.width=8, fig.height=9}
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
65
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
66 # Determine the column types within the trait annotation data.
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
67 trait_types = sapply(trait_data, class)
11
01ace2c8a593 Uploaded
spficklin
parents: 7
diff changeset
68
01ace2c8a593 Uploaded
spficklin
parents: 7
diff changeset
69 # So that we can merge colors together with a cbind, create a
01ace2c8a593 Uploaded
spficklin
parents: 7
diff changeset
70 # data frame with an empty column
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
71 trait_colors = data.frame(empty = rep(1:dim(trait_data)[1]))
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
72
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
73 # Set the colors for the quantitative data.
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
74 quantitative_fields = colnames(trait_data)[which(trait_types == "numeric")]
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
75 if (length(quantitative_fields) > 0) {
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
76 qdata = as.data.frame(trait_data[,quantitative_fields])
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
77 quantitative_colors = numbers2colors(qdata, signed = FALSE)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
78 colnames(quantitative_colors) = quantitative_fields
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
79 trait_colors = cbind(trait_colors,quantitative_colors)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
80 }
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
81
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
82 # Set the colors for the categorical data.
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
83 categorical_fields = colnames(trait_data)[which(trait_types == "factor")]
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
84 if (length(categorical_fields) > 0) {
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
85 cdata = as.data.frame(trait_data[,categorical_fields])
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
86 categorical_colors = labels2colors(cdata)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
87 colnames(categorical_colors) = categorical_fields
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
88 trait_colors = cbind(trait_colors,categorical_colors)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
89 }
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
90
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
91 # Set the colors for the ordinal data.
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
92 ordinal_fields = colnames(trait_data)[which(trait_types == "integer")]
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
93 if (length(ordinal_fields) > 0) {
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
94 odata = as.data.frame(trait_data[,ordinal_fields])
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
95 ordinal_colors = numbers2colors(odata, signed = FALSE)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
96 colnames(ordinal_colors) = ordinal_fields
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
97 trait_colors = cbind(trait_colors, ordinal_colors)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
98 }
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
99
11
01ace2c8a593 Uploaded
spficklin
parents: 7
diff changeset
100 # Remove the empty column from teh trait_colors dataframe and
01ace2c8a593 Uploaded
spficklin
parents: 7
diff changeset
101 # reorder the colors to match the same order of columns in the trait_data df.
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
102 trait_colors = subset(trait_colors, select=-c(empty))
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
103 trait_colors = trait_colors[,colnames(trait_data)]
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
104 options(repr.plot.width=15, repr.plot.height=10)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
105 plotDendroAndColors(sampleTree, trait_colors,
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
106 groupLabels = names(trait_data),
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
107 main = "Sample Dendrogram and Annotation Heatmap",
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
108 cex.dendroLabels = 0.5)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
109
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
110 ```
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
111
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
112 To statistically identify the associations, correlation tests are performed of the eigengenes of each module with the annotation data. The following heatmap shows the results between each annotation feature and each module. Modules with a signficant positive assocation have a correlation value near 1. Modules with a significant negative association have a correlation value near -1. Modules with no correlation have a value near 0.
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
113
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
114 ```{r fig.align='center', fig.width=15, fig.height=15}
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
115 MEs = orderMEs(MEs)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
116 moduleTraitCor = cor(MEs, trait_data, use = "p");
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
117 moduleTraitPvalue = corPvalueStudent(moduleTraitCor, n_samples);
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
118
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
119 # The WGCNA labeledHeatmap function is too overloaded with detail, we'll create a simpler plot.
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
120
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
121 plotData = melt(moduleTraitCor)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
122 # We want to makes sure the order is the same as in the
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
123 # labeledHeatmap function (example above)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
124 plotData$Var1 = factor(plotData$Var1, levels = rev(colnames(MEs)), ordered=TRUE)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
125 # Now use ggplot2 to make a nicer image.
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
126 p <- ggplot(plotData, aes(Var2, Var1, fill=value)) +
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
127 geom_tile() + xlab('Experimental Conditions') + ylab('WGCNA Modules') +
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
128 scale_fill_gradient2(low = "#0072B2", high = "#D55E00",
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
129 mid = "white", midpoint = 0,
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
130 limit = c(-1,1), name="PCC") +
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
131 theme_bw() +
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
132 theme(axis.text.x = element_text(angle = 45, hjust=1, vjust=1, size=15),
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
133 axis.text.y = element_text(angle = 0, hjust=1, vjust=0.5, size=15),
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
134 legend.text=element_text(size=15),
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
135 panel.border = element_blank(),
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
136 panel.grid.major = element_blank(),
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
137 panel.grid.minor = element_blank(),
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
138 axis.line = element_blank())
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
139 print(p)
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
140 ```
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
141
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
142 ```{r}
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
143 output = cbind(moduleTraitCor, moduleTraitPvalue)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
144 write.csv(output, file = opt$module_association_file, quote=FALSE, row.names=TRUE)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
145 ```
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
146 A file has been generated named `module_association.csv` which conatins the list of modules, and their correlation values as well as p-values indicating the strength of the associations.
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
147 ```{r}
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
148 # names (colors) of the modules
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
149 modNames = substring(names(MEs), 3)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
150 geneModuleMembership = as.data.frame(cor(gemt, MEs, use = "p"));
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
151 MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), n_samples));
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
152 names(geneModuleMembership) = paste("MM", modNames, sep="");
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
153 names(MMPvalue) = paste("p.MM", modNames, sep="");
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
154
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
155 # Calculate the gene trait significance as a Pearson's R and p-value.
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
156 gts = as.data.frame(cor(gemt, trait_data, use = "p"));
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
157 gtsp = as.data.frame(corPvalueStudent(as.matrix(gts), n_samples));
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
158 colnames(gtsp) = c(paste("p", names(trait_data), sep="."))
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
159 colnames(gts) = c(paste("GS", names(trait_data), sep="."))
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
160
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
161 # Write out the gene information.
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
162 output = cbind(Module = module_labels, gts, gtsp)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
163 write.csv(output, file = opt$gene_association_file, quote=FALSE, row.names=TRUE)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
164
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
165 ```
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
166 Genes themselves can also have assocation with traits. This is calculated via a traditional correlation test as well. Another file has been generated named `gene_association.csv` which provides the list of genes, the modules they belong to and the assocaition of each gene to the trait features.