annotate aurora_wgcna_trait.Rmd @ 7:ffbafe466107 draft

Uploaded
author spficklin
date Thu, 21 Nov 2019 09:26:30 +0000
parents b915c2c92e66
children 01ace2c8a593
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
1 ---
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
2 title: 'WGCNA: Module-Trait Association Report.'
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
3 output:
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
4 html_document:
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
5 number_sections: false
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
6 toc: true
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
7 ---
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
8
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
9 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
10 knitr::opts_chunk$set(error = TRUE, echo = FALSE)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
11 ```
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
12 ```{r}
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
13 # Load the data from the previous step.
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
14 load(file=opt$r_data)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
15 ```
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
16 # Trait/Phenotype Data
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
17
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
18 The table below shows the list of trait/phenotype data provided, but with any ignored columns removed and any categorical columns converted to a one-hot enconding (e.g. 0 when present 1 when not present).
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
19
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
20 ```{r}
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
21 # Load the trait data file.
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
22 trait_data = data.frame()
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
23 trait_data = read.csv(opt$trait_data, header = TRUE, row.names = opt$sname_col)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
24 sample_names = rownames(gemt)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
25 trait_rows = match(sample_names, rownames(trait_data))
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
26 trait_data = trait_data[trait_rows, ]
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
27
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
28 # Remove ignored columns.
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
29 ignore = strsplit(opt$ignore_cols, ',')
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
30 if (length(ignore[[1]]) > 0) {
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
31 print(paste('You chose to ignore the following fields:', paste(ignore[[1]], collapse=", ")))
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
32 trait_data = trait_data[, colnames(trait_data)[!(colnames(trait_data) %in% ignore[[1]])]]
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
33 }
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
34
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
35 # Change any categorical fields to 1 hot encoding as requested by the caller.
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
36 one_hot_cols = strsplit(opt$one_hot_cols, ',')
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
37 if (length(one_hot_cols[[1]]) > 0) {
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
38 print(paste('You chose to 1-hot encode the following fields:', paste(one_hot_cols[[1]], collapse=", ")))
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
39
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
40 # Perform the 1-hot encoding for specified fields.
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
41 swap_cols = colnames(trait_data)[(colnames(trait_data) %in% one_hot_cols[[1]])]
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
42 temp = as.data.frame(trait_data[, swap_cols])
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
43 colnames(temp) = swap_cols
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
44 temp = apply(temp, 2, make.names)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
45 dmy <- dummyVars(" ~ .", data = temp)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
46 encoded <- data.frame(predict(dmy, newdata = temp))
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
47 encoded = sapply(encoded, as.integer)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
48
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
49 # Make a new trait_data table with these new 1-hot fields.
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
50 keep_cols = colnames(trait_data)[!(colnames(trait_data) %in% one_hot_cols[[1]])]
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
51 keep = as.data.frame(trait_data[, keep_cols])
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
52 colnames(keep) = keep_cols
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
53
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
54 # Make a new trait_data object that has the columns to keep and the new 1-hot columns.
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
55 trait_data = cbind(keep, encoded)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
56 }
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
57
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
58 datatable(trait_data)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
59 ```
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
60 # Module-Condition Association.
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
61 Now that we have trait/phenotype data, we can explore if any of the network modules are asociated with these features. First, is an empirical exploration by viewing again the sample dendrogram but with traits added and colored by category or numerical intensity, as appropriate. If groups of samples with similar expression also share similar annotations then the same colors will appear "in blocks" under the clustered samples. This view does not indicate associations but can help visualize when some modules might be associated.
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
62
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
63 ```{r fig.align='center', fig.width=8, fig.height=9}
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
64
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
65 # Determine the column types within the trait annotation data.
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
66 trait_types = sapply(trait_data, class)
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
67 trait_colors = data.frame(empty = rep(1:dim(trait_data)[1]))
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
68
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
69 # Set the colors for the quantitative data.
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
70 quantitative_fields = colnames(trait_data)[which(trait_types == "numeric")]
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
71 if (length(quantitative_fields) > 0) {
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
72 qdata = as.data.frame(trait_data[,quantitative_fields])
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
73 quantitative_colors = numbers2colors(qdata, signed = FALSE)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
74 colnames(quantitative_colors) = quantitative_fields
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
75 trait_colors = cbind(trait_colors,quantitative_colors)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
76 }
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
77
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
78 # Set the colors for the categorical data.
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
79 categorical_fields = colnames(trait_data)[which(trait_types == "factor")]
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
80 if (length(categorical_fields) > 0) {
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
81 cdata = as.data.frame(trait_data[,categorical_fields])
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
82 categorical_colors = labels2colors(cdata)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
83 colnames(categorical_colors) = categorical_fields
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
84 trait_colors = cbind(trait_colors,categorical_colors)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
85 }
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
86
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
87 # Set the colors for the ordinal data.
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
88 ordinal_fields = colnames(trait_data)[which(trait_types == "integer")]
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
89 if (length(ordinal_fields) > 0) {
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
90 odata = as.data.frame(trait_data[,ordinal_fields])
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
91 ordinal_colors = numbers2colors(odata, signed = FALSE)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
92 colnames(ordinal_colors) = ordinal_fields
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
93 trait_colors = cbind(trait_colors, ordinal_colors)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
94 }
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
95
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
96 trait_colors = subset(trait_colors, select=-c(empty))
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
97 trait_colors = trait_colors[,colnames(trait_data)]
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
98 options(repr.plot.width=15, repr.plot.height=10)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
99 plotDendroAndColors(sampleTree, trait_colors,
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
100 groupLabels = names(trait_data),
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
101 main = "Sample Dendrogram and Annotation Heatmap",
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
102 cex.dendroLabels = 0.5)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
103
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
104 ```
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
105
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
106 To statistically identify the associations, correlation tests are performed of the eigengenes of each module with the annotation data. The following heatmap shows the results between each annotation feature and each module. Modules with a signficant positive assocation have a correlation value near 1. Modules with a significant negative association have a correlation value near -1. Modules with no correlation have a value near 0.
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
107
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
108 ```{r fig.align='center', fig.width=15, fig.height=15}
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
109 MEs = orderMEs(MEs)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
110 moduleTraitCor = cor(MEs, trait_data, use = "p");
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
111 moduleTraitPvalue = corPvalueStudent(moduleTraitCor, n_samples);
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
112
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
113 # The WGCNA labeledHeatmap function is too overloaded with detail, we'll create a simpler plot.
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
114
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
115 plotData = melt(moduleTraitCor)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
116 # We want to makes sure the order is the same as in the
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
117 # labeledHeatmap function (example above)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
118 plotData$Var1 = factor(plotData$Var1, levels = rev(colnames(MEs)), ordered=TRUE)
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
119 # Now use ggplot2 to make a nicer image.
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
120 p <- ggplot(plotData, aes(Var2, Var1, fill=value)) +
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
121 geom_tile() + xlab('Experimental Conditions') + ylab('WGCNA Modules') +
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
122 scale_fill_gradient2(low = "#0072B2", high = "#D55E00",
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
123 mid = "white", midpoint = 0,
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
124 limit = c(-1,1), name="PCC") +
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
125 theme_bw() +
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
126 theme(axis.text.x = element_text(angle = 45, hjust=1, vjust=1, size=15),
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
127 axis.text.y = element_text(angle = 0, hjust=1, vjust=0.5, size=15),
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
128 legend.text=element_text(size=15),
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
129 panel.border = element_blank(),
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
130 panel.grid.major = element_blank(),
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
131 panel.grid.minor = element_blank(),
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
132 axis.line = element_blank())
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
133 print(p)
3
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
134 ```
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
135
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
136 ```{r}
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
137 output = cbind(moduleTraitCor, moduleTraitPvalue)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
138 write.csv(output, file = opt$module_association_file, quote=FALSE, row.names=TRUE)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
139 ```
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
140 A file has been generated named `module_association.csv` which conatins the list of modules, and their correlation values as well as p-values indicating the strength of the associations.
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
141 ```{r}
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
142 # names (colors) of the modules
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
143 modNames = substring(names(MEs), 3)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
144 geneModuleMembership = as.data.frame(cor(gemt, MEs, use = "p"));
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
145 MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), n_samples));
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
146 names(geneModuleMembership) = paste("MM", modNames, sep="");
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
147 names(MMPvalue) = paste("p.MM", modNames, sep="");
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
148
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
149 # Calculate the gene trait significance as a Pearson's R and p-value.
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
150 gts = as.data.frame(cor(gemt, trait_data, use = "p"));
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
151 gtsp = as.data.frame(corPvalueStudent(as.matrix(gts), n_samples));
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
152 colnames(gtsp) = c(paste("p", names(trait_data), sep="."))
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
153 colnames(gts) = c(paste("GS", names(trait_data), sep="."))
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
154
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
155 # Write out the gene information.
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
156 output = cbind(Module = module_labels, gts, gtsp)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
157 write.csv(output, file = opt$gene_association_file, quote=FALSE, row.names=TRUE)
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
158
b915c2c92e66 Uploaded
spficklin
parents:
diff changeset
159 ```
7
ffbafe466107 Uploaded
spficklin
parents: 3
diff changeset
160 Genes themselves can also have assocation with traits. This is calculated via a traditional correlation test as well. Another file has been generated named `gene_association.csv` which provides the list of genes, the modules they belong to and the assocaition of each gene to the trait features.