Mercurial > repos > spficklin > aurora_wgcna
comparison aurora_wgcna_trait.Rmd @ 7:ffbafe466107 draft
Uploaded
author | spficklin |
---|---|
date | Thu, 21 Nov 2019 09:26:30 +0000 |
parents | b915c2c92e66 |
children | 01ace2c8a593 |
comparison
equal
deleted
inserted
replaced
6:083886b3f3db | 7:ffbafe466107 |
---|---|
11 ``` | 11 ``` |
12 ```{r} | 12 ```{r} |
13 # Load the data from the previous step. | 13 # Load the data from the previous step. |
14 load(file=opt$r_data) | 14 load(file=opt$r_data) |
15 ``` | 15 ``` |
16 # Sample Annotation Data | 16 # Trait/Phenotype Data |
17 | 17 |
18 The table below shows the list of sample annotations you provided. Sample annotations can include categorical data related to experimental conditions, or numerical trait or phenotype data for each sample. | 18 The table below shows the list of trait/phenotype data provided, but with any ignored columns removed and any categorical columns converted to a one-hot enconding (e.g. 0 when present 1 when not present). |
19 | 19 |
20 ```{r} | 20 ```{r} |
21 # Load the trait data file. | |
21 trait_data = data.frame() | 22 trait_data = data.frame() |
22 trait_data = read.csv(opt$trait_data, header = TRUE, row.names = opt$sname_col) | 23 trait_data = read.csv(opt$trait_data, header = TRUE, row.names = opt$sname_col) |
23 sample_names = rownames(gemt) | 24 sample_names = rownames(gemt) |
24 trait_rows = match(sample_names, rownames(trait_data)) | 25 trait_rows = match(sample_names, rownames(trait_data)) |
25 trait_data = trait_data[trait_rows, ] | 26 trait_data = trait_data[trait_rows, ] |
27 | |
28 # Remove ignored columns. | |
29 ignore = strsplit(opt$ignore_cols, ',') | |
30 if (length(ignore[[1]]) > 0) { | |
31 print(paste('You chose to ignore the following fields:', paste(ignore[[1]], collapse=", "))) | |
32 trait_data = trait_data[, colnames(trait_data)[!(colnames(trait_data) %in% ignore[[1]])]] | |
33 } | |
34 | |
35 # Change any categorical fields to 1 hot encoding as requested by the caller. | |
36 one_hot_cols = strsplit(opt$one_hot_cols, ',') | |
37 if (length(one_hot_cols[[1]]) > 0) { | |
38 print(paste('You chose to 1-hot encode the following fields:', paste(one_hot_cols[[1]], collapse=", "))) | |
39 | |
40 # Perform the 1-hot encoding for specified fields. | |
41 swap_cols = colnames(trait_data)[(colnames(trait_data) %in% one_hot_cols[[1]])] | |
42 temp = as.data.frame(trait_data[, swap_cols]) | |
43 colnames(temp) = swap_cols | |
44 temp = apply(temp, 2, make.names) | |
45 dmy <- dummyVars(" ~ .", data = temp) | |
46 encoded <- data.frame(predict(dmy, newdata = temp)) | |
47 encoded = sapply(encoded, as.integer) | |
48 | |
49 # Make a new trait_data table with these new 1-hot fields. | |
50 keep_cols = colnames(trait_data)[!(colnames(trait_data) %in% one_hot_cols[[1]])] | |
51 keep = as.data.frame(trait_data[, keep_cols]) | |
52 colnames(keep) = keep_cols | |
53 | |
54 # Make a new trait_data object that has the columns to keep and the new 1-hot columns. | |
55 trait_data = cbind(keep, encoded) | |
56 } | |
57 | |
26 datatable(trait_data) | 58 datatable(trait_data) |
27 ``` | 59 ``` |
28 # Module-Condition Association. | 60 # Module-Condition Association. |
29 Now that we have sample annotations, we can explore if any of the network modules are asociated with these features. First, is an empirical exploration by viewing again the sample dendrogram but with sample annotations added and colored by category or numerical intensity, as appropriate. If groups of samples with similar expression also share similar annotations then the same colors will appear "in blocks" under the clustered samples. This view does not indicate associations but can help visualize when some modules might be associated. | 61 Now that we have trait/phenotype data, we can explore if any of the network modules are asociated with these features. First, is an empirical exploration by viewing again the sample dendrogram but with traits added and colored by category or numerical intensity, as appropriate. If groups of samples with similar expression also share similar annotations then the same colors will appear "in blocks" under the clustered samples. This view does not indicate associations but can help visualize when some modules might be associated. |
30 | 62 |
31 ```{r fig.align='center', fig.width=8, fig.height=9} | 63 ```{r fig.align='center', fig.width=8, fig.height=9} |
32 | 64 |
33 # Determine the column types within the trait annotation data. | 65 # Determine the column types within the trait annotation data. |
34 trait_types = sapply(trait_data, class) | 66 trait_types = sapply(trait_data, class) |
67 trait_colors = data.frame(empty = rep(1:dim(trait_data)[1])) | |
35 | 68 |
36 # Set the colors for the quantitative data. | 69 # Set the colors for the quantitative data. |
37 quantitative_fields = which(trait_types == "numeric") | 70 quantitative_fields = colnames(trait_data)[which(trait_types == "numeric")] |
38 quantitative_colors = numbers2colors(trait_data[,quantitative_fields], signed = FALSE) | 71 if (length(quantitative_fields) > 0) { |
39 colnames(quantitative_colors) = colnames(trait_data[,quantitative_fields]) | 72 qdata = as.data.frame(trait_data[,quantitative_fields]) |
73 quantitative_colors = numbers2colors(qdata, signed = FALSE) | |
74 colnames(quantitative_colors) = quantitative_fields | |
75 trait_colors = cbind(trait_colors,quantitative_colors) | |
76 } | |
40 | 77 |
41 # Set the colors for the categorical data. | 78 # Set the colors for the categorical data. |
42 categorical_fields = which(trait_types == "factor") | 79 categorical_fields = colnames(trait_data)[which(trait_types == "factor")] |
43 categorical_colors = labels2colors(trait_data[,categorical_fields]) | 80 if (length(categorical_fields) > 0) { |
44 colnames(categorical_colors) = colnames(trait_data[,categorical_fields]) | 81 cdata = as.data.frame(trait_data[,categorical_fields]) |
82 categorical_colors = labels2colors(cdata) | |
83 colnames(categorical_colors) = categorical_fields | |
84 trait_colors = cbind(trait_colors,categorical_colors) | |
85 } | |
45 | 86 |
46 # Set the colors for the ordinal data. | 87 # Set the colors for the ordinal data. |
47 ordinal_fields = which(trait_types == "integer") | 88 ordinal_fields = colnames(trait_data)[which(trait_types == "integer")] |
48 ordinal_colors = numbers2colors(trait_data[,ordinal_fields], signed = FALSE) | 89 if (length(ordinal_fields) > 0) { |
49 colnames(ordinal_colors) = colnames(trait_data[,ordinal_fields]) | 90 odata = as.data.frame(trait_data[,ordinal_fields]) |
91 ordinal_colors = numbers2colors(odata, signed = FALSE) | |
92 colnames(ordinal_colors) = ordinal_fields | |
93 trait_colors = cbind(trait_colors, ordinal_colors) | |
94 } | |
50 | 95 |
51 # Combine the colors in their original order as the trait_data data frame. | 96 trait_colors = subset(trait_colors, select=-c(empty)) |
52 traitColors = cbind(quantitative_colors, categorical_colors, ordinal_colors) | 97 trait_colors = trait_colors[,colnames(trait_data)] |
53 | 98 options(repr.plot.width=15, repr.plot.height=10) |
54 plotDendroAndColors(sampleTree, traitColors, | 99 plotDendroAndColors(sampleTree, trait_colors, |
55 groupLabels = names(trait_data), | 100 groupLabels = names(trait_data), |
56 main = "Sample Dendrogram and Annotation Heatmap", | 101 main = "Sample Dendrogram and Annotation Heatmap", |
57 cex.dendroLabels = 0.5) | 102 cex.dendroLabels = 0.5) |
58 | 103 |
59 ``` | 104 ``` |
63 ```{r fig.align='center', fig.width=15, fig.height=15} | 108 ```{r fig.align='center', fig.width=15, fig.height=15} |
64 MEs = orderMEs(MEs) | 109 MEs = orderMEs(MEs) |
65 moduleTraitCor = cor(MEs, trait_data, use = "p"); | 110 moduleTraitCor = cor(MEs, trait_data, use = "p"); |
66 moduleTraitPvalue = corPvalueStudent(moduleTraitCor, n_samples); | 111 moduleTraitPvalue = corPvalueStudent(moduleTraitCor, n_samples); |
67 | 112 |
68 textMatrix = paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = ""); | 113 # The WGCNA labeledHeatmap function is too overloaded with detail, we'll create a simpler plot. |
69 dim(textMatrix) = dim(moduleTraitCor) | |
70 par(mar = c(6, 8.5, 3, 3)); | |
71 | 114 |
72 # Display the correlation values within a heatmap plot | 115 plotData = melt(moduleTraitCor) |
73 labeledHeatmap(Matrix = moduleTraitCor, | 116 # We want to makes sure the order is the same as in the |
74 xLabels = names(trait_data), | 117 # labeledHeatmap function (example above) |
75 yLabels = names(MEs), | 118 plotData$Var1 = factor(plotData$Var1, levels = rev(colnames(MEs)), ordered=TRUE) |
76 ySymbols = names(MEs), | 119 # Now use ggplot2 to make a nicer image. |
77 colorLabels = FALSE, | 120 p <- ggplot(plotData, aes(Var2, Var1, fill=value)) + |
78 colors = blueWhiteRed(50), | 121 geom_tile() + xlab('Experimental Conditions') + ylab('WGCNA Modules') + |
79 textMatrix = textMatrix, | 122 scale_fill_gradient2(low = "#0072B2", high = "#D55E00", |
80 setStdMargins = FALSE, | 123 mid = "white", midpoint = 0, |
81 cex.text = 0.5, | 124 limit = c(-1,1), name="PCC") + |
82 zlim = c(-1,1), | 125 theme_bw() + |
83 main = paste("Module-trait relationships")) | 126 theme(axis.text.x = element_text(angle = 45, hjust=1, vjust=1, size=15), |
127 axis.text.y = element_text(angle = 0, hjust=1, vjust=0.5, size=15), | |
128 legend.text=element_text(size=15), | |
129 panel.border = element_blank(), | |
130 panel.grid.major = element_blank(), | |
131 panel.grid.minor = element_blank(), | |
132 axis.line = element_blank()) | |
133 print(p) | |
84 ``` | 134 ``` |
85 | 135 |
86 ```{r} | 136 ```{r} |
87 output = cbind(moduleTraitCor, moduleTraitPvalue) | 137 output = cbind(moduleTraitCor, moduleTraitPvalue) |
88 write.csv(output, file = opt$module_association_file, quote=FALSE, row.names=TRUE) | 138 write.csv(output, file = opt$module_association_file, quote=FALSE, row.names=TRUE) |
105 # Write out the gene information. | 155 # Write out the gene information. |
106 output = cbind(Module = module_labels, gts, gtsp) | 156 output = cbind(Module = module_labels, gts, gtsp) |
107 write.csv(output, file = opt$gene_association_file, quote=FALSE, row.names=TRUE) | 157 write.csv(output, file = opt$gene_association_file, quote=FALSE, row.names=TRUE) |
108 | 158 |
109 ``` | 159 ``` |
110 Genes themselves can also have assocation with sample annotations. This is calculated via a traditional correlation test as well. Another file has been generated named `gene_association.csv` which provides the list of genes, the modules they belong to and the assocaition of each gene to the sample annotation features. | 160 Genes themselves can also have assocation with traits. This is calculated via a traditional correlation test as well. Another file has been generated named `gene_association.csv` which provides the list of genes, the modules they belong to and the assocaition of each gene to the trait features. |
111 |