aurora_wgcna: aurora_wgcna_trait.Rmd annotate

annotate aurora_wgcna_trait.Rmd @ 11:01ace2c8a593 draft

Uploaded

author	spficklin
date	Fri, 22 Nov 2019 01:47:07 +0000
parents	ffbafe466107
children	3ed8495d7df7

rev	line source
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	1 ---
b915c2c92e66 Uploaded spficklin parents: diff changeset	2 title: 'WGCNA: Module-Trait Association Report.'
b915c2c92e66 Uploaded spficklin parents: diff changeset	3 output:
b915c2c92e66 Uploaded spficklin parents: diff changeset	4 html_document:
b915c2c92e66 Uploaded spficklin parents: diff changeset	5 number_sections: false
b915c2c92e66 Uploaded spficklin parents: diff changeset	6 toc: true
b915c2c92e66 Uploaded spficklin parents: diff changeset	7 ---
b915c2c92e66 Uploaded spficklin parents: diff changeset	8
b915c2c92e66 Uploaded spficklin parents: diff changeset	9 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
11 01ace2c8a593 Uploaded spficklin parents: 7 diff changeset	10 knitr::opts_chunk$set(error = FALSE, echo = FALSE)
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	11 ```
b915c2c92e66 Uploaded spficklin parents: diff changeset	12 ```{r}
b915c2c92e66 Uploaded spficklin parents: diff changeset	13 # Load the data from the previous step.
b915c2c92e66 Uploaded spficklin parents: diff changeset	14 load(file=opt$r_data)
b915c2c92e66 Uploaded spficklin parents: diff changeset	15 ```
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	16 # Trait/Phenotype Data
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	17
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	18 The table below shows the list of trait/phenotype data provided, but with any ignored columns removed and any categorical columns converted to a one-hot enconding (e.g. 0 when present 1 when not present).
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	19
b915c2c92e66 Uploaded spficklin parents: diff changeset	20 ```{r}
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	21 # Load the trait data file.
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	22 trait_data = data.frame()
b915c2c92e66 Uploaded spficklin parents: diff changeset	23 trait_data = read.csv(opt$trait_data, header = TRUE, row.names = opt$sname_col)
b915c2c92e66 Uploaded spficklin parents: diff changeset	24 sample_names = rownames(gemt)
b915c2c92e66 Uploaded spficklin parents: diff changeset	25 trait_rows = match(sample_names, rownames(trait_data))
b915c2c92e66 Uploaded spficklin parents: diff changeset	26 trait_data = trait_data[trait_rows, ]
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	27
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	28 # Remove ignored columns.
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	29 ignore = strsplit(opt$ignore_cols, ',')
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	30 if (length(ignore[[1]]) > 0) {
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	31 print(paste('You chose to ignore the following fields:', paste(ignore[[1]], collapse=", ")))
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	32 trait_data = trait_data[, colnames(trait_data)[!(colnames(trait_data) %in% ignore[[1]])]]
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	33 }
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	34
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	35 # Change any categorical fields to 1 hot encoding as requested by the caller.
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	36 one_hot_cols = strsplit(opt$one_hot_cols, ',')
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	37 if (length(one_hot_cols[[1]]) > 0) {
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	38 print(paste('You chose to 1-hot encode the following fields:', paste(one_hot_cols[[1]], collapse=", ")))
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	39
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	40 # Perform the 1-hot encoding for specified fields.
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	41 swap_cols = colnames(trait_data)[(colnames(trait_data) %in% one_hot_cols[[1]])]
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	42 temp = as.data.frame(trait_data[, swap_cols])
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	43 colnames(temp) = swap_cols
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	44 temp = apply(temp, 2, make.names)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	45 dmy <- dummyVars(" ~ .", data = temp)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	46 encoded <- data.frame(predict(dmy, newdata = temp))
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	47 encoded = sapply(encoded, as.integer)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	48
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	49 # Make a new trait_data table with these new 1-hot fields.
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	50 keep_cols = colnames(trait_data)[!(colnames(trait_data) %in% one_hot_cols[[1]])]
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	51 keep = as.data.frame(trait_data[, keep_cols])
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	52 colnames(keep) = keep_cols
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	53
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	54 # Make a new trait_data object that has the columns to keep and the new 1-hot columns.
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	55 trait_data = cbind(keep, encoded)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	56 }
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	57
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	58 datatable(trait_data)
b915c2c92e66 Uploaded spficklin parents: diff changeset	59 ```
11 01ace2c8a593 Uploaded spficklin parents: 7 diff changeset	60 # Module-Condition Association
01ace2c8a593 Uploaded spficklin parents: 7 diff changeset	61
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	62 Now that we have trait/phenotype data, we can explore if any of the network modules are asociated with these features. First, is an empirical exploration by viewing again the sample dendrogram but with traits added and colored by category or numerical intensity, as appropriate. If groups of samples with similar expression also share similar annotations then the same colors will appear "in blocks" under the clustered samples. This view does not indicate associations but can help visualize when some modules might be associated.
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	63
b915c2c92e66 Uploaded spficklin parents: diff changeset	64 ```{r fig.align='center', fig.width=8, fig.height=9}
b915c2c92e66 Uploaded spficklin parents: diff changeset	65
b915c2c92e66 Uploaded spficklin parents: diff changeset	66 # Determine the column types within the trait annotation data.
b915c2c92e66 Uploaded spficklin parents: diff changeset	67 trait_types = sapply(trait_data, class)
11 01ace2c8a593 Uploaded spficklin parents: 7 diff changeset	68
01ace2c8a593 Uploaded spficklin parents: 7 diff changeset	69 # So that we can merge colors together with a cbind, create a
01ace2c8a593 Uploaded spficklin parents: 7 diff changeset	70 # data frame with an empty column
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	71 trait_colors = data.frame(empty = rep(1:dim(trait_data)[1]))
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	72
b915c2c92e66 Uploaded spficklin parents: diff changeset	73 # Set the colors for the quantitative data.
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	74 quantitative_fields = colnames(trait_data)[which(trait_types == "numeric")]
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	75 if (length(quantitative_fields) > 0) {
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	76 qdata = as.data.frame(trait_data[,quantitative_fields])
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	77 quantitative_colors = numbers2colors(qdata, signed = FALSE)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	78 colnames(quantitative_colors) = quantitative_fields
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	79 trait_colors = cbind(trait_colors,quantitative_colors)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	80 }
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	81
b915c2c92e66 Uploaded spficklin parents: diff changeset	82 # Set the colors for the categorical data.
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	83 categorical_fields = colnames(trait_data)[which(trait_types == "factor")]
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	84 if (length(categorical_fields) > 0) {
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	85 cdata = as.data.frame(trait_data[,categorical_fields])
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	86 categorical_colors = labels2colors(cdata)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	87 colnames(categorical_colors) = categorical_fields
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	88 trait_colors = cbind(trait_colors,categorical_colors)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	89 }
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	90
b915c2c92e66 Uploaded spficklin parents: diff changeset	91 # Set the colors for the ordinal data.
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	92 ordinal_fields = colnames(trait_data)[which(trait_types == "integer")]
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	93 if (length(ordinal_fields) > 0) {
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	94 odata = as.data.frame(trait_data[,ordinal_fields])
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	95 ordinal_colors = numbers2colors(odata, signed = FALSE)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	96 colnames(ordinal_colors) = ordinal_fields
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	97 trait_colors = cbind(trait_colors, ordinal_colors)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	98 }
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	99
11 01ace2c8a593 Uploaded spficklin parents: 7 diff changeset	100 # Remove the empty column from teh trait_colors dataframe and
01ace2c8a593 Uploaded spficklin parents: 7 diff changeset	101 # reorder the colors to match the same order of columns in the trait_data df.
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	102 trait_colors = subset(trait_colors, select=-c(empty))
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	103 trait_colors = trait_colors[,colnames(trait_data)]
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	104 options(repr.plot.width=15, repr.plot.height=10)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	105 plotDendroAndColors(sampleTree, trait_colors,
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	106 groupLabels = names(trait_data),
b915c2c92e66 Uploaded spficklin parents: diff changeset	107 main = "Sample Dendrogram and Annotation Heatmap",
b915c2c92e66 Uploaded spficklin parents: diff changeset	108 cex.dendroLabels = 0.5)
b915c2c92e66 Uploaded spficklin parents: diff changeset	109
b915c2c92e66 Uploaded spficklin parents: diff changeset	110 ```
b915c2c92e66 Uploaded spficklin parents: diff changeset	111
b915c2c92e66 Uploaded spficklin parents: diff changeset	112 To statistically identify the associations, correlation tests are performed of the eigengenes of each module with the annotation data. The following heatmap shows the results between each annotation feature and each module. Modules with a signficant positive assocation have a correlation value near 1. Modules with a significant negative association have a correlation value near -1. Modules with no correlation have a value near 0.
b915c2c92e66 Uploaded spficklin parents: diff changeset	113
b915c2c92e66 Uploaded spficklin parents: diff changeset	114 ```{r fig.align='center', fig.width=15, fig.height=15}
b915c2c92e66 Uploaded spficklin parents: diff changeset	115 MEs = orderMEs(MEs)
b915c2c92e66 Uploaded spficklin parents: diff changeset	116 moduleTraitCor = cor(MEs, trait_data, use = "p");
b915c2c92e66 Uploaded spficklin parents: diff changeset	117 moduleTraitPvalue = corPvalueStudent(moduleTraitCor, n_samples);
b915c2c92e66 Uploaded spficklin parents: diff changeset	118
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	119 # The WGCNA labeledHeatmap function is too overloaded with detail, we'll create a simpler plot.
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	120
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	121 plotData = melt(moduleTraitCor)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	122 # We want to makes sure the order is the same as in the
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	123 # labeledHeatmap function (example above)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	124 plotData$Var1 = factor(plotData$Var1, levels = rev(colnames(MEs)), ordered=TRUE)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	125 # Now use ggplot2 to make a nicer image.
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	126 p <- ggplot(plotData, aes(Var2, Var1, fill=value)) +
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	127 geom_tile() + xlab('Experimental Conditions') + ylab('WGCNA Modules') +
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	128 scale_fill_gradient2(low = "#0072B2", high = "#D55E00",
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	129 mid = "white", midpoint = 0,
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	130 limit = c(-1,1), name="PCC") +
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	131 theme_bw() +
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	132 theme(axis.text.x = element_text(angle = 45, hjust=1, vjust=1, size=15),
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	133 axis.text.y = element_text(angle = 0, hjust=1, vjust=0.5, size=15),
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	134 legend.text=element_text(size=15),
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	135 panel.border = element_blank(),
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	136 panel.grid.major = element_blank(),
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	137 panel.grid.minor = element_blank(),
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	138 axis.line = element_blank())
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	139 print(p)
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	140 ```
b915c2c92e66 Uploaded spficklin parents: diff changeset	141
b915c2c92e66 Uploaded spficklin parents: diff changeset	142 ```{r}
b915c2c92e66 Uploaded spficklin parents: diff changeset	143 output = cbind(moduleTraitCor, moduleTraitPvalue)
b915c2c92e66 Uploaded spficklin parents: diff changeset	144 write.csv(output, file = opt$module_association_file, quote=FALSE, row.names=TRUE)
b915c2c92e66 Uploaded spficklin parents: diff changeset	145 ```
b915c2c92e66 Uploaded spficklin parents: diff changeset	146 A file has been generated named `module_association.csv` which conatins the list of modules, and their correlation values as well as p-values indicating the strength of the associations.
b915c2c92e66 Uploaded spficklin parents: diff changeset	147 ```{r}
b915c2c92e66 Uploaded spficklin parents: diff changeset	148 # names (colors) of the modules
b915c2c92e66 Uploaded spficklin parents: diff changeset	149 modNames = substring(names(MEs), 3)
b915c2c92e66 Uploaded spficklin parents: diff changeset	150 geneModuleMembership = as.data.frame(cor(gemt, MEs, use = "p"));
b915c2c92e66 Uploaded spficklin parents: diff changeset	151 MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), n_samples));
b915c2c92e66 Uploaded spficklin parents: diff changeset	152 names(geneModuleMembership) = paste("MM", modNames, sep="");
b915c2c92e66 Uploaded spficklin parents: diff changeset	153 names(MMPvalue) = paste("p.MM", modNames, sep="");
b915c2c92e66 Uploaded spficklin parents: diff changeset	154
b915c2c92e66 Uploaded spficklin parents: diff changeset	155 # Calculate the gene trait significance as a Pearson's R and p-value.
b915c2c92e66 Uploaded spficklin parents: diff changeset	156 gts = as.data.frame(cor(gemt, trait_data, use = "p"));
b915c2c92e66 Uploaded spficklin parents: diff changeset	157 gtsp = as.data.frame(corPvalueStudent(as.matrix(gts), n_samples));
b915c2c92e66 Uploaded spficklin parents: diff changeset	158 colnames(gtsp) = c(paste("p", names(trait_data), sep="."))
b915c2c92e66 Uploaded spficklin parents: diff changeset	159 colnames(gts) = c(paste("GS", names(trait_data), sep="."))
b915c2c92e66 Uploaded spficklin parents: diff changeset	160
b915c2c92e66 Uploaded spficklin parents: diff changeset	161 # Write out the gene information.
b915c2c92e66 Uploaded spficklin parents: diff changeset	162 output = cbind(Module = module_labels, gts, gtsp)
b915c2c92e66 Uploaded spficklin parents: diff changeset	163 write.csv(output, file = opt$gene_association_file, quote=FALSE, row.names=TRUE)
b915c2c92e66 Uploaded spficklin parents: diff changeset	164
b915c2c92e66 Uploaded spficklin parents: diff changeset	165 ```
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	166 Genes themselves can also have assocation with traits. This is calculated via a traditional correlation test as well. Another file has been generated named `gene_association.csv` which provides the list of genes, the modules they belong to and the assocaition of each gene to the trait features.

Mercurial > repos > spficklin > aurora_wgcna

annotate aurora_wgcna_trait.Rmd @ 11:01ace2c8a593 draft