aurora_wgcna: aurora_wgcna_trait.Rmd annotate

annotate aurora_wgcna_trait.Rmd @ 7:ffbafe466107 draft

Uploaded

author	spficklin
date	Thu, 21 Nov 2019 09:26:30 +0000
parents	b915c2c92e66
children	01ace2c8a593

rev	line source
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	1 ---
b915c2c92e66 Uploaded spficklin parents: diff changeset	2 title: 'WGCNA: Module-Trait Association Report.'
b915c2c92e66 Uploaded spficklin parents: diff changeset	3 output:
b915c2c92e66 Uploaded spficklin parents: diff changeset	4 html_document:
b915c2c92e66 Uploaded spficklin parents: diff changeset	5 number_sections: false
b915c2c92e66 Uploaded spficklin parents: diff changeset	6 toc: true
b915c2c92e66 Uploaded spficklin parents: diff changeset	7 ---
b915c2c92e66 Uploaded spficklin parents: diff changeset	8
b915c2c92e66 Uploaded spficklin parents: diff changeset	9 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
b915c2c92e66 Uploaded spficklin parents: diff changeset	10 knitr::opts_chunk$set(error = TRUE, echo = FALSE)
b915c2c92e66 Uploaded spficklin parents: diff changeset	11 ```
b915c2c92e66 Uploaded spficklin parents: diff changeset	12 ```{r}
b915c2c92e66 Uploaded spficklin parents: diff changeset	13 # Load the data from the previous step.
b915c2c92e66 Uploaded spficklin parents: diff changeset	14 load(file=opt$r_data)
b915c2c92e66 Uploaded spficklin parents: diff changeset	15 ```
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	16 # Trait/Phenotype Data
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	17
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	18 The table below shows the list of trait/phenotype data provided, but with any ignored columns removed and any categorical columns converted to a one-hot enconding (e.g. 0 when present 1 when not present).
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	19
b915c2c92e66 Uploaded spficklin parents: diff changeset	20 ```{r}
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	21 # Load the trait data file.
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	22 trait_data = data.frame()
b915c2c92e66 Uploaded spficklin parents: diff changeset	23 trait_data = read.csv(opt$trait_data, header = TRUE, row.names = opt$sname_col)
b915c2c92e66 Uploaded spficklin parents: diff changeset	24 sample_names = rownames(gemt)
b915c2c92e66 Uploaded spficklin parents: diff changeset	25 trait_rows = match(sample_names, rownames(trait_data))
b915c2c92e66 Uploaded spficklin parents: diff changeset	26 trait_data = trait_data[trait_rows, ]
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	27
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	28 # Remove ignored columns.
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	29 ignore = strsplit(opt$ignore_cols, ',')
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	30 if (length(ignore[[1]]) > 0) {
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	31 print(paste('You chose to ignore the following fields:', paste(ignore[[1]], collapse=", ")))
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	32 trait_data = trait_data[, colnames(trait_data)[!(colnames(trait_data) %in% ignore[[1]])]]
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	33 }
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	34
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	35 # Change any categorical fields to 1 hot encoding as requested by the caller.
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	36 one_hot_cols = strsplit(opt$one_hot_cols, ',')
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	37 if (length(one_hot_cols[[1]]) > 0) {
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	38 print(paste('You chose to 1-hot encode the following fields:', paste(one_hot_cols[[1]], collapse=", ")))
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	39
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	40 # Perform the 1-hot encoding for specified fields.
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	41 swap_cols = colnames(trait_data)[(colnames(trait_data) %in% one_hot_cols[[1]])]
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	42 temp = as.data.frame(trait_data[, swap_cols])
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	43 colnames(temp) = swap_cols
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	44 temp = apply(temp, 2, make.names)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	45 dmy <- dummyVars(" ~ .", data = temp)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	46 encoded <- data.frame(predict(dmy, newdata = temp))
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	47 encoded = sapply(encoded, as.integer)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	48
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	49 # Make a new trait_data table with these new 1-hot fields.
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	50 keep_cols = colnames(trait_data)[!(colnames(trait_data) %in% one_hot_cols[[1]])]
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	51 keep = as.data.frame(trait_data[, keep_cols])
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	52 colnames(keep) = keep_cols
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	53
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	54 # Make a new trait_data object that has the columns to keep and the new 1-hot columns.
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	55 trait_data = cbind(keep, encoded)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	56 }
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	57
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	58 datatable(trait_data)
b915c2c92e66 Uploaded spficklin parents: diff changeset	59 ```
b915c2c92e66 Uploaded spficklin parents: diff changeset	60 # Module-Condition Association.
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	61 Now that we have trait/phenotype data, we can explore if any of the network modules are asociated with these features. First, is an empirical exploration by viewing again the sample dendrogram but with traits added and colored by category or numerical intensity, as appropriate. If groups of samples with similar expression also share similar annotations then the same colors will appear "in blocks" under the clustered samples. This view does not indicate associations but can help visualize when some modules might be associated.
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	62
b915c2c92e66 Uploaded spficklin parents: diff changeset	63 ```{r fig.align='center', fig.width=8, fig.height=9}
b915c2c92e66 Uploaded spficklin parents: diff changeset	64
b915c2c92e66 Uploaded spficklin parents: diff changeset	65 # Determine the column types within the trait annotation data.
b915c2c92e66 Uploaded spficklin parents: diff changeset	66 trait_types = sapply(trait_data, class)
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	67 trait_colors = data.frame(empty = rep(1:dim(trait_data)[1]))
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	68
b915c2c92e66 Uploaded spficklin parents: diff changeset	69 # Set the colors for the quantitative data.
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	70 quantitative_fields = colnames(trait_data)[which(trait_types == "numeric")]
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	71 if (length(quantitative_fields) > 0) {
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	72 qdata = as.data.frame(trait_data[,quantitative_fields])
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	73 quantitative_colors = numbers2colors(qdata, signed = FALSE)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	74 colnames(quantitative_colors) = quantitative_fields
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	75 trait_colors = cbind(trait_colors,quantitative_colors)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	76 }
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	77
b915c2c92e66 Uploaded spficklin parents: diff changeset	78 # Set the colors for the categorical data.
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	79 categorical_fields = colnames(trait_data)[which(trait_types == "factor")]
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	80 if (length(categorical_fields) > 0) {
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	81 cdata = as.data.frame(trait_data[,categorical_fields])
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	82 categorical_colors = labels2colors(cdata)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	83 colnames(categorical_colors) = categorical_fields
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	84 trait_colors = cbind(trait_colors,categorical_colors)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	85 }
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	86
b915c2c92e66 Uploaded spficklin parents: diff changeset	87 # Set the colors for the ordinal data.
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	88 ordinal_fields = colnames(trait_data)[which(trait_types == "integer")]
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	89 if (length(ordinal_fields) > 0) {
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	90 odata = as.data.frame(trait_data[,ordinal_fields])
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	91 ordinal_colors = numbers2colors(odata, signed = FALSE)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	92 colnames(ordinal_colors) = ordinal_fields
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	93 trait_colors = cbind(trait_colors, ordinal_colors)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	94 }
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	95
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	96 trait_colors = subset(trait_colors, select=-c(empty))
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	97 trait_colors = trait_colors[,colnames(trait_data)]
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	98 options(repr.plot.width=15, repr.plot.height=10)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	99 plotDendroAndColors(sampleTree, trait_colors,
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	100 groupLabels = names(trait_data),
b915c2c92e66 Uploaded spficklin parents: diff changeset	101 main = "Sample Dendrogram and Annotation Heatmap",
b915c2c92e66 Uploaded spficklin parents: diff changeset	102 cex.dendroLabels = 0.5)
b915c2c92e66 Uploaded spficklin parents: diff changeset	103
b915c2c92e66 Uploaded spficklin parents: diff changeset	104 ```
b915c2c92e66 Uploaded spficklin parents: diff changeset	105
b915c2c92e66 Uploaded spficklin parents: diff changeset	106 To statistically identify the associations, correlation tests are performed of the eigengenes of each module with the annotation data. The following heatmap shows the results between each annotation feature and each module. Modules with a signficant positive assocation have a correlation value near 1. Modules with a significant negative association have a correlation value near -1. Modules with no correlation have a value near 0.
b915c2c92e66 Uploaded spficklin parents: diff changeset	107
b915c2c92e66 Uploaded spficklin parents: diff changeset	108 ```{r fig.align='center', fig.width=15, fig.height=15}
b915c2c92e66 Uploaded spficklin parents: diff changeset	109 MEs = orderMEs(MEs)
b915c2c92e66 Uploaded spficklin parents: diff changeset	110 moduleTraitCor = cor(MEs, trait_data, use = "p");
b915c2c92e66 Uploaded spficklin parents: diff changeset	111 moduleTraitPvalue = corPvalueStudent(moduleTraitCor, n_samples);
b915c2c92e66 Uploaded spficklin parents: diff changeset	112
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	113 # The WGCNA labeledHeatmap function is too overloaded with detail, we'll create a simpler plot.
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	114
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	115 plotData = melt(moduleTraitCor)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	116 # We want to makes sure the order is the same as in the
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	117 # labeledHeatmap function (example above)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	118 plotData$Var1 = factor(plotData$Var1, levels = rev(colnames(MEs)), ordered=TRUE)
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	119 # Now use ggplot2 to make a nicer image.
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	120 p <- ggplot(plotData, aes(Var2, Var1, fill=value)) +
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	121 geom_tile() + xlab('Experimental Conditions') + ylab('WGCNA Modules') +
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	122 scale_fill_gradient2(low = "#0072B2", high = "#D55E00",
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	123 mid = "white", midpoint = 0,
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	124 limit = c(-1,1), name="PCC") +
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	125 theme_bw() +
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	126 theme(axis.text.x = element_text(angle = 45, hjust=1, vjust=1, size=15),
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	127 axis.text.y = element_text(angle = 0, hjust=1, vjust=0.5, size=15),
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	128 legend.text=element_text(size=15),
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	129 panel.border = element_blank(),
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	130 panel.grid.major = element_blank(),
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	131 panel.grid.minor = element_blank(),
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	132 axis.line = element_blank())
ffbafe466107 Uploaded spficklin parents: 3 diff changeset	133 print(p)
3 b915c2c92e66 Uploaded spficklin parents: diff changeset	134 ```
b915c2c92e66 Uploaded spficklin parents: diff changeset	135
b915c2c92e66 Uploaded spficklin parents: diff changeset	136 ```{r}
b915c2c92e66 Uploaded spficklin parents: diff changeset	137 output = cbind(moduleTraitCor, moduleTraitPvalue)
b915c2c92e66 Uploaded spficklin parents: diff changeset	138 write.csv(output, file = opt$module_association_file, quote=FALSE, row.names=TRUE)
b915c2c92e66 Uploaded spficklin parents: diff changeset	139 ```
b915c2c92e66 Uploaded spficklin parents: diff changeset	140 A file has been generated named `module_association.csv` which conatins the list of modules, and their correlation values as well as p-values indicating the strength of the associations.
b915c2c92e66 Uploaded spficklin parents: diff changeset	141 ```{r}
b915c2c92e66 Uploaded spficklin parents: diff changeset	142 # names (colors) of the modules
b915c2c92e66 Uploaded spficklin parents: diff changeset	143 modNames = substring(names(MEs), 3)
b915c2c92e66 Uploaded spficklin parents: diff changeset	144 geneModuleMembership = as.data.frame(cor(gemt, MEs, use = "p"));
b915c2c92e66 Uploaded spficklin parents: diff changeset	145 MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), n_samples));
b915c2c92e66 Uploaded spficklin parents: diff changeset	146 names(geneModuleMembership) = paste("MM", modNames, sep="");
b915c2c92e66 Uploaded spficklin parents: diff changeset	147 names(MMPvalue) = paste("p.MM", modNames, sep="");
b915c2c92e66 Uploaded spficklin parents: diff changeset	148
b915c2c92e66 Uploaded spficklin parents: diff changeset	149 # Calculate the gene trait significance as a Pearson's R and p-value.
b915c2c92e66 Uploaded spficklin parents: diff changeset	150 gts = as.data.frame(cor(gemt, trait_data, use = "p"));
b915c2c92e66 Uploaded spficklin parents: diff changeset	151 gtsp = as.data.frame(corPvalueStudent(as.matrix(gts), n_samples));
b915c2c92e66 Uploaded spficklin parents: diff changeset	152 colnames(gtsp) = c(paste("p", names(trait_data), sep="."))
b915c2c92e66 Uploaded spficklin parents: diff changeset	153 colnames(gts) = c(paste("GS", names(trait_data), sep="."))
b915c2c92e66 Uploaded spficklin parents: diff changeset	154
b915c2c92e66 Uploaded spficklin parents: diff changeset	155 # Write out the gene information.
b915c2c92e66 Uploaded spficklin parents: diff changeset	156 output = cbind(Module = module_labels, gts, gtsp)
b915c2c92e66 Uploaded spficklin parents: diff changeset	157 write.csv(output, file = opt$gene_association_file, quote=FALSE, row.names=TRUE)
b915c2c92e66 Uploaded spficklin parents: diff changeset	158
b915c2c92e66 Uploaded spficklin parents: diff changeset	159 ```
7 ffbafe466107 Uploaded spficklin parents: 3 diff changeset	160 Genes themselves can also have assocation with traits. This is calculated via a traditional correlation test as well. Another file has been generated named `gene_association.csv` which provides the list of genes, the modules they belong to and the assocaition of each gene to the trait features.

Mercurial > repos > spficklin > aurora_wgcna

annotate aurora_wgcna_trait.Rmd @ 7:ffbafe466107 draft