# HG changeset patch # User spficklin # Date 1574328342 0 # Node ID 3a7b350927d51fc668a231047af390999ea0c733 # Parent b915c2c92e66911a492482a39122c7f9420ada24 Uploaded diff -r b915c2c92e66 -r 3a7b350927d5 aurora_wgcna.Rmd --- a/aurora_wgcna.Rmd Wed Nov 20 00:56:30 2019 +0000 +++ b/aurora_wgcna.Rmd Thu Nov 21 09:25:42 2019 +0000 @@ -15,7 +15,7 @@ The table below shows the first 100 rows of the Gene Expression Matrix (GEM) file that you provided. ```{r} -gem = read.csv(opt$expression_data, header = TRUE, row.names = 1) +gem = read.csv(opt$expression_data, header = TRUE, row.names = 1, na.strings = opt$missing_value) table_data = head(gem, 100) datatable(table_data) ``` @@ -31,13 +31,6 @@ gsg = goodSamplesGenes(gemt, verbose = 3) if (!gsg$allOK) { - if (sum(!gsg$goodGenes)>0) { - printFlush(paste("Removing genes:", paste(names(gemt)[!gsg$goodGenes], collapse = ", "))); - } - if (sum(!gsg$goodSamples)>0) { - printFlush(paste("Removing samples:", paste(rownames(gemt)[!gsg$goodSamples], collapse = ", "))); - # Remove the offending genes and samples from the data: - } gemt = gemt[gsg$goodSamples, gsg$goodGenes] } else { print('all genes are OK!') @@ -68,8 +61,11 @@ n_genes = ncol(gemt) n_samples = nrow(gemt) removed = length(which(keepSamples == FALSE)) -print(paste("A total of", removed, "sample(s) was/were removed")) - +if (removed == 1) { + print(paste("A total of", removed, "sample was removed")) +} else { + print(paste("A total of", removed, "samples were removed")) +} ``` ```{r fig.align='center'} @@ -178,13 +174,16 @@ ```{r} # Plot the dendrogram and the module colors underneath for (i in blocks) { - plotDendroAndColors(net$dendrograms[[i]], net$colors[net$blockGenes[[i]]], + options(repr.plot.width=15, repr.plot.height=10) + colors = module_labels[net$blockGenes[[i]]] + colors = sub('ME','', colors) + plotDendroAndColors(net$dendrograms[[i]], colors, "Module colors", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main=paste('Cluster Dendgrogram, Block', i)) } ``` -The network is housed in a *n* x *n* similarity matrix known as the the Topological Overlap Matrix (TOM), where *n* is the number of genes and the value in each cell indicates the measure of similarity in terms of correlation of expression and interconnectedness. The following heat maps organizes the genes using the dendrograms from above and the color intensity indicates genes that tend to be more similar. Genes with similar expression and in modules will cluster along the diagonal. +The network is housed in a *n* x *n* similarity matrix known as the the Topological Overlap Matrix (TOM), where *n* is the number of genes and the value in each cell indicates the measure of similarity in terms of correlation of expression and interconnectedness. The following heat maps shows the TOM. Note, that the dendrograms in the TOM heat map may from what is shown above. This is because a subset of genes were selected to draw the heat maps in order to save on computational time. ```{r} for (i in blocks) { @@ -192,16 +191,23 @@ load(net$TOMFiles[i]) TOM_size = length(which(net$blocks == i)) TOM = as.matrix(TOM, nrow=TOM_size, ncol=TOM_size) + dissTOM = 1-TOM - # Calculate the dissimilarity and raise it to apower to make it more visible on the plot. - dissTOM = 1-TOM - plotTOM = dissTOM^7 - diag(plotTOM) = NA + # For reproducibility, we set the random seed + set.seed(10); + select = sample(dim(TOM)[1], size = 1000); + selectColors = module_labels[net$blockGenes[[i]][select]] + selectTOM = dissTOM[select, select]; - # Now plot the heatmap - geneTree = net$dendrograms[[i]] - colors = sub('ME','', module_labels[which(net$blocks == i)]) - TOMplot(plotTOM, geneTree, colors, main = paste("Network Heatmap, Block", i)) + # There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster. + selectTree = hclust(as.dist(selectTOM), method = "average") + + # Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing + # the color palette; setting the diagonal to NA also improves the clarity of the plot + plotDiss = selectTOM^7; + diag(plotDiss) = NA; + colors = sub('ME','', selectColors) + TOMplot(plotDiss, selectTree, colors, main = "Network heatmap plot, selected genes") } ```