Mercurial > repos > artbio > gsc_high_dimensions_visualisation
comparison high_dim_visu.R @ 2:701af13901fd draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_high_dimension_visualization commit ef87a68f9a33f8418699d97627eb5f49a5e2c4a6
| author | artbio |
|---|---|
| date | Sun, 21 May 2023 16:26:02 +0000 |
| parents | 8e6ce12edd90 |
| children |
comparison
equal
deleted
inserted
replaced
| 1:8e6ce12edd90 | 2:701af13901fd |
|---|---|
| 1 # load packages that are provided in the conda env | 1 options(show.error.messages = FALSE, |
| 2 options( show.error.messages=F, | 2 error = function() { |
| 3 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | 3 cat(geterrmessage(), file = stderr()) |
| 4 q("no", 1, FALSE) | |
| 5 } | |
| 6 ) | |
| 4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
| 5 warnings() | 8 warnings() |
| 6 library(optparse) | 9 library(optparse) |
| 7 library(FactoMineR) | 10 library(FactoMineR) |
| 8 library(factoextra) | 11 library(factoextra) |
| 10 library(ggplot2) | 13 library(ggplot2) |
| 11 library(ggfortify) | 14 library(ggfortify) |
| 12 library(RColorBrewer) | 15 library(RColorBrewer) |
| 13 library(ClusterR) | 16 library(ClusterR) |
| 14 library(data.table) | 17 library(data.table) |
| 18 library(Polychrome) | |
| 15 | 19 |
| 16 # Arguments | 20 # Arguments |
| 17 option_list = list( | 21 option_list <- list( |
| 18 make_option( | 22 make_option( |
| 19 "--data", | 23 "--data", |
| 20 default = NA, | 24 default = NA, |
| 21 type = 'character', | 25 type = "character", |
| 22 help = "Input file that contains expression value to visualise" | 26 help = "Input file that contains expression value to visualise" |
| 23 ), | 27 ), |
| 24 make_option( | 28 make_option( |
| 25 "--sep", | 29 "--sep", |
| 26 default = '\t', | 30 default = "\t", |
| 27 type = 'character', | 31 type = "character", |
| 28 help = "File separator [default : '%default' ]" | 32 help = "File separator [default : '%default' ]" |
| 29 ), | 33 ), |
| 30 make_option( | 34 make_option( |
| 31 "--colnames", | 35 "--colnames", |
| 32 default = TRUE, | 36 default = TRUE, |
| 33 type = 'logical', | 37 type = "logical", |
| 34 help = "Consider first line as header ? [default : '%default' ]" | 38 help = "Consider first line as header ? [default : '%default' ]" |
| 35 ), | 39 ), |
| 36 make_option( | 40 make_option( |
| 37 "--out", | 41 "--out", |
| 38 default = "res.tab", | 42 default = "res.tab", |
| 39 type = 'character', | 43 type = "character", |
| 40 help = "Output name [default : '%default' ]" | 44 help = "Output name [default : '%default' ]" |
| 41 ), | 45 ), |
| 42 make_option( | 46 make_option( |
| 43 "--labels", | 47 "--labels", |
| 44 default = FALSE, | 48 default = FALSE, |
| 45 type = 'logical', | 49 type = "logical", |
| 46 help = "add labels in scatter plots [default : '%default' ]" | 50 help = "add labels in scatter plots [default : '%default' ]" |
| 47 ), | 51 ), |
| 48 make_option( | 52 make_option( |
| 49 "--factor", | 53 "--factor", |
| 50 default = '', | 54 default = "", |
| 51 type = 'character', | 55 type = "character", |
| 52 help = "A two column table that specifies factor levels for contrasting data [default : '%default' ]" | 56 help = "A two column table that specifies factor levels for contrasting data [default : '%default' ]" |
| 53 ), | 57 ), |
| 54 make_option( | 58 make_option( |
| 55 "--visu_choice", | 59 "--visu_choice", |
| 56 default = 'PCA', | 60 default = "PCA", |
| 57 type = 'character', | 61 type = "character", |
| 58 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]" | 62 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]" |
| 59 ), | 63 ), |
| 60 make_option( | 64 make_option( |
| 61 "--table_coordinates", | 65 "--table_coordinates", |
| 62 default = '', | 66 default = "", |
| 63 type = 'character', | 67 type = "character", |
| 64 help = "Table with plot coordinates [default : '%default' ]" | 68 help = "Table with plot coordinates [default : '%default' ]" |
| 65 ), | 69 ), |
| 66 make_option( | 70 make_option( |
| 67 "--Rtsne_seed", | 71 "--Rtsne_seed", |
| 68 default = 42, | 72 default = 42, |
| 69 type = 'integer', | 73 type = "integer", |
| 70 help = "Seed value for reproducibility [default : '%default' ]" | 74 help = "Seed value for reproducibility [default : '%default' ]" |
| 71 ), | 75 ), |
| 72 make_option( | 76 make_option( |
| 73 "--Rtsne_dims", | 77 "--Rtsne_dims", |
| 74 default = 2, | 78 default = 2, |
| 75 type = 'integer', | 79 type = "integer", |
| 76 help = "Output dimensionality [default : '%default' ]" | 80 help = "Output dimensionality [default : '%default' ]" |
| 77 ), | 81 ), |
| 78 make_option( | 82 make_option( |
| 79 "--Rtsne_initial_dims", | 83 "--Rtsne_initial_dims", |
| 80 default = 50, | 84 default = 50, |
| 81 type = 'integer', | 85 type = "integer", |
| 82 help = "The number of dimensions that should be retained in the initial PCA step [default : '%default' ]" | 86 help = "The number of dimensions that should be retained in the initial PCA step [default : '%default' ]" |
| 83 ), | 87 ), |
| 84 make_option( | 88 make_option( |
| 85 "--Rtsne_perplexity", | 89 "--Rtsne_perplexity", |
| 86 default = 5.0, | 90 default = 5.0, |
| 87 type = 'numeric', | 91 type = "numeric", |
| 88 help = "perplexity [default : '%default' ]" | 92 help = "perplexity [default : '%default' ]" |
| 89 ), | 93 ), |
| 90 make_option( | 94 make_option( |
| 91 "--Rtsne_theta", | 95 "--Rtsne_theta", |
| 92 default = 1.0, | 96 default = 1.0, |
| 93 type = 'numeric', | 97 type = "numeric", |
| 94 help = "theta [default : '%default' ]" | 98 help = "theta [default : '%default' ]" |
| 95 ), | 99 ), |
| 96 make_option( | 100 make_option( |
| 97 "--Rtsne_max_iter", | 101 "--Rtsne_max_iter", |
| 98 default = 1000, | 102 default = 1000, |
| 99 type = 'integer', | 103 type = "integer", |
| 100 help = "max_iter [default : '%default' ]" | 104 help = "max_iter [default : '%default' ]" |
| 101 ), | 105 ), |
| 102 make_option( | 106 make_option( |
| 103 "--Rtsne_pca", | 107 "--Rtsne_pca", |
| 104 default = TRUE, | 108 default = TRUE, |
| 105 type = 'logical', | 109 type = "logical", |
| 106 help = "Whether an initial PCA step should be performed [default : '%default' ]" | 110 help = "Whether an initial PCA step should be performed [default : '%default' ]" |
| 107 ), | 111 ), |
| 108 make_option( | 112 make_option( |
| 109 "--Rtsne_pca_center", | 113 "--Rtsne_pca_center", |
| 110 default = TRUE, | 114 default = TRUE, |
| 111 type = 'logical', | 115 type = "logical", |
| 112 help = "Should data be centered before pca is applied? [default : '%default' ]" | 116 help = "Should data be centered before pca is applied? [default : '%default' ]" |
| 113 ), | 117 ), |
| 114 make_option( | 118 make_option( |
| 115 "--Rtsne_pca_scale", | 119 "--Rtsne_pca_scale", |
| 116 default = FALSE, | 120 default = FALSE, |
| 117 type = 'logical', | 121 type = "logical", |
| 118 help = "Should data be scaled before pca is applied? [default : '%default' ]" | 122 help = "Should data be scaled before pca is applied? [default : '%default' ]" |
| 119 ), | 123 ), |
| 120 make_option( | 124 make_option( |
| 121 "--Rtsne_normalize", | 125 "--Rtsne_normalize", |
| 122 default = TRUE, | 126 default = TRUE, |
| 123 type = 'logical', | 127 type = "logical", |
| 124 help = "Should data be normalized internally prior to distance calculations? [default : '%default' ]" | 128 help = "Should data be normalized internally prior to distance calculations? [default : '%default' ]" |
| 125 ), | 129 ), |
| 126 make_option( | 130 make_option( |
| 127 "--Rtsne_exaggeration_factor", | 131 "--Rtsne_exaggeration_factor", |
| 128 default = 12.0, | 132 default = 12.0, |
| 129 type = 'numeric', | 133 type = "numeric", |
| 130 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]" | 134 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]" |
| 131 ), | 135 ), |
| 132 make_option( | 136 make_option( |
| 133 "--PCA_npc", | 137 "--PCA_npc", |
| 134 default = 5, | 138 default = 5, |
| 135 type = 'integer', | 139 type = "integer", |
| 136 help = "number of dimensions kept in the results [default : '%default' ]" | 140 help = "number of dimensions kept in the results [default : '%default' ]" |
| 137 ), | 141 ), |
| 138 make_option( | 142 make_option( |
| 139 "--PCA_x_axis", | 143 "--PCA_x_axis", |
| 140 default = 1, | 144 default = 1, |
| 141 type = 'integer', | 145 type = "integer", |
| 142 help = "PC to plot in the x axis [default : '%default' ]" | 146 help = "PC to plot in the x axis [default : '%default' ]" |
| 143 ), | 147 ), |
| 144 make_option( | 148 make_option( |
| 145 "--PCA_y_axis", | 149 "--PCA_y_axis", |
| 146 default = 2, | 150 default = 2, |
| 147 type = 'integer', | 151 type = "integer", |
| 148 help = "PC to plot in the y axis [default : '%default' ]" | 152 help = "PC to plot in the y axis [default : '%default' ]" |
| 149 ), | 153 ), |
| 150 make_option( | 154 make_option( |
| 151 "--HCPC_ncluster", | 155 "--HCPC_ncluster", |
| 152 default = -1, | 156 default = -1, |
| 153 type = 'numeric', | 157 type = "numeric", |
| 154 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]" | 158 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]" |
| 155 ), | 159 ), |
| 156 make_option( | 160 make_option( |
| 157 "--HCPC_npc", | 161 "--HCPC_npc", |
| 158 default = 5, | 162 default = 5, |
| 159 type = 'integer', | 163 type = "integer", |
| 160 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" | 164 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" |
| 161 ), | 165 ), |
| 162 make_option( | 166 make_option( |
| 163 "--HCPC_metric", | 167 "--HCPC_metric", |
| 164 default = 'euclidean', | 168 default = "euclidean", |
| 165 type = 'character', | 169 type = "character", |
| 166 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]" | 170 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]" |
| 167 ), | 171 ), |
| 168 make_option( | 172 make_option( |
| 169 "--HCPC_method", | 173 "--HCPC_method", |
| 170 default = 'ward', | 174 default = "ward", |
| 171 type = 'character', | 175 type = "character", |
| 172 help = "Clustering method between 'ward','average','single', 'complete', 'weighted' [default :'%default']" | 176 help = "Clustering method between 'ward','average','single', 'complete', 'weighted' [default :'%default']" |
| 173 ), | 177 ), |
| 174 make_option( | 178 make_option( |
| 175 "--pdf_out", | 179 "--pdf_out", |
| 176 default = "out.pdf", | 180 default = "out.pdf", |
| 177 type = 'character', | 181 type = "character", |
| 178 help = "pdf of plots [default : '%default' ]" | 182 help = "pdf of plots [default : '%default' ]" |
| 179 ), | 183 ), |
| 180 make_option( | 184 make_option( |
| 181 "--HCPC_consol", | 185 "--HCPC_consol", |
| 182 default = 'TRUE', | 186 default = "TRUE", |
| 183 type = 'logical', | 187 type = "logical", |
| 184 help = "If TRUE, a k-means consolidation is performed [default :'%default']" | 188 help = "If TRUE, a k-means consolidation is performed [default :'%default']" |
| 185 ), | 189 ), |
| 186 make_option( | 190 make_option( |
| 187 "--HCPC_itermax", | 191 "--HCPC_itermax", |
| 188 default = '10', | 192 default = "10", |
| 189 type = 'integer', | 193 type = "integer", |
| 190 help = "The maximum number of iterations for the consolidation [default :'%default']" | 194 help = "The maximum number of iterations for the consolidation [default :'%default']" |
| 191 ), | 195 ), |
| 192 make_option( | 196 make_option( |
| 193 "--HCPC_min", | 197 "--HCPC_min", |
| 194 default = '3', | 198 default = "3", |
| 195 type = 'integer', | 199 type = "integer", |
| 196 help = "The least possible number of clusters suggested [default :'%default']" | 200 help = "The least possible number of clusters suggested [default :'%default']" |
| 197 ), | 201 ), |
| 198 make_option( | 202 make_option( |
| 199 "--HCPC_max", | 203 "--HCPC_max", |
| 200 default = -1, | 204 default = -1, |
| 201 type = 'integer', | 205 type = "integer", |
| 202 help = "The higher possible number of clusters suggested [default :'%default']" | 206 help = "The higher possible number of clusters suggested [default :'%default']" |
| 203 ), | 207 ), |
| 204 make_option( | 208 make_option( |
| 205 "--HCPC_clusterCA", | 209 "--HCPC_clusterCA", |
| 206 default = 'rows', | 210 default = "rows", |
| 207 type = 'character', | 211 type = "character", |
| 208 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" | 212 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" |
| 209 ), | 213 ), |
| 210 make_option( | 214 make_option( |
| 211 "--HCPC_kk", | 215 "--HCPC_kk", |
| 212 default = Inf, | 216 default = Inf, |
| 213 type = 'numeric', | 217 type = "numeric", |
| 214 help = "The maximum number of iterations for the consolidation [default :'%default']" | 218 help = "The maximum number of iterations for the consolidation [default :'%default']" |
| 215 ), | 219 ), |
| 216 make_option( | 220 make_option( |
| 217 "--HCPC_clust", | 221 "--HCPC_clust", |
| 218 default = "", | 222 default = "", |
| 219 type = 'character', | 223 type = "character", |
| 220 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']" | 224 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']" |
| 221 ), | 225 ), |
| 222 make_option( | 226 make_option( |
| 223 "--HCPC_mutual_info", | 227 "--HCPC_mutual_info", |
| 224 default = "", | 228 default = "", |
| 231 type = "character", | 235 type = "character", |
| 232 help = "Output file with variables most contributing to clustering [default :'%default']" | 236 help = "Output file with variables most contributing to clustering [default :'%default']" |
| 233 ) | 237 ) |
| 234 ) | 238 ) |
| 235 | 239 |
| 236 opt = parse_args(OptionParser(option_list = option_list), | 240 opt <- parse_args(OptionParser(option_list = option_list), |
| 237 args = commandArgs(trailingOnly = TRUE)) | 241 args = commandArgs(trailingOnly = TRUE)) |
| 238 | 242 |
| 239 if (opt$sep == "tab") {opt$sep <- "\t"} | 243 if (opt$sep == "tab") { |
| 240 if (opt$sep == "comma") {opt$sep <- ","} | 244 opt$sep <- "\t" |
| 241 if(opt$HCPC_max == -1) {opt$HCPC_max <- NULL} | 245 } |
| 242 if(opt$HCPC_kk == -1) {opt$HCPC_kk <- Inf} | 246 if (opt$sep == "comma") { |
| 247 opt$sep <- "," | |
| 248 } | |
| 249 if (opt$HCPC_max == -1) { | |
| 250 opt$HCPC_max <- NULL | |
| 251 } | |
| 252 if (opt$HCPC_kk == -1) { | |
| 253 opt$HCPC_kk <- Inf | |
| 254 } | |
| 243 | 255 |
| 244 ##Add legend to plot() | 256 ##Add legend to plot() |
| 245 legend.col <- function(col, lev){ | 257 legend_col <- function(col, lev) { |
| 246 | 258 |
| 247 opar <- par | 259 opar <- par |
| 248 | 260 |
| 249 n <- length(col) | 261 n <- length(col) |
| 250 | 262 |
| 251 bx <- par("usr") | 263 bx <- par("usr") |
| 252 | 264 |
| 253 box.cx <- c(bx[2] + (bx[2] - bx[1]) / 1000, | 265 box_cx <- c(bx[2] + (bx[2] - bx[1]) / 1000, |
| 254 bx[2] + (bx[2] - bx[1]) / 1000 + (bx[2] - bx[1]) / 50) | 266 bx[2] + (bx[2] - bx[1]) / 1000 + (bx[2] - bx[1]) / 50) |
| 255 box.cy <- c(bx[3], bx[3]) | 267 box_cy <- c(bx[3], bx[3]) |
| 256 box.sy <- (bx[4] - bx[3]) / n | 268 box_sy <- (bx[4] - bx[3]) / n |
| 257 | 269 |
| 258 xx <- rep(box.cx, each = 2) | 270 xx <- rep(box_cx, each = 2) |
| 259 | 271 |
| 260 par(xpd = TRUE) | 272 par(xpd = TRUE) |
| 261 for(i in 1:n){ | 273 for (i in 1:n) { |
| 262 | 274 yy <- c(box_cy[1] + (box_sy * (i - 1)), |
| 263 yy <- c(box.cy[1] + (box.sy * (i - 1)), | 275 box_cy[1] + (box_sy * (i)), |
| 264 box.cy[1] + (box.sy * (i)), | 276 box_cy[1] + (box_sy * (i)), |
| 265 box.cy[1] + (box.sy * (i)), | 277 box_cy[1] + (box_sy * (i - 1))) |
| 266 box.cy[1] + (box.sy * (i - 1))) | 278 polygon(xx, yy, col = col[i], border = col[i]) |
| 267 polygon(xx, yy, col = col[i], border = col[i]) | |
| 268 | |
| 269 } | 279 } |
| 270 par(new = TRUE) | 280 par(new = TRUE) |
| 271 plot(0, 0, type = "n", | 281 plot(0, 0, type = "n", |
| 272 ylim = c(min(lev), max(lev)), | 282 ylim = c(min(lev), max(lev)), |
| 273 yaxt = "n", ylab = "", | 283 yaxt = "n", ylab = "", |
| 275 frame.plot = FALSE) | 285 frame.plot = FALSE) |
| 276 axis(side = 4, las = 2, tick = FALSE, line = .25) | 286 axis(side = 4, las = 2, tick = FALSE, line = .25) |
| 277 par <- opar | 287 par <- opar |
| 278 } | 288 } |
| 279 | 289 |
| 280 | 290 data <- read.delim( |
| 281 data = read.table( | |
| 282 opt$data, | 291 opt$data, |
| 283 check.names = FALSE, | 292 check.names = FALSE, |
| 284 header = opt$colnames, | 293 header = opt$colnames, |
| 285 row.names = 1, | 294 row.names = 1, |
| 286 sep = opt$sep | 295 sep = opt$sep |
| 287 ) | 296 ) |
| 288 | 297 |
| 289 # Contrasting factor and its colors | 298 # Contrasting factor and its colors |
| 290 if (opt$factor != '') { | 299 if (opt$factor != "") { |
| 291 contrasting_factor <- read.delim( | 300 contrasting_factor <- read.delim( |
| 292 opt$factor, | 301 opt$factor, |
| 293 header = TRUE | 302 header = TRUE |
| 294 ) | 303 ) |
| 295 rownames(contrasting_factor) <- contrasting_factor[,1] | 304 rownames(contrasting_factor) <- contrasting_factor[, 1] |
| 296 contrasting_factor <- contrasting_factor[colnames(data),] | 305 contrasting_factor <- contrasting_factor[colnames(data), ] |
| 297 colnames(contrasting_factor) <- c("id","factor") | 306 colnames(contrasting_factor) <- c("id", "factor") |
| 298 if(is.numeric(contrasting_factor$factor)){ | 307 if (is.numeric(contrasting_factor$factor)) { |
| 299 factor_cols <- rev(brewer.pal(n = 11, name = "RdYlGn"))[contrasting_factor$factor] | 308 factor_cols <- rev(brewer.pal(n = 11, name = "RdYlGn"))[contrasting_factor$factor] |
| 300 } else { | 309 } else { |
| 301 contrasting_factor$factor <- as.factor(contrasting_factor$factor) | 310 contrasting_factor$factor <- as.factor(contrasting_factor$factor) |
| 302 if(nlevels(contrasting_factor$factor) == 2){ | 311 if (nlevels(contrasting_factor$factor) == 2) { |
| 303 colors_labels <- c("#E41A1C", "#377EB8") | 312 colors_labels <- c("#E41A1C", "#377EB8") |
| 304 } else { | 313 } else { |
| 305 colors_labels <- brewer.pal(nlevels(contrasting_factor$factor), name = 'Paired') | 314 set.seed(567629) |
| 315 colors_labels <- createPalette(nlevels(contrasting_factor$factor), c("#5A5156", "#E4E1E3", "#F6222E")) | |
| 316 names(colors_labels) <- NULL | |
| 306 } | 317 } |
| 307 factorColors <- | 318 factor_colors <- |
| 308 with(contrasting_factor, | 319 with(contrasting_factor, |
| 309 data.frame(factor = levels(contrasting_factor$factor), | 320 data.frame(factor = levels(contrasting_factor$factor), |
| 310 color = I(colors_labels) | 321 color = I(colors_labels) |
| 311 ) | 322 ) |
| 312 ) | 323 ) |
| 313 factor_cols <- factorColors$color[match(contrasting_factor$factor, | 324 factor_cols <- factor_colors$color[match(contrasting_factor$factor, |
| 314 factorColors$factor)] | 325 factor_colors$factor)] |
| 315 } | 326 } |
| 316 } else { | 327 } else { |
| 317 factor_cols <- rep("deepskyblue4", length(rownames(data))) | 328 factor_cols <- rep("deepskyblue4", length(rownames(data))) |
| 318 } | 329 } |
| 319 | 330 |
| 320 ################ t-SNE #################### | 331 ################ t-SNE #################### |
| 321 if (opt$visu_choice == 'tSNE') { | 332 if (opt$visu_choice == "tSNE") { |
| 322 # filter and transpose df for tsne and pca | 333 # filter and transpose df for tsne and pca |
| 323 tdf = t(data) | 334 tdf <- t(data) |
| 324 # make tsne and plot results | 335 # make tsne and plot results |
| 325 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility | 336 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility |
| 326 | 337 |
| 327 tsne_out <- Rtsne(tdf, | 338 tsne_out <- Rtsne(tdf, |
| 328 dims = opt$Rtsne_dims, | 339 dims = opt$Rtsne_dims, |
| 329 initial_dims = opt$Rtsne_initial_dims, | 340 initial_dims = opt$Rtsne_initial_dims, |
| 330 perplexity = opt$Rtsne_perplexity , | 341 perplexity = opt$Rtsne_perplexity, |
| 331 theta = opt$Rtsne_theta, | 342 theta = opt$Rtsne_theta, |
| 332 max_iter = opt$Rtsne_max_iter, | 343 max_iter = opt$Rtsne_max_iter, |
| 333 pca = opt$Rtsne_pca, | 344 pca = opt$Rtsne_pca, |
| 334 pca_center = opt$Rtsne_pca_center, | 345 pca_center = opt$Rtsne_pca_center, |
| 335 pca_scale = opt$Rtsne_pca_scale, | 346 pca_scale = opt$Rtsne_pca_scale, |
| 336 normalize = opt$Rtsne_normalize, | 347 normalize = opt$Rtsne_normalize, |
| 337 exaggeration_factor=opt$Rtsne_exaggeration_factor) | 348 exaggeration_factor = opt$Rtsne_exaggeration_factor) |
| 338 | 349 |
| 339 embedding <- as.data.frame(tsne_out$Y[,1:2]) | 350 embedding <- as.data.frame(tsne_out$Y[, 1:2]) |
| 340 embedding$Class <- as.factor(rownames(tdf)) | 351 embedding$Class <- as.factor(rownames(tdf)) |
| 341 gg_legend = theme(legend.position="right") | 352 gg_legend <- theme(legend.position = "right") |
| 342 if (opt$factor == '') { | 353 if (opt$factor == "") { |
| 343 ggplot(embedding, aes(x=V1, y=V2)) + | 354 ggplot(embedding, aes(x = V1, y = V2)) + |
| 344 geom_point(size=1, color='deepskyblue4') + | 355 geom_point(size = 1, color = "deepskyblue4") + |
| 345 gg_legend + | 356 gg_legend + |
| 346 xlab("t-SNE 1") + | 357 xlab("t-SNE 1") + |
| 347 ylab("t-SNE 2") + | 358 ylab("t-SNE 2") + |
| 348 ggtitle('t-SNE') + | 359 ggtitle("t-SNE") + |
| 349 if (opt$labels) { | 360 if (opt$labels) { |
| 350 geom_text(aes(label=Class),hjust=-0.2, vjust=-0.5, size=1.5, color='deepskyblue4') | 361 geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = 1.5, color = "deepskyblue4") |
| 351 } | 362 } |
| 352 } else { | 363 } else { |
| 353 if(is.numeric(contrasting_factor$factor)){ | 364 if (is.numeric(contrasting_factor$factor)) { |
| 354 embedding$factor <- contrasting_factor$factor | 365 embedding$factor <- contrasting_factor$factor |
| 355 } else { | 366 } else { |
| 356 embedding$factor <- as.factor(contrasting_factor$factor) | 367 embedding$factor <- as.factor(contrasting_factor$factor) |
| 357 } | 368 } |
| 358 | 369 |
| 359 ggplot(embedding, aes(x=V1, y=V2, color=factor)) + | 370 ggplot(embedding, aes(x = V1, y = V2, color = factor)) + |
| 360 geom_point(size=1) + #, color=factor_cols | 371 geom_point(size = 1) + |
| 361 gg_legend + | 372 gg_legend + |
| 362 xlab("t-SNE 1") + | 373 xlab("t-SNE 1") + |
| 363 ylab("t-SNE 2") + | 374 ylab("t-SNE 2") + |
| 364 ggtitle('t-SNE') + | 375 ggtitle("t-SNE") + |
| 365 if (opt$labels) { | 376 if (opt$labels) { |
| 366 geom_text(aes(label=Class, colour=factor),hjust=-0.2, vjust=-0.5, size=1.5) | 377 geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = 1.5) |
| 367 } | 378 } |
| 368 } | 379 } |
| 369 ggsave(file=opt$pdf_out, device="pdf") | 380 ggsave(file = opt$pdf_out, device = "pdf") |
| 370 | 381 |
| 371 #save coordinates table | 382 #save coordinates table |
| 372 if(opt$table_coordinates != ''){ | 383 if (opt$table_coordinates != "") { |
| 373 coord_table <- cbind(rownames(tdf), round(as.data.frame(tsne_out$Y), 6)) | 384 coord_table <- cbind(rownames(tdf), round(as.data.frame(tsne_out$Y), 6)) |
| 374 colnames(coord_table)=c("Cells",paste0("DIM",(1:opt$Rtsne_dims))) | 385 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$Rtsne_dims))) |
| 375 } | 386 } |
| 376 } | 387 } |
| 377 | 388 |
| 378 | 389 |
| 379 ######### make PCA with FactoMineR ################# | 390 ######### make PCA with FactoMineR ################# |
| 380 if (opt$visu_choice == 'PCA') { | 391 if (opt$visu_choice == "PCA") { |
| 381 pca <- PCA(t(data), ncp=opt$PCA_npc, graph=FALSE) | 392 pca <- PCA(t(data), ncp = opt$PCA_npc, graph = FALSE) |
| 382 pdf(opt$pdf_out) | 393 pdf(opt$pdf_out) |
| 383 if (opt$labels == FALSE) { | 394 if (opt$labels == FALSE) { |
| 384 plot(pca, axes = c(opt$PCA_x_axis,opt$PCA_y_axis), label="none" , col.ind = factor_cols) | 395 plot(pca, axes = c(opt$PCA_x_axis, opt$PCA_y_axis), label = "none", col.ind = factor_cols) |
| 385 } else { | 396 } else { |
| 386 plot(pca, axes = c(opt$PCA_x_axis,opt$PCA_y_axis), cex=0.2 , col.ind = factor_cols) | 397 plot(pca, axes = c(opt$PCA_x_axis, opt$PCA_y_axis), cex = 0.2, col.ind = factor_cols) |
| 387 } | 398 } |
| 388 if (opt$factor != '') { | 399 if (opt$factor != "") { |
| 389 if(is.factor(contrasting_factor$factor)) { | 400 if (is.factor(contrasting_factor$factor)) { |
| 390 legend(x = 'topright', | 401 legend(x = "topright", |
| 391 legend = as.character(factorColors$factor), | 402 legend = as.character(factor_colors$factor), |
| 392 col = factorColors$color, pch = 16, bty = 'n', xjust = 1, cex=0.7) | 403 col = factor_colors$color, pch = 16, bty = "n", xjust = 1, cex = 0.7) |
| 393 } else { | 404 } else { |
| 394 legend.col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) | 405 legend_col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) |
| 395 } | 406 } |
| 396 } | 407 } |
| 397 dev.off() | 408 dev.off() |
| 398 | 409 |
| 399 #save coordinates table | 410 #save coordinates table |
| 400 if(opt$table_coordinates != ''){ | 411 if (opt$table_coordinates != "") { |
| 401 coord_table <- cbind(rownames(pca$ind$coord), round(as.data.frame(pca$ind$coord), 6)) | 412 coord_table <- cbind(rownames(pca$ind$coord), round(as.data.frame(pca$ind$coord), 6)) |
| 402 colnames(coord_table)=c("Cells",paste0("DIM",(1:opt$PCA_npc))) | 413 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$PCA_npc))) |
| 403 } | 414 } |
| 404 | 415 |
| 405 } | 416 } |
| 406 | 417 |
| 407 ########### make HCPC with FactoMineR ########## | 418 ########### make HCPC with FactoMineR ########## |
| 408 if (opt$visu_choice == 'HCPC') { | 419 if (opt$visu_choice == "HCPC") { |
| 409 | 420 |
| 410 # HCPC starts with a PCA | 421 # HCPC starts with a PCA |
| 411 pca <- PCA( | 422 pca <- PCA( |
| 412 t(data), | 423 t(data), |
| 413 ncp = opt$HCPC_npc, | 424 ncp = opt$HCPC_npc, |
| 414 graph = FALSE, | 425 graph = FALSE, |
| 415 ) | 426 ) |
| 416 | 427 |
| 417 PCA_IndCoord = as.data.frame(pca$ind$coord) # coordinates of observations in PCA | 428 pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA |
| 418 | 429 |
| 419 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering | 430 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering |
| 420 res.hcpc <- HCPC(pca, | 431 res_hcpc <- HCPC(pca, |
| 421 nb.clust=opt$HCPC_ncluster, metric=opt$HCPC_metric, method=opt$HCPC_method, | 432 nb.clust = opt$HCPC_ncluster, metric = opt$HCPC_metric, method = opt$HCPC_method, |
| 422 graph=F,consol=opt$HCPC_consol,iter.max=opt$HCPC_itermax,min=opt$HCPC_min,max=opt$HCPC_max, | 433 graph = FALSE, consol = opt$HCPC_consol, iter.max = opt$HCPC_itermax, min = opt$HCPC_min, max = opt$HCPC_max, |
| 423 cluster.CA=opt$HCPC_clusterCA,kk=opt$HCPC_kk) | 434 cluster.CA = opt$HCPC_clusterCA, kk = opt$HCPC_kk) |
| 424 # HCPC plots | 435 # HCPC plots |
| 425 dims <- head(as.data.frame(res.hcpc$call$t$res$eig),2) # dims variances in column 2 | 436 dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2 |
| 426 pdf(opt$pdf_out) | 437 pdf(opt$pdf_out) |
| 427 plot(res.hcpc, choice="tree") | 438 plot(res_hcpc, choice = "tree") |
| 428 plot(res.hcpc, choice="bar") | 439 plot(res_hcpc, choice = "bar") |
| 429 plot(res.hcpc, choice="3D.map") | 440 plot(res_hcpc, choice = "3D.map") |
| 430 if (opt$labels == FALSE) { | 441 if (opt$labels == FALSE) { |
| 431 plot(res.hcpc, choice="map", label="none") | 442 plot(res_hcpc, choice = "map", label = "none") |
| 432 } else { | 443 } else { |
| 433 plot(res.hcpc, choice="map") | 444 plot(res_hcpc, choice = "map") |
| 434 } | 445 } |
| 435 | 446 |
| 436 # user contrasts on the pca | 447 # user contrasts on the pca |
| 437 if (opt$factor != '') { | 448 if (opt$factor != "") { |
| 438 plot(pca, label="none", col.ind = factor_cols) | 449 plot(pca, label = "none", col.ind = factor_cols) |
| 439 if(is.factor(contrasting_factor$factor)) { | 450 if (is.factor(contrasting_factor$factor)) { |
| 440 legend(x = 'topright', | 451 legend(x = "topright", |
| 441 legend = as.character(factorColors$factor), | 452 legend = as.character(factor_colors$factor), |
| 442 col = factorColors$color, pch = 16, bty = 'n', xjust = 1, cex=0.7) | 453 col = factor_colors$color, pch = 16, bty = "n", xjust = 1, cex = 0.7) |
| 443 | 454 |
| 444 ## Normalized Mutual Information | 455 ## Normalized Mutual Information |
| 445 sink(opt$HCPC_mutual_info) | 456 sink(opt$HCPC_mutual_info) |
| 446 res <- external_validation( | 457 res <- external_validation( |
| 447 true_labels = as.numeric(contrasting_factor$factor), | 458 true_labels = as.numeric(contrasting_factor$factor), |
| 448 clusters = as.numeric(res.hcpc$data.clust$clust), | 459 clusters = as.numeric(res_hcpc$data.clust$clust), |
| 449 summary_stats = TRUE | 460 summary_stats = TRUE |
| 450 ) | 461 ) |
| 451 sink() | 462 sink() |
| 452 | 463 |
| 453 } else { | 464 } else { |
| 454 legend.col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) | 465 legend_col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) |
| 455 } | 466 } |
| 456 } | 467 } |
| 457 | |
| 458 ## Clusters to which individual observations belong # used ? | |
| 459 # Clust <- data.frame(Cluster = res.hcpc$data.clust[, (nrow(data) + 1)], | |
| 460 # Observation = rownames(res.hcpc$data.clust)) | |
| 461 # metadata <- data.frame(Observation=colnames(data), row.names=colnames(data)) | |
| 462 # metadata = merge(y = metadata, | |
| 463 # x = Clust, | |
| 464 # by = "Observation") | |
| 465 | |
| 466 # unclear utility | |
| 467 # ObsNumberPerCluster = as.data.frame(table(metadata$Cluster)) | |
| 468 # colnames(ObsNumberPerCluster) = c("Cluster", "ObsNumber") | |
| 469 # | |
| 470 # ## Silhouette Plot # not used | |
| 471 # hc.cut = hcut(PCA_IndCoord, | |
| 472 # k = nlevels(metadata$Cluster), | |
| 473 # hc_method = "ward.D2") | |
| 474 # | |
| 475 # Sil = fviz_silhouette(hc.cut) | |
| 476 # sil1 = as.data.frame(Sil$data) | |
| 477 | 468 |
| 478 dev.off() | 469 dev.off() |
| 479 | 470 |
| 480 if(opt$table_coordinates != ''){ | 471 if (opt$table_coordinates != "") { |
| 481 coord_table <- cbind(Cell=rownames(res.hcpc$call$X), | 472 coord_table <- cbind(Cell = rownames(res_hcpc$call$X), |
| 482 round(as.data.frame(res.hcpc$call$X[, -length(res.hcpc$call$X)]), 6), | 473 round(as.data.frame(res_hcpc$call$X[, -length(res_hcpc$call$X)]), 6), |
| 483 as.data.frame(res.hcpc$call$X[, length(res.hcpc$call$X)]) | 474 as.data.frame(res_hcpc$call$X[, length(res_hcpc$call$X)]) |
| 484 ) | 475 ) |
| 485 colnames(coord_table)=c("Cells",paste0("DIM",(1:opt$HCPC_npc)),"Cluster") | 476 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$HCPC_npc)), "Cluster") |
| 486 } | 477 } |
| 487 | 478 |
| 488 if(opt$HCPC_clust != ""){ | 479 if (opt$HCPC_clust != "") { |
| 489 res_clustering <- data.frame(Cell = rownames(res.hcpc$data.clust), | 480 res_clustering <- data.frame(Cell = rownames(res_hcpc$data.clust), |
| 490 Cluster = res.hcpc$data.clust$clust) | 481 Cluster = res_hcpc$data.clust$clust) |
| 491 | 482 |
| 492 } | 483 } |
| 493 | 484 |
| 494 # Description of cluster by most contributing variables / gene expressions | 485 # Description of cluster by most contributing variables / gene expressions |
| 495 | 486 |
| 496 # first transform list of vectors in a list of dataframes | 487 # first transform list of vectors in a list of dataframes |
| 497 extract_description <- lapply(res.hcpc$desc.var$quanti, as.data.frame) | 488 extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame) |
| 498 # second, transfer rownames (genes) to column in the dataframe, before rbinding | 489 # second, transfer rownames (genes) to column in the dataframe, before rbinding |
| 499 extract_description_w_genes <- Map(cbind, | 490 extract_description_w_genes <- Map(cbind, |
| 500 extract_description, | 491 extract_description, |
| 501 genes= lapply(extract_description, rownames) | 492 genes = lapply(extract_description, rownames) |
| 502 ) | 493 ) |
| 503 # Then use data.table to collapse all generated dataframe, with the cluster id in first column | 494 # Then use data.table to collapse all generated dataframe, with the cluster id in first column |
| 504 # using the {data.table} rbindlist function | 495 # using the {data.table} rbindlist function |
| 505 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") | 496 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") |
| 506 cluster_description = cluster_description[ ,c(8, 1, 2, 3,4,5,6,7)] # reorganize columns | 497 cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # reorganize columns |
| 507 | 498 |
| 508 | 499 |
| 509 # Finally, output cluster description data frame | 500 # Finally, output cluster description data frame |
| 510 write.table( | 501 write.table( |
| 511 cluster_description, | 502 cluster_description, |
| 512 file = opt$HCPC_cluster_description, | 503 file = opt$HCPC_cluster_description, |
| 513 sep = "\t", | 504 sep = "\t", |
| 514 quote = F, | 505 quote = FALSE, |
| 515 col.names = T, | 506 col.names = TRUE, |
| 516 row.names = F | 507 row.names = FALSE |
| 517 ) | 508 ) |
| 518 | 509 |
| 519 } | 510 } |
| 520 | 511 |
| 521 ## Return coordinates file to user | 512 ## Return coordinates file to user |
| 522 | 513 |
| 523 if(opt$table_coordinates != ''){ | 514 if (opt$table_coordinates != "") { |
| 524 write.table( | 515 write.table( |
| 525 coord_table, | 516 coord_table, |
| 526 file = opt$table_coordinates, | 517 file = opt$table_coordinates, |
| 527 sep = "\t", | 518 sep = "\t", |
| 528 quote = F, | 519 quote = FALSE, |
| 529 col.names = T, | 520 col.names = TRUE, |
| 530 row.names = F | 521 row.names = FALSE |
| 531 ) | 522 ) |
| 532 } | 523 } |
| 533 | 524 |
| 534 | 525 |
| 535 if(opt$HCPC_clust != ""){ | 526 if (opt$HCPC_clust != "") { |
| 536 write.table( | 527 write.table( |
| 537 res_clustering, | 528 res_clustering, |
| 538 file = opt$HCPC_clust, | 529 file = opt$HCPC_clust, |
| 539 sep = "\t", | 530 sep = "\t", |
| 540 quote = F, | 531 quote = FALSE, |
| 541 col.names = T, | 532 col.names = TRUE, |
| 542 row.names = F | 533 row.names = FALSE |
| 543 ) | 534 ) |
| 544 } | 535 } |
