Mercurial > repos > artbio > gsc_high_dimensions_visualisation
comparison high_dim_visu.R @ 1:8e6ce12edd90 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_high_dimension_visualization commit 1b98c85982a2a9f9df4b318f672b9b68cff66a93"
| author | artbio |
|---|---|
| date | Mon, 02 Sep 2019 04:38:32 -0400 |
| parents | 241dd93219d7 |
| children | 701af13901fd |
comparison
equal
deleted
inserted
replaced
| 0:241dd93219d7 | 1:8e6ce12edd90 |
|---|---|
| 1 # load packages that are provided in the conda env | 1 # load packages that are provided in the conda env |
| 2 options( show.error.messages=F, | 2 options( show.error.messages=F, |
| 3 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | 3 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) |
| 4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
| 5 requiredPackages = c('optparse', 'Rtsne', 'ggplot2', 'ggfortify') | |
| 6 warnings() | 5 warnings() |
| 7 library(optparse) | 6 library(optparse) |
| 8 library(FactoMineR) | 7 library(FactoMineR) |
| 9 library(factoextra) | 8 library(factoextra) |
| 10 library(Rtsne) | 9 library(Rtsne) |
| 11 library(ggplot2) | 10 library(ggplot2) |
| 12 library(ggfortify) | 11 library(ggfortify) |
| 13 library(RColorBrewer) | 12 library(RColorBrewer) |
| 14 library(ClusterR) | 13 library(ClusterR) |
| 14 library(data.table) | |
| 15 | 15 |
| 16 # Arguments | 16 # Arguments |
| 17 option_list = list( | 17 option_list = list( |
| 18 make_option( | 18 make_option( |
| 19 "--data", | 19 "--data", |
| 159 type = 'integer', | 159 type = 'integer', |
| 160 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" | 160 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" |
| 161 ), | 161 ), |
| 162 make_option( | 162 make_option( |
| 163 "--HCPC_metric", | 163 "--HCPC_metric", |
| 164 default = 'euclidian', | 164 default = 'euclidean', |
| 165 type = 'character', | 165 type = 'character', |
| 166 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidian' or 'manhattan' [default : '%default' ]" | 166 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]" |
| 167 ), | 167 ), |
| 168 make_option( | 168 make_option( |
| 169 "--HCPC_method", | 169 "--HCPC_method", |
| 170 default = 'ward', | 170 default = 'ward', |
| 171 type = 'character', | 171 type = 'character', |
| 207 type = 'character', | 207 type = 'character', |
| 208 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" | 208 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" |
| 209 ), | 209 ), |
| 210 make_option( | 210 make_option( |
| 211 "--HCPC_kk", | 211 "--HCPC_kk", |
| 212 default = -1, | 212 default = Inf, |
| 213 type = 'numeric', | 213 type = 'numeric', |
| 214 help = "The maximum number of iterations for the consolidation [default :'%default']" | 214 help = "The maximum number of iterations for the consolidation [default :'%default']" |
| 215 ), | 215 ), |
| 216 make_option( | 216 make_option( |
| 217 "--HCPC_clust", | 217 "--HCPC_clust", |
| 218 default = "", | 218 default = "", |
| 219 type = 'character', | 219 type = 'character', |
| 220 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']" | 220 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']" |
| 221 ), | 221 ), |
| 222 make_option( | 222 make_option( |
| 223 "--mutual_info", | 223 "--HCPC_mutual_info", |
| 224 default = "", | 224 default = "", |
| 225 type = "character", | 225 type = "character", |
| 226 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']" | 226 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']" |
| 227 ), | |
| 228 make_option( | |
| 229 "--HCPC_cluster_description", | |
| 230 default = "", | |
| 231 type = "character", | |
| 232 help = "Output file with variables most contributing to clustering [default :'%default']" | |
| 227 ) | 233 ) |
| 228 ) | 234 ) |
| 229 | 235 |
| 230 opt = parse_args(OptionParser(option_list = option_list), | 236 opt = parse_args(OptionParser(option_list = option_list), |
| 231 args = commandArgs(trailingOnly = TRUE)) | 237 args = commandArgs(trailingOnly = TRUE)) |
| 434 legend(x = 'topright', | 440 legend(x = 'topright', |
| 435 legend = as.character(factorColors$factor), | 441 legend = as.character(factorColors$factor), |
| 436 col = factorColors$color, pch = 16, bty = 'n', xjust = 1, cex=0.7) | 442 col = factorColors$color, pch = 16, bty = 'n', xjust = 1, cex=0.7) |
| 437 | 443 |
| 438 ## Normalized Mutual Information | 444 ## Normalized Mutual Information |
| 439 sink(opt$mutual_info) | 445 sink(opt$HCPC_mutual_info) |
| 440 res <- external_validation( | 446 res <- external_validation( |
| 441 true_labels = as.numeric(contrasting_factor$factor), | 447 true_labels = as.numeric(contrasting_factor$factor), |
| 442 clusters = as.numeric(res.hcpc$data.clust$clust), | 448 clusters = as.numeric(res.hcpc$data.clust$clust), |
| 443 summary_stats = TRUE | 449 summary_stats = TRUE |
| 444 ) | 450 ) |
| 446 | 452 |
| 447 } else { | 453 } else { |
| 448 legend.col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) | 454 legend.col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) |
| 449 } | 455 } |
| 450 } | 456 } |
| 457 | |
| 451 ## Clusters to which individual observations belong # used ? | 458 ## Clusters to which individual observations belong # used ? |
| 452 # Clust <- data.frame(Cluster = res.hcpc$data.clust[, (nrow(data) + 1)], | 459 # Clust <- data.frame(Cluster = res.hcpc$data.clust[, (nrow(data) + 1)], |
| 453 # Observation = rownames(res.hcpc$data.clust)) | 460 # Observation = rownames(res.hcpc$data.clust)) |
| 454 # metadata <- data.frame(Observation=colnames(data), row.names=colnames(data)) | 461 # metadata <- data.frame(Observation=colnames(data), row.names=colnames(data)) |
| 455 # metadata = merge(y = metadata, | 462 # metadata = merge(y = metadata, |
| 482 res_clustering <- data.frame(Cell = rownames(res.hcpc$data.clust), | 489 res_clustering <- data.frame(Cell = rownames(res.hcpc$data.clust), |
| 483 Cluster = res.hcpc$data.clust$clust) | 490 Cluster = res.hcpc$data.clust$clust) |
| 484 | 491 |
| 485 } | 492 } |
| 486 | 493 |
| 494 # Description of cluster by most contributing variables / gene expressions | |
| 495 | |
| 496 # first transform list of vectors in a list of dataframes | |
| 497 extract_description <- lapply(res.hcpc$desc.var$quanti, as.data.frame) | |
| 498 # second, transfer rownames (genes) to column in the dataframe, before rbinding | |
| 499 extract_description_w_genes <- Map(cbind, | |
| 500 extract_description, | |
| 501 genes= lapply(extract_description, rownames) | |
| 502 ) | |
| 503 # Then use data.table to collapse all generated dataframe, with the cluster id in first column | |
| 504 # using the {data.table} rbindlist function | |
| 505 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") | |
| 506 cluster_description = cluster_description[ ,c(8, 1, 2, 3,4,5,6,7)] # reorganize columns | |
| 507 | |
| 508 | |
| 509 # Finally, output cluster description data frame | |
| 510 write.table( | |
| 511 cluster_description, | |
| 512 file = opt$HCPC_cluster_description, | |
| 513 sep = "\t", | |
| 514 quote = F, | |
| 515 col.names = T, | |
| 516 row.names = F | |
| 517 ) | |
| 518 | |
| 487 } | 519 } |
| 488 | 520 |
| 489 ## Return coordinates file to user | 521 ## Return coordinates file to user |
| 490 | 522 |
| 491 if(opt$table_coordinates != ''){ | 523 if(opt$table_coordinates != ''){ |
| 507 sep = "\t", | 539 sep = "\t", |
| 508 quote = F, | 540 quote = F, |
| 509 col.names = T, | 541 col.names = T, |
| 510 row.names = F | 542 row.names = F |
| 511 ) | 543 ) |
| 512 } | 544 } |
| 513 | |
| 514 | |
| 515 | |
| 516 | |
| 517 | |
| 518 | |
| 519 | |
| 520 | |
| 521 | |
| 522 | |
| 523 |
