gsc_high_dimensions_visualisation: high_dim

comparison high_dim_visu.R @ 1:8e6ce12edd90 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_high_dimension_visualization commit 1b98c85982a2a9f9df4b318f672b9b68cff66a93"

author	artbio
date	Mon, 02 Sep 2019 04:38:32 -0400
parents	241dd93219d7
children	701af13901fd

comparison

equal deleted inserted replaced

-:241dd93219d7
+:8e6ce12edd90
 # load packages that are provided in the conda env
 options( show.error.messages=F,
 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
-requiredPackages = c('optparse', 'Rtsne', 'ggplot2', 'ggfortify')
 warnings()
 library(optparse)
 library(FactoMineR)
 library(factoextra)
 library(Rtsne)
 library(ggplot2)
 library(ggfortify)
 library(RColorBrewer)
 library(ClusterR)
+library(data.table)
 # Arguments
 option_list = list(
 make_option(
 "--data",
 type = 'integer',
 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]"
 ),
 make_option(
 "--HCPC_metric",
-default = 'euclidian',
+default = 'euclidean',
 type = 'character',
-help = "Metric to be used for calculating dissimilarities between observations, available 'euclidian' or 'manhattan' [default : '%default' ]"
+help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]"
 ),
 make_option(
 "--HCPC_method",
 default = 'ward',
 type = 'character',
 type = 'character',
 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']"
 ),
 make_option(
 "--HCPC_kk",
-default = -1,
+default = Inf,
 type = 'numeric',
 help = "The maximum number of iterations for the consolidation [default :'%default']"
 ),
 make_option(
 "--HCPC_clust",
 default = "",
 type = 'character',
 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']"
 ),
 make_option(
-"--mutual_info",
+"--HCPC_mutual_info",
 default = "",
 type = "character",
 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']"
+),
+make_option(
+"--HCPC_cluster_description",
+default = "",
+type = "character",
+help = "Output file with variables most contributing to clustering [default :'%default']"
 )
 )
 opt = parse_args(OptionParser(option_list = option_list),
 args = commandArgs(trailingOnly = TRUE))
 legend(x = 'topright',
 legend = as.character(factorColors$factor),
 col = factorColors$color, pch = 16, bty = 'n', xjust = 1, cex=0.7)
 ## Normalized Mutual Information
-sink(opt$mutual_info)
+sink(opt$HCPC_mutual_info)
 res <- external_validation(
 true_labels = as.numeric(contrasting_factor$factor),
 clusters = as.numeric(res.hcpc$data.clust$clust),
 summary_stats = TRUE
 )
 } else {
 legend.col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE))
 }
 }
 ## Clusters to which individual observations belong # used ?
 # Clust <- data.frame(Cluster = res.hcpc$data.clust[, (nrow(data) + 1)],
 #                     Observation = rownames(res.hcpc$data.clust))
 # metadata <- data.frame(Observation=colnames(data), row.names=colnames(data))
 # metadata = merge(y = metadata,
 res_clustering <- data.frame(Cell = rownames(res.hcpc$data.clust),
 Cluster = res.hcpc$data.clust$clust)
 }
+# Description of cluster by most contributing variables / gene expressions
+# first transform list of vectors in a list of dataframes
+extract_description <- lapply(res.hcpc$desc.var$quanti, as.data.frame)
+# second, transfer rownames (genes) to column in the dataframe, before rbinding
+extract_description_w_genes <- Map(cbind,
+extract_description,
+genes= lapply(extract_description, rownames)
+)
+# Then use data.table to collapse all generated dataframe, with the cluster id in first column
+# using the {data.table} rbindlist function
+cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id")
+cluster_description = cluster_description[ ,c(8, 1, 2, 3,4,5,6,7)] # reorganize columns
+# Finally, output cluster description data frame
+write.table(
+cluster_description,
+file = opt$HCPC_cluster_description,
+sep = "\t",
+quote = F,
+col.names = T,
+row.names = F
+)
 }
 ## Return coordinates file to user
 if(opt$table_coordinates != ''){
 sep = "\t",
 quote = F,
 col.names = T,
 row.names = F
 )
 }

Mercurial > repos > artbio > gsc_high_dimensions_visualisation

comparison high_dim_visu.R @ 1:8e6ce12edd90 draft