Mercurial > repos > artbio > pathifier
comparison pathifier.R @ 0:dd01ae279778 draft default tip
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/pathifier commit b94cfc7bf8df30aa8e9249b75ea31332ee2bada1"
| author | artbio |
|---|---|
| date | Mon, 12 Apr 2021 09:54:52 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dd01ae279778 |
|---|---|
| 1 ################################################################################################## | |
| 2 # Running PATHIFIER (Drier et al., 2013) | |
| 3 # Based on the work of Author: Miguel Angel Garcia-Campos - Github: https://github.com/AngelCampos | |
| 4 ################################################################################################## | |
| 5 | |
| 6 | |
| 7 options(show.error.messages = F, error = function() { | |
| 8 cat(geterrmessage(), file = stderr()); q("no", 1, F) | |
| 9 } | |
| 10 ) | |
| 11 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
| 12 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
| 13 | |
| 14 library(pathifier) | |
| 15 library(optparse) | |
| 16 library(pheatmap) | |
| 17 library(scatterplot3d) | |
| 18 library(circlize) | |
| 19 | |
| 20 option_list <- list( | |
| 21 make_option( | |
| 22 "--exp", | |
| 23 type = "character", | |
| 24 help = "Expression matrix"), | |
| 25 make_option( | |
| 26 "--sep", | |
| 27 type = "character", | |
| 28 default = "\t", | |
| 29 help = "File separator [default : '%default']" | |
| 30 ), | |
| 31 make_option( | |
| 32 "--genes", | |
| 33 type = "character", | |
| 34 help = "Gene sets Pathways : gmt format (one pathway per line : Name, description, genes (one by column), tab separated)"), | |
| 35 make_option( | |
| 36 "--is_normal", | |
| 37 default = F, | |
| 38 type = "logical", | |
| 39 help = "Define normals cells in your data"), | |
| 40 make_option( | |
| 41 "--normals", | |
| 42 type = "character", | |
| 43 help = "A vector of sample status : 1 = Healthy, 0 = Tumor. Must be in the same order as in expression data"), | |
| 44 make_option( | |
| 45 "--logfile", | |
| 46 type = "character", | |
| 47 default = "log.txt", | |
| 48 help = "Log file name [default : '%default']" | |
| 49 ), | |
| 50 make_option( | |
| 51 "--max_stability", | |
| 52 type = "logical", | |
| 53 default = T, | |
| 54 help = "If true, throw away components leading to low stability of sampling noise [default : '%default']" | |
| 55 ), | |
| 56 make_option( | |
| 57 "--attempts", | |
| 58 type = "integer", | |
| 59 default = 10, | |
| 60 help = "Number of runs to determine stability. [default : '%default']" | |
| 61 ), | |
| 62 make_option( | |
| 63 "--min_std", | |
| 64 type = "character", | |
| 65 default = "0.4", | |
| 66 help = "Minimum of standard deviation to filter out low variable genes. | |
| 67 Use --min.std data, to use the minimum std of your data [default : '%default']" | |
| 68 ), | |
| 69 make_option( | |
| 70 "--min_exp", | |
| 71 type = "character", | |
| 72 default = "4", | |
| 73 help = "Minimum of gene expression to filter out low expressed genes. | |
| 74 Use --min.exp data, to use the minimum expression of your data [default : '%default']" | |
| 75 ), | |
| 76 make_option( | |
| 77 "--pds", | |
| 78 type = "character", | |
| 79 default = "PDS.tsv", | |
| 80 help = "Output PDS (Pathway deregulation score) of Pathifier in tabular file [default : '%default']" | |
| 81 ), | |
| 82 make_option( | |
| 83 "--heatmap_cluster_cells", | |
| 84 type = "logical", | |
| 85 default = TRUE, | |
| 86 help = "Cluster columns (cells) in the heatmap [default : '%default']" | |
| 87 ), | |
| 88 make_option( | |
| 89 "--heatmap_cluster_pathways", | |
| 90 type = "logical", | |
| 91 default = TRUE, | |
| 92 help = "Cluster rows (pathways) in the heatmap [default : '%default']" | |
| 93 ), | |
| 94 make_option( | |
| 95 "--heatmap_show_cell_labels", | |
| 96 type = "logical", | |
| 97 default = FALSE, | |
| 98 help = "Print column names (cells) on the heatmap [default : '%default']" | |
| 99 ), | |
| 100 make_option( | |
| 101 "--heatmap_show_pathway_labels", | |
| 102 type = "logical", | |
| 103 default = FALSE, | |
| 104 help = "Print row names (pathways) on the heatmap [default : '%default']" | |
| 105 ), | |
| 106 make_option( | |
| 107 "--plot", | |
| 108 type = "character", | |
| 109 default = "./plot.pdf", | |
| 110 help = "Pathifier visualization [default : '%default']" | |
| 111 ), | |
| 112 make_option( | |
| 113 "--rdata", | |
| 114 type = "character", | |
| 115 default = "./results.rdata", | |
| 116 help = "Pathifier object (S4) [default : '%default']")) | |
| 117 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) | |
| 118 args <- parse_args(parser) | |
| 119 if (args$sep == "tab") { | |
| 120 args$sep <- "\t" | |
| 121 } | |
| 122 | |
| 123 | |
| 124 # set seed for reproducibility | |
| 125 set.seed(123) | |
| 126 | |
| 127 # Load expression data for PATHIFIER | |
| 128 exp_matrix <- as.matrix(read.delim(file = args$exp, | |
| 129 as.is = T, | |
| 130 row.names = 1, | |
| 131 sep = args$sep, | |
| 132 check.names = F)) | |
| 133 | |
| 134 # Load Genesets annotation | |
| 135 gene_sets_file <- file(args$genes, open = "r") | |
| 136 gene_sets <- readLines(gene_sets_file) | |
| 137 close(gene_sets_file) | |
| 138 | |
| 139 # Generate a list that contains genes in genesets | |
| 140 gs <- strsplit(gene_sets, "\t") | |
| 141 names(gs) <- lapply(gs, function(x) x[1]) | |
| 142 gs <- lapply(gs, function(x) x[-c(1:2)]) | |
| 143 | |
| 144 # Generate a list that contains the names of the genesets used | |
| 145 pathwaynames <- names(gs) | |
| 146 | |
| 147 # Prepare data and parameters ################################################## | |
| 148 # Extract information from binary phenotypes. 1 = Normal, 0 = Tumor | |
| 149 if (args$is_normal == T) { | |
| 150 normals <- read.delim(file = args$normals, h = F) | |
| 151 normals <- as.logical(normals[, 2]) | |
| 152 n_exp_matrix <- exp_matrix[, normals] | |
| 153 } else n_exp_matrix <- exp_matrix | |
| 154 | |
| 155 # Calculate MIN_STD | |
| 156 rsd <- apply(n_exp_matrix, 1, sd) | |
| 157 min_std <- quantile(rsd, 0.25) | |
| 158 | |
| 159 # Calculate MIN_EXP | |
| 160 min_exp <- quantile(as.vector(as.matrix(exp_matrix)), | |
| 161 0.1) # Percentile 10 of data | |
| 162 | |
| 163 # Filter low value genes. At least 10% of samples with values over min_exp | |
| 164 # Set expression levels < MIN_EXP to MIN_EXP | |
| 165 over <- apply(exp_matrix, 1, function(x) x > min_exp) | |
| 166 g_over <- apply(over, 2, mean) | |
| 167 g_over <- names(g_over)[g_over > 0.1] | |
| 168 exp_matrix_filtered <- exp_matrix[g_over, ] | |
| 169 exp_matrix_filtered[exp_matrix_filtered < min_exp] <- min_exp | |
| 170 | |
| 171 # Set maximum 5000 genes with more variance | |
| 172 variable_genes <- names(sort(apply(exp_matrix_filtered, 1, var), decreasing = T))[1:5000] | |
| 173 variable_genes <- variable_genes[!is.na(variable_genes)] | |
| 174 exp_matrix_filtered <- exp_matrix_filtered[variable_genes, ] | |
| 175 allgenes <- as.vector(rownames(exp_matrix_filtered)) | |
| 176 | |
| 177 | |
| 178 if (args$min_std == "data") { | |
| 179 args$min_std <- min_std | |
| 180 } else args$min_std <- as.numeric(args$min_std) | |
| 181 | |
| 182 if (args$min_exp == "data") { | |
| 183 args$min_exp <- min_exp | |
| 184 } else args$min_exp <- as.numeric(args$min_exp) | |
| 185 | |
| 186 | |
| 187 # Open pdf | |
| 188 pdf(args$plot) | |
| 189 | |
| 190 # Construct continuous color scale | |
| 191 col_score_fun <- colorRamp2(c(0, 0.5, 1), c("#4575B4", "#FFFFBF", "#D73027")) | |
| 192 | |
| 193 # Run Pathifier | |
| 194 if (args$is_normal == T) { | |
| 195 pds <- quantify_pathways_deregulation(exp_matrix_filtered, | |
| 196 allgenes, | |
| 197 gs, | |
| 198 pathwaynames, | |
| 199 normals, | |
| 200 maximize_stability = args$max_stability, | |
| 201 attempts = args$attempts, | |
| 202 logfile = args$logfile, | |
| 203 min_std = args$min_std, | |
| 204 min_exp = args$min_exp) | |
| 205 for (i in pathwaynames) { | |
| 206 df <- data.frame(pds$curves[[i]][, 1:3], | |
| 207 normal = normals, | |
| 208 PDS = as.numeric(pds$scores[[i]]), | |
| 209 curve_order = as.vector(pds$curves_order[[i]])) | |
| 210 ordered <- df[df$curve_order, ] | |
| 211 | |
| 212 | |
| 213 layout(cbind(1:2, 1:2), heights = c(7, 1)) | |
| 214 sc3 <- scatterplot3d(ordered[, 1:3], | |
| 215 main = paste("Principal curve of", i), | |
| 216 box = F, pch = 19, type = "l") | |
| 217 sc3$points3d(ordered[, 1:3], box = F, pch = 19, | |
| 218 col = col_score_fun(ordered$PDS)) | |
| 219 | |
| 220 # Plot color scale legend | |
| 221 par(mar = c(5, 3, 0, 3)) | |
| 222 plot(seq(min(ordered$PDS), max(ordered$PDS), length = 100), rep(0, 100), pch = 15, | |
| 223 axes = TRUE, yaxt = "n", xlab = "Color scale of PDS", ylab = "", bty = "n", | |
| 224 col = col_score_fun(seq(min(ordered$PDS), max(ordered$PDS), length = 100))) | |
| 225 | |
| 226 | |
| 227 cols_status <- ifelse(ordered$normal, "blue", "red") | |
| 228 sc3 <- scatterplot3d(ordered[, 1:3], | |
| 229 main = paste("Principal curve of", i), | |
| 230 box = F, pch = "", type = "l") | |
| 231 sc3$points3d(ordered[, 1:3], box = F, | |
| 232 pch = ifelse(ordered$normal, 19, 8), | |
| 233 col = ifelse(ordered$normal, "blue", "red")) | |
| 234 legend("topright", pch = c(19, 8), yjust = 0, | |
| 235 legend = c("normal", "cancer"), | |
| 236 col = c("blue", "red"), cex = 1.1) | |
| 237 | |
| 238 ## annotation for heatmap | |
| 239 sample_status <- data.frame(Status = factor(ifelse(df$normal, "normal", "tumor"))) | |
| 240 rownames(sample_status) <- colnames(exp_matrix_filtered) | |
| 241 color_status_heatmap <- list(Status = c(normal = "blue", tumor = "red")) | |
| 242 } | |
| 243 } else{ | |
| 244 pds <- quantify_pathways_deregulation(exp_matrix_filtered, | |
| 245 allgenes, | |
| 246 gs, | |
| 247 pathwaynames, | |
| 248 maximize_stability = args$max_stability, | |
| 249 attempts = args$attempts, | |
| 250 logfile = args$logfile, | |
| 251 min_std = args$min_std, | |
| 252 min_exp = args$min_exp) | |
| 253 for (i in pathwaynames) { | |
| 254 df <- data.frame(pds$curves[[i]][, 1:3], | |
| 255 PDS = as.numeric(pds$scores[[i]]), | |
| 256 curve_order = as.vector(pds$curves_order[[i]])) | |
| 257 ordered <- df[df$curve_order, ] | |
| 258 | |
| 259 layout(cbind(1:2, 1:2), heights = c(7, 1)) | |
| 260 sc3 <- scatterplot3d(ordered[, 1:3], | |
| 261 main = paste("Principal curve of", i), | |
| 262 box = F, pch = 19, type = "l") | |
| 263 sc3$points3d(ordered[, 1:3], box = F, pch = 19, | |
| 264 col = col_score_fun(ordered$PDS)) | |
| 265 | |
| 266 # Plot color scale legend | |
| 267 par(mar = c(5, 3, 0, 3)) | |
| 268 plot(seq(min(ordered$PDS), max(ordered$PDS), length = 100), rep(0, 100), pch = 15, | |
| 269 axes = TRUE, yaxt = "n", xlab = "Color scale of PDS", ylab = "", bty = "n", | |
| 270 col = col_score_fun(seq(min(ordered$PDS), max(ordered$PDS), length = 100))) | |
| 271 | |
| 272 | |
| 273 ## annotation for heatmap (for the moment none for this situation) | |
| 274 sample_status <- NA | |
| 275 color_status_heatmap <- NA | |
| 276 } | |
| 277 } | |
| 278 | |
| 279 ## Create dataframe from Pathifier list and round score to 4 digits | |
| 280 pds_scores <- mapply(FUN = function(x) cbind(round(x, 4)), pds$scores) | |
| 281 dimnames(pds_scores) <- list(colnames(exp_matrix_filtered), names(pds$scores)) | |
| 282 | |
| 283 ## plot heatmap | |
| 284 if (ncol(pds_scores) > 1) { | |
| 285 pheatmap(t(pds_scores), | |
| 286 main = "Heatmap of Pathway Deregulation Score", # heat map title | |
| 287 cluster_rows = args$heatmap_cluster_pathways, # apply clustering method | |
| 288 cluster_cols = args$heatmap_cluster_cells, # apply clustering method | |
| 289 | |
| 290 #Additional Options | |
| 291 ## Color labeled columns | |
| 292 annotation_col = sample_status, | |
| 293 annotation_colors = color_status_heatmap, | |
| 294 show_rownames = args$heatmap_show_pathway_labels, | |
| 295 show_colnames = args$heatmap_show_cell_labels, | |
| 296 border_color = NA, | |
| 297 legend = TRUE) | |
| 298 } | |
| 299 dev.off() | |
| 300 | |
| 301 | |
| 302 ## write table | |
| 303 write.table(pds_scores, | |
| 304 args$pds, | |
| 305 row.names = T, | |
| 306 col.names = T, | |
| 307 quote = F, | |
| 308 sep = "\t") | |
| 309 | |
| 310 ## write S4 pathifier object | |
| 311 save(pds, file = args$rdata) |
