Mercurial > repos > morinlab > oncoprintplus
view oncoprintplus.R @ 3:1e33e2121edb draft
Uploaded
author | morinlab |
---|---|
date | Mon, 12 Sep 2016 15:46:13 -0400 |
parents | 62e8263df333 |
children |
line wrap: on
line source
suppressMessages(library(BoutrosLab.plotting.general)); suppressMessages(library(dplyr)); ### GET ARGUMENTS ################################################################################## suppressMessages(library(argparse)); parser <- ArgumentParser(description="Create a Gene by Patient Heatmap BPGs multiplot fucntion"); ### VARIANT INPUT ARGUMENTS ######################################################################## parser$add_argument( "--input_snv", "-snv", required="True", help="Input SNV files in MAF format" ); parser$add_argument( "--input_cnv", "-cnv", help="Input CNV files in MAF format" ); ### GENE INPUT ARGUMENTS ########################################################################### parser$add_argument( "--gene_list", "-gl", help="" ); parser$add_argument( "--gene_oncodrive", "-go", help="" ); parser$add_argument( "--gene_mutsig", "-gc", help="" ) ### GENERAL ARGUMENTS ############################################################################## parser$add_argument( "--center_plot", choices=c('impact', 'variant_classification'), required=TRUE, help="What should be displayed in the Center Heatmap?" ); parser$add_argument( "--cancer_genomics_functions_path", "-cgfp", required='True', help="Path to cancer genomics functions in R" ) parser$add_argument( "--output", "-o", required="True", help="Output Image File" ) ### PATIENT ARGUMENTS ############################################################################## parser$add_argument( "--patient_covariate_data", help="Input Patient Covariate Matrix" ); parser$add_argument( "--plot_patient_covariate", action="append", help="Should We Plot the Input Covariate Data?" ); parser$add_argument( "--plot_patient_snv_counts", action='store_true', help="Should We Plot Patient SNV Counts?" ); parser$add_argument( "--plot_patient_snv_distribution", action='store_true', help="Should We Plot Patient SNV Distribution?" ); parser$add_argument( "--patient_order", action="append", help="How Should We Determine Patient Order?" ); ### GENE ARGUMENTS ################################################################################# parser$add_argument( "--plot_gene_snv_counts", action="store_true", help="Should We Plot Gene SNV Counts?" ); parser$add_argument( "--plot_gene_mutsig", action="store_true", help="Should We Plot Gene Significance?" ); parser$add_argument( "--plot_gene_oncodrive", action="store_true", help="How Should We Determine Gene Order?" ); parser$add_argument( "--gene_order", choices=c('mutsig', 'oncodrive', 'snv_counts', 'gene_list'), help="How Should We Determine Gene Order?" ); #################################################################################################### args <- parser$parse_args(); source(paste(args$cancer_genomics_functions_path, "functions.R", sep="/")); ### ORGANIZE GENE DATA ############################################################################# snv_data <- read_vcf2maf_maf(args$input_snv); snv_data$patient snv_counts <- calculate_gene_snv_counts(snv_data); patient_covariate_data <- calculate_patient_snv_counts(snv_data); patient_snv_distribution <- calculate_patient_snv_frequency(snv_data); if (!is.null(args$patient_covariate_data)) { tmp_patient_covariate_data <- read_patient_covariate_data(args$patient_covariate_data); if (!all(rownames(patient_covariate_data) %in% rownames(tmp_patient_covariate_data))) { stop("Some patients identified in the MAF file are not present in patient covariate data file"); } indexes <- which( rownames(patient_covariate_data) %in% rownames(tmp_patient_covariate_data)) current_names <- colnames(patient_covariate_data); patient_covariate_data <- cbind(patient_covariate_data, tmp_patient_covariate_data[rownames(patient_covariate_data)[indexes],-1]); colnames(patient_covariate_data) <- c(current_names, colnames(tmp_patient_covariate_data)[-1]); } ################################## gene_list <- NULL; mutsig <- NULL; mutsig_sig <- NULL; oncodrive <- NULL; oncodrive_sig <- NULL; if (!is.null(args$gene_list)) { gene_list <- read_gene_list(args$gene_list); } if (!is.null(args$gene_mutsig)) { mutsig <- read_mutsigCV_genes(args$gene_mutsig); mutsig_sig <- filter_gene_list(mutsig); }; if (!is.null(args$gene_oncodrive)) { oncodrive <- read_oncodriveFM_genes(args$gene_oncodrive); oncodrive_sig <- filter_gene_list(oncodrive); }; genes_of_interest <- c( gene_list$gene, mutsig_sig$gene, oncodrive_sig$gene ); if (!is.null(args$gene_list)) { genes_of_interest <- gene_list$gene } genes_of_interest <- unique(genes_of_interest); if (is.null(genes_of_interest)) { genes_of_interest <- calculate_gene_snv_counts(snv_data, best=TRUE)$gene } gene_data <- data.frame( gene = genes_of_interest, user = genes_of_interest %in% gene_list, mutsig = genes_of_interest %in% mutsig_sig$gene, oncodrive = genes_of_interest %in% oncodrive_sig$gene, stringsAsFactors = F ); gene_order <- NULL; if (is.null(args$gene_order)) { gene_order <- gene_data$gene; } else if (args$gene_order == 'mutsig') { add_to_end <- NULL; if (!all(gene_data$gene %in% mutsig$gene)) { warning("mutsigCV input file is missing some genes, assuming a p value of 1"); add_to_end <- gene_data$gene[!gene_data$gene %in% mutsig$gene] } gene_order <- c(mutsig$gene[mutsig$gene %in% gene_data$gene], add_to_end); } else if (args$gene_order == 'oncodrive') { add_to_end <- NULL; if (!all(gene_data$gene %in% oncodrive$gene)) { warning("oncodriveFM input file is missing some genes, assuming a p value of 1"); add_to_end <- gene_data$gene[!gene_data$gene %in% oncodrive$gene] } gene_order <- c(oncodrive$gene[oncodrive$gene %in% gene_data$gene], add_to_end); } else if (args$gene_order == 'snv_counts') { add_to_end <- NULL; if ( !all(gene_data$gene %in% snv_counts$gene)) { warning("snv input file is missing some genes, assuming snv count of 0"); add_to_end <- gene_data$gene[!gene_data$gene %in% snv_counts$gene] } gene_order <- c( snv_counts[snv_counts$gene %in% gene_data$gene,]$gene, add_to_end); } patient_order <- NULL; if ( is.null(args$patient_order) ) { patient_order <- unique(snv_data$patient); patient_covariate_data <- patient_covariate_data[patient_order,]; } else { for (covariate in args$patient_order) { print(covariate) print(colnames(patient_covariate_data)) current_order <- order(patient_covariate_data[,covariate]); patient_covariate_data <- patient_covariate_data[current_order,]; } patient_order <- rownames(patient_covariate_data); } ### GENE PLOTS ##################################################################################### gene_plots <- list(); if (args$plot_gene_snv_counts) { plot_data <- data.frame( gene = gene_order, order = length(gene_order):1, counts = log10(snv_counts[gene_order,]$counts) ); max <- ceiling(max(plot_data$counts)) middle <- round(median(plot_data$counts)) plot <- create.barplot( data = plot_data, formula = order ~ counts, plot.horizontal = TRUE, xlimits = c(-0.1, max+0.1), xat = c(0, middle, max), xaxis.lab = c( expression(10^0), bquote(10^.(middle)), bquote(10^.(max)) ), yat = 0 ); gene_plots <- c(gene_plots, list(plot)); } if (args$plot_gene_mutsig) { plot_data <- data.frame( gene = gene_order, order = length(gene_order):1, p.value = -1*log10(mutsig[gene_order,]$p.value), q.value = mutsig[gene_order,]$q.value, stringsAsFactors=F ); if (any( is.infinite(plot_data$p.value) ) ) { plot_data$p.value[is.infinite(plot_data$p.value)] <- max( c( plot_data$p.value[!is.infinite(plot_data$p.value)], 10) ); } plot_data$p.value[is.na(plot_data$p.value)] <- 1 plot_data$q.value[is.na(plot_data$q.value)] <- 1 plot_data$colour = c("black", "grey80")[(plot_data$q.value <= 0.05) + 1] max <- ceiling(max(plot_data$p.value)) middle <- round(median(plot_data$p.value)) plot <- create.barplot( data = plot_data, formula = order ~ p.value, col = rev(plot_data$colour), plot.horizontal = TRUE, xlimits = c(-1, max+1), xat = c(0, middle, max), xaxis.lab = c( expression(10^-0), bquote(10^-.(middle)), bquote(10^-.(max)) ) , yat = 0, ); gene_plots <- c(gene_plots, list(plot)) } if (args$plot_gene_oncodrive) { plot_data <- data.frame( gene = gene_order, order = length(gene_order):1, p.value = -1 * log10(oncodrive[gene_order,]$p.value), q.value = oncodrive[gene_order,]$q.value, stringsAsFactors=F ); plot_data$p.value[is.na(plot_data$p.value)] <- 1 plot_data$q.value[is.na(plot_data$q.value)] <- 1 plot_data$colour = c("black", "grey80")[(plot_data$q.value <= 0.05) + 1] max <- ceiling(max(plot_data$p.value)) middle <- round(median(plot_data$p.value)) plot <- create.barplot( data = plot_data, formula = order ~ p.value, col = rev(plot_data$colour), plot.horizontal = TRUE, xlimits = c(-1, max), xat = c(0, middle, max), xaxis.lab = c( expression(10^-0), bquote(10^-.(middle)), bquote(10^-.(max)) ), yat = 0 ); gene_plots <- c(gene_plots, list(plot)) } ### PATIENT PLOTS ################################################################################## patient_plots <- list(); if (args$plot_patient_snv_counts) { plot_data <- data.frame( patient = patient_order, order = 1:length(patient_order), counts = log10(patient_covariate_data[patient_order,]$snv_counts) ) middle <- round(median(plot_data$counts)) max <- ceiling(max(plot_data$counts)) plot <- create.barplot( data = plot_data, formula = counts ~ order, ylimits = c(-0.1, max+0.1), yaxis.lab = c(expression(10^0), bquote(10^.(middle)), bquote(10^.(max)) ), yat = c(0,middle,max), xat=0 ); patient_plots <- c(patient_plots, list(plot)); } if (args$plot_patient_snv_distribution) { patient_to_order <- data.frame( order = 1:length(patient_order) ); rownames(patient_to_order) <- patient_order; plot_data <- data.frame( patient = patient_snv_distribution$patient, category = patient_snv_distribution$category, counts = patient_snv_distribution$counts, order = patient_to_order[patient_snv_distribution$patient,], colours = rep(c("#0e8efa", "#010101","#fe0000", "#bfbfbf", "#00f100", "#ffbfcb"), by=6), stringsAsFactors=F ); plot <- create.barplot( data = plot_data, formula = counts ~ order, groups = category, stack = TRUE, xat = 0, yaxis.lab = c("0.0", "0.2", "0.4", "0.6", "0.8", "1.0"), ylimits = c(-0.1, 1.1), yat = c(0, .2, .4, .6, .8, 1.0), col = plot_data$colours ); patient_plots <- c(patient_plots, list(plot)); } ### PATIENT COVARIATE PLOT ######################################################################### patient_covariate_plots <- list(); patient_covariate_legend <- list(); if (!is.null(args$plot_patient_covariate)) { cov.data <- patient_covariate_data[patient_order,c('snv_counts', args$plot_patient_covariate)]; cov.colors <- c(); for (cov in args$plot_patient_covariate) { tmp <- paste0(cov, "_col"); if ( any(tmp == colnames(patient_covariate_data)) ) { tmp.colors <- unique(patient_covariate_data[,tmp]); for (col_index in (length(cov.colors)+1):((length(cov.colors)+1)+length(tmp.colors))) { col <- tmp.colors[col_index - length(cov.colors)]; cov.data[which(col == patient_covariate_data[,tmp]), cov] <- col_index; } } cov.colors <- c(cov.colors, tmp.colors); } cov.data <- cov.data[,-1]; covariates <- create.heatmap( x = matrix(as.numeric(as.matrix(cov.data)), ncol=length(args$plot_patient_covariate)), clustering.method = 'none', colour.scheme = as.vector(cov.colors), total.colours = length(cov.colors)+1, row.colour = 'white', col.colour = 'white', grid.row = TRUE, grid.col = TRUE, yaxis.lab = args$plot_patient_covariate, yat = 1:length(args$plot_patient_covariate), xaxis.lab = patient_order, xat = 1:length(patient_order), yaxis.tck = 0, print.colour.key = FALSE ); patient_covariate_plots <- c(patient_covariate_plots, list(covariates)); for ( cov in args$plot_patient_covariate) { legend <- list( legend = list( colours = rev(unique(patient_covariate_data[, paste0(cov, "_col")])), labels = rev(unique(patient_covariate_data[, cov])), border = 'white', title = cov, pch = 15 ) ); patient_covariate_legend <- c(patient_covariate_legend, legend); } } ### CENTRAL PLOT ################################################################################### central_plot <- NULL; if ( args$center_plot == 'impact' ) { plot_data <- matrix( data = rep(0, length(patient_order) * length(gene_order)), nrow = length(gene_order) ) rownames(plot_data) <- rev(gene_order); colnames(plot_data) <- patient_order; for (patient in patient_order) { for (gene in gene_order) { truth <- snv_data$patient == patient & snv_data$gene == gene; if (any(truth)) { if (any(snv_data$impact[truth] == "HIGH")) { plot_data[gene, patient] <- 3; } else if (any(snv_data$impact[truth] == "MODERATE")) { plot_data[gene, patient] <- 2; } else if (any(snv_data$impact[truth] == "LOW")) { plot_data[gene, patient] <- 1; } } } } gene.labs <- rownames(plot_data); patient.labs <- colnames(plot_data); if (!is.null(args$plot_patient_covariate)) { patient.labs <- rep("", length(patient.labs)); } plot <- create.heatmap( t(plot_data), clustering.method = "none", colour.scheme = c('grey95',c('#ffeda0', '#feb24c','#f03b20')), total.colours = 5, xaxis.tck = 0, yaxis.tck = 0, print.colour.key = F, xaxis.lab = patient.labs, yaxis.lab = gene.labs, row.colour = 'white', col.colour = 'white', grid.row = TRUE, grid.col = TRUE ); central_plot <- list(plot); } which_present <- rep(FALSE, 9) if (args$center_plot == 'variant_classification') { plot_data <- matrix( data = rep(0, length(patient_order) * length(gene_order)), nrow = length(gene_order) ) rownames(plot_data) <- rev(gene_order); colnames(plot_data) <- patient_order; order.of.variants <- data.frame( Truncation = 'Truncation', Splicing = 'Splicing', Missense = 'Missense', UTR = 'UTR', Non_coding = "Non_coding", Regulatory = "Regulatory", miRNA = "miRNA", Synonymous = "Synonymous", stringsAsFactors = F ) categories <- recode( snv_data[,]$ensembl_variant_class, # Truncating variants stop_gained = "Truncation", frameshift_variant = "Truncation", stop_lost = "Truncation", start_lost = "Truncation", incomplete_terminal_codon_variant = "Truncation", transcript_ablation = "Truncation", # Missense(-ish) variants missense_variant = "Missense", inframe_deletion = "Missense", inframe_insertion = "Missense", protein_altering_variant = "Missense", # UTR Variants `5_prime_UTR_variant` = "UTR", `3_prime_UTR_variant` = "UTR", # Splicing variants splice_acceptor_variant = "Splicing", splice_donor_variant = "Splicing", splice_region_variant = "Splicing", # Non-coding variants non_coding_transcript_exon_variant = "Non_coding", intron_variant = "Non_coding", upstream_gene_variant = "Non_coding", downstream_gene_variant = "Non_coding", intergenic_variant = "Non_coding", # Regulatory variants TF_binding_site_variant = "Regulatory", regulatory_region_variant = "Regulatory", # Synonymous variants synonymous_variant = "Synonymous", coding_sequence_variant = "Synonymous", stop_retained_variant = "Synonymous", # miRNA variants mature_miRNA_variant = "miRNA", # Default .default = NA_character_) for( gene in gene_order ) { for( patient in patient_order ) { truth <- snv_data$patient == patient & snv_data$gene == gene; if (gene == "SRSF7" && any(truth)) { } if (any(truth)) { vars <- colnames(order.of.variants) %in% categories[truth] if(any(vars)) { plot_data[gene, patient] <- which(vars)[1] which_present[which(vars)[1]] <- TRUE } else { which_present[9] <- TRUE } } } } col_one <- "#512C6F"; col_two <- "#0F6A99"; col_thr <- "#46823C"; col_fou <- "#B367A7"; col_fiv <- "#64B4D5"; col_six <- "#F7D72E"; col_sev <- "#EF922A"; col_eig <- "#B12025"; gene.labs <- rownames(plot_data); patient.labs <- colnames(plot_data); if (!is.null(args$plot_patient_covariate)) { patient.labs <- rep("", length(patient.labs)); } plot <- create.heatmap( t(plot_data), clustering.method = "none", colour.scheme = c('grey95', c(col_one, col_two, col_thr, col_fou, col_fiv, col_six, col_sev, col_eig) ), total.colours = 10, xaxis.tck = 0, yaxis.tck = 0, print.colour.key = F, xaxis.lab = patient.labs, yaxis.lab = gene.labs, row.colour = 'white', col.colour = 'white', grid.row = TRUE, grid.col = TRUE ); central_plot <- list(plot); } ### LEGENDS ######################################################################################## legends <- list(); if (args$plot_patient_snv_distribution ) { legend <- list( legend = list( colours = rev(c("#0e8efa", "#010101","#fe0000", "#bfbfbf", "#00f100", "#ffbfcb" )), labels = rev(expression(C > A,C > G,C > T, T > A, T > C, T > G)), border = 'white', title = 'Mutation Frequencies', pch = 15 ) ); legends <- c(legends, legend); } if (args$plot_gene_mutsig || args$plot_gene_oncodrive) { legend <- list( legend = list( colours = c('grey80', 'black'), labels = expression(Q <= 10^-1, Q > 10^-1), border = 'white', title = 'Q-value Groupings', pch=15 ) ) legends <- c(legends, legend); } if ( args$center_plot == 'impact' ) { legend <- list( legend = list( colours = c('grey95',c('#ffeda0', '#feb24c','#f03b20')), labels = c("None", "Low", "Moderate", "High"), border = 'white', title = 'SNV Impact', pch = 15 ) ) legends <- c(legends, legend); } if ( args$center_plot == 'variant_classification' ) { col_one <- "#512C6F"; col_two <- "#0F6A99"; col_thr <- "#46823C"; col_fou <- "#B367A7"; col_fiv <- "#64B4D5"; col_six <- "#F7D72E"; col_sev <- "#EF922A"; col_eig <- "#B12025"; legend <- list( legend = list( colours = c(c(col_one, col_two, col_thr, col_fou, col_fiv, col_six, col_sev, col_eig), 'grey95' )[which_present], labels = c("Truncation", "Splicing", "Missense", "UTR", "Non-coding", "Regulatory", "miRNA", "Synonymous", "None")[which_present], border = 'white', title = 'SNV Category', pch = 15 ) ) legends <- c(legends, legend); } if (length(patient_covariate_legend) > 0) { legends <- c(legends, patient_covariate_legend); } legend_grob <- legend.grob( legends = legends, title.just = 'left', label.cex = 1.0, title.cex = 1.0 ); ### MULTIPLOT ###################################################################################### plots <- c( patient_covariate_plots, central_plot, gene_plots, patient_plots ); plot.layout <- c(1 + length(gene_plots), 1 + length(patient_plots)); layout.skip <- c(F, rep(F, length(gene_plots)), rep(c(F, rep(T, length(gene_plots))), length(patient_plots))); panel.heights <- c(rep(3, length(patient_plots)),20); panel.widths <- c(20,rep(3, length(gene_plots))); if (length(patient_covariate_plots) > 0) { plot.layout[2] <- plot.layout[2] + 1; layout.skip <- c(F, rep(T, length(gene_plots)), layout.skip); panel.heights <- c(panel.heights, 0.5 * length(args$plot_patient_covariate) ); } create.multiplot( filename = args$output, res = 300, plot.objects = plots, plot.layout = plot.layout, layout.skip = layout.skip, panel.heights = panel.heights, panel.widths = panel.widths, height = 16, width = 16, x.spacing = -.8, y.spacing = -.33 * max(nchar(patient_order)), left.padding = 4, xaxis.cex = .65, yaxis.cex = .9, xaxis.rot = 90, retrieve.plot.labels=T, legend = list( right = list( x = 0.10, y = 0.50, fun = legend_grob ) ), print.new.legend= TRUE );