Mercurial > repos > eschen42 > mqppep_anova

diff mqppep_anova_script.Rmd @ 24:8582a9797c18 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit c9e47049958ea3b12e30b9bd8884d48147c45edd
author: eschen42
date: Thu, 14 Jul 2022 02:12:33 +0000
parents: 61adb8801b73
children: f9cd87ac8006
--- a/mqppep_anova_script.Rmd	Mon Jul 11 13:51:14 2022 +0000
+++ b/mqppep_anova_script.Rmd	Thu Jul 14 02:12:33 2022 +0000
@@ -21,13 +21,13 @@
   inputFile:            "test-data/test_input_for_anova.tabular"
   preprocDb:            "test-data/test_input_for_anova.sqlite"
   kseaAppPrepDb:        !r c(":memory:", "test-data/mqppep.sqlite")[2]
+  regexSampleNames:     "\\.\\d+[A-Z]$"
+  regexSampleGrouping:  "\\d+"
   show_toc:             true
   firstDataColumn:      "^Intensity[^_]"
-  imputationMethod:     !r c("group-median", "median", "mean", "random")[1]
+  imputationMethod:     !r c("group-median", "median", "mean", "random")[4]
   meanPercentile:       1
   sdPercentile:         1.0
-  regexSampleNames:     "\\.\\d+[A-Z]$"
-  regexSampleGrouping:  "\\d+"
   imputedDataFilename:  "test-data/limbo/imputedDataFilename.txt"
   imputedQNLTDataFile:  "test-data/limbo/imputedQNLTDataFile.txt"
   anovaKseaMetadata:    "test-data/limbo/anovaKseaMetadata.txt"
@@ -39,23 +39,36 @@
   intensityHeatmapRows: 75
 ---
 <!--
-  kseaCutoffStatistic:  !r c("p.value", "FDR")[2]
-  kseaCutoffThreshold:  !r c(0.05, 0.1)[1]
-
   alphaFile:            "test-data/alpha_levels.tabular"
   inputFile:            "test-data/test_input_for_anova.tabular"
   preprocDb:            "test-data/test_input_for_anova.sqlite"
   kseaAppPrepDb:        !r c(":memory:", "test-data/mqppep.sqlite")[2]
+  regexSampleNames:     "\\.\\d+[A-Z]$"
+  regexSampleGrouping:  "\\d+"
+
+  alphaFile:            "test-data/alpha_levels.tabular"
+  inputFile:            "test-data/PDX_pST_by_trt.ppep_intensities.ppep_map.preproc_tab.tabular"
+  preprocDb:            "test-data/PDX_pST_by_trt.ppep_intensities.ppep_map.preproc_sqlite.sqlite"
+  kseaAppPrepDb:        !r c(":memory:", "test-data/mqppep.sqlite")[2]
+  regexSampleNames:     "\\.\\w+\\.\\d+[A-Z]$"
+  regexSampleGrouping:  "\\w+"
+
+  kseaCutoffStatistic:  !r c("p.value", "FDR")[2]
+  kseaCutoffThreshold:  !r c(0.05, 0.1)[1]
 
   alphaFile:            "test-data/alpha_levels.tabular"
   inputFile:            "test-data/UT_phospho_ST_sites.preproc.tabular"
   preprocDb:            "test-data/UT_phospho_ST_sites.preproc.sqlite"
   kseaAppPrepDb:        !r c(":memory:", "test-data/UT_phospho_ST_sites.ksea.sqlite")[2]
+  regexSampleNames:     "\\.\\d+[A-Z]$"
+  regexSampleGrouping:  "\\d+"
 
   alphaFile:            "test-data/alpha_levels.tabular"
   inputFile:            "test-data/pY_Sites_NancyDu.txt.ppep_intensities.ppep_map.preproc.tabular"
   preprocDb:            "test-data/pY_Sites_NancyDu.txt.ppep_intensities.ppep_map.preproc.sqlite"
-  kseaAppPrepDb:        !r c(":memory:", "test-data/pST_Sites_NancyDu.ksea.sqlite")[2]
+  kseaAppPrepDb:        !r c(":memory:", "test-data/pY_Sites_NancyDu.ksea.sqlite")[2]
+  regexSampleNames:     "\\.\\d+[A-Z]$"
+  regexSampleGrouping:  "\\d+"
 
   alphaFile:            "test-data/alpha_levels.tabular"
   inputFile:            "test-data/pST_Sites_NancyDu.txt.preproc.tabular"
@@ -668,7 +681,7 @@
 
     k <- k[selector < ksea_cutoff_threshold, ]
 
-    if (nrow(k) > 1) {
+    if (nrow(k) > 0) {
       op <- par(mai = c(1, 1.5, 0.4, 0.4))
       numeric_z_score <- as.numeric(k$z_score)
       z_score_order <- order(numeric_z_score)
@@ -687,13 +700,14 @@
         border = NA,
         xpd = FALSE,
         cex.names = 1.0,
-        cex.axis = 1.0,
         main = long_caption,
         cex.main = my_cex_caption,
         names.arg = kinase_name[z_score_order],
         horiz = TRUE,
         srt = 45,
-        las = 1)
+        las = 1,
+        cex.axis = 0.9
+        )
       par(op)
     }
   }
@@ -852,6 +866,8 @@
 
 # create_breaks is a helper for ksea_heatmap
 create_breaks <- function(merged_scores) {
+  if (sum(!is.na(merged_scores)) < 2)
+    return(NULL)
   if (min(merged_scores, na.rm = TRUE) < -1.6) {
     breaks_neg <- seq(-1.6, 0, length.out = 30)
     breaks_neg <-
@@ -909,39 +925,41 @@
         )
       )
   } else if (nrow(x) < 2) {
-    cat("No plot because matrix x has ", nrow(x), " rows.\n\n")
-    cat("\\begin{verbatim}\n")
-    str(x)
-    cat("\\end{verbatim}\n")
+    cat("No plot because matrix has ", nrow(x), " rows.\n\n")
+    return(FALSE)
   } else if (ncol(x) < 2) {
     cat("No plot because matrix x has ", ncol(x), " columns.\n\n")
-    cat("\\begin{verbatim}\n")
-    str(x)
-    cat("\\end{verbatim}\n")
+    return(FALSE)
   } else {
+    my_limit <- 25
+    my_cex_col <- my_limit / (my_limit + ncol(x))
+    my_cex_row <- my_limit / (my_limit + nrow(x))
+    my_scale <- 12.0
+    if (ncol(x) < 10 && nrow(x) < 10)
+      my_scale <- my_scale * 10 / (10 - nrow(x)) * 10 / (10 - ncol(x))
     gplots::heatmap.2(
       x            = merged_scores,
       Colv         = sample_cluster,
-      scale        = "none",
+      breaks       = color_breaks[[1]],
       cellnote     = merged_asterisk,
+      cexCol       = 0.9 * my_cex_col,
+      cexRow       = 2 * my_cex_row,
+      col          = color_breaks[[2]],
+      density.info = "none",
+      key          = FALSE,
+      lhei         = c(0.4, 8.0, 1.1),
+      lmat         = rbind(c(0, 3), c(2, 1), c(0, 4)),
+      lwid         = c(0.5, 3),
+      margins      = margins,
+      notecex      = my_scale * my_cex_row * my_cex_col,
       notecol      = "white",
-      cexCol       = 0.9,
-      # Heuristically assign size of row labels
-      cexRow       = min(1.0, ((3 * my_cex_row) ^ 1.7) / 2.25),
+      scale        = "none",
       srtCol       = 45,
       srtRow       = 45,
-      notecex      = 3 * my_cex_row,
-      col          = color_breaks[[2]],
-      density.info = "none",
       trace        = "none",
-      breaks       = color_breaks[[1]],
-      lmat         = rbind(c(0, 3), c(2, 1), c(0, 4)),
-      lhei         = c(0.4, 8.0, 1.1),
-      lwid         = c(0.5, 3),
-      key          = FALSE,
-      margins      = margins,
       ...
     )
+    return(TRUE)
   }
 }
 
@@ -993,7 +1011,7 @@
   master <-
     Reduce(
       f = function(...) {
-        base::merge(..., by = "Kinase.Gene", all = FALSE)
+        base::merge(..., by = "Kinase.Gene", all = TRUE)
       },
       x = score_list_m
       )
@@ -1025,8 +1043,8 @@
   names(asterisk_rows) <- all_rows
   non_asterisk_rows <- names(asterisk_rows[asterisk_rows == FALSE])
   asterisk_rows <- names(asterisk_rows[asterisk_rows == TRUE])
-  merged_scores_asterisk <- merged_scores[names(asterisk_rows), ]
-  merged_scores_non_asterisk <- merged_scores[names(non_asterisk_rows), ]
+  merged_scores_asterisk <- merged_scores[names(asterisk_rows), , drop = FALSE]
+  merged_scores_non_asterisk <- merged_scores[names(non_asterisk_rows), , drop = FALSE]
   # end hack to print only significant rows
 
   row_list <- list()
@@ -1036,11 +1054,15 @@
 
   i <- which_kinases
   my_row_names <- row_list[[i]]
-  scrs <- merged_scores[my_row_names, ]
-  stts <- merged_stats[my_row_names, ]
+  scrs <- merged_scores[my_row_names, , drop = FALSE]
+  stts <- merged_stats[my_row_names, , drop = FALSE]
   merged_asterisk <- as.matrix(asterisk(stts, p_cutoff))
 
   color_breaks <- create_breaks(scrs)
+  if (is.null(color_breaks)) {
+    cat("No plot because matrix has too many missing values.\n\n")
+    return(NULL)
+  }
   plot_height <- nrow(scrs) ^ 0.55
   plot_width <- ncol(scrs) ^ 0.7
   my_cex_row <- 0.25 * 16 / plot_height
@@ -1053,7 +1075,7 @@
       pointsize = 14
     )
   }
-  draw_kseaapp_summary_heatmap(
+  did_draw <- draw_kseaapp_summary_heatmap(
     x               = scrs,
     sample_cluster  = sample_cluster,
     merged_asterisk = merged_asterisk,
@@ -1064,12 +1086,14 @@
   if (export == "TRUE") {
     dev.off()
   }
+  if (!did_draw)
+    return(NULL)
   return(my_row_names)
 }
 
 # helper for heatmaps of phosphopeptide intensities
 
-draw_intensity_heatmap <-
+draw_ppep_heatmap <-
   function(
     m,                              # matrix with rownames already formatted
     cutoff,                         # cutoff used by hm_heading_function
@@ -1084,7 +1108,7 @@
     # emit the heading for the heatmap
     if (hm_heading_function(m, cutoff)) {
       peptide_count <- min(max_peptide_count, nrow(m))
-      if (nrow(m) > 1) {
+      if (nrow(m) > 0) {
         m_margin <- m[peptide_count:1, ]
         # Margin setting was heuristically derived
         margins <-
@@ -1092,39 +1116,82 @@
             max(80, sqrt(nchar(rownames(m_margin)))) * 5 / 16  # row
           )
         }
-      if (nrow(m) > 1) {
+      if (nrow(m) > 0) {
+        hm_call <- NULL
         tryCatch(
           {
             old_oma <- par("oma")
             par(cex.main = 0.6)
             # Heuristically determined character size adjustment formula
-            char_contractor <-
+            my_cex_row <-
               250000 / (
                 max(4500, (nchar(rownames(m_margin)))^2) * intensity_hm_rows
                 )
-            heatmap(
-              m[peptide_count:1, ],
-              Rowv = if (suppress_row_dendrogram) NA else NULL,
-              Colv = NA,
-              cexRow = char_contractor,
-              cexCol = char_contractor * 50 / max_peptide_count,
-              scale = "row",
-              margins = margins,
-              main =
-                "Unimputed, unnormalized log(intensities)",
-              xlab = "",
-              las = 1,
-              ...
-              )
+            m_hm <-  m[peptide_count:1, , drop = FALSE]
+            my_limit <- 60
+            my_cex_col <- 0.75 * my_limit / (my_limit + ncol(m_hm))
+            hm_call <- function(x, scaling, title) {
+              heatmap(
+                x,
+                Rowv = if (suppress_row_dendrogram) NA else NULL,
+                Colv = NA,
+                cexRow = my_cex_row,
+                cexCol = my_cex_col,
+                scale = scaling,
+                margins = margins,
+                main = title,
+                xlab = "",
+                las = 1,
+                ...
+                )
+            }
+            if (sum(rowSums(!is.na(m_hm)) < 2))
+              hm_call(
+                m_hm,
+                "none",
+                "log(intensities), unscaled, unimputed, and unnormalized"
+                )
+            else
+              hm_call(
+                m_hm,
+                "row",
+                "log(intensities), row-scaled, unimputed, and unnormalized"
+                )
           },
           error = function(e) {
-            cat(
-              sprintf(
-                "\nCould not draw heatmap, possibly because of too many missing values.  Internal message: %s\n",
-                e$message
+            if (!is.null(hm_call)) {
+              m_hm[is.na(m_hm)] <- 0
+              tryCatch(
+                {
+                  if (nrow(m_hm) > 1)
+                    hm_call(
+                      m_hm,
+                      "none",
+                      paste(
+                        "log(intensities), unscaled, unimputed,",
+                        "NAs zeroed, unnormalized"
+                        )
+                      )
+                  else
+                    cat("\nThere are too few peptides to produce a heatmap.\n")
+                },
+                error = function(r) {
+                  cat(
+                    sprintf(
+                      "\n%s %s Internal message: %s\n",
+                      "Could not draw heatmap,",
+                      "possibly because of too many missing values.",
+                      r$message
+                      )
+                    )
+                }
+              )
+            } else {
+              cat(
+                "\nCould not draw heatmap, possibly because of too many missing values.\n"
                 )
-              )
-            },
+            }
+          },
           finally = par(old_oma)
         )
       }
@@ -1291,9 +1358,9 @@
   )
 ```
 
-# Extract Sample Names and Treatment Levels
-
-Column names parsed from input file are shown in Table 1; sample names and treatment levels, in Table 2.
+# Extract Sample Classes and Names
+
+Column names parsed from input file are shown in Table 1; sample classes and names, in Table 2.
 
 ```{r echo = FALSE, results = 'asis'}
 
@@ -1323,6 +1390,7 @@
   column = seq_len(length(colnames(full_data))),
   name = paste0("\\verb@", colnames(full_data), "@")
   )
+cat("\n\\begin{tiny}\n")
 data_frame_latex(
   x = column_name_df,
   justification = "l l",
@@ -1331,6 +1399,7 @@
   anchor = const_table_anchor_bp,
   underscore_whack = FALSE
   )
+cat("\n\\end{tiny}\n")
 
 ```
 
@@ -1358,17 +1427,29 @@
 sample_treatment_levels <- as.factor(regmatches(sample_name_matches, rx_match))
 number_of_samples <- length(sample_name_matches)
 sample_treatment_df <- data.frame(
-  level = sample_treatment_levels,
+  class = sample_treatment_levels,
+  sample = sample_name_matches
+  )
+# reorder data
+if (TRUE) {
+  my_order <- with(sample_treatment_df, order(class, sample))
+  quant_data <- quant_data[, my_order]
+  sample_name_matches <- sample_name_matches[my_order]
+  sample_treatment_levels <- sample_treatment_levels[my_order]
+}
+sample_treatment_df <- data.frame(
+  class = sample_treatment_levels,
   sample = sample_name_matches
   )
 data_frame_latex(
   x = sample_treatment_df,
   justification = "rp{0.2\\linewidth} lp{0.3\\linewidth}",
   centered = TRUE,
-  caption = "Treatment levels",
+  caption = "Sample classes",
   anchor = const_table_anchor_tbp,
   underscore_whack = FALSE
   )
+sample_name_shrink <- 10 / (10 + max(nchar(sample_name_matches)))
 ```
 
 ```{r echo = FALSE, results = 'asis'}
@@ -1395,7 +1476,8 @@
 # Vertical plot
 boxplot(
   quant_data_log
-, las = 1
+, las = 2
+, cex.axis = 0.9 * sample_name_shrink
 , col = const_boxplot_fill
 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
 , xlab = "Sample"
@@ -1785,7 +1867,8 @@
   colnames(blue_dots) <- sample_name_matches
   boxplot(
       blue_dots
-    , las = 1 # "always horizontal"
+    , las = 2 # "always vertical"
+    , cex.axis = 0.9 * sample_name_shrink
     , col = const_boxplot_fill
     , ylim = ylim
     , main = "Peptide intensities after eliminating unusable peptides"
@@ -1831,6 +1914,8 @@
       ylim = ylim_save,
       main = "Distributions of observed and imputed data",
       sub = "Light blue = observed data; Pink = imputed data",
+      las = 2,
+      cex.axis = 0.9 * sample_name_shrink,
       xlab = "Sample",
       ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
       )
@@ -2043,7 +2128,8 @@
   colnames(quant_data_log) <- sample_name_matches
   boxplot(
     quant_data_log
-  , las = 1
+  , las = 2
+  , cex.axis = 0.9 * sample_name_shrink
   , col = const_boxplot_fill
   , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
   , xlab = "Sample"
@@ -2084,7 +2170,7 @@
 colnames(connect_df) <- c("Phosphopeptide", "Intensity")
 ```
 
-```{r echo = FALSE, fig.dim = c(9, 10), results = 'asis'}
+```{r anova, echo = FALSE, fig.dim = c(9, 10), results = 'asis'}
 count_of_treatment_levels <- length(levels(sample_treatment_levels))
 if (count_of_treatment_levels < 2) {
   nuke_control_sequences <-
@@ -2156,10 +2242,8 @@
   p_value_data_anova_ps_fdr <-
     p.adjust(p_value_data_anova_ps, method = "fdr")
   p_value_data <- data.frame(
-    phosphopeptide = full_data[, 1]
-    ,
-    raw_anova_p = p_value_data_anova_ps
-    ,
+    phosphopeptide = full_data[, 1],
+    raw_anova_p = p_value_data_anova_ps,
     fdr_adjusted_anova_p = p_value_data_anova_ps_fdr
   )
 
@@ -2257,7 +2341,8 @@
         boxplot(
           filtered_data_filtered,
           main = "Imputed, normalized intensities", # no line plot
-          las = 1,
+          las = 2,
+          cex.axis = 0.9 * sample_name_shrink,
           col = const_boxplot_fill,
           ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
         ),
@@ -2309,16 +2394,16 @@
         X = filtered_p$fdr_adjusted_anova_p
         ,
         FUN = function(x) {
-          if (x > 0.0001)
-            paste0("(%0.", 1 + ceiling(-log10(x)), "f) %s")
+          if (x > 0.01)
+            paste0("%s (%0.", 1 + ceiling(-log10(x)), "f)")
           else
-            paste0("(%0.4e) %s")
+            paste0("%s (%0.2e)")
         }
       )
 
       cat_hm_heading <- function(m, cutoff) {
-        cat("\\newpage\n")
         if (nrow(m) > intensity_hm_rows) {
+          cat("\\newpage\n")
           subsection_header(
             paste(
               sprintf("Heatmap for the %d most-significant peptides",
@@ -2327,7 +2412,7 @@
             )
           )
         } else {
-          if (nrow(m) == 1) {
+          if (nrow(m) == 0) {
             return(FALSE)
           } else {
             subsection_header(
@@ -2354,8 +2439,8 @@
           FUN = function(i) {
             sprintf(
               anova_filtered_merge_format[i],
-              filtered_p$fdr_adjusted_anova_p[i],
-              rownames_m[i]
+              rownames_m[i],
+              signif(filtered_p$fdr_adjusted_anova_p[i], 2)
             )
           }
         )
@@ -2363,18 +2448,21 @@
       # draw the heading and heatmap
       if (nrow(m) > 0) {
           number_of_peptides_found <-
-            draw_intensity_heatmap(
+            draw_ppep_heatmap(
               m                       = m,
               cutoff                  = cutoff,
               hm_heading_function     = cat_hm_heading,
-              hm_main_title           = "Unimputed, unnormalized log(intensities)",
+              hm_main_title           =
+                "log(intensities), row-scaled, unimputed, unnormalized",
               suppress_row_dendrogram = FALSE
             )
       }
     }
   }
 }
-cat("\\leavevmode\n\n\n")
+cat("\\leavevmode\n")
+cat("The adjusted ANOVA \\textit{p}-value is shown in parentheses
+  after the phosphopeptide sequence.\n\n")
 ```
 
 ```{r sqlite, echo = FALSE, fig.dim = c(9, 10), results = 'asis'}
@@ -2710,7 +2798,7 @@
         MARGIN = 1, # apply to rows
         FUN = anova_func,
         grouping_factor =
-          as.factor(as.numeric(grouping_factor$level)), # anova_func arg2
+          as.factor(grouping_factor$level), # anova_func arg2
         one_way_f = one_way_two_categories, # anova_func arg3
         simplify = TRUE # TRUE is the default for simplify
         )
@@ -3046,21 +3134,9 @@
   contrast_label <- sprintf("%s -> %s", cntrst_b_level, cntrst_a_level)
   contrast_longlabel <- (
     sprintf(
-      "Trt %s {%s} -> Trt %s {%s}",
+      "Class %s -> Class %s",
       contrast_metadata_df[i_cntrst, "b_level"],
-      gsub(
-        pattern = ";",
-        replacement = ", ",
-        x = contrast_metadata_df[i_cntrst, "b_samples"],
-        fixed = TRUE
-      ),
-      contrast_metadata_df[i_cntrst, "a_level"],
-      gsub(
-        pattern = ";",
-        replacement = ", ",
-        x = contrast_metadata_df[i_cntrst, "a_samples"],
-        fixed = TRUE
-      )
+      contrast_metadata_df[i_cntrst, "a_level"]
     )
   )
   kseaapp_input <-
@@ -3165,9 +3241,11 @@
       # - 3 : non-significant kinases
       which_kinases = which_kinases
     )
-    cat("\\begin{center}\n")
-    cat("Color intensities reflects $z$-score magnitudes; hue reflects $z$-score sign.  Asterisks reflect significance.\n")
-    cat("\\end{center}\n")
+    if (!is.null(plotted_kinases)) {
+      cat("\\begin{center}\n")
+      cat("Color intensity reflects $z$-score magnitudes; hue reflects $z$-score sign.  Asterisks reflect significance.\n")
+      cat("\\end{center}\n")
+    }
   } # end for (i in ...
 } # end if (length ...
 
@@ -3179,21 +3257,9 @@
   contrast_label <- sprintf("%s -> %s", cntrst_b_level, cntrst_a_level)
   contrast_longlabel <- (
     sprintf(
-      "Trt %s {%s} -> Trt %s {%s}",
+      "Class %s -> Class %s",
       contrast_metadata_df[i_cntrst, "b_level"],
-      gsub(
-        pattern = ";",
-        replacement = ", ",
-        x = contrast_metadata_df[i_cntrst, "b_samples"],
-        fixed = TRUE
-      ),
-      contrast_metadata_df[i_cntrst, "a_level"],
-      gsub(
-        pattern = ";",
-        replacement = ", ",
-        x = contrast_metadata_df[i_cntrst, "a_samples"],
-        fixed = TRUE
-      )
+      contrast_metadata_df[i_cntrst, "a_level"]
     )
   )
   main_title <- (
@@ -3233,13 +3299,18 @@
   SELECT
     gene AS kinase,
     ppep,
-    '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label
+    sub_gene,
+    '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label,
+    fdr_adjusted_anova_p
   FROM (
     SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep
       FROM pseudo_ksdata
-      WHERE GENE IN (SELECT kinase FROM enriched_kinases)
-    )
+      WHERE gene IN (SELECT kinase FROM enriched_kinases)
+    ),
+    p_value_data
+  WHERE ppep = phosphopeptide
   GROUP BY ppep
+  ORDER BY fdr_adjusted_anova_p
   ")
 
 # helper used to label per-kinase substrate enrichment figure
@@ -3262,7 +3333,7 @@
       )
     )
   } else {
-    if (nrow(m) == 1) {
+    if (nrow(m) == 0) {
       return(FALSE)
     } else {
       subsection_header(
@@ -3287,7 +3358,7 @@
 }
 
 # Disabling heatmaps for substrates pending decision whether to eliminate them altogether
-if (FALSE)
+if (TRUE)
   for (kinase_name in sort(enriched_kinases$kinase)) {
     enriched_substrates <-
       all_enriched_substrates[
@@ -3295,14 +3366,20 @@
         ,
         drop = FALSE
         ]
+    enriched_substrates$label <- with(
+      enriched_substrates,
+      sprintf(
+        "(%s-%s) %s (%0.2g)",
+        kinase,
+        sub("$FAILED_MATCH_GENE_NAME", "unidentified", sub_gene, fixed = TRUE),
+        ppep,
+        fdr_adjusted_anova_p
+        )
+      )
     # Get the intensity values for the heatmap
     enriched_intensities <-
       as.matrix(unimputed_quant_data_log[enriched_substrates$ppep, , drop = FALSE])
     # Remove rows having too many NA values to be relevant
-    na_counter <- is.na(enriched_intensities)
-    na_counts <- apply(na_counter, 1, sum)
-    enriched_intensities <-
-      enriched_intensities[na_counts < ncol(enriched_intensities) / 2, , drop = FALSE]
     # Rename the rows with the display-name for the heatmap
     rownames(enriched_intensities) <-
       sapply(
@@ -3321,7 +3398,7 @@
       cut_args$statistic <- ksea_cutoff_statistic
       cut_args$threshold <- ksea_cutoff_threshold
       number_of_peptides_found <-
-        draw_intensity_heatmap(
+        draw_ppep_heatmap(
           m                       = m,
           cutoff                  = cut_args,
           hm_heading_function     = cat_enriched_heading,
@@ -3329,6 +3406,25 @@
             = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates",
           suppress_row_dendrogram = FALSE
         )
+        if (number_of_peptides_found > 1) {
+          cat("\\leavevmode\n")
+          cat("The kinase-subsrate pair is shown in parentheses
+            before the phosphopeptide sequence.\n\n")
+          cat("The adjusted ANOVA \\textit{p}-value is shown in parentheses
+            after the phosphopeptide sequence.\n\n")
+        }
+      if (nrow(m) == 1) {
+        cat(
+          sprintf(
+            "\n\nSubstrate is %s,
+            \nphopshopeptide is %s,
+            \n\nand adjusted ANOVA \\textit{p}-value is %0.2g.\n",
+            enriched_substrates[1, "sub_gene"],
+            enriched_substrates[1, "ppep"],
+            enriched_substrates[1, "fdr_adjusted_anova_p"]
+            )
+        )
+      }
     }
   }
 
@@ -3473,7 +3569,11 @@
 param_unlist <- unlist(as.list(params))
 param_df <- data.frame(
   parameter = paste0("\\verb@", names(param_unlist), "@"),
-  value = paste0("\\verb@", gsub("$", "\\$", param_unlist, fixed = TRUE), "@")
+  value = paste0(
+    "\n\\begin{tiny}\n\\verb@",
+    gsub("$", "\\$", param_unlist, fixed = TRUE),
+    "@\n\\end{tiny}"
+    )
   )
 
 data_frame_latex(
author	eschen42
date	Thu, 14 Jul 2022 02:12:33 +0000
parents	61adb8801b73
children	f9cd87ac8006