Mercurial > repos > eschen42 > mqppep_anova

--- a/macros.xml	Tue Mar 15 18:17:55 2022 +0000
+++ b/macros.xml	Tue Mar 22 20:47:40 2022 +0000
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@TOOL_VERSION@">0.1.3</token>
+    <token name="@TOOL_VERSION@">0.1.4</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <xml name="requirements">
         <requirements>
--- a/mqppep_anova.R	Tue Mar 15 18:17:55 2022 +0000
+++ b/mqppep_anova.R	Tue Mar 22 20:47:40 2022 +0000
@@ -1,207 +1,243 @@
-#!/usr/bin/env Rscript
-# libraries
-library(optparse)
-library(data.table)
-library(stringr)
-# bioconductor-preprocesscore
-#  - libopenblas
-#  - r-data.table
-#  - r-rmarkdown
-#  - r-ggplot2
-#  - texlive-core
-
-# ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285
-
-# parse options
-option_list <- list(
-  make_option(
-    c("-i", "--inputFile"),
-    action = "store",
-    default = NA,
-    type = "character",
-    help = "Phosphopeptide Intensities sparse input file path"
-  ),
-  make_option(
-    c("-a", "--alphaFile"),
-    action = "store",
-    default = NA,
-    type = "character",
-    help = paste0("List of alpha cutoff values for significance testing;",
-             " path to text file having one column and no header")
-  ),
-  make_option(
-    c("-f", "--firstDataColumn"),
-    action = "store",
-    default = "10",
-    type = "character",
-    help = "First column of intensity values"
-  ),
-  make_option(
-    c("-m", "--imputationMethod"),
-    action = "store",
-    default = "group-median",
-    type = "character",
-    help = paste0("Method for missing-value imputation,",
-             " one of c('group-median','median','mean','random')")
-  ),
-  make_option(
-    c("-p", "--meanPercentile"),
-    action = "store",
-    default = 3,
-    type = "integer",
-    help = paste0("Mean percentile for randomly generated imputed values;",
-              ", range [1,99]")
-  ),
-  make_option(
-    c("-d", "--sdPercentile"),
-    action = "store",
-    default = 3,
-    type = "double",
-    help = paste0("Adjustment value for standard deviation of",
-              " randomly generated imputed values; real")
-  ),
-  make_option(
-    c("-s", "--regexSampleNames"),
-    action = "store",
-    default = "\\.(\\d+)[A-Z]$",
-    type = "character",
-    help = "Regular expression extracting sample-names"
-  ),
-  make_option(
-    c("-g", "--regexSampleGrouping"),
-    action = "store",
-    default = "(\\d+)",
-    type = "character",
-    help = paste0("Regular expression extracting sample-group",
-             " from an extracted sample-name")
-  ),
-  make_option(
-    c("-o", "--imputedDataFile"),
-    action = "store",
-    default = "output_imputed.tsv",
-    type = "character",
-    help = "Imputed Phosphopeptide Intensities output file path"
-  ),
-  make_option(
-    c("-r", "--reportFile"),
-    action = "store",
-    default = "QuantDataProcessingScript.html",
-    type = "character",
-    help = "HTML report file path"
-  )
-)
-args <- parse_args(OptionParser(option_list = option_list))
-
-# Check parameter values
-
-if (! file.exists(args$inputFile)) {
-  stop((paste("Input file", args$inputFile, "does not exist")))
-}
-input_file <- args$inputFile
-alpha_file <- args$alphaFile
-first_data_column <- args$firstDataColumn
-imputation_method <- args$imputationMethod
-mean_percentile <- args$meanPercentile
-sd_percentile <- args$sdPercentile
-
-regex_sample_names    <- gsub("^[ \t\n]*", "",
-                         readChar(args$regexSampleNames,  1000)
-                       )
-regex_sample_names    <- gsub("[ \t\n]*$", "",
-                         regex_sample_names
-                       )
-cat(regex_sample_names)
-cat("\n")
-
-regex_sample_grouping <- gsub("^[ \t\n]*", "",
-                           readChar(args$regexSampleGrouping, 1000)
-                         )
-regex_sample_grouping <- gsub("[ \t\n]*$", "",
-                           regex_sample_grouping
-                         )
-cat(regex_sample_grouping)
-cat("\n")
-
-imputed_data_file_name <- args$imputedDataFile
-report_file_name <- args$reportFile
-
-print("args is:")
-cat(str(args))
-
-print("regex_sample_names is:")
-cat(str(regex_sample_names))
-
-print("regex_sample_grouping is:")
-cat(str(regex_sample_grouping))
-
-# from: https://github.com/molgenis/molgenis-pipelines/wiki/
-#   How-to-source-another_file.R-from-within-your-R-script
-# Function location_of_this_script returns the location of this .R script
-#   (may be needed to source other files in same dir)
-location_of_this_script <- function() {
-    this_file <- NULL
-    # This file may be 'sourced'
-    for (i in - (1:sys.nframe())) {
-        if (identical(sys.function(i), base::source)) {
-            this_file <- (normalizePath(sys.frame(i)$ofile))
-        }
-    }
-
-    if (!is.null(this_file)) return(dirname(this_file))
-
-    # But it may also be called from the command line
-    cmd_args <- commandArgs(trailingOnly = FALSE)
-    cmd_args_trailing <- commandArgs(trailingOnly = TRUE)
-    cmd_args <- cmd_args[
-      seq.int(
-        from = 1,
-        length.out = length(cmd_args) - length(cmd_args_trailing)
-        )
-      ]
-    res <- gsub("^(?:--file=(.*)|.*)$", "\\1", cmd_args)
-
-    # If multiple --file arguments are given, R uses the last one
-    res <- tail(res[res != ""], 1)
-    if (0 < length(res)) return(dirname(res))
-
-    # Both are not the case. Maybe we are in an R GUI?
-    return(NULL)
-}
-
-script_dir <-  location_of_this_script()
-
-rmarkdown_params <- list(
-    inputFile = input_file
-  , alphaFile = alpha_file
-  , firstDataColumn = first_data_column
-  , imputationMethod = imputation_method
-  , meanPercentile = mean_percentile
-  , sdPercentile = sd_percentile
-  , regexSampleNames = regex_sample_names
-  , regexSampleGrouping = regex_sample_grouping
-  , imputedDataFilename = imputed_data_file_name
-  )
-
-str(rmarkdown_params)
-
-# BUG
-# Must render as HTML for the time being until this issue is resolved:
-#   https://github.com/conda-forge/texlive-core-feedstock/issues/19
-# for reason:
-#   "The following dependencies are not available in conda"
-# reported here:
-#   https://github.com/ami-iit/bipedal-locomotion-framework/pull/457
-
-# freeze the random number generator so the same results will be produced
-#  from run to run
-set.seed(28571)
-
-
-library(tinytex)
-tinytex::install_tinytex()
-rmarkdown::render(
-  input = paste(script_dir, "mqppep_anova_script.Rmd", sep = "/")
-, output_format = rmarkdown::pdf_document()
-, output_file = report_file_name
-, params = rmarkdown_params
-)
+#!/usr/bin/env Rscript
+# libraries
+library(optparse)
+library(data.table)
+library(stringr)
+# bioconductor-preprocesscore
+#  - libopenblas
+#  - r-data.table
+#  - r-rmarkdown
+#  - r-ggplot2
+#  - texlive-core
+
+# ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285
+
+# parse options
+option_list <- list(
+  make_option(
+    c("-i", "--inputFile"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Phosphopeptide Intensities sparse input file path"
+  ),
+  make_option(
+    c("-a", "--alphaFile"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = paste0("List of alpha cutoff values for significance testing;",
+             " path to text file having one column and no header")
+  ),
+  make_option(
+    c("-f", "--firstDataColumn"),
+    action = "store",
+    default = "10",
+    type = "character",
+    help = "First column of intensity values"
+  ),
+  make_option(
+    c("-m", "--imputationMethod"),
+    action = "store",
+    default = "random",
+    type = "character",
+    help = paste0("Method for missing-value imputation,",
+             " one of c('group-median','median','mean','random')")
+  ),
+  make_option(
+    c("-p", "--meanPercentile"),
+    action = "store",
+    default = 3,
+    type = "integer",
+    help = paste0("Mean percentile for randomly generated imputed values;",
+              ", range [1,99]")
+  ),
+  make_option(
+    c("-d", "--sdPercentile"),
+    action = "store",
+    default = 3,
+    type = "double",
+    help = paste0("Adjustment value for standard deviation of",
+              " randomly generated imputed values; real")
+  ),
+  make_option(
+    c("-s", "--regexSampleNames"),
+    action = "store",
+    default = "\\.(\\d+)[A-Z]$",
+    type = "character",
+    help = "Regular expression extracting sample-names"
+  ),
+  make_option(
+    c("-g", "--regexSampleGrouping"),
+    action = "store",
+    default = "(\\d+)",
+    type = "character",
+    help = paste0("Regular expression extracting sample-group",
+             " from an extracted sample-name")
+  ),
+  make_option(
+    c("-o", "--imputedDataFile"),
+    action = "store",
+    default = "output_imputed.tsv",
+    type = "character",
+    help = "Imputed Phosphopeptide Intensities output file path"
+  ),
+  make_option(
+    c("-n", "--imputedQNLTDataFile"),
+    action = "store",
+    default = "output_imp_qn_lt.tsv",
+    type = "character",
+    help =
+      paste(
+        "Imputed, Quantile-Normalized Log-Transformed Phosphopeptide",
+        "Intensities output file path"
+        )
+  ),
+  make_option(
+    c("-r", "--reportFile"),
+    action = "store",
+    default = "QuantDataProcessingScript.html",
+    type = "character",
+    help = "HTML report file path"
+  )
+)
+args <- parse_args(OptionParser(option_list = option_list))
+print("args is:")
+cat(str(args))
+
+# Check parameter values
+
+if (! file.exists(args$inputFile)) {
+  stop((paste("Input file", args$inputFile, "does not exist")))
+}
+input_file <- args$inputFile
+alpha_file <- args$alphaFile
+first_data_column <- args$firstDataColumn
+imputation_method <- args$imputationMethod
+print(
+  grepl(
+    pattern = imputation_method,
+    x = c("group-median", "median", "mean", "random")
+    )
+  )
+
+if (
+  sum(
+    grepl(
+      pattern = imputation_method,
+      x = c("group-median", "median", "mean", "random")
+      )
+    ) < 1
+  ) {
+    print(sprintf("bad imputationMethod argument: %s", imputation_method))
+    return(-1)
+    }
+
+mean_percentile <- args$meanPercentile
+print("mean_percentile is:")
+cat(str(mean_percentile))
+
+sd_percentile <- args$sdPercentile
+print("sd_percentile is:")
+cat(str(mean_percentile))
+
+
+regex_sample_names    <- gsub("^[ \t\n]*", "",
+                         readChar(args$regexSampleNames,  1000)
+                       )
+regex_sample_names    <- gsub("[ \t\n]*$", "",
+                         regex_sample_names
+                       )
+cat(regex_sample_names)
+cat("\n")
+
+regex_sample_grouping <- gsub("^[ \t\n]*", "",
+                           readChar(args$regexSampleGrouping, 1000)
+                         )
+regex_sample_grouping <- gsub("[ \t\n]*$", "",
+                           regex_sample_grouping
+                         )
+cat(regex_sample_grouping)
+cat("\n")
+
+imputed_data_file_name <- args$imputedDataFile
+imp_qn_lt_data_filenm <-  args$imputedQNLTDataFile
+report_file_name <- args$reportFile
+
+print("regex_sample_names is:")
+cat(str(regex_sample_names))
+
+print("regex_sample_grouping is:")
+cat(str(regex_sample_grouping))
+
+# from: https://github.com/molgenis/molgenis-pipelines/wiki/
+#   How-to-source-another_file.R-from-within-your-R-script
+# Function location_of_this_script returns the location of this .R script
+#   (may be needed to source other files in same dir)
+location_of_this_script <- function() {
+    this_file <- NULL
+    # This file may be 'sourced'
+    for (i in - (1:sys.nframe())) {
+        if (identical(sys.function(i), base::source)) {
+            this_file <- (normalizePath(sys.frame(i)$ofile))
+        }
+    }
+
+    if (!is.null(this_file)) return(dirname(this_file))
+
+    # But it may also be called from the command line
+    cmd_args <- commandArgs(trailingOnly = FALSE)
+    cmd_args_trailing <- commandArgs(trailingOnly = TRUE)
+    cmd_args <- cmd_args[
+      seq.int(
+        from = 1,
+        length.out = length(cmd_args) - length(cmd_args_trailing)
+        )
+      ]
+    res <- gsub("^(?:--file=(.*)|.*)$", "\\1", cmd_args)
+
+    # If multiple --file arguments are given, R uses the last one
+    res <- tail(res[res != ""], 1)
+    if (0 < length(res)) return(dirname(res))
+
+    # Both are not the case. Maybe we are in an R GUI?
+    return(NULL)
+}
+
+script_dir <-  location_of_this_script()
+
+rmarkdown_params <- list(
+    inputFile = input_file
+  , alphaFile = alpha_file
+  , firstDataColumn = first_data_column
+  , imputationMethod = imputation_method
+  , meanPercentile = mean_percentile
+  , sdPercentile = sd_percentile
+  , regexSampleNames = regex_sample_names
+  , regexSampleGrouping = regex_sample_grouping
+  , imputedDataFilename = imputed_data_file_name
+  , imputedQNLTDataFile = imp_qn_lt_data_filenm
+  )
+
+print("rmarkdown_params")
+str(rmarkdown_params)
+
+# freeze the random number generator so the same results will be produced
+#  from run to run
+set.seed(28571)
+
+# BUG (or "opportunity")
+# To render as PDF for the time being requires installing the conda
+# package `r-texlive` until this issue in `texlive-core` is resolved:
+#   https://github.com/conda-forge/texlive-core-feedstock/issues/19
+# This workaround is detailed in the fourth comment of:
+#   https://github.com/conda-forge/texlive-core-feedstock/issues/61
+
+library(tinytex)
+tinytex::install_tinytex()
+rmarkdown::render(
+  input = paste(script_dir, "mqppep_anova_script.Rmd", sep = "/")
+, output_format = rmarkdown::pdf_document(toc = TRUE)
+, output_file = report_file_name
+, params = rmarkdown_params
+)
--- a/mqppep_anova.xml	Tue Mar 15 18:17:55 2022 +0000
+++ b/mqppep_anova.xml	Tue Mar 22 20:47:40 2022 +0000
@@ -19,21 +19,24 @@
       cd \$TEMP;
       cp '$__tool_directory__/mqppep_anova_script.Rmd' . || exit 0;
       cp '$__tool_directory__/mqppep_anova.R' . || exit 0;
-      \${CONDA_PREFIX}/bin/Rscript \$TEMP/mqppep_anova.R
-      --inputFile '$input_file'
-      --alphaFile $alpha_file
-      --firstDataColumn $first_data_column
-      --imputationMethod $imputation.imputation_method
-      #if '$imputation_method' == 'random':
-        --meanPercentile '$meanPercentile'
-        --sdPercentile   '$sdPercentile'
-      #end if
-      --regexSampleNames $sample_names_regex_f
-      --regexSampleGrouping $sample_grouping_regex_f
-      --imputedDataFile $imputed_data_file
-      --reportFile $report_file;
+      \${CONDA_PREFIX}/bin/Rscript \$TEMP/mqppep_anova.R
+        --inputFile '$input_file'
+        --alphaFile '$alpha_file'
+        --firstDataColumn $first_data_column
+        --imputationMethod $imputation.imputation_method
+        #if $imputation.imputation_method == "random"
+          --meanPercentile '$imputation.meanPercentile'
+          --sdPercentile   '$imputation.sdPercentile'
+        #end if
+        --regexSampleNames $sample_names_regex_f
+        --regexSampleGrouping $sample_grouping_regex_f
+        --imputedDataFile $imputed_data_file
+        --imputedQNLTDataFile '$imp_qn_lt_file'
+        --reportFile '$report_file';
+      export RESULT=\$?;
       cd \${OLD_PWD};
-      rm -rf \$HOME
+      rm -rf \$HOME;
+      exit \${RESULT}
     ]]></command>
     <configfiles>
       <configfile name="sample_names_regex_f">
@@ -98,7 +101,8 @@
         </param>
     </inputs>
     <outputs>
-        <data name="imputed_data_file" format="tabular" label="${input_file.name}.${imputation.imputation_method}-imputed_QN_LT_intensities" ></data>
+        <data name="imputed_data_file" format="tabular" label="${input_file.name}.${imputation.imputation_method}-imputed_intensities" ></data>
+        <data name="imp_qn_lt_file" format="tabular" label="${input_file.name}.${imputation.imputation_method}-imputed_QN_LT_intensities" ></data>
         <!--
         <data name="report_file" format="html" label="${input_file.name}.${imputation.imputation_method}-imputed_report (download/unzip to view)" ></data>
         -->
@@ -112,7 +116,7 @@
             <param name="imputation_method" value="group-median"/>
             <param name="sample_names_regex" value="\.\d+[A-Z]$"/>
             <param name="sample_grouping_regex" value="\d+"/>
-            <output name="imputed_data_file">
+            <output name="imp_qn_lt_file">
                 <assert_contents>
                     <has_text text="Phosphopeptide" />
                     <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
@@ -130,11 +134,11 @@
             <param name="sdPercentile" value="0.2" />
             <param name="sample_names_regex" value="\.\d+[A-Z]$"/>
             <param name="sample_grouping_regex" value="\d+"/>
-            <output name="imputed_data_file">
+            <output name="imp_qn_lt_file">
                 <assert_contents>
                     <has_text text="Phosphopeptide" />
                     <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
-                    <has_text text="0.82258" />
+                    <has_text text="8.392287" />
                     <has_text text="pSQKQEEENPAEETGEEK" />
                 </assert_contents>
             </output>
@@ -192,11 +196,14 @@

 **Outputs**

-``intensities_*-imputed_QN_LT``
+``imputed_intensities``
+  Phosphopeptide MS intensities where missing values have been **imputed** by the chosen method, in tabular format.
+
+``imputed_QN_LT_intensities``
   Phosphopeptide MS intensities where missing values have been **imputed** by the chosen method, quantile-normalized (**QN**), and log10-transformed (**LT**), in tabular format.

 ``report_file``
-  Summary report for normalization, imputation, and ANOVA, in PDF format.
+  Summary report for normalization, imputation, and **ANOVA**, in PDF format.

 **Authors**
--- a/mqppep_anova_script.Rmd	Tue Mar 15 18:17:55 2022 +0000
+++ b/mqppep_anova_script.Rmd	Tue Mar 22 20:47:40 2022 +0000
@@ -1,24 +1,50 @@
 ---
 title: "MaxQuant Phospho-Proteomic Enrichment Pipeline ANOVA"
 author: "Larry Cheng; Art Eschenlauer"
-date: "May 28, 2018; Nov 16, 2021"
+date: "May 28, 2018; Mar 16, 2022"
 output:
-  pdf_document: default
+  pdf_document:
+    toc: true
+  latex_document:
+    toc: true
 params:
   inputFile: "test-data/test_input_for_anova.tabular"
   alphaFile: "test-data/alpha_levels.tabular"
   firstDataColumn: "Intensity"
   imputationMethod: !r c("group-median", "median", "mean", "random")[1]
   meanPercentile: 1
-  sdPercentile: 0.2
+  sdPercentile: 1.0
   regexSampleNames: "\\.\\d+[A-Z]$"
   regexSampleGrouping: "\\d+"
-  imputedDataFilename: "Upstream_Map_pST_outputfile_STEP4_QN_LT.txt"
+  imputedDataFilename: "test-data/imputedDataFilename.txt"
+  imputedQNLTDataFile: "test-data/imputedQNLTDataFile.txt"
+  show_toc: true
 ---
+<!--
+  latex_document: default
+  inputFile: "test-data/test_input_for_anova.tabular"
+  inputFile: "test-data/density_failure.preproc_tab.tabular"
+  inputFile: "test-data/UT_Phospho_STY_Sites.preproc_tab"
+date: "May 28, 2018; Mar 16, 2022"
+-->
 ```{r setup, include = FALSE}
 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285
 knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10))

+# freeze the random number generator so the same results will be produced
+#  from run to run
+set.seed(28571)
+
+### CONSTANTS
+
+const_parfin <- par("fin")
+const_boxplot_fill <- "grey94"
+const_stripchart_cex <- 0.5
+const_stripsmall_cex <-
+  sqrt(const_stripchart_cex * const_stripchart_cex / 2)
+const_stripchart_jitter <- 0.3
+const_write_debug_files <- FALSE
+
 ### FUNCTIONS

 #ANOVA filter function
@@ -27,9 +53,155 @@
   pvalue <- summary(x_aov)[[1]][["Pr(>F)"]][1]
   pvalue
 }
+
+write_debug_file <- function(s) {
+  if (const_write_debug_files) {
+    s_path <- sprintf("test-data/%s.txt", deparse(substitute(s)))
+    write.table(
+      s,
+      file = s_path,
+      sep = "\t",
+      col.names = TRUE,
+      row.names = TRUE,
+      quote = FALSE
+    )
+  }
+}
+
+latex_collapsed_vector <- function(collapse_string, v) {
+  cat(
+    paste0(
+      gsub("_", "\\\\_", v),
+      collapse = collapse_string
+      )
+    )
+}
+
+latex_itemized_collapsed <- function(collapse_string, v) {
+  cat("\\begin{itemize}\n\\item ")
+  latex_collapsed_vector(collapse_string, v)
+  cat("\n\\end{itemize}\n")
+}
+
+latex_itemized_list <- function(v) {
+  latex_itemized_collapsed("\n\\item ", v)
+}
+
+latex_enumerated_collapsed <- function(collapse_string, v) {
+  cat("\\begin{enumerate}\n\\item ")
+  latex_collapsed_vector(collapse_string, v)
+  cat("\n\\end{enumerate}\n")
+}
+
+latex_enumerated_list <- function(v) {
+  latex_enumerated_collapsed("\n\\item ", v)
+}
+
+latex_table_row <- function(v) {
+  latex_collapsed_vector(" & ", v)
+  cat(" \\\\\n")
+}
+
+# Use this like print.data.frame, from which it is adapted:
+print_data_frame_latex <-
+  function(
+    x,
+    ...,
+    # digits to pass to format.data.frame
+    digits = NULL,
+    # TRUE -> right-justify columns; FALSE -> left-justify
+    right = TRUE,
+    # maximumn number of rows to print
+    max = NULL,
+    # string with justification of each column
+    justification = NULL,
+    # TRUE to center on page
+    centered = FALSE,
+    # optional capttion
+    caption = NULL,
+    # h(inline); b(bottom); t (top) or p (separate page)
+    anchor = "h"
+  ) {
+    if (is.null(justification))
+      justification <-
+        Reduce(
+          f = paste,
+          x = rep_len(if (right) "r" else "l", length(colnames(x)))
+          )
+    n <- length(rownames(x))
+    if (length(x) == 0L) {
+      cat(
+        sprintf(
+          # if n is one, use singular 'row', else use plural 'rows'
+          ngettext(
+            n,
+            "data frame with 0 columns and %d row",
+            "data frame with 0 columns and %d rows"
+            ),
+          n
+          ),
+        "\n",
+        sep = ""
+        )
+    }
+    else if (n == 0L) {
+      cat("0 rows for:\n")
+      latex_itemized_list(names(x))
+    }
+    else {
+      if (is.null(max))
+        max <- getOption("max.print", 99999L)
+      if (!is.finite(max))
+        stop("invalid 'max' / getOption(\"max.print\"): ",
+          max)
+      omit <- (n0 <- max %/% length(x)) < n
+      m <- as.matrix(
+        format.data.frame(
+          if (omit) x[seq_len(n0), , drop = FALSE] else x,
+          digits = digits,
+          na.encode = FALSE
+          )
+        )
+      cat(
+        # h(inline); b(bottom); t (top) or p (separate page)
+        paste0("\\begin{table}[", anchor, "]\n")
+        )
+      if (!is.null(caption))
+        cat(paste0(" \\caption{", caption, "}"))
+      if (centered) cat("\\centering\n")
+      cat(
+        paste(
+          " \\begin{tabular}{",
+          justification,
+          "}\n",
+          sep = ""
+          )
+        )
+      if (!is.null(caption))
+        cat(" \\hline\\hline\n")
+      latex_table_row(colnames(m))
+      cat("\\hline\n")
+      for (i in seq_len(length(m[, 1]))) {
+        latex_table_row(m[i, ])
+      }
+      cat(
+        paste(
+          " \\end{tabular}",
+          "\\end{table}",
+          sep = "\n"
+          )
+        )
+      if (omit)
+        cat(" [ reached 'max' / getOption(\"max.print\") -- omitted",
+          n - n0, "rows ]\n")
+    }
+    invisible(x)
+  }
+
 ```

 ## Purpose:
+
 Perform imputation of missing values, quantile normalization, and ANOVA.

 <!--
@@ -58,6 +230,7 @@

 #Imputed Data filename
 imputed_data_filename <- params$imputedDataFilename
+imp_qn_lt_data_filenm <- params$imputedQNLTDataFile

 #ANOVA data filename
 ```
@@ -78,7 +251,7 @@
 regex_sample_names <- params$regexSampleNames

 # Regular expression to extract Sample Grouping from Sample Name;
-#   if error occurs, compare sample_factor_levels and temp_matches
+#   if error occurs, compare sample_factor_levels and sample_name_matches
 #   to see if groupings/pairs line up
 #   e.g., "(\\d+)"
 regex_sample_grouping <- params$regexSampleGrouping
@@ -101,25 +274,32 @@
   )
 ```

-### Column names from input file
-
-```{r echo = FALSE, results = 'markup'}
-print(colnames(full_data))
-data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE)
-cat(sprintf("First data column:  %d\n", min(data_column_indices)))
-cat(sprintf("Last data column:   %d\n", max(data_column_indices)))
-```
+### Parse column names, sample names, and factor levels from input file

 ```{r echo = FALSE, results = 'asis'}
-cat("\\newpage\n")
-```
+# Write column naames as an enumerated list.
+column_name_df <- data.frame(
+  column = seq_len(length(colnames(full_data))),
+  name = colnames(full_data)
+  )
+print_data_frame_latex(
+  x = column_name_df,
+  justification = "l l",
+  centered = TRUE,
+  caption = "Input data column name",
+  anchor = "h"
+  )

-### Checking that log-transformed sample distributions are similar:
-
-```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'}
+data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE)
+cat(
+  sprintf(
+    "\n\nData columns: [%d,%d]\n\n",
+    min(data_column_indices),
+    max(data_column_indices)
+    )
+  )

 if (FALSE == fdc_is_integer) {
-
   if (length(data_column_indices) > 0) {
     first_data_column <- data_column_indices[1]
   } else {
@@ -127,45 +307,114 @@
   }
 }

-quant_data0 <- full_data[first_data_column:length(full_data)]
+```
+
+```{r echo = FALSE, results = 'asis'}
+#```{r echo = FALSE, results = 'asis'}
 quant_data <- full_data[first_data_column:length(full_data)]
+quant_data[quant_data == 0] <- NA
+rownames(quant_data) <- full_data$Phosphopeptide
+# Get factors -> group replicates (as indicated by terminal letter)
+#   by the preceding digits;
+# Assuming that regex_sample_names <- "\\.(\\d+)[A-Z]$"
+#   get factors ->
+#      group runs (samples) by ignoring terminal [A-Z] in sample names
+#   e.g.
+#     group .1A .1B .1C into group 1;
+#     group .2A .2B .2C, into group 2;
+#     etc.
+m <- regexpr(regex_sample_names, colnames(quant_data), perl = TRUE)
+sample_name_matches <- regmatches(names(quant_data), m)
+colnames(quant_data) <- sample_name_matches
+
+write_debug_file(quant_data)
+
+m2 <- regexpr(regex_sample_grouping, sample_name_matches, perl = TRUE)
+sample_factor_levels <- as.factor(regmatches(sample_name_matches, m2))
+number_of_samples <- length(sample_name_matches)
+sample_factor_df <- data.frame(
+  sample = sample_name_matches,
+  level = sample_factor_levels
+  )
+print_data_frame_latex(
+  x = sample_factor_df,
+  justification = "c c",
+  centered = TRUE,
+  caption = "Factor level",
+  anchor = "h"
+  )
+```
+```{r echo = FALSE, results = 'asis'}
+cat("\\newpage\n")
+```
+
+### Are the log-transformed sample distributions similar?
+
+```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'}
+
 quant_data[quant_data == 0] <- NA  #replace 0 with NA
 quant_data_log <- log10(quant_data)

-rownames(quant_data_log) <- full_data$Phosphopeptide
+rownames(quant_data_log) <- rownames(quant_data)
+colnames(quant_data_log) <- sample_name_matches
+
+write_debug_file(quant_data_log)

 # data visualization
 old_par <- par(
   mai = par("mai") + c(0.5, 0, 0, 0)
 )
+# ref: https://r-charts.com/distribution/add-points-boxplot/
+# Vertical plot
 boxplot(
   quant_data_log
-, las = 2
+, las = 1
+, col = const_boxplot_fill
 )
+# Points
+stripchart(
+  quant_data_log, # Data
+  method = "jitter",          # Random noise
+  jitter = const_stripchart_jitter,
+  pch = 19,                   # Pch symbols
+  cex = const_stripchart_cex, # Size of symbols reduced
+  col = "goldenrod",          # Color of the symbol
+  vertical = TRUE,            # Vertical mode
+  add = TRUE                  # Add it over
+  )
 par(old_par)


-cat("\\newline\n")
-cat("\\newline\n")
+cat("\n\n\n")
+cat("\n\n\n")

 ```

-```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 4), warning = FALSE}
-quant_data_log_stack <- stack(quant_data_log)
+```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 4), results = 'asis'}
 library(ggplot2)
-ggplot(
-  quant_data_log_stack,
-  aes(x = values)) + geom_density(aes(group = ind, colour = ind))
+if (nrow(quant_data_log) > 1) {
+  quant_data_log_stack <- stack(quant_data_log)
+  ggplot(
+    quant_data_log_stack,
+    aes(x = values)
+    ) +
+    geom_density(
+      aes(group = ind, colour = ind),
+      na.rm = TRUE
+    )
+} else {
+  cat("No density plot because there are too few peptides.\n\n")
+}
 ```

-### Globally, are phosphopeptide intensities are approximately unimodal?
+### Globally, are peptide intensities are approximately unimodal?

 <!--
 # ref for bquote below particularly and plotting math expressions generally:
 #   https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/
 -->
-```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5)}
+```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5), results = 'asis'}

 # identify the location of missing values
 fin <- is.finite(as.numeric(as.matrix(quant_data_log)))
@@ -185,92 +434,63 @@
 )
 ```

-### Distribution of standard deviations of phosphopeptides, ignoring missing values:
+### Distribution of standard deviations of $log_{10}(\text{intensity})$, ignoring missing values:

-```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5)}
+```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5), results = 'asis'}
 # determine quantile
 q1 <- quantile(logvalues, probs = mean_percentile)[1]

 # determine standard deviation of quantile to impute
 sd_finite <- function(x) {
   ok <- is.finite(x)
-  sd(x[ok]) * sd_percentile
+  sd(x[ok])
 }
 # 1 = row of matrix (ie, phosphopeptide)
 sds <- apply(quant_data_log, 1, sd_finite)
-plot(
-  density(sds, na.rm = T)
-, main = "Smoothed estimated probability density vs. std. deviation"
-, sub = "(probability estimation made with Gaussian smoothing)"
-)
-
-m1 <- median(sds, na.rm = T) #sd to be used is the median sd
+if (sum(!is.na(sds)) > 2) {
+  plot(
+    density(sds, na.rm = T)
+  , main = "Smoothed estimated probability density vs. std. deviation"
+  , sub = "(probability estimation made with Gaussian smoothing)"
+  )
+} else {
+  cat(
+    "At least two non-missing values are required to plot",
+    "probability density.\n\n"
+    )
+}

 ```

-
-
-<!--
-The number of missing values are:
--->
 ```{r echo = FALSE}
-#Determine number of cells to impute
+# Determine number of cells to impute
 temp <- quant_data[is.na(quant_data)]

-#Determine number of values to impute
+# Determine number of values to impute
 number_to_impute <- length(temp)
-```
-
-<!--
-% of values that are missing:
--->
-```{r echo = FALSE}
-pct_missing_values <- length(temp) / (length(logvalues) + length(temp)) * 100
-```

-<!--
-First few rows of data before imputation:
--->
-```{r echo = FALSE, results = 'asis'}
-cat("\\newpage\n")
+# Determine percent of missing values
+pct_missing_values <-
+  round(length(temp) / (length(logvalues) + length(temp)) * 100)
 ```

-## Parse sample names
-
-Parse the names of the samples to deduce the factor level for each sample:
-
 ```{r echo = FALSE}

 # prep for trt-median based imputation

-# Assuming that regex_sample_names <- "\\.(\\d+)[A-Z]$"
-#   get factors ->
-#      group runs (samples) by ignoring terminal [A-Z] in sample names
-
-m <- regexpr(regex_sample_names, names(quant_data), perl = TRUE)
-temp_matches <- regmatches(names(quant_data), m)
-print("Extracted sample names")
-print(temp_matches)
-m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE)
-sample_factor_levels <- as.factor(regmatches(temp_matches, m2))
-print("Factor levels")
-print(sample_factor_levels)
-
 ```
-## Impute missing values
+## Impute Missing Values

 ```{r echo = FALSE}

+imp_smry_potential_before <- length(logvalues)
+imp_smry_missing_before   <- number_to_impute
+imp_smry_pct_missing      <- pct_missing_values
+
+```
+
+```{r echo = FALSE}
 #Determine number of cells to impute
-cat("Before imputation,",
-  sprintf(
-    "there are:\n  %d peptides\n  %d missing values (%2.0f%s)",
-    sum(rep.int(TRUE, nrow(quant_data))),
-    sum(is.na(quant_data)),
-    pct_missing_values,
-    "%"
-    )
-)

 ```
 ```{r echo = FALSE}
@@ -282,15 +502,16 @@
 ind <- which(is.na(quant_data_imp), arr.ind = TRUE)

 ```
-```{r echo = FALSE}
+```{r echo = FALSE, results = 'asis'}

 # Apply imputation
 switch(
   imputation_method
 , "group-median" = {
-    cat("Imputation method:\n   substitute missing value",
-      "with median peptide-intensity for sample-group\n")
-
+    imputation_method_description <-
+      paste("Substitute missing value with",
+        "median peptide intensity for sample group\n"
+        )
     sample_level_integers <- as.integer(sample_factor_levels)
     for (i in seq_len(length(levels(sample_factor_levels)))) {
       level_cols <- i == sample_level_integers
@@ -301,47 +522,90 @@
     good_rows <- !is.na(rowMeans(quant_data_imp))
   }
 , "median" = {
-    cat("Imputation method:\n   substitute missing value with",
-      "median peptide-intensity across all sample classes\n")
+    imputation_method_description <-
+      paste("Substitute missing value with",
+        "median peptide intensity across all sample classes\n"
+        )
     quant_data_imp[ind] <- apply(quant_data_imp, 1, median, na.rm = T)[ind[, 1]]
     good_rows <- !is.na(rowMeans(quant_data_imp))
   }
 , "mean" = {
-    cat("Imputation method:\n   substitute missing value with",
-      "mean peptide-intensity across all sample classes\n")
+    imputation_method_description <-
+      paste("Substitute missing value with",
+        "mean peptide intensity across all sample classes\n"
+        )
     quant_data_imp[ind] <- apply(quant_data_imp, 1, mean, na.rm = T)[ind[, 1]]
     good_rows <- !is.na(rowMeans(quant_data_imp))
   }
 , "random" = {
-    cat(
-      "Imputation method:\n   substitute missing value with\n  ",
-      sprintf(
-        "random intensity N ~ (%0.2f, %0.2f)\n"
-      , q1, m1
-      )
-    )
-    quant_data_imp[is.na(quant_data_imp)] <-
+    m1 <- median(sds, na.rm = T) * sd_percentile #sd to be used is the median sd
+    # If you want results to be reproducible, you will want to call
+    #   base::set.seed before calling stats::rnorm
+    imputation_method_description <-
+      paste("Substitute each missing value with random intensity",
+        sprintf(
+          "random intensity $N \\sim (%0.2f, %0.2f)$\n",
+          q1, m1
+          )
+        )
+    cat(sprintf("mean_percentile (from input parameter) is %2.0f\n\n",
+      100 * mean_percentile))
+    cat(sprintf("sd_percentile (from input parameter) is %0.2f\n\n",
+      sd_percentile))
+    #ACE cat(sprintf("sd for rnorm is %0.4f\n\n", m1))
+    quant_data_imp[ind] <-
       10 ^ rnorm(number_to_impute, mean = q1, sd = m1)
     good_rows <- !is.na(rowMeans(quant_data_imp))
   }
 )

+if (length(good_rows) < 1) {
+  print("ERROR: Cannot impute data; there are no good rows!")
+  return(-1)
+  }
 ```
+
+```{r echo = FALSE, results = 'asis'}
+cat("\\quad\n\nImputation method:\n\n\n", imputation_method_description)
+```
+
 ```{r echo = FALSE}

-#Determine number of cells to impute
-temp <- quant_data_imp[is.na(quant_data_imp)]
-cat("After imputation, there are:",
+imp_smry_potential_after <- sum(good_rows)
+imp_smry_rejected_after  <- sum(!good_rows)
+imp_smry_missing_after   <- sum(is.na(quant_data_imp[good_rows, ]))
+```
+```{r echo = FALSE, results = 'asis'}
+# ref: http://www1.maths.leeds.ac.uk/latex/TableHelp1.pdf
+tabular_lines <- paste(
+  "\\begin{table}[hb]", # h(inline); b(bottom); t (top) or p (separate page)
+  " \\caption{Imputation Results}",
+  " \\centering", # \centering centers the table on the page
+  " \\begin{tabular}{l c c c}",
+  "  \\hline\\hline",
+  "  \\  & potential peptides   & missing values & rejected",
+  "    peptides \\\\ [0.5ex]",
+  "  \\hline",
+  "  before imputation & %d     & %d (%d\\%s)    &    \\\\",
+  "  after imputation  & %d     & %d             & %d \\\\ [1ex]",
+  "  \\hline",
+  " \\end{tabular}",
+  #" \\label{table:nonlin}", # may be used to refer this table in the text
+  "\\end{table}",
+  sep = "\n"
+  )
+tabular_lines <-
   sprintf(
-    "\n  %d missing values\n  %d usable peptides analysis"
-  , sum(is.na(quant_data_imp[good_rows, ]))
-  , sum(good_rows)
-  ),
-  sprintf(
-    "\n  %d peptides with too many missing values for further analysis"
-  , sum(!good_rows)
-  )
-)
+    tabular_lines,
+    imp_smry_potential_before,
+    imp_smry_missing_before,
+    imp_smry_pct_missing,
+    "%",
+    imp_smry_potential_after,
+    imp_smry_missing_after,
+    imp_smry_rejected_after
+    )
+cat(tabular_lines)
 ```
 ```{r echo = FALSE}

@@ -349,27 +613,51 @@
 # Zap rows where imputation was ineffective
 full_data         <- full_data        [good_rows, ]
 quant_data        <- quant_data       [good_rows, ]
-quant_data_imp <- quant_data_imp[good_rows, ]
+
+write_debug_file(quant_data_imp)

+quant_data_imp <- quant_data_imp[good_rows, ]
+quant_data_imp_good_rows <- quant_data_imp
+
+write_debug_file(quant_data_imp_good_rows)
 ```
-```{r echo = FALSE}
+
+```{r echo = FALSE, results = 'asis'}

-d_combined <- (density(as.numeric(as.matrix(
-  log10(quant_data_imp)
-))))
+can_plot_before_after_imp <- TRUE
+d_combined <-
+  as.numeric(
+    as.matrix(
+      log10(quant_data_imp)
+      )
+    )
 d_original <-
-  density(as.numeric(as.matrix(
-    log10(quant_data_imp[!is.na(quant_data)]))))
+  as.numeric(
+    as.matrix(
+      log10(quant_data_imp[!is.na(quant_data)])
+      )
+    )

-```
-```{r echo = FALSE}
+if (sum(!is.na(d_combined)) > 2 && sum(!is.na(d_original)) > 2) {
+  d_combined <- density(d_combined)
+  d_original <- density(d_original)
+} else {
+  can_plot_before_after_imp <- FALSE
+}

 if (sum(is.na(quant_data)) > 0) {
   # There ARE missing values
   d_imputed <-
-    (density(as.numeric(as.matrix(
-      log10(quant_data_imp[is.na(quant_data)])
-    ))))
+    as.numeric(
+      as.matrix(
+        log10(quant_data_imp[is.na(quant_data)])
+        )
+      )
+  if (sum(!is.na(d_combined)) > 2) {
+    d_imputed <- (density(d_imputed))
+  } else {
+    can_plot_before_after_imp <- FALSE
+  }
 } else {
   # There are NO missing values
   d_imputed <- d_combined
@@ -377,16 +665,92 @@

 ```

-```{r echo = FALSE, fig.dim = c(9, 5)}
-ylim <- c(0, max(d_combined$y, d_original$y, d_imputed$y))
-plot(
-  d_combined,
-  ylim = ylim,
-  sub = "Blue = data before imputation; Red = imputed data",
-  main = "Density vs. log10(intensity) before and after imputation"
-)
-lines(d_original, col = "blue")
-lines(d_imputed, col = "red")
+```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'}
+if (sum(is.na(quant_data)) > 0) {
+  cat("\\leavevmode\\newpage\n")
+  # data visualization
+  old_par <- par(
+    mai = par("mai") + c(0.5, 0, 0, 0)
+  )
+  x <- quant_data
+  x <- blue_dots <- x / x
+  blue_dots <- log10(blue_dots * quant_data)
+  x[is.na(x)] <- 0
+  x[x == 1] <- NA
+  x[x == 0] <- 1
+  quant_data_imp_log10 <-
+    log10(quant_data_imp)
+
+  write_debug_file(quant_data_imp_log10)
+
+  red_dots <- quant_data_imp_log10 * x
+  ylim <- c(
+    min(red_dots, blue_dots, na.rm = TRUE),
+    max(red_dots, blue_dots, na.rm = TRUE)
+    )
+  # ref: https://r-charts.com/distribution/add-points-boxplot/
+  # Vertical plot
+  colnames(blue_dots) <- sample_name_matches
+  boxplot(
+      blue_dots
+    , las = 1 # "always horizontal"
+    , col = const_boxplot_fill
+    , ylim = ylim
+    , main = "Peptide intensities before and after imputation"
+    , sub = "Light blue = data before imputation; Red = imputed data"
+    , xlab = "Sample"
+    , ylab = "log10(peptide intensity)"
+    )
+  # Points
+  # NA values are not plotted
+  stripchart(
+    blue_dots,                 # Data
+    method = "jitter",          # Random noise
+    jitter = const_stripchart_jitter,
+    pch = 19,                   # Pch symbols
+    cex = const_stripsmall_cex, # Size of symbols reduced
+    col = "lightblue",          # Color of the symbol
+    vertical = TRUE,            # Vertical mode
+    add = TRUE                  # Add it over
+    )
+  stripchart(
+    red_dots,                   # Data
+    method = "jitter",          # Random noise
+    jitter = const_stripchart_jitter,
+    pch = 19,                   # Pch symbols
+    cex = const_stripsmall_cex, # Size of symbols reduced
+    col = "red",                # Color of the symbol
+    vertical = TRUE,            # Vertical mode
+    add = TRUE                  # Add it over
+    )
+  par(old_par)
+
+  # density plot
+  cat("\\leavevmode\n\n\n\n\n\n\n")
+  if (can_plot_before_after_imp) {
+    ylim <- c(0, max(d_combined$y, d_original$y, d_imputed$y))
+    plot(
+      d_combined,
+      ylim = ylim,
+      sub =
+        paste(
+          "Blue = data before imputation; Red = imputed data;",
+          "Black = combined"
+          ),
+      main = "Density of peptide intensity before and after imputation",
+      xlab = "log10(peptide intensity)",
+      ylab = "Probability density"
+    )
+    lines(d_original, col = "blue")
+    lines(d_imputed, col = "red")
+  } else {
+    cat(
+      "There are too few points to plot the density of peptide intensity",
+      "before and after imputation."
+      )
+  }
+  cat("\\leavevmode\\newpage\n")
+}
 ```

 ## Perform Quantile Normalization
@@ -448,10 +812,10 @@
 #       http://bmbolstad.com/misc/normalize/normalize.html
 # ...
 -->
-```{r echo = FALSE}
+```{r echo = FALSE, results = 'asis'}
 library(preprocessCore)

-if (TRUE) {
+if (nrow(quant_data_imp) > 0) {
   quant_data_imp_qn <- normalize.quantiles(as.matrix(quant_data_imp))
 } else {
   quant_data_imp_qn <- as.matrix(quant_data_imp)
@@ -459,28 +823,39 @@

 quant_data_imp_qn <- as.data.frame(quant_data_imp_qn)
 names(quant_data_imp_qn) <- names(quant_data_imp)
-quant_data_imp_qn_log <- log10(quant_data_imp_qn)
+
+write_debug_file(quant_data_imp_qn)

-rownames(quant_data_imp_qn_log) <- full_data[, 1]
+quant_data_imp_qn_log <- log10(quant_data_imp_qn)
+rownames(quant_data_imp_qn_log) <- rownames(quant_data_imp)
+
+write_debug_file(quant_data_imp_qn_log)

 quant_data_imp_qn_ls <- t(scale(t(log10(quant_data_imp_qn))))
+
 any_nan <- function(x) {
   !any(x == "NaN")
 }
 sel <- apply(quant_data_imp_qn_ls, 1, any_nan)
 quant_data_imp_qn_ls2 <- quant_data_imp_qn_ls[which(sel), ]
 quant_data_imp_qn_ls2 <- as.data.frame(quant_data_imp_qn_ls2)
+rownames(quant_data_imp_qn_ls2) <- rownames(quant_data_imp)[sel]
+
+quant_data_imp_qn_ls <- as.data.frame(quant_data_imp_qn_ls)
+rownames(quant_data_imp_qn_ls) <- rownames(quant_data_imp)
+
+write_debug_file(quant_data_imp_qn_ls)
+write_debug_file(quant_data_imp_qn_ls2)

 #output quantile normalized data
 data_table_imp_qn_lt <- cbind(full_data[1:9], quant_data_imp_qn_log)
 write.table(
   data_table_imp_qn_lt,
-  file = paste(paste(
-    strsplit(imputed_data_filename, ".txt"), "QN_LT", sep = "_"
-  ), ".txt", sep = ""),
+  file = imp_qn_lt_data_filenm,
   sep = "\t",
   col.names = TRUE,
-  row.names = FALSE
+  row.names = FALSE,
+  quote = FALSE
 )

 ```
@@ -490,7 +865,6 @@

 ```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'}

-
 # Save unimputed quant_data_log for plotting below
 unimputed_quant_data_log <- quant_data_log

@@ -499,10 +873,9 @@
 quant_data_imp_qn[quant_data_imp_qn == 0] <- .000000001
 quant_data_log <- log10(quant_data_imp_qn)

-# Output quantile-normalized log-transformed dataset
-#   with imputed, normalized data
+# Output with imputed, un-normalized data

-data_table_imputed <- cbind(full_data[1:9], quant_data_log)
+data_table_imputed <- cbind(full_data[1:9], quant_data_imp)
 write.table(
     data_table_imputed
   , file = imputed_data_filename
@@ -512,38 +885,72 @@
   , quote = FALSE
   )

+how_many_peptides <- nrow(quant_data_log)
+
+if ((how_many_peptides) > 0) {
+  cat(
+    sprintf(
+      "Intensities for %d peptides:\n\n\n",
+      how_many_peptides
+      )
+    )
+  cat("\n\n\n")


-# data visualization
-old_par <- par(
-  mai = par("mai") + c(0.5, 0, 0, 0)
-, oma = par("oma") + c(0.5, 0, 0, 0)
-)
-boxplot(
-  quant_data_log
-, las = 2
-)
-par(old_par)
+  # data visualization
+  old_par <- par(
+    mai = par("mai") + c(0.5, 0, 0, 0)
+  , oma = par("oma") + c(0.5, 0, 0, 0)
+  )
+  # ref: https://r-charts.com/distribution/add-points-boxplot/
+  # Vertical plot
+  colnames(quant_data_log) <- sample_name_matches
+  boxplot(
+    quant_data_log
+  , las = 1
+  , col = const_boxplot_fill
+  )
+  # Points
+  stripchart(
+    quant_data_log, # Data
+    method = "jitter",          # Random noise
+    jitter = const_stripchart_jitter,
+    pch = 19,                   # Pch symbols
+    cex = const_stripchart_cex, # Size of symbols reduced
+    col = "goldenrod",          # Color of the symbol
+    vertical = TRUE,            # Vertical mode
+    add = TRUE                  # Add it over
+    )
+  par(old_par)
+} else {
+  cat("There are no peptides to plot\n")
+}

-
-
-cat("\\newline\n")
-cat("\\newline\n")
+cat("\n\n\n")

 ```

-```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 4)}
-quant_data_log_stack <- stack(quant_data_log)
-ggplot(
-  quant_data_log_stack,
-  aes(x = values)
-  ) + geom_density(aes(group = ind, colour = ind))
+```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 4), results = 'asis'}
+if (nrow(quant_data_log) > 1) {
+  quant_data_log_stack <- stack(quant_data_log)
+  ggplot(
+    quant_data_log_stack,
+    aes(x = values)
+    ) +
+    geom_density(
+      aes(group = ind, colour = ind),
+      na.rm = TRUE
+      )
+} else {
+  cat("No density plot because there are fewer than two peptides to plot.\n\n")
+}
+```
+```{r echo = FALSE, results = 'asis'}
+cat("\\leavevmode\\newpage\n")
 ```

 ## Perform ANOVA filters

-(see following pages)
-
 ```{r, echo = FALSE}
 # Make new data frame containing only Phosphopeptides
 #   to connect preANOVA to ANOVA (connect_df)
@@ -555,22 +962,8 @@
 ```

 ```{r echo = FALSE, fig.dim = c(9, 10), results = 'asis'}
-# Get factors -> group replicates (as indicated by terminal letter)
-#   by the preceding digits;
-#   e.g., group .1A .1B .1C into group 1; .2A .2B .2C, into group 2; etc..
-m <-
-  regexpr(regex_sample_names, names(quant_data_imp_qn_log), perl = TRUE)
-
-temp_matches <- regmatches(names(quant_data_imp_qn_log), m)
-
-number_of_samples <- length(temp_matches)
-
-m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE)
-
-
-sample_factor_levels <- as.factor(regmatches(temp_matches, m2))
-
-if (length(levels(sample_factor_levels)) < 2) {
+count_of_factor_levels <- length(levels(sample_factor_levels))
+if (count_of_factor_levels < 2) {
   nuke_control_sequences <-
     function(s) {
       s <- gsub("[\\]", "xyzzy_plugh", s)
@@ -584,46 +977,46 @@
   )
   cat(
     "ERROR: ANOVA analysis",
-    "requires two or more factor levels!\\newline\n"
+    "requires two or more factor levels!\n\n\n"
   )

-  cat("\\newline\\newline\n")
-  cat("Unparsed sample names are:\\newline\n",
+  cat("\n\n\n\n\n")
+  cat("Unparsed sample names are:\n\n\n",
     "\\begin{quote}\n",
-    paste(names(quant_data_imp_qn_log), collapse = "\\newline\n"),
+    paste(names(quant_data_imp_qn_log), collapse = "\n\n\n"),
     "\n\\end{quote}\n\n")

   regex_sample_names <- nuke_control_sequences(regex_sample_names)

-  cat("\\leavevmode\\newline\n")
+  cat("\\leavevmode\n\n\n")
   cat("Parsing rule for SampleNames is",
-    "\\newline\n",
+    "\n\n\n",
     "\\text{'",
     regex_sample_names,
-    "'}\\newline\n",
+    "'}\n\n\n",
     sep = ""
     )

   cat("\nParsed sample names are:\n",
     "\\begin{quote}\n",
-    paste(temp_matches, collapse = "\\newline\n"),
+    paste(sample_name_matches, collapse = "\n\n\n"),
     "\n\\end{quote}\n\n")

   regex_sample_grouping <- nuke_control_sequences(regex_sample_grouping)

-  cat("\\leavevmode\\newline\n")
+  cat("\\leavevmode\n\n\n")
   cat("Parsing rule for SampleGrouping is",
-    "\\newline\n",
+    "\n\n\n",
     "\\text{'",
     regex_sample_grouping,
-    "'}\\newline\n",
+    "'}\n\n\n",
     sep = ""
     )

-  cat("\\newline\n")
+  cat("\n\n\n")
   cat("Sample group assignments are:\n",
     "\\begin{quote}\n",
-    paste(regmatches(temp_matches, m2), collapse = "\\newline\n"),
+    paste(regmatches(sample_name_matches, m2), collapse = "\n\n\n"),
     "\n\\end{quote}\n\n")

 } else {
@@ -649,14 +1042,19 @@
   #   e.g.    "Outputfile_pST_ANOVA_STEP5.txt"
   #   becomes "Outpufile_pST_ANOVA_STEP5_FDR0.05.txt"

-  # Re-output quantile-normalized log-transformed dataset
-  #   with imputed, normalized data to include p-values
+  # Re-output datasets to include p-values
+  write.table(
+    cbind(full_data[1:9], p_value_data[, 2:3], quant_data_imp),
+    file = imputed_data_filename,
+    sep = "\t",
+    col.names = TRUE,
+    row.names = FALSE,
+    quote = FALSE
+    )

-  data_table_imputed <-
-    cbind(full_data[1:9], p_value_data[, 2:3], quant_data_log)
   write.table(
-    data_table_imputed,
-    file = imputed_data_filename,
+    cbind(full_data[1:9], p_value_data[, 2:3], quant_data_imp_qn_log),
+    file = imp_qn_lt_data_filenm,
     sep = "\t",
     col.names = TRUE,
     row.names = FALSE,
@@ -667,70 +1065,94 @@
   p_value_data <-
     p_value_data[order(p_value_data$fdr_adjusted_anova_p), ]

+  first_page_suppress <- 1
+  number_of_peptides_found <- 0
   cutoff <- val_fdr[1]
   for (cutoff in val_fdr) {
+    if (number_of_peptides_found > 49) {
+      cat("\\leavevmode\n\n\n")
+      break
+      }
+
     #loop through FDR cutoffs

     filtered_p <-
       p_value_data[
         which(p_value_data$fdr_adjusted_anova_p < cutoff),
-        ,
-        drop = FALSE
+        , drop = FALSE
         ]
     filtered_data_filtered <-
       quant_data_imp_qn_log[
         rownames(filtered_p),
-        ,
-        drop = FALSE
+        , drop = FALSE
         ]
     filtered_data_filtered <-
       filtered_data_filtered[
         order(filtered_p$fdr_adjusted_anova_p),
-        ,
-        drop = FALSE
+        , drop = FALSE
         ]

     # <!-- ACE insertion start -->
-    old_oma <- par("oma")
-    old_par <- par(
-      mai = (par("mai") + c(0.7, 0, 0, 0)) * c(1, 1, 0.3, 1),
-      oma = old_oma * c(1, 1, 0.3, 1),
-      cex.main = 0.9,
-      cex.axis = 0.7
-      )

-    cat("\\newpage\n")
-    if (nrow(filtered_data_filtered) > 0) {
+    if (nrow(filtered_p) && nrow(filtered_data_filtered) > 0) {
+      if (first_page_suppress == 1) {
+        first_page_suppress <- 0
+        }
+        else {
+          cat("\\newpage\n")
+        }
       cat(sprintf(
-        "Intensities for peptides whose adjusted p-value < %0.2f\n",
+        "Intensities for %d peptides whose adjusted p-value < %0.2f:\n",
+        nrow(filtered_data_filtered),
         cutoff
       ))
-      cat("\\newline\n")
-      cat("\\newline\n")
+      cat("\n\n\n")
+      cat("\n\n\n")

+      old_oma <- par("oma")
+      old_par <- par(
+        mai = (par("mai") + c(0.7, 0, 0, 0)) * c(1, 1, 0.3, 1),
+        oma = old_oma * c(1, 1, 0.3, 1),
+        cex.main = 0.9,
+        cex.axis = 0.7,
+        fin = c(9, 7.25)
+        )
+      # ref: https://r-charts.com/distribution/add-points-boxplot/
+      # Vertical plot
+      colnames(filtered_data_filtered) <- sample_name_matches
       boxplot(
         filtered_data_filtered,
         main = "Imputed, normalized intensities", # no line plot
-        las = 2,
+        las = 1,
+        col = const_boxplot_fill,
         ylab = expression(log[10](intensity))
       )
+      # Points
+      stripchart(
+        filtered_data_filtered,     # Data
+        method = "jitter",          # Random noise
+        jitter = const_stripchart_jitter,
+        pch = 19,                   # Pch symbols
+        cex = const_stripchart_cex, # Size of symbols reduced
+        col = "goldenrod",          # Color of the symbol
+        vertical = TRUE,            # Vertical mode
+        add = TRUE                  # Add it over
+        )
+      par(old_par)
     } else {
       cat(sprintf(
-        "No peptides were found to have cutoff adjusted p-value < %0.2f\n",
+        "%s < %0.2f\n\n\n\n\n",
+        "No peptides were found to have cutoff adjusted p-value <",
         cutoff
       ))
     }
-    par(old_par)

     if (nrow(filtered_data_filtered) > 0) {
       #Add Phosphopeptide column to anova_filtered table
       anova_filtered_merge <- merge(
-        x = connect_df
-        ,
-        y = filtered_data_filtered
-        ,
-        by.x = "Intensity"
-        ,
+        x = connect_df,
+        y = filtered_data_filtered,
+        by.x = "Intensity",
         by.y = 1
       )
       anova_filtered_merge_order <- rownames(filtered_p)
@@ -759,12 +1181,9 @@
       # Merge qualitative columns into the ANOVA data
       output_table <- data.frame(anova_filtered$Phosphopeptide)
       output_table <- merge(
-        x = output_table
-        ,
-        y = data_table_imp_qn_lt
-        ,
-        by.x = "anova_filtered.Phosphopeptide"
-        ,
+        x = output_table,
+        y = data_table_imp_qn_lt,
+        by.x = "anova_filtered.Phosphopeptide",
         by.y = "Phosphopeptide"
       )

@@ -777,7 +1196,7 @@
           nrow = nrow(m)
           )
         )
-      m <- m[!m_nan_rows, ]
+      m <- m[!m_nan_rows, , drop = FALSE]
       if (nrow(m) > 0) {
         rownames_m <- rownames(m)
         rownames(m) <- sapply(
@@ -791,11 +1210,15 @@
             )
           }
         )
-        margins <-
-          c(max(nchar(colnames(m))) * 10 / 16 # col
-            , max(nchar(rownames(m))) * 5 / 16 # row
-            )
         how_many_peptides <- min(50, nrow(m))
+        number_of_peptides_found <- how_many_peptides
+        if (nrow(m) > 1) {
+          m_margin <- m[how_many_peptides:1, ]
+          margins <-
+            c(max(nchar(colnames(m_margin))) * 10 / 16 # col
+              , max(nchar(rownames(m_margin))) * 5 / 16 # row
+              )
+          }

         cat("\\newpage\n")
         if (nrow(m) > 50) {
@@ -805,16 +1228,19 @@
               cutoff)
           )
         } else {
-          cat("Heatmap for peptides whose",
-            sprintf("adjusted p-value < %0.2f\n",
-            cutoff)
-          )
+          if (nrow(m) == 1) {
+            cat(
+              sprintf("Heatmap for %d usable peptides whose", nrow(m)),
+              sprintf("adjusted p-value < %0.2f\n", cutoff)
+            )
+            next
+          }
         }
-        cat("\\newline\n")
-        cat("\\newline\n")
-        op <- par("cex.main")
+        cat("\n\n\n")
+        cat("\n\n\n")
         try(
           if (nrow(m) > 1) {
+            old_oma <- par("oma")
             par(cex.main = 0.6)
             heatmap(
               m[how_many_peptides:1, ],
@@ -823,19 +1249,19 @@
               cexRow = 0.7,
               cexCol = 0.8,
               scale = "row",
-              #ACE scale = "none",
               margins = margins,
               main =
-                "Heatmap of unimputed, unnormalized intensities",
-              xlab = ""
+                "Unimputed, unnormalized intensities",
+              xlab = "",
+              las = 1 #, fin = c(9, 5.5)
               )
           }
         )
-        par(op)
       }
     }
   }
 }
+cat("\\leavevmode\n\n\n")
 ```

 <!--
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/workflow/ppenrich_suite_wf.ga	Tue Mar 22 20:47:40 2022 +0000
@@ -0,0 +1,652 @@
+{
+    "a_galaxy_workflow": "true",
+    "annotation": "phoshpoproteomic enrichment data pre-processing and ANOVA",
+    "creator": [
+        {
+            "class": "Person",
+            "identifier": "0000-0002-2882-0508",
+            "name": "Art Eschenlauer"
+        }
+    ],
+    "format-version": "0.1",
+    "license": "MIT",
+    "name": "ppenrich_suite_wf",
+    "steps": {
+        "0": {
+            "annotation": "The Phospho (STY)Sites.txt file produced by MaxQuant (found in the txt folder).",
+            "content_id": null,
+            "errors": null,
+            "id": 0,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "The Phospho (STY)Sites.txt file produced by MaxQuant (found in the txt folder).",
+                    "name": "Phospho (STY)Sites.txt"
+                }
+            ],
+            "label": "Phospho (STY)Sites.txt",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "bottom": 346.3999938964844,
+                "height": 81.89999389648438,
+                "left": 495,
+                "right": 695,
+                "top": 264.5,
+                "width": 200,
+                "x": 495,
+                "y": 264.5
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "21c3c29d-9e8c-4ece-b585-9e68fed7a93f",
+            "workflow_outputs": []
+        },
+        "1": {
+            "annotation": "FASTA file of all human canonical isoforms, derived from Swiss-Prot (e.g., merge of https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot_varsplic.fasta.gz and https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz)",
+            "content_id": null,
+            "errors": null,
+            "id": 1,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "FASTA file of all human canonical isoforms, derived from Swiss-Prot (e.g., merge of https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot_varsplic.fasta.gz and https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz)",
+                    "name": "SwissProt_Human_Canonical_Isoform.fasta"
+                }
+            ],
+            "label": "SwissProt_Human_Canonical_Isoform.fasta",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "bottom": 708.8000030517578,
+                "height": 102.30000305175781,
+                "left": 685,
+                "right": 885,
+                "top": 606.5,
+                "width": 200,
+                "x": 685,
+                "y": 606.5
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"fasta\"]}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "5da7317c-4def-48f3-8eac-af95bd18b290",
+            "workflow_outputs": []
+        },
+        "2": {
+            "annotation": "Derived from https://networkin.info/download/networkin_human_predictions_3.1.tsv.xz (which is free for non-commercial use - for required citation, see https://networkin.info/)",
+            "content_id": null,
+            "errors": null,
+            "id": 2,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "Derived from https://networkin.info/download/networkin_human_predictions_3.1.tsv.xz (which is free for non-commercial use - for required citation, see https://networkin.info/)",
+                    "name": "NetworKIN_cutoffscore2.0.tabular"
+                }
+            ],
+            "label": "NetworKIN_cutoffscore2.0.tabular",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "bottom": 853.8000030517578,
+                "height": 102.30000305175781,
+                "left": 696,
+                "right": 896,
+                "top": 751.5,
+                "width": 200,
+                "x": 696,
+                "y": 751.5
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "2edff8de-4379-45e2-b6b9-6ed4706bbf00",
+            "workflow_outputs": []
+        },
+        "3": {
+            "annotation": "Derived from http://hprd.org/serine_motifs, http://hprd.org/tyrosine_motifs, and http://pegasus.biochem.mpg.de/phosida/help/motifs.aspx",
+            "content_id": null,
+            "errors": null,
+            "id": 3,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "Derived from http://hprd.org/serine_motifs, http://hprd.org/tyrosine_motifs, and http://pegasus.biochem.mpg.de/phosida/help/motifs.aspx",
+                    "name": "pSTY_Motifs.tabular"
+                }
+            ],
+            "label": "pSTY_Motifs.tabular",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "bottom": 977.3999938964844,
+                "height": 81.89999389648438,
+                "left": 708,
+                "right": 908,
+                "top": 895.5,
+                "width": 200,
+                "x": 708,
+                "y": 895.5
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "86ebaaf2-b050-4eca-a88b-23a4c1af39f5",
+            "workflow_outputs": []
+        },
+        "4": {
+            "annotation": "Derived from Kinase_Substrate_Dataset.gz found at https://www.phosphosite.org/staticDownloads (free for non-commercial use  - see that link for citation.)",
+            "content_id": null,
+            "errors": null,
+            "id": 4,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "Derived from Kinase_Substrate_Dataset.gz found at https://www.phosphosite.org/staticDownloads (free for non-commercial use  - see that link for citation.)",
+                    "name": "PSP_Kinase_Substrate_Dataset.tabular"
+                }
+            ],
+            "label": "PSP_Kinase_Substrate_Dataset.tabular",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "bottom": 1126.8000030517578,
+                "height": 102.30000305175781,
+                "left": 729,
+                "right": 929,
+                "top": 1024.5,
+                "width": 200,
+                "x": 729,
+                "y": 1024.5
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "92f16705-a19c-4fb9-b278-3ae8e11f09d8",
+            "workflow_outputs": []
+        },
+        "5": {
+            "annotation": "Derived from Regulatory_sites.gz found at https://www.phosphosite.org/staticDownloads (free for non-commercial use  - see that link for citation.)",
+            "content_id": null,
+            "errors": null,
+            "id": 5,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "Derived from Regulatory_sites.gz found at https://www.phosphosite.org/staticDownloads (free for non-commercial use  - see that link for citation.)",
+                    "name": "PSP_Regulatory_sites.tabular"
+                }
+            ],
+            "label": "PSP_Regulatory_sites.tabular",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "bottom": 1251.3999938964844,
+                "height": 81.89999389648438,
+                "left": 745,
+                "right": 945,
+                "top": 1169.5,
+                "width": 200,
+                "x": 745,
+                "y": 1169.5
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "5ab49d93-11e4-4e91-b30b-92269b319879",
+            "workflow_outputs": []
+        },
+        "6": {
+            "annotation": "List of alpha cutoff values for significance testing; text file having no header and a single line for each cutoff value.",
+            "content_id": null,
+            "errors": null,
+            "id": 6,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "List of alpha cutoff values for significance testing; text file having no header and a single line for each cutoff value.",
+                    "name": "alpha_levels.tabular"
+                }
+            ],
+            "label": "alpha_levels.tabular",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "bottom": 1501.8999938964844,
+                "height": 81.89999389648438,
+                "left": 727,
+                "right": 927,
+                "top": 1420,
+                "width": 200,
+                "x": 727,
+                "y": 1420
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "481c627c-a4ce-45d7-b659-4f54692aafc7",
+            "workflow_outputs": []
+        },
+        "7": {
+            "annotation": "",
+            "content_id": "mqppep_preproc",
+            "errors": null,
+            "id": 7,
+            "input_connections": {
+                "networkin": {
+                    "id": 2,
+                    "output_name": "output"
+                },
+                "p_sty_motifs": {
+                    "id": 3,
+                    "output_name": "output"
+                },
+                "phosphoSites": {
+                    "id": 0,
+                    "output_name": "output"
+                },
+                "protein_fasta": {
+                    "id": 1,
+                    "output_name": "output"
+                },
+                "psp_kinase_substrate": {
+                    "id": 4,
+                    "output_name": "output"
+                },
+                "psp_regulatory_sites": {
+                    "id": 5,
+                    "output_name": "output"
+                }
+            },
+            "inputs": [],
+            "label": null,
+            "name": "MaxQuant Phosphopeptide Preprocessing",
+            "outputs": [
+                {
+                    "name": "phosphoPepIntensities",
+                    "type": "tabular"
+                },
+                {
+                    "name": "enrichGraph",
+                    "type": "pdf"
+                },
+                {
+                    "name": "locProbCutoffGraph",
+                    "type": "pdf"
+                },
+                {
+                    "name": "enrichGraph_svg",
+                    "type": "svg"
+                },
+                {
+                    "name": "locProbCutoffGraph_svg",
+                    "type": "svg"
+                },
+                {
+                    "name": "filteredData_tabular",
+                    "type": "tabular"
+                },
+                {
+                    "name": "quantData_tabular",
+                    "type": "tabular"
+                },
+                {
+                    "name": "mapped_phophopeptides",
+                    "type": "tabular"
+                },
+                {
+                    "name": "melted_phophopeptide_map",
+                    "type": "tabular"
+                },
+                {
+                    "name": "mqppep_output_sqlite",
+                    "type": "sqlite"
+                },
+                {
+                    "name": "preproc_tab",
+                    "type": "tabular"
+                },
+                {
+                    "name": "preproc_csv",
+                    "type": "csv"
+                },
+                {
+                    "name": "preproc_sqlite",
+                    "type": "sqlite"
+                }
+            ],
+            "position": {
+                "bottom": 1408.7000122070312,
+                "height": 793.2000122070312,
+                "left": 1138.5,
+                "right": 1338.5,
+                "top": 615.5,
+                "width": 200,
+                "x": 1138.5,
+                "y": 615.5
+            },
+            "post_job_actions": {
+                "RenameDatasetActionenrichGraph": {
+                    "action_arguments": {
+                        "newname": "#{phosphoSites}.enrichGraph_pdf"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "enrichGraph"
+                },
+                "RenameDatasetActionenrichGraph_svg": {
+                    "action_arguments": {
+                        "newname": "#{phosphoSites}.enrichGraph_svg"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "enrichGraph_svg"
+                },
+                "RenameDatasetActionfilteredData_tabular": {
+                    "action_arguments": {
+                        "newname": "#{phosphoSites}.filteredData"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "filteredData_tabular"
+                },
+                "RenameDatasetActionlocProbCutoffGraph": {
+                    "action_arguments": {
+                        "newname": "#{phosphoSites}.locProbCutoffGraph_pdf"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "locProbCutoffGraph"
+                },
+                "RenameDatasetActionlocProbCutoffGraph_svg": {
+                    "action_arguments": {
+                        "newname": "#{phosphoSites}.locProbCutoffGraph_svg"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "locProbCutoffGraph_svg"
+                },
+                "RenameDatasetActionmapped_phophopeptides": {
+                    "action_arguments": {
+                        "newname": "#{phosphoSites}.ppep_map"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "mapped_phophopeptides"
+                },
+                "RenameDatasetActionmelted_phophopeptide_map": {
+                    "action_arguments": {
+                        "newname": "#{phosphoSites}.melted"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "melted_phophopeptide_map"
+                },
+                "RenameDatasetActionmqppep_output_sqlite": {
+                    "action_arguments": {
+                        "newname": "#{phosphoSites}.ppep_mapping_sqlite"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "mqppep_output_sqlite"
+                },
+                "RenameDatasetActionphosphoPepIntensities": {
+                    "action_arguments": {
+                        "newname": "#{phosphoSites}.ppep_intensities"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "phosphoPepIntensities"
+                },
+                "RenameDatasetActionpreproc_csv": {
+                    "action_arguments": {
+                        "newname": "#{phosphoSites}.preproc_csv"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "preproc_csv"
+                },
+                "RenameDatasetActionpreproc_sqlite": {
+                    "action_arguments": {
+                        "newname": "#{phosphoSites}.preproc_sqlite"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "preproc_sqlite"
+                },
+                "RenameDatasetActionpreproc_tab": {
+                    "action_arguments": {
+                        "newname": "#{phosphoSites}.preproc_tab"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "preproc_tab"
+                },
+                "RenameDatasetActionquantData_tabular": {
+                    "action_arguments": {
+                        "newname": "#{phosphoSites}.quantData"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "quantData_tabular"
+                }
+            },
+            "tool_id": "mqppep_preproc",
+            "tool_state": "{\"collapseFunc\": \"sum\", \"intervalCol\": \"1\", \"localProbCutoff\": \"0.75\", \"merge_function\": \"sum\", \"networkin\": {\"__class__\": \"ConnectedValue\"}, \"p_sty_motifs\": {\"__class__\": \"ConnectedValue\"}, \"phosphoCol\": \"^Number of Phospho [(]STY[)]$\", \"phosphoSites\": {\"__class__\": \"ConnectedValue\"}, \"protein_fasta\": {\"__class__\": \"ConnectedValue\"}, \"psp_kinase_substrate\": {\"__class__\": \"ConnectedValue\"}, \"psp_regulatory_sites\": {\"__class__\": \"ConnectedValue\"}, \"pst_not_py\": \"true\", \"species\": \"human\", \"startCol\": \"^Intensity[^_]\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": null,
+            "type": "tool",
+            "uuid": "1e983dba-edca-4aed-a589-49b1651b4a85",
+            "workflow_outputs": [
+                {
+                    "label": "preproc_csv",
+                    "output_name": "preproc_csv",
+                    "uuid": "8cba5367-b25c-40e0-a324-552225b1aa1d"
+                },
+                {
+                    "label": "locProbCutoffGraph_pdf",
+                    "output_name": "locProbCutoffGraph",
+                    "uuid": "641c5959-dab4-42d1-986d-8e6aaeb74ef6"
+                },
+                {
+                    "label": "melted_phosphopeptide_map",
+                    "output_name": "melted_phophopeptide_map",
+                    "uuid": "878dc817-26a3-4061-9dd4-56e737b3c4f7"
+                },
+                {
+                    "label": "enrichGraph_svg",
+                    "output_name": "enrichGraph_svg",
+                    "uuid": "4492366c-945e-492f-8381-1c97c4da2264"
+                },
+                {
+                    "label": "locProbCutoffGraph_svg",
+                    "output_name": "locProbCutoffGraph_svg",
+                    "uuid": "06faf93c-5f04-4cb3-9e41-58e465f6180e"
+                },
+                {
+                    "label": "filteredData",
+                    "output_name": "filteredData_tabular",
+                    "uuid": "76e2e268-f728-45f0-9973-793fbde0dd0a"
+                },
+                {
+                    "label": "ppep_map",
+                    "output_name": "mapped_phophopeptides",
+                    "uuid": "d0fea028-2ea5-4862-8a92-c2088edfcbe1"
+                },
+                {
+                    "label": "ppep_mapping_sqlite",
+                    "output_name": "mqppep_output_sqlite",
+                    "uuid": "eb996931-c548-4f3b-aaaa-39cc711df516"
+                },
+                {
+                    "label": "preproc_tab",
+                    "output_name": "preproc_tab",
+                    "uuid": "c9410cf1-44a2-4aa6-b3df-06cef74f3a45"
+                },
+                {
+                    "label": "preproc_sqlite",
+                    "output_name": "preproc_sqlite",
+                    "uuid": "4eb22cc3-5879-4625-89c0-e0fddb01a197"
+                },
+                {
+                    "label": "ppep_intensities",
+                    "output_name": "phosphoPepIntensities",
+                    "uuid": "c704fd66-5ac3-4779-ad40-536955cd81e3"
+                },
+                {
+                    "label": "enrichGraph_pdf",
+                    "output_name": "enrichGraph",
+                    "uuid": "5bf2a478-0431-4d32-84a9-7d46aad80ec5"
+                },
+                {
+                    "label": "quantData",
+                    "output_name": "quantData_tabular",
+                    "uuid": "cc922a75-6e72-4e60-add2-4b6ed8f73cdb"
+                }
+            ]
+        },
+        "8": {
+            "annotation": "Perform ANOVA. For imputing missing values, use median of non-missing values from the same treatment group.",
+            "content_id": "mqppep_anova",
+            "errors": null,
+            "id": 8,
+            "input_connections": {
+                "alpha_file": {
+                    "id": 6,
+                    "output_name": "output"
+                },
+                "input_file": {
+                    "id": 7,
+                    "output_name": "preproc_tab"
+                }
+            },
+            "inputs": [],
+            "label": "MaxQuant Phosphopeptide ANOVA group-median imputed",
+            "name": "MaxQuant Phosphopeptide ANOVA",
+            "outputs": [
+                {
+                    "name": "imputed_data_file",
+                    "type": "tabular"
+                },
+                {
+                    "name": "report_file",
+                    "type": "pdf"
+                }
+            ],
+            "position": {
+                "bottom": 1775.6000061035156,
+                "height": 255.60000610351562,
+                "left": 1370,
+                "right": 1570,
+                "top": 1520,
+                "width": 200,
+                "x": 1370,
+                "y": 1520
+            },
+            "post_job_actions": {
+                "RenameDatasetActionimputed_data_file": {
+                    "action_arguments": {
+                        "newname": "#{input_file}.intensities_group-mean-imputed_QN_LT"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "imputed_data_file"
+                },
+                "RenameDatasetActionreport_file": {
+                    "action_arguments": {
+                        "newname": "#{input_file}.intensities_group-mean-imputed_report (download/unzip to view)"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "report_file"
+                }
+            },
+            "tool_id": "mqppep_anova",
+            "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"first_data_column\": \"Intensity\", \"imputation\": {\"imputation_method\": \"group-median\", \"__current_case__\": 0}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"sample_grouping_regex\": \"(\\\\d+)\", \"sample_names_regex\": \"\\\\.(\\\\d+)[A-Z]$\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": null,
+            "type": "tool",
+            "uuid": "ffa771c3-c52d-42a4-b78f-a60a39678792",
+            "workflow_outputs": [
+                {
+                    "label": "intensities_group-mean-imputed_QN_LT",
+                    "output_name": "imputed_data_file",
+                    "uuid": "169d677f-0acb-4c56-b057-21f4aaf2b920"
+                },
+                {
+                    "label": "intensities_group-mean-imputed_report",
+                    "output_name": "report_file",
+                    "uuid": "25edae88-3bb6-4ec9-8b98-686fded7ed79"
+                }
+            ]
+        },
+        "9": {
+            "annotation": "Perform ANOVA. For imputing missing values, create random values.",
+            "content_id": "mqppep_anova",
+            "errors": null,
+            "id": 9,
+            "input_connections": {
+                "alpha_file": {
+                    "id": 6,
+                    "output_name": "output"
+                },
+                "input_file": {
+                    "id": 7,
+                    "output_name": "preproc_tab"
+                }
+            },
+            "inputs": [],
+            "label": "MaxQuant Phosphopeptide ANOVA randomly imputed",
+            "name": "MaxQuant Phosphopeptide ANOVA",
+            "outputs": [
+                {
+                    "name": "imputed_data_file",
+                    "type": "tabular"
+                },
+                {
+                    "name": "report_file",
+                    "type": "pdf"
+                }
+            ],
+            "position": {
+                "bottom": 1609.6000061035156,
+                "height": 255.60000610351562,
+                "left": 1617,
+                "right": 1817,
+                "top": 1354,
+                "width": 200,
+                "x": 1617,
+                "y": 1354
+            },
+            "post_job_actions": {
+                "RenameDatasetActionimputed_data_file": {
+                    "action_arguments": {
+                        "newname": "#{input_file}.intensities_randomly-imputed_QN_LT"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "imputed_data_file"
+                },
+                "RenameDatasetActionreport_file": {
+                    "action_arguments": {
+                        "newname": "#{input_file}.intensities_randomly-imputed_report (download/unzip to view)"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "report_file"
+                }
+            },
+            "tool_id": "mqppep_anova",
+            "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"first_data_column\": \"Intensity\", \"imputation\": {\"imputation_method\": \"random\", \"__current_case__\": 3, \"meanPercentile\": \"1\", \"sdPercentile\": \"0.2\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"sample_grouping_regex\": \"(\\\\d+)\", \"sample_names_regex\": \"\\\\.(\\\\d+)[A-Z]$\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "type": "tool",
+            "uuid": "f1f2bdf9-fbc0-4205-b834-9a8af5814dc9",
+            "workflow_outputs": [
+                {
+                    "label": "intensities_randomly-imputed_QN_LT",
+                    "output_name": "imputed_data_file",
+                    "uuid": "d70a3476-fb42-4533-831b-4fcb2bda74fc"
+                },
+                {
+                    "label": "intensities_randomly-imputed_report",
+                    "output_name": "report_file",
+                    "uuid": "d6701a61-357b-4a27-8154-ca41eb16d8a6"
+                }
+            ]
+        }
+    },
+    "tags": [
+        "ppenrich"
+    ],
+    "uuid": "445a0eb0-25c7-44c0-8259-a3346b01cbf3",
+    "version": 3
+}