changeset 15:2c5f1a2fe16a draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 96659062ea07ac43d139746b0d119f1ee020f9cd"
author eschen42
date Sat, 26 Mar 2022 02:27:12 +0000
parents 6679616d0c18
children 2b9119d7d80a
files macros.xml mqppep_anova.R mqppep_anova.xml mqppep_anova_script.Rmd workflow/ppenrich_suite_wf.ga
diffstat 5 files changed, 555 insertions(+), 370 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Tue Mar 22 23:12:18 2022 +0000
+++ b/macros.xml	Sat Mar 26 02:27:12 2022 +0000
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@TOOL_VERSION@">0.1.5</token>
+    <token name="@TOOL_VERSION@">0.1.6</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <xml name="requirements">
         <requirements>
@@ -16,15 +16,18 @@
             <requirement type="package" version="1.7.1"   >r-optparse</requirement>
             <requirement type="package" version="2.11"    >r-rmarkdown</requirement>
             <!--
-            It would be nice to use conda-forge/texlive-core, but issue 23 blocked PDF-creation.
-            Also, I got pango font errors (output had missing symbols replaced with boxes) unless
-            I specified the build as well as the version, i.e.
-            texlive-core=20210325=h97429d4_0
+            It would be nice to use conda-forge/texlive-core rather than r-tinytex because the
+            former installs texlive when the package is built, but issue 23 blocked PDF-creation.
+            Also, texlive-core also gave pango font errors (output had missing symbols replaced
+            with boxes) unless I specified the build as well as the version when building a
+            conda environment, e.g.:  texlive-core=20210325=h97429d4_0
             -->
             <requirement type="package" version="0.37"    >r-tinytex</requirement>
             <requirement type="package" version="3.3.5"   >r-ggplot2</requirement>
             <requirement type="package" version="3.9.10"  >python</requirement>
             <requirement type="package" version="5.26.2"  >perl</requirement>
+            <requirement type="package" version="0.9.4"   >r-latex2exp</requirement>
+            <requirement type="package" version="0.3.7"   >r-vioplot</requirement>
         </requirements>
     </xml>
 </macros>
--- a/mqppep_anova.R	Tue Mar 22 23:12:18 2022 +0000
+++ b/mqppep_anova.R	Sat Mar 26 02:27:12 2022 +0000
@@ -32,7 +32,7 @@
   make_option(
     c("-f", "--firstDataColumn"),
     action = "store",
-    default = "10",
+    default = "^Intensity[^_]",
     type = "character",
     help = "First column of intensity values"
   ),
@@ -110,9 +110,12 @@
 if (! file.exists(args$inputFile)) {
   stop((paste("Input file", args$inputFile, "does not exist")))
 }
-input_file <- args$inputFile
-alpha_file <- args$alphaFile
-first_data_column <- args$firstDataColumn
+input_file             <- args$inputFile
+alpha_file             <- args$alphaFile
+imputed_data_file_name <- args$imputedDataFile
+imp_qn_lt_data_filenm  <- args$imputedQNLTDataFile
+report_file_name       <- args$reportFile
+
 imputation_method <- args$imputationMethod
 print(
   grepl(
@@ -133,42 +136,49 @@
     return(-1)
     }
 
+# read with default values, when applicable
 mean_percentile <- args$meanPercentile
-print("mean_percentile is:")
-cat(str(mean_percentile))
-
-sd_percentile <- args$sdPercentile
-print("sd_percentile is:")
-cat(str(mean_percentile))
-
+sd_percentile   <- args$sdPercentile
+# in the case of 'random" these values are ignored by the client script
+if (imputation_method == "random") {
+  print("mean_percentile is:")
+  cat(str(mean_percentile))
 
-regex_sample_names    <- gsub("^[ \t\n]*", "",
-                         readChar(args$regexSampleNames,  1000)
-                       )
-regex_sample_names    <- gsub("[ \t\n]*$", "",
-                         regex_sample_names
-                       )
-cat(regex_sample_names)
-cat("\n")
+  print("sd_percentile is:")
+  cat(str(mean_percentile))
+}
 
-regex_sample_grouping <- gsub("^[ \t\n]*", "",
-                           readChar(args$regexSampleGrouping, 1000)
-                         )
-regex_sample_grouping <- gsub("[ \t\n]*$", "",
-                           regex_sample_grouping
-                         )
-cat(regex_sample_grouping)
-cat("\n")
-
-imputed_data_file_name <- args$imputedDataFile
-imp_qn_lt_data_filenm <-  args$imputedQNLTDataFile
-report_file_name <- args$reportFile
-
-print("regex_sample_names is:")
-cat(str(regex_sample_names))
-
-print("regex_sample_grouping is:")
-cat(str(regex_sample_grouping))
+# convert string parameters that are passed in via config files:
+#  - firstDataColumn
+#  - regexSampleNames
+#  - regexSampleGrouping
+read_config_file_string <- function(fname, limit) {
+  # eliminate any leading whitespace
+  result    <- gsub("^[ \t\n]*", "", readChar(fname, limit))
+  # eliminate any trailing whitespace
+  result    <- gsub("[ \t\n]*$", "", result)
+  # substitute characters escaped by Galaxy sanitizer
+  result <- gsub("__lt__", "<",  result)
+  result <- gsub("__le__", "<=", result)
+  result <- gsub("__eq__", "==", result)
+  result <- gsub("__ne__", "!=", result)
+  result <- gsub("__gt__", ">",  result)
+  result <- gsub("__ge__", ">=", result)
+  result <- gsub("__sq__", "'",  result)
+  result <- gsub("__dq__", '"',  result)
+  result <- gsub("__ob__", "[",  result)
+  result <- gsub("__cb__", "]",  result)
+}
+cat(paste0("first_data_column file: ", args$firstDataColumn, "\n"))
+cat(paste0("regex_sample_names file: ", args$regexSampleNames, "\n"))
+cat(paste0("regex_sample_grouping file: ", args$regexSampleGrouping, "\n"))
+nc <- 1000
+regex_sample_names <- read_config_file_string(args$regexSampleNames, nc)
+regex_sample_grouping <- read_config_file_string(args$regexSampleGrouping, nc)
+first_data_column <- read_config_file_string(args$firstDataColumn,  nc)
+cat(paste0("first_data_column: ",     first_data_column,     "\n"))
+cat(paste0("regex_sample_names: ",    regex_sample_names,    "\n"))
+cat(paste0("regex_sample_grouping: ", regex_sample_grouping, "\n"))
 
 # from: https://github.com/molgenis/molgenis-pipelines/wiki/
 #   How-to-source-another_file.R-from-within-your-R-script
--- a/mqppep_anova.xml	Tue Mar 22 23:12:18 2022 +0000
+++ b/mqppep_anova.xml	Sat Mar 26 02:27:12 2022 +0000
@@ -22,7 +22,7 @@
       \${CONDA_PREFIX}/bin/Rscript \$TEMP/mqppep_anova.R
         --inputFile '$input_file'
         --alphaFile '$alpha_file'
-        --firstDataColumn $first_data_column
+        --firstDataColumn $intensity_column_regex_f
         --imputationMethod $imputation.imputation_method
         #if $imputation.imputation_method == "random"
           --meanPercentile '$imputation.meanPercentile'
@@ -45,6 +45,9 @@
       <configfile name="sample_grouping_regex_f">
         $sample_grouping_regex
       </configfile>
+      <configfile name="intensity_column_regex_f">
+        $intensity_column_regex
+      </configfile>
     </configfiles>
     <inputs>
         <param name="input_file" type="data" format="tabular" label="Filtered Phosphopeptide Intensities"
@@ -53,9 +56,9 @@
         <param name="alpha_file" type="data" format="tabular" label="alpha cutoff level"
                help="[alpha_file] List of alpha cutoff values for significance testing; text file having one column and no header"
         />
-        <param name="first_data_column" type="text" value="Intensity"
-               label="First data column"
-               help="[first_data_column] First column having intensity values (integer or PERL-compatible regular expression matching column label)"
+        <param name="intensity_column_regex" type="text" value="^Intensity[^_]"
+               label="Intensity-column pattern"
+               help="[intensity_column_regex] Pattern matching columns that have peptide intensity data (PERL-compatible regular expression matching column label)"
         />
         <!-- imputation_method <- c("group-median","median","mean","random")[1] -->
         <conditional name="imputation">
@@ -75,15 +78,15 @@
                        label="Mean percentile for random values"
                        help="[meanPercentile] Percentile center of random values; range [1,99]"
                 />
-                <param name="sdPercentile" type="float" value="0.2"
+                <param name="sdPercentile" type="float" value="1.0"
                        label="Percentile std. dev. for random values"
                        help="[sdPercentile] Standard deviation adjustment-factor for random values; real number.  (1.0 means SD equal to the SD for the entire data set.)"
                 />
             </when>
         </conditional>
         <param name="sample_names_regex" type="text" value="\.\d+[A-Z]$"
-               help="[sample_names_regex] PERL-compatible regular expression extracting sample-names from the the name of a spectrum file (without extension)"
-               label="Sample-extraction regex">
+               help="[sample_names_regex] Pattern extracting sample-names from names of columns that have peptide intensity data (PERL-compatible regular expression)"
+               label="Sample-extraction pattern">
           <sanitizer>
             <valid initial="string.printable">
               <remove value="&apos;"/>
@@ -91,8 +94,8 @@
           </sanitizer>
         </param>
         <param name="sample_grouping_regex" type="text" value="\d+"
-               help="[sample_grouping_regex] PERL-compatible regular expression extracting sample-group from each sample-name (i.e., extracted by previous regex pattern)"
-               label="Group-extraction regex">
+               help="[sample_grouping_regex] Pattern extracting sample-group from the sample-names that are extracted by 'Sample-extraction pattern' (PERL-compatible regular expression)"
+               label="Group-extraction pattern">
           <sanitizer>
             <valid initial="string.printable">
               <remove value="&apos;"/>
@@ -112,7 +115,7 @@
         <test>
             <param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/>
             <param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/>
-            <param name="first_data_column" value="10"/>
+            <param name="intensity_column_regex" value="^Intensity[^_]"/>
             <param name="imputation_method" value="group-median"/>
             <param name="sample_names_regex" value="\.\d+[A-Z]$"/>
             <param name="sample_grouping_regex" value="\d+"/>
@@ -128,10 +131,10 @@
         <test>
             <param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/>
             <param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/>
-            <param name="first_data_column" value="10"/>
+            <param name="intensity_column_regex" value="^Intensity[^_]"/>
             <param name="imputation_method" value="random"/>
             <param name="meanPercentile" value="1" />
-            <param name="sdPercentile" value="0.2" />
+            <param name="sdPercentile" value="1.0" />
             <param name="sample_names_regex" value="\.\d+[A-Z]$"/>
             <param name="sample_grouping_regex" value="\d+"/>
             <output name="imp_qn_lt_file">
@@ -167,7 +170,7 @@
 
 **Input parameters**
 
-``first_data_column``
+``intensity_column_regex``
   First column of ``input_file`` having intensity values (integer or PERL-compatible regular expression matching column label). Default: **Intensity**
 
 ``imputation_method``
--- a/mqppep_anova_script.Rmd	Tue Mar 22 23:12:18 2022 +0000
+++ b/mqppep_anova_script.Rmd	Sat Mar 26 02:27:12 2022 +0000
@@ -8,24 +8,24 @@
   latex_document:
     toc: true
 params:
-  inputFile: "test-data/test_input_for_anova.tabular"
   alphaFile: "test-data/alpha_levels.tabular"
-  firstDataColumn: "Intensity"
+  inputFile: "test-data/UT_Phospho_ST_Sites.preproc.tabular"
+  firstDataColumn: "^Intensity[^_]"
   imputationMethod: !r c("group-median", "median", "mean", "random")[4]
   meanPercentile: 1
   sdPercentile: 1.0
   regexSampleNames: "\\.\\d+[A-Z]$"
   regexSampleGrouping: "\\d+"
-  imputedDataFilename: "test-data/imputedDataFilename.txt"
-  imputedQNLTDataFile: "test-data/imputedQNLTDataFile.txt"
+  imputedDataFilename: "test-data/limbo/imputedDataFilename.txt"
+  imputedQNLTDataFile: "test-data/limbo/imputedQNLTDataFile.txt"
   show_toc: true
 ---
 <!--
-  latex_document: default
+  alphaFile: "test-data/alpha_levels.tabular"
   inputFile: "test-data/test_input_for_anova.tabular"
+  inputFile: "test-data/UT_Phospho_ST_Sites.preproc.tabular"
   inputFile: "test-data/density_failure.preproc_tab.tabular"
-  inputFile: "test-data/UT_Phospho_STY_Sites.preproc_tab"
-date: "May 28, 2018; Mar 16, 2022"
+  latex_document: default
 -->
 ```{r setup, include = FALSE}
 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285
@@ -44,6 +44,7 @@
   sqrt(const_stripchart_cex * const_stripchart_cex / 2)
 const_stripchart_jitter <- 0.3
 const_write_debug_files <- FALSE
+const_table_anchor <- "tbp"
 
 ### FUNCTIONS
 
@@ -103,7 +104,7 @@
 }
 
 # Use this like print.data.frame, from which it is adapted:
-print_data_frame_latex <-
+data_frame_latex <-
   function(
     x,
     ...,
@@ -116,7 +117,7 @@
     # string with justification of each column
     justification = NULL,
     # TRUE to center on page
-    centered = FALSE,
+    centered = TRUE,
     # optional capttion
     caption = NULL,
     # h(inline); b(bottom); t (top) or p (separate page)
@@ -200,13 +201,10 @@
 
 ```
 
-## Purpose:
+## Purpose
 
 Perform imputation of missing values, quantile normalization, and ANOVA.
 
-<!--
-## Variables to change for each input file
--->
 ```{r include = FALSE}
 # Input Filename
 input_file <- params$inputFile
@@ -214,19 +212,25 @@
 # First data column - ideally, this could be detected via regexSampleNames,
 #   but for now leave it as is.
 first_data_column <- params$firstDataColumn
-fdc_is_integer <- TRUE
-first_data_column <- withCallingHandlers(
-    as.integer(first_data_column)
-  , warning = function(w) fdc_is_integer <<- FALSE
-  )
-if (FALSE == fdc_is_integer) {
-  first_data_column <- params$firstDataColumn
+fdc_is_integer <- is.integer(first_data_column)
+if (fdc_is_integer) {
+  first_data_column <- as.integer(params$firstDataColumn)
 }
 
 # False discovery rate adjustment for ANOVA
 #  Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05
 val_fdr <-
-  read.table(file = params$alphaFile, sep = "\t", header = F, quote = "")[, 1]
+  read.table(file = params$alphaFile, sep = "\t", header = F, quote = "")
+
+if (
+  ncol(val_fdr) != 1 ||
+  sum(!is.numeric(val_fdr[, 1])) ||
+  sum(val_fdr[, 1] < 0) ||
+  sum(val_fdr[, 1] > 1)
+) {
+  stop("alphaFile should be one column of numbers within the range [0.0,1.0]")
+}
+val_fdr <- val_fdr[, 1]
 
 #Imputed Data filename
 imputed_data_filename <- params$imputedDataFilename
@@ -274,32 +278,15 @@
   )
 ```
 
-### Parse column names, sample names, and factor levels from input file
+## Extract Sample Names and Factor Levels
+
+Column names parsed from input file are shown in Table 1; sample names and factor levels, in Table 2.
 
 ```{r echo = FALSE, results = 'asis'}
-# Write column naames as an enumerated list.
-column_name_df <- data.frame(
-  column = seq_len(length(colnames(full_data))),
-  name = colnames(full_data)
-  )
-print_data_frame_latex(
-  x = column_name_df,
-  justification = "l l",
-  centered = TRUE,
-  caption = "Input data column name",
-  anchor = "h"
-  )
 
 data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE)
-cat(
-  sprintf(
-    "\n\nData columns: [%d,%d]\n\n",
-    min(data_column_indices),
-    max(data_column_indices)
-    )
-  )
 
-if (FALSE == fdc_is_integer) {
+if (!fdc_is_integer) {
   if (length(data_column_indices) > 0) {
     first_data_column <- data_column_indices[1]
   } else {
@@ -307,6 +294,30 @@
   }
 }
 
+cat(
+  sprintf(
+    paste(
+      "\n\nPeptide-intensity data for each sample is",
+      "in one of columns %d through %d.\n\n"
+      ),
+    min(data_column_indices),
+    max(data_column_indices)
+    )
+  )
+
+# Write column names as a LaTeX enumerated list.
+column_name_df <- data.frame(
+  column = seq_len(length(colnames(full_data))),
+  name = colnames(full_data)
+  )
+data_frame_latex(
+  x = column_name_df,
+  justification = "l l",
+  centered = TRUE,
+  caption = "Input data column name",
+  anchor = const_table_anchor
+  )
+
 ```
 
 ```{r echo = FALSE, results = 'asis'}
@@ -336,12 +347,12 @@
   sample = sample_name_matches,
   level = sample_factor_levels
   )
-print_data_frame_latex(
+data_frame_latex(
   x = sample_factor_df,
   justification = "c c",
   centered = TRUE,
   caption = "Factor level",
-  anchor = "h"
+  anchor = const_table_anchor
   )
 ```
 ```{r echo = FALSE, results = 'asis'}
@@ -370,18 +381,9 @@
   quant_data_log
 , las = 1
 , col = const_boxplot_fill
+, ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
+, xlab = "Sample"
 )
-# Points
-stripchart(
-  quant_data_log, # Data
-  method = "jitter",          # Random noise
-  jitter = const_stripchart_jitter,
-  pch = 19,                   # Pch symbols
-  cex = const_stripchart_cex, # Size of symbols reduced
-  col = "goldenrod",          # Color of the symbol
-  vertical = TRUE,            # Vertical mode
-  add = TRUE                  # Add it over
-  )
 par(old_par)
 
 
@@ -398,7 +400,8 @@
   ggplot(
     quant_data_log_stack,
     aes(x = values)
-    ) +
+    ) + xlab(latex2exp::TeX("$log_{10}$(peptide intensity)")) +
+    ylab("Probability density") +
     geom_density(
       aes(group = ind, colour = ind),
       na.rm = TRUE
@@ -411,7 +414,10 @@
 ### Globally, are peptide intensities are approximately unimodal?
 
 <!--
-# ref for bquote below particularly and plotting math expressions generally:
+# bquote could be used as an alternative to latex2exp::TeX below particularly
+#   and when plotting math expressions generally, at the expense of mastering
+#   another syntax, which hardly seems worthwhile when I need to use TeX
+#   elsewhere; here's an introduction to bquote:
 #   https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/
 -->
 ```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5), results = 'asis'}
@@ -420,17 +426,21 @@
 fin <- is.finite(as.numeric(as.matrix(quant_data_log)))
 
 logvalues <- as.numeric(as.matrix(quant_data_log))[fin]
+logvalues_density <- density(logvalues)
 plot(
-  density(logvalues),
-  main = bquote(
-    "Smoothed estimated probability density vs." ~ log[10](intensity)),
-  xlab = bquote(log[10](intensity))
+  x = logvalues_density,
+  main = latex2exp::TeX(
+    "Smoothed estimated probability density vs. $log_{10}$(peptide intensity)"
+    ),
+  xlab = latex2exp::TeX("$log_{10}$(peptide intensity)"),
+  ylab = "Probability density"
   )
 hist(
-  x = as.numeric(as.matrix(quant_data_log))
-, breaks = 100
-, main = bquote("Frequency vs." ~ log[10](intensity))
-, xlab = bquote(log[10](intensity))
+  x = as.numeric(as.matrix(quant_data_log)),
+  xlim = c(min(logvalues_density$x), max(logvalues_density$x)),
+  breaks = 100,
+  main = latex2exp::TeX("Frequency vs. $log_{10}$(peptide intensity)"),
+  xlab = latex2exp::TeX("$log_{10}$(peptide intensity)")
 )
 ```
 
@@ -452,6 +462,7 @@
     density(sds, na.rm = T)
   , main = "Smoothed estimated probability density vs. std. deviation"
   , sub = "(probability estimation made with Gaussian smoothing)"
+  , ylab = "Probability density"
   )
 } else {
   cat(
@@ -510,7 +521,7 @@
 , "group-median" = {
     imputation_method_description <-
       paste("Substitute missing value with",
-        "median peptide intensity for sample group\n"
+        "median peptide intensity for sample group.\n"
         )
     sample_level_integers <- as.integer(sample_factor_levels)
     for (i in seq_len(length(levels(sample_factor_levels)))) {
@@ -524,7 +535,7 @@
 , "median" = {
     imputation_method_description <-
       paste("Substitute missing value with",
-        "median peptide intensity across all sample classes\n"
+        "median peptide intensity across all sample classes.\n"
         )
     quant_data_imp[ind] <- apply(quant_data_imp, 1, median, na.rm = T)[ind[, 1]]
     good_rows <- !is.na(rowMeans(quant_data_imp))
@@ -532,7 +543,7 @@
 , "mean" = {
     imputation_method_description <-
       paste("Substitute missing value with",
-        "mean peptide intensity across all sample classes\n"
+        "mean peptide intensity across all sample classes.\n"
         )
     quant_data_imp[ind] <- apply(quant_data_imp, 1, mean, na.rm = T)[ind[, 1]]
     good_rows <- !is.na(rowMeans(quant_data_imp))
@@ -544,7 +555,7 @@
     imputation_method_description <-
       paste("Substitute each missing value with random intensity",
         sprintf(
-          "random intensity $N \\sim (%0.2f, %0.2f)$\n",
+          "random intensity $N \\sim (%0.2f, %0.2f)$.\n",
           q1, m1
           )
         )
@@ -552,7 +563,6 @@
       100 * mean_percentile))
     cat(sprintf("sd_percentile (from input parameter) is %0.2f\n\n",
       sd_percentile))
-    #ACE cat(sprintf("sd for rnorm is %0.4f\n\n", m1))
     quant_data_imp[ind] <-
       10 ^ rnorm(number_to_impute, mean = q1, sd = m1)
     good_rows <- !is.na(rowMeans(quant_data_imp))
@@ -684,11 +694,20 @@
   write_debug_file(quant_data_imp_log10)
 
   red_dots <- quant_data_imp_log10 * x
-  ylim <- c(
+  count_red <- sum(!is.na(red_dots))
+  count_blue <- sum(!is.na(blue_dots))
+  ylim_save <- ylim <- c(
     min(red_dots, blue_dots, na.rm = TRUE),
     max(red_dots, blue_dots, na.rm = TRUE)
     )
-  # ref: https://r-charts.com/distribution/add-points-boxplot/
+  show_stripchart <-
+    50 > (count_red + count_blue) / length(sample_name_matches)
+  if (show_stripchart) {
+    boxplot_sub <- "Light blue = data before imputation; Red = imputed data"
+  } else {
+    boxplot_sub <- ""
+  }
+
   # Vertical plot
   colnames(blue_dots) <- sample_name_matches
   boxplot(
@@ -697,32 +716,59 @@
     , col = const_boxplot_fill
     , ylim = ylim
     , main = "Peptide intensities before and after imputation"
-    , sub = "Light blue = data before imputation; Red = imputed data"
+    , sub = boxplot_sub
     , xlab = "Sample"
-    , ylab = "log10(peptide intensity)"
+    , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
     )
-  # Points
-  # NA values are not plotted
-  stripchart(
-    blue_dots,                 # Data
-    method = "jitter",          # Random noise
-    jitter = const_stripchart_jitter,
-    pch = 19,                   # Pch symbols
-    cex = const_stripsmall_cex, # Size of symbols reduced
-    col = "lightblue",          # Color of the symbol
-    vertical = TRUE,            # Vertical mode
-    add = TRUE                  # Add it over
-    )
-  stripchart(
-    red_dots,                   # Data
-    method = "jitter",          # Random noise
-    jitter = const_stripchart_jitter,
-    pch = 19,                   # Pch symbols
-    cex = const_stripsmall_cex, # Size of symbols reduced
-    col = "red",                # Color of the symbol
-    vertical = TRUE,            # Vertical mode
-    add = TRUE                  # Add it over
-    )
+
+  if (show_stripchart) {
+    # Points
+    # ref: https://r-charts.com/distribution/add-points-boxplot/
+    # NA values are not plotted
+    stripchart(
+      blue_dots,                 # Data
+      method = "jitter",          # Random noise
+      jitter = const_stripchart_jitter,
+      pch = 19,                   # Pch symbols
+      cex = const_stripsmall_cex, # Size of symbols reduced
+      col = "lightblue",          # Color of the symbol
+      vertical = TRUE,            # Vertical mode
+      add = TRUE                  # Add it over
+      )
+    stripchart(
+      red_dots,                   # Data
+      method = "jitter",          # Random noise
+      jitter = const_stripchart_jitter,
+      pch = 19,                   # Pch symbols
+      cex = const_stripsmall_cex, # Size of symbols reduced
+      col = "red",                # Color of the symbol
+      vertical = TRUE,            # Vertical mode
+      add = TRUE                  # Add it over
+      )
+
+  } else {
+    # violin plot
+    cat("\\leavevmode\n\\quad\n\n\\quad\n\n")
+    vioplot::vioplot(
+      x = lapply(blue_dots, function(x) x[!is.na(x)]),
+      col = "lightblue1",
+      side = "left",
+      plotCentre = "line",
+      ylim = ylim_save,
+      main = "Distributions of observed and imputed data",
+      sub = "Light blue = observed data; Pink = imputed data",
+      xlab = "Sample",
+      ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
+      )
+    vioplot::vioplot(
+      x = lapply(red_dots, function(x) x[!is.na(x)]),
+      col = "lightpink1",
+      side = "right",
+      plotCentre = "line",
+      add = T
+      )
+  }
+
   par(old_par)
 
   # density plot
@@ -738,7 +784,7 @@
           "Black = combined"
           ),
       main = "Density of peptide intensity before and after imputation",
-      xlab = "log10(peptide intensity)",
+      xlab = latex2exp::TeX("$log_{10}$(peptide intensity)"),
       ylab = "Probability density"
     )
     lines(d_original, col = "blue")
@@ -909,18 +955,9 @@
     quant_data_log
   , las = 1
   , col = const_boxplot_fill
+  , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
+  , xlab = "Sample"
   )
-  # Points
-  stripchart(
-    quant_data_log, # Data
-    method = "jitter",          # Random noise
-    jitter = const_stripchart_jitter,
-    pch = 19,                   # Pch symbols
-    cex = const_stripchart_cex, # Size of symbols reduced
-    col = "goldenrod",          # Color of the symbol
-    vertical = TRUE,            # Vertical mode
-    add = TRUE                  # Add it over
-    )
   par(old_par)
 } else {
   cat("There are no peptides to plot\n")
@@ -936,7 +973,8 @@
   ggplot(
     quant_data_log_stack,
     aes(x = values)
-    ) +
+    ) + xlab(latex2exp::TeX("$log_{10}$(peptide intensity)")) +
+    ylab("Probability density") +
     geom_density(
       aes(group = ind, colour = ind),
       na.rm = TRUE
@@ -949,7 +987,7 @@
 cat("\\leavevmode\\newpage\n")
 ```
 
-## Perform ANOVA filters
+## Perform ANOVA Filters
 
 ```{r, echo = FALSE}
 # Make new data frame containing only Phosphopeptides
@@ -1125,24 +1163,13 @@
         main = "Imputed, normalized intensities", # no line plot
         las = 1,
         col = const_boxplot_fill,
-        ylab = expression(log[10](intensity))
+        ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
       )
-      # Points
-      stripchart(
-        filtered_data_filtered,     # Data
-        method = "jitter",          # Random noise
-        jitter = const_stripchart_jitter,
-        pch = 19,                   # Pch symbols
-        cex = const_stripchart_cex, # Size of symbols reduced
-        col = "goldenrod",          # Color of the symbol
-        vertical = TRUE,            # Vertical mode
-        add = TRUE                  # Add it over
-        )
       par(old_par)
     } else {
       cat(sprintf(
         "%s < %0.2f\n\n\n\n\n",
-        "No peptides were found to have cutoff adjusted p-value <",
+        "No peptides were found to have cutoff adjusted p-value",
         cutoff
       ))
     }
@@ -1229,11 +1256,12 @@
           )
         } else {
           if (nrow(m) == 1) {
+            next
+          } else {
             cat(
               sprintf("Heatmap for %d usable peptides whose", nrow(m)),
               sprintf("adjusted p-value < %0.2f\n", cutoff)
             )
-            next
           }
         }
         cat("\n\n\n")
@@ -1263,9 +1291,3 @@
 }
 cat("\\leavevmode\n\n\n")
 ```
-
-<!--
-## Peptide IDs, etc.
-
-See output files.
--->
--- a/workflow/ppenrich_suite_wf.ga	Tue Mar 22 23:12:18 2022 +0000
+++ b/workflow/ppenrich_suite_wf.ga	Sat Mar 26 02:27:12 2022 +0000
@@ -28,27 +28,155 @@
             "name": "Input dataset",
             "outputs": [],
             "position": {
-                "bottom": 346.3999938964844,
-                "height": 81.89999389648438,
-                "left": 495,
-                "right": 695,
-                "top": 264.5,
-                "width": 200,
-                "x": 495,
-                "y": 264.5
+                "bottom": 336.19168853759766,
+                "height": 82.20000457763672,
+                "left": 482.5333557128906,
+                "right": 682.5333709716797,
+                "top": 253.99168395996094,
+                "width": 200.00001525878906,
+                "x": 482.5333557128906,
+                "y": 253.99168395996094
             },
             "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": \"\"}",
             "tool_version": null,
             "type": "data_input",
-            "uuid": "21c3c29d-9e8c-4ece-b585-9e68fed7a93f",
+            "uuid": "78170155-4d6c-461b-a289-bcf0196b87db",
             "workflow_outputs": []
         },
         "1": {
+            "annotation": "Pattern matching columns that have peptide intensity data (PERL-compatible regular expression matching column label)",
+            "content_id": null,
+            "errors": null,
+            "id": 1,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "Pattern matching columns that have peptide intensity data (PERL-compatible regular expression matching column label)",
+                    "name": "Intensity-column pattern"
+                }
+            ],
+            "label": "Intensity-column pattern",
+            "name": "Input parameter",
+            "outputs": [],
+            "position": {
+                "bottom": 443.9083480834961,
+                "height": 82.20000457763672,
+                "left": 510.5917053222656,
+                "right": 710.5917205810547,
+                "top": 361.7083435058594,
+                "width": 200.00001525878906,
+                "x": 510.5917053222656,
+                "y": 361.7083435058594
+            },
+            "tool_id": null,
+            "tool_state": "{\"default\": \"^Intensity[^_]\", \"parameter_type\": \"text\", \"optional\": true}",
+            "tool_version": null,
+            "type": "parameter_input",
+            "uuid": "b1a27840-417c-4e81-9fc3-7d3e3dee6753",
+            "workflow_outputs": []
+        },
+        "2": {
+            "annotation": "Pattern extracting sample-names from names of columns that have peptide intensity data (PERL-compatible regular expression)",
+            "content_id": null,
+            "errors": null,
+            "id": 2,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "Pattern extracting sample-names from names of columns that have peptide intensity data (PERL-compatible regular expression)",
+                    "name": "Sample-extraction pattern"
+                }
+            ],
+            "label": "Sample-extraction pattern",
+            "name": "Input parameter",
+            "outputs": [],
+            "position": {
+                "bottom": 555.9083480834961,
+                "height": 82.20000457763672,
+                "left": 526.6583862304688,
+                "right": 726.6584014892578,
+                "top": 473.7083435058594,
+                "width": 200.00001525878906,
+                "x": 526.6583862304688,
+                "y": 473.7083435058594
+            },
+            "tool_id": null,
+            "tool_state": "{\"default\": \"\\\\.\\\\d+[A-Z]$\", \"parameter_type\": \"text\", \"optional\": true}",
+            "tool_version": null,
+            "type": "parameter_input",
+            "uuid": "05fe662c-b0a9-4ae7-8232-1b0a09261ac7",
+            "workflow_outputs": []
+        },
+        "3": {
+            "annotation": "Pattern extracting sample-group from the sample-names that are extracted by 'Sample-extraction pattern' (PERL-compatible regular expression)",
+            "content_id": null,
+            "errors": null,
+            "id": 3,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "Pattern extracting sample-group from the sample-names that are extracted by 'Sample-extraction pattern' (PERL-compatible regular expression)",
+                    "name": "Group-extraction pattern"
+                }
+            ],
+            "label": "Group-extraction pattern",
+            "name": "Input parameter",
+            "outputs": [],
+            "position": {
+                "bottom": 671.9083786010742,
+                "height": 82.20000457763672,
+                "left": 530.7000122070312,
+                "right": 730.7000274658203,
+                "top": 589.7083740234375,
+                "width": 200.00001525878906,
+                "x": 530.7000122070312,
+                "y": 589.7083740234375
+            },
+            "tool_id": null,
+            "tool_state": "{\"default\": \"\\\\d+\", \"parameter_type\": \"text\", \"optional\": true}",
+            "tool_version": null,
+            "type": "parameter_input",
+            "uuid": "55d4b63b-8863-4ee4-b504-4b436c6e9684",
+            "workflow_outputs": []
+        },
+        "4": {
+            "annotation": "THIS IS pST BY DEFAULT.  Change if your data are enriched for pY.",
+            "content_id": null,
+            "errors": null,
+            "id": 4,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "THIS IS pST BY DEFAULT.  Change if your data are enriched for pY.",
+                    "name": "enrichmentType"
+                }
+            ],
+            "label": "enrichmentType",
+            "name": "Input parameter",
+            "outputs": [],
+            "position": {
+                "bottom": 788.8083648681641,
+                "height": 61.80000305175781,
+                "left": 534.5750122070312,
+                "right": 734.5750274658203,
+                "top": 727.0083618164062,
+                "width": 200.00001525878906,
+                "x": 534.5750122070312,
+                "y": 727.0083618164062
+            },
+            "tool_id": null,
+            "tool_state": "{\"restrictions\": [\"pST\", \"pY\"], \"parameter_type\": \"text\", \"optional\": false}",
+            "tool_version": null,
+            "type": "parameter_input",
+            "uuid": "644c2a42-475d-4757-a58b-46d0d86323d3",
+            "workflow_outputs": []
+        },
+        "5": {
             "annotation": "FASTA file of all human canonical isoforms, derived from Swiss-Prot (e.g., merge of https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot_varsplic.fasta.gz and https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz)",
             "content_id": null,
             "errors": null,
-            "id": 1,
+            "id": 5,
             "input_connections": {},
             "inputs": [
                 {
@@ -60,27 +188,27 @@
             "name": "Input dataset",
             "outputs": [],
             "position": {
-                "bottom": 708.8000030517578,
-                "height": 102.30000305175781,
-                "left": 685,
-                "right": 885,
-                "top": 606.5,
-                "width": 200,
-                "x": 685,
-                "y": 606.5
+                "bottom": 984.6083679199219,
+                "height": 102.60000610351562,
+                "left": 559.5916748046875,
+                "right": 759.5916900634766,
+                "top": 882.0083618164062,
+                "width": 200.00001525878906,
+                "x": 559.5916748046875,
+                "y": 882.0083618164062
             },
             "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"fasta\"]}",
+            "tool_state": "{\"optional\": false, \"format\": [\"fasta\"], \"tag\": \"\"}",
             "tool_version": null,
             "type": "data_input",
-            "uuid": "5da7317c-4def-48f3-8eac-af95bd18b290",
+            "uuid": "c190d96f-c9ee-413b-ba19-75808b0eff55",
             "workflow_outputs": []
         },
-        "2": {
+        "6": {
             "annotation": "Derived from https://networkin.info/download/networkin_human_predictions_3.1.tsv.xz (which is free for non-commercial use - for required citation, see https://networkin.info/)",
             "content_id": null,
             "errors": null,
-            "id": 2,
+            "id": 6,
             "input_connections": {},
             "inputs": [
                 {
@@ -92,27 +220,27 @@
             "name": "Input dataset",
             "outputs": [],
             "position": {
-                "bottom": 853.8000030517578,
-                "height": 102.30000305175781,
-                "left": 696,
-                "right": 896,
-                "top": 751.5,
-                "width": 200,
-                "x": 696,
-                "y": 751.5
+                "bottom": 1115.6083679199219,
+                "height": 102.60000610351562,
+                "left": 576.5916748046875,
+                "right": 776.5916900634766,
+                "top": 1013.0083618164062,
+                "width": 200.00001525878906,
+                "x": 576.5916748046875,
+                "y": 1013.0083618164062
             },
             "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": \"\"}",
             "tool_version": null,
             "type": "data_input",
-            "uuid": "2edff8de-4379-45e2-b6b9-6ed4706bbf00",
+            "uuid": "d2783d72-6639-4275-8726-6e32b956aaca",
             "workflow_outputs": []
         },
-        "3": {
+        "7": {
             "annotation": "Derived from http://hprd.org/serine_motifs, http://hprd.org/tyrosine_motifs, and http://pegasus.biochem.mpg.de/phosida/help/motifs.aspx",
             "content_id": null,
             "errors": null,
-            "id": 3,
+            "id": 7,
             "input_connections": {},
             "inputs": [
                 {
@@ -124,27 +252,27 @@
             "name": "Input dataset",
             "outputs": [],
             "position": {
-                "bottom": 977.3999938964844,
-                "height": 81.89999389648438,
-                "left": 708,
-                "right": 908,
-                "top": 895.5,
-                "width": 200,
-                "x": 708,
-                "y": 895.5
+                "bottom": 1224.2084274291992,
+                "height": 82.20000457763672,
+                "left": 593.6000366210938,
+                "right": 793.6000518798828,
+                "top": 1142.0084228515625,
+                "width": 200.00001525878906,
+                "x": 593.6000366210938,
+                "y": 1142.0084228515625
             },
             "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": \"\"}",
             "tool_version": null,
             "type": "data_input",
-            "uuid": "86ebaaf2-b050-4eca-a88b-23a4c1af39f5",
+            "uuid": "e8c138eb-fb7d-4b4b-a5cc-25e000f555e1",
             "workflow_outputs": []
         },
-        "4": {
+        "8": {
             "annotation": "Derived from Kinase_Substrate_Dataset.gz found at https://www.phosphosite.org/staticDownloads (free for non-commercial use  - see that link for citation.)",
             "content_id": null,
             "errors": null,
-            "id": 4,
+            "id": 8,
             "input_connections": {},
             "inputs": [
                 {
@@ -156,27 +284,27 @@
             "name": "Input dataset",
             "outputs": [],
             "position": {
-                "bottom": 1126.8000030517578,
-                "height": 102.30000305175781,
-                "left": 729,
-                "right": 929,
-                "top": 1024.5,
-                "width": 200,
-                "x": 729,
-                "y": 1024.5
+                "bottom": 1354.6084289550781,
+                "height": 102.60000610351562,
+                "left": 593.5916748046875,
+                "right": 793.5916900634766,
+                "top": 1252.0084228515625,
+                "width": 200.00001525878906,
+                "x": 593.5916748046875,
+                "y": 1252.0084228515625
             },
             "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": \"\"}",
             "tool_version": null,
             "type": "data_input",
-            "uuid": "92f16705-a19c-4fb9-b278-3ae8e11f09d8",
+            "uuid": "d6aea327-574a-4657-ba6b-13fa973c182a",
             "workflow_outputs": []
         },
-        "5": {
+        "9": {
             "annotation": "Derived from Regulatory_sites.gz found at https://www.phosphosite.org/staticDownloads (free for non-commercial use  - see that link for citation.)",
             "content_id": null,
             "errors": null,
-            "id": 5,
+            "id": 9,
             "input_connections": {},
             "inputs": [
                 {
@@ -188,27 +316,27 @@
             "name": "Input dataset",
             "outputs": [],
             "position": {
-                "bottom": 1251.3999938964844,
-                "height": 81.89999389648438,
-                "left": 745,
-                "right": 945,
-                "top": 1169.5,
-                "width": 200,
-                "x": 745,
-                "y": 1169.5
+                "bottom": 1464.2084274291992,
+                "height": 82.20000457763672,
+                "left": 594.5916748046875,
+                "right": 794.5916900634766,
+                "top": 1382.0084228515625,
+                "width": 200.00001525878906,
+                "x": 594.5916748046875,
+                "y": 1382.0084228515625
             },
             "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": \"\"}",
             "tool_version": null,
             "type": "data_input",
-            "uuid": "5ab49d93-11e4-4e91-b30b-92269b319879",
+            "uuid": "1cda2502-f848-47d0-af07-d9997021b8c4",
             "workflow_outputs": []
         },
-        "6": {
+        "10": {
             "annotation": "List of alpha cutoff values for significance testing; text file having no header and a single line for each cutoff value.",
             "content_id": null,
             "errors": null,
-            "id": 6,
+            "id": 10,
             "input_connections": {},
             "inputs": [
                 {
@@ -220,34 +348,34 @@
             "name": "Input dataset",
             "outputs": [],
             "position": {
-                "bottom": 1501.8999938964844,
-                "height": 81.89999389648438,
-                "left": 727,
-                "right": 927,
-                "top": 1420,
-                "width": 200,
-                "x": 727,
-                "y": 1420
+                "bottom": 1723.7084274291992,
+                "height": 82.20000457763672,
+                "left": 611.558349609375,
+                "right": 811.5583648681641,
+                "top": 1641.5084228515625,
+                "width": 200.00001525878906,
+                "x": 611.558349609375,
+                "y": 1641.5084228515625
             },
             "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": \"\"}",
             "tool_version": null,
             "type": "data_input",
-            "uuid": "481c627c-a4ce-45d7-b659-4f54692aafc7",
+            "uuid": "f59256bb-c823-48d6-923e-e54c6d04e155",
             "workflow_outputs": []
         },
-        "7": {
-            "annotation": "",
+        "11": {
+            "annotation": "Transform the output of MaxQuant for phosphoproteome-enriched samples to prepare it for statistical anlaysis.",
             "content_id": "mqppep_preproc",
             "errors": null,
-            "id": 7,
+            "id": 11,
             "input_connections": {
                 "networkin": {
-                    "id": 2,
+                    "id": 6,
                     "output_name": "output"
                 },
                 "p_sty_motifs": {
-                    "id": 3,
+                    "id": 7,
                     "output_name": "output"
                 },
                 "phosphoSites": {
@@ -255,20 +383,28 @@
                     "output_name": "output"
                 },
                 "protein_fasta": {
-                    "id": 1,
+                    "id": 5,
                     "output_name": "output"
                 },
                 "psp_kinase_substrate": {
+                    "id": 8,
+                    "output_name": "output"
+                },
+                "psp_regulatory_sites": {
+                    "id": 9,
+                    "output_name": "output"
+                },
+                "pst_py_selector": {
                     "id": 4,
                     "output_name": "output"
                 },
-                "psp_regulatory_sites": {
-                    "id": 5,
+                "startCol": {
+                    "id": 1,
                     "output_name": "output"
                 }
             },
             "inputs": [],
-            "label": null,
+            "label": "Preprocess MaxQuant Phospho (STY)Sites",
             "name": "MaxQuant Phosphopeptide Preprocessing",
             "outputs": [
                 {
@@ -325,16 +461,46 @@
                 }
             ],
             "position": {
-                "bottom": 1408.7000122070312,
-                "height": 793.2000122070312,
-                "left": 1138.5,
-                "right": 1338.5,
-                "top": 615.5,
-                "width": 200,
-                "x": 1138.5,
-                "y": 615.5
+                "bottom": 1520.0000610351562,
+                "height": 936.0000610351562,
+                "left": 1257.10009765625,
+                "right": 1457.100112915039,
+                "top": 584,
+                "width": 200.00001525878906,
+                "x": 1257.10009765625,
+                "y": 584
             },
             "post_job_actions": {
+                "HideDatasetActionfilteredData_tabular": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "filteredData_tabular"
+                },
+                "HideDatasetActionmapped_phophopeptides": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "mapped_phophopeptides"
+                },
+                "HideDatasetActionmelted_phophopeptide_map": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "melted_phophopeptide_map"
+                },
+                "HideDatasetActionmqppep_output_sqlite": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "mqppep_output_sqlite"
+                },
+                "HideDatasetActionpreproc_csv": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "preproc_csv"
+                },
+                "HideDatasetActionquantData_tabular": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "quantData_tabular"
+                },
                 "RenameDatasetActionenrichGraph": {
                     "action_arguments": {
                         "newname": "#{phosphoSites}.enrichGraph_pdf"
@@ -428,101 +594,73 @@
                 }
             },
             "tool_id": "mqppep_preproc",
-
-
-
-
-
-
-            "tool_state": "{\"collapseFunc\": \"sum\", \"intervalCol\": \"1\", \"localProbCutoff\": \"0.75\", \"merge_function\": \"sum\", \"networkin\": {\"__class__\": \"ConnectedValue\"}, \"p_sty_motifs\": {\"__class__\": \"ConnectedValue\"}, \"phosphoCol\": \"^Number of Phospho [(]STY[)]$\", \"phosphoSites\": {\"__class__\": \"ConnectedValue\"}, \"protein_fasta\": {\"__class__\": \"ConnectedValue\"}, \"psp_kinase_substrate\": {\"__class__\": \"ConnectedValue\"}, \"psp_regulatory_sites\": {\"__class__\": \"ConnectedValue\"}, \"pst_not_py\": \"true\", \"pst_py_selector\": \"st\", \"species\": \"human\", \"startCol\": \"^Intensity[^_]\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_state": "{\"collapseFunc\": \"sum\", \"intervalCol\": \"1\", \"localProbCutoff\": \"0.75\", \"merge_function\": \"sum\", \"networkin\": {\"__class__\": \"ConnectedValue\"}, \"p_sty_motifs\": {\"__class__\": \"ConnectedValue\"}, \"phosphoCol\": \"^Number of Phospho [(]STY[)]$\", \"phosphoSites\": {\"__class__\": \"ConnectedValue\"}, \"protein_fasta\": {\"__class__\": \"ConnectedValue\"}, \"psp_kinase_substrate\": {\"__class__\": \"ConnectedValue\"}, \"psp_regulatory_sites\": {\"__class__\": \"ConnectedValue\"}, \"pst_py_selector\": {\"__class__\": \"ConnectedValue\"}, \"species\": \"human\", \"startCol\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
             "tool_version": null,
             "type": "tool",
-            "uuid": "1e983dba-edca-4aed-a589-49b1651b4a85",
+            "uuid": "fc558352-b85e-438a-b94f-951f73768dfd",
             "workflow_outputs": [
                 {
-                    "label": "preproc_csv",
-                    "output_name": "preproc_csv",
-                    "uuid": "8cba5367-b25c-40e0-a324-552225b1aa1d"
-                },
-                {
                     "label": "locProbCutoffGraph_pdf",
                     "output_name": "locProbCutoffGraph",
-                    "uuid": "641c5959-dab4-42d1-986d-8e6aaeb74ef6"
-                },
-                {
-                    "label": "melted_phosphopeptide_map",
-                    "output_name": "melted_phophopeptide_map",
-                    "uuid": "878dc817-26a3-4061-9dd4-56e737b3c4f7"
+                    "uuid": "56f5f9b8-ae81-4743-abf8-1510b895eb2e"
                 },
                 {
                     "label": "enrichGraph_svg",
                     "output_name": "enrichGraph_svg",
-                    "uuid": "4492366c-945e-492f-8381-1c97c4da2264"
+                    "uuid": "c4c8da47-9bad-4dc1-9205-374233e6c555"
                 },
                 {
                     "label": "locProbCutoffGraph_svg",
                     "output_name": "locProbCutoffGraph_svg",
-                    "uuid": "06faf93c-5f04-4cb3-9e41-58e465f6180e"
-                },
-                {
-                    "label": "filteredData",
-                    "output_name": "filteredData_tabular",
-                    "uuid": "76e2e268-f728-45f0-9973-793fbde0dd0a"
-                },
-                {
-                    "label": "ppep_map",
-                    "output_name": "mapped_phophopeptides",
-                    "uuid": "d0fea028-2ea5-4862-8a92-c2088edfcbe1"
-                },
-                {
-                    "label": "ppep_mapping_sqlite",
-                    "output_name": "mqppep_output_sqlite",
-                    "uuid": "eb996931-c548-4f3b-aaaa-39cc711df516"
+                    "uuid": "40e1ef7c-a7ff-4ba4-8b8c-08be988d9b90"
                 },
                 {
                     "label": "preproc_tab",
                     "output_name": "preproc_tab",
-                    "uuid": "c9410cf1-44a2-4aa6-b3df-06cef74f3a45"
+                    "uuid": "c53a5f0b-da40-4dc0-8e73-41f518b7844e"
                 },
                 {
                     "label": "preproc_sqlite",
                     "output_name": "preproc_sqlite",
-                    "uuid": "4eb22cc3-5879-4625-89c0-e0fddb01a197"
+                    "uuid": "124ddcad-8fbd-489c-80ce-48cf2bc3c78c"
                 },
                 {
                     "label": "ppep_intensities",
                     "output_name": "phosphoPepIntensities",
-                    "uuid": "c704fd66-5ac3-4779-ad40-536955cd81e3"
+                    "uuid": "25f871f9-6017-4209-ab8b-4551f3d34a79"
                 },
                 {
                     "label": "enrichGraph_pdf",
                     "output_name": "enrichGraph",
-                    "uuid": "5bf2a478-0431-4d32-84a9-7d46aad80ec5"
-                },
-                {
-                    "label": "quantData",
-                    "output_name": "quantData_tabular",
-                    "uuid": "cc922a75-6e72-4e60-add2-4b6ed8f73cdb"
+                    "uuid": "e2d9f99e-23ec-4da8-aa0b-7ce4f59ea713"
                 }
             ]
         },
-        "8": {
+        "12": {
             "annotation": "Perform ANOVA. For imputing missing values, use median of non-missing values from the same treatment group.",
             "content_id": "mqppep_anova",
             "errors": null,
-            "id": 8,
+            "id": 12,
             "input_connections": {
                 "alpha_file": {
-                    "id": 6,
+                    "id": 10,
                     "output_name": "output"
                 },
                 "input_file": {
-                    "id": 7,
+                    "id": 11,
                     "output_name": "preproc_tab"
+                },
+                "sample_grouping_regex": {
+                    "id": 3,
+                    "output_name": "output"
+                },
+                "sample_names_regex": {
+                    "id": 2,
+                    "output_name": "output"
                 }
             },
             "inputs": [],
-            "label": "MaxQuant Phosphopeptide ANOVA group-median imputed",
+            "label": "ANOVA group-median imputed",
             "name": "MaxQuant Phosphopeptide ANOVA",
             "outputs": [
                 {
@@ -539,17 +677,17 @@
                 }
             ],
             "position": {
-                "bottom": 1775.6000061035156,
-                "height": 255.60000610351562,
-                "left": 1370,
-                "right": 1570,
-                "top": 1520,
-                "width": 200,
-                "x": 1370,
-                "y": 1520
+                "bottom": 2134.7084350585938,
+                "height": 347.20001220703125,
+                "left": 948.6083984375,
+                "right": 1148.608413696289,
+                "top": 1787.5084228515625,
+                "width": 200.00001525878906,
+                "x": 948.6083984375,
+                "y": 1787.5084228515625
             },
             "post_job_actions": {
-                "RenameDatasetActionimputed_data_file": {
+                "RenameDatasetActionimp_qn_lt_file": {
                     "action_arguments": {
                         "newname": "#{input_file}.intensities_group-mean-imputed_QN_LT"
                     },
@@ -565,48 +703,56 @@
                 },
                 "RenameDatasetActionreport_file": {
                     "action_arguments": {
-                        "newname": "#{input_file}.intensities_group-mean-imputed_report (download/unzip to view)"
+                        "newname": "#{input_file}.intensities_group-mean-imputed_report"
                     },
                     "action_type": "RenameDatasetAction",
                     "output_name": "report_file"
                 }
             },
             "tool_id": "mqppep_anova",
-            "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"first_data_column\": \"Intensity\", \"imputation\": {\"imputation_method\": \"group-median\", \"__current_case__\": 0}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"sample_grouping_regex\": \"(\\\\d+)\", \"sample_names_regex\": \"\\\\.(\\\\d+)[A-Z]$\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"imputation\": {\"imputation_method\": \"group-median\", \"__current_case__\": 0}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"intensity_column_regex\": \"^Intensity[^_]\", \"sample_grouping_regex\": {\"__class__\": \"ConnectedValue\"}, \"sample_names_regex\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
             "tool_version": null,
             "type": "tool",
-            "uuid": "ffa771c3-c52d-42a4-b78f-a60a39678792",
+            "uuid": "7002fe3a-bd4d-4d1d-9caf-1193606e3368",
             "workflow_outputs": [
                 {
                     "label": "intensities_group-mean-imputed_QN_LT",
                     "output_name": "imp_qn_lt_file",
-                    "uuid": "0e77a2e0-bd7a-4487-aaa6-ead2469509a2"
+                    "uuid": "61a62d74-1026-43ee-80f2-8c3905ebdeb5"
                 },
                 {
                     "label": "intensities_group-mean-imputed",
                     "output_name": "imputed_data_file",
-                    "uuid": "169d677f-0acb-4c56-b057-21f4aaf2b920"
+                    "uuid": "c455ec37-f6c7-4abb-af6b-8ac6bea6d1a5"
                 },
                 {
                     "label": "intensities_group-mean-imputed_report",
                     "output_name": "report_file",
-                    "uuid": "25edae88-3bb6-4ec9-8b98-686fded7ed79"
+                    "uuid": "9e138ec0-1248-4f4f-820f-6a8d05213662"
                 }
             ]
         },
-        "9": {
+        "13": {
             "annotation": "Perform ANOVA. For imputing missing values, create random values.",
             "content_id": "mqppep_anova",
             "errors": null,
-            "id": 9,
+            "id": 13,
             "input_connections": {
                 "alpha_file": {
-                    "id": 6,
+                    "id": 10,
                     "output_name": "output"
                 },
                 "input_file": {
-                    "id": 7,
+                    "id": 11,
                     "output_name": "preproc_tab"
+                },
+                "sample_grouping_regex": {
+                    "id": 3,
+                    "output_name": "output"
+                },
+                "sample_names_regex": {
+                    "id": 2,
+                    "output_name": "output"
                 }
             },
             "inputs": [],
@@ -627,17 +773,17 @@
                 }
             ],
             "position": {
-                "bottom": 1609.6000061035156,
-                "height": 255.60000610351562,
-                "left": 1617,
-                "right": 1817,
-                "top": 1354,
-                "width": 200,
-                "x": 1617,
-                "y": 1354
+                "bottom": 1994.1084289550781,
+                "height": 367.6000061035156,
+                "left": 1319.6083984375,
+                "right": 1519.608413696289,
+                "top": 1626.5084228515625,
+                "width": 200.00001525878906,
+                "x": 1319.6083984375,
+                "y": 1626.5084228515625
             },
             "post_job_actions": {
-                "RenameDatasetActionimputed_data_file": {
+                "RenameDatasetActionimp_qn_lt_file": {
                     "action_arguments": {
                         "newname": "#{input_file}.intensities_randomly-imputed_QN_LT"
                     },
@@ -653,31 +799,32 @@
                 },
                 "RenameDatasetActionreport_file": {
                     "action_arguments": {
-                        "newname": "#{input_file}.intensities_randomly-imputed_report (download/unzip to view)"
+                        "newname": "#{input_file}.intensities_randomly-imputed_report"
                     },
                     "action_type": "RenameDatasetAction",
                     "output_name": "report_file"
                 }
             },
             "tool_id": "mqppep_anova",
-            "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"first_data_column\": \"Intensity\", \"imputation\": {\"imputation_method\": \"random\", \"__current_case__\": 3, \"meanPercentile\": \"1\", \"sdPercentile\": \"0.2\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"sample_grouping_regex\": \"(\\\\d+)\", \"sample_names_regex\": \"\\\\.(\\\\d+)[A-Z]$\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"imputation\": {\"imputation_method\": \"random\", \"__current_case__\": 3, \"meanPercentile\": \"1\", \"sdPercentile\": \"1.0\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"intensity_column_regex\": \"^Intensity[^_]\", \"sample_grouping_regex\": {\"__class__\": \"ConnectedValue\"}, \"sample_names_regex\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": null,
             "type": "tool",
-            "uuid": "f1f2bdf9-fbc0-4205-b834-9a8af5814dc9",
+            "uuid": "b239a226-854c-4fbc-8955-cadd968f0704",
             "workflow_outputs": [
                 {
-                    "label": "intensities_randomly-imputed_QN_LT",
+                    "label": "intensities_randomly-imputed",
                     "output_name": "imputed_data_file",
-                    "uuid": "d70a3476-fb42-4533-831b-4fcb2bda74fc"
+                    "uuid": "8e403d6c-8f47-4f78-addd-da8923695135"
                 },
                 {
                     "label": "intensities_randomly-imputed_report",
                     "output_name": "report_file",
-                    "uuid": "d6701a61-357b-4a27-8154-ca41eb16d8a6"
+                    "uuid": "2c54a01e-ca12-4769-b370-cf137fe4b3f4"
                 },
                 {
                     "label": "intensities_randomly-imputed_QN_LT",
                     "output_name": "imp_qn_lt_file",
-                    "uuid": "637faf12-0d52-45b3-a318-40bfc7fcd4cb"
+                    "uuid": "5fa4acc2-b82f-41e8-aedc-6efdc73f3d58"
                 }
             ]
         }
@@ -685,6 +832,6 @@
     "tags": [
         "ppenrich"
     ],
-    "uuid": "445a0eb0-25c7-44c0-8259-a3346b01cbf3",
-    "version": 3
+    "uuid": "23c8a0f0-218f-4ba7-9470-ec826d16243a",
+    "version": 19
 }