Mercurial > repos > eschen42 > mqppep_anova
changeset 15:2c5f1a2fe16a draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 96659062ea07ac43d139746b0d119f1ee020f9cd"
author | eschen42 |
---|---|
date | Sat, 26 Mar 2022 02:27:12 +0000 |
parents | 6679616d0c18 |
children | 2b9119d7d80a |
files | macros.xml mqppep_anova.R mqppep_anova.xml mqppep_anova_script.Rmd workflow/ppenrich_suite_wf.ga |
diffstat | 5 files changed, 555 insertions(+), 370 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Tue Mar 22 23:12:18 2022 +0000 +++ b/macros.xml Sat Mar 26 02:27:12 2022 +0000 @@ -1,5 +1,5 @@ <macros> - <token name="@TOOL_VERSION@">0.1.5</token> + <token name="@TOOL_VERSION@">0.1.6</token> <token name="@VERSION_SUFFIX@">0</token> <xml name="requirements"> <requirements> @@ -16,15 +16,18 @@ <requirement type="package" version="1.7.1" >r-optparse</requirement> <requirement type="package" version="2.11" >r-rmarkdown</requirement> <!-- - It would be nice to use conda-forge/texlive-core, but issue 23 blocked PDF-creation. - Also, I got pango font errors (output had missing symbols replaced with boxes) unless - I specified the build as well as the version, i.e. - texlive-core=20210325=h97429d4_0 + It would be nice to use conda-forge/texlive-core rather than r-tinytex because the + former installs texlive when the package is built, but issue 23 blocked PDF-creation. + Also, texlive-core also gave pango font errors (output had missing symbols replaced + with boxes) unless I specified the build as well as the version when building a + conda environment, e.g.: texlive-core=20210325=h97429d4_0 --> <requirement type="package" version="0.37" >r-tinytex</requirement> <requirement type="package" version="3.3.5" >r-ggplot2</requirement> <requirement type="package" version="3.9.10" >python</requirement> <requirement type="package" version="5.26.2" >perl</requirement> + <requirement type="package" version="0.9.4" >r-latex2exp</requirement> + <requirement type="package" version="0.3.7" >r-vioplot</requirement> </requirements> </xml> </macros>
--- a/mqppep_anova.R Tue Mar 22 23:12:18 2022 +0000 +++ b/mqppep_anova.R Sat Mar 26 02:27:12 2022 +0000 @@ -32,7 +32,7 @@ make_option( c("-f", "--firstDataColumn"), action = "store", - default = "10", + default = "^Intensity[^_]", type = "character", help = "First column of intensity values" ), @@ -110,9 +110,12 @@ if (! file.exists(args$inputFile)) { stop((paste("Input file", args$inputFile, "does not exist"))) } -input_file <- args$inputFile -alpha_file <- args$alphaFile -first_data_column <- args$firstDataColumn +input_file <- args$inputFile +alpha_file <- args$alphaFile +imputed_data_file_name <- args$imputedDataFile +imp_qn_lt_data_filenm <- args$imputedQNLTDataFile +report_file_name <- args$reportFile + imputation_method <- args$imputationMethod print( grepl( @@ -133,42 +136,49 @@ return(-1) } +# read with default values, when applicable mean_percentile <- args$meanPercentile -print("mean_percentile is:") -cat(str(mean_percentile)) - -sd_percentile <- args$sdPercentile -print("sd_percentile is:") -cat(str(mean_percentile)) - +sd_percentile <- args$sdPercentile +# in the case of 'random" these values are ignored by the client script +if (imputation_method == "random") { + print("mean_percentile is:") + cat(str(mean_percentile)) -regex_sample_names <- gsub("^[ \t\n]*", "", - readChar(args$regexSampleNames, 1000) - ) -regex_sample_names <- gsub("[ \t\n]*$", "", - regex_sample_names - ) -cat(regex_sample_names) -cat("\n") + print("sd_percentile is:") + cat(str(mean_percentile)) +} -regex_sample_grouping <- gsub("^[ \t\n]*", "", - readChar(args$regexSampleGrouping, 1000) - ) -regex_sample_grouping <- gsub("[ \t\n]*$", "", - regex_sample_grouping - ) -cat(regex_sample_grouping) -cat("\n") - -imputed_data_file_name <- args$imputedDataFile -imp_qn_lt_data_filenm <- args$imputedQNLTDataFile -report_file_name <- args$reportFile - -print("regex_sample_names is:") -cat(str(regex_sample_names)) - -print("regex_sample_grouping is:") -cat(str(regex_sample_grouping)) +# convert string parameters that are passed in via config files: +# - firstDataColumn +# - regexSampleNames +# - regexSampleGrouping +read_config_file_string <- function(fname, limit) { + # eliminate any leading whitespace + result <- gsub("^[ \t\n]*", "", readChar(fname, limit)) + # eliminate any trailing whitespace + result <- gsub("[ \t\n]*$", "", result) + # substitute characters escaped by Galaxy sanitizer + result <- gsub("__lt__", "<", result) + result <- gsub("__le__", "<=", result) + result <- gsub("__eq__", "==", result) + result <- gsub("__ne__", "!=", result) + result <- gsub("__gt__", ">", result) + result <- gsub("__ge__", ">=", result) + result <- gsub("__sq__", "'", result) + result <- gsub("__dq__", '"', result) + result <- gsub("__ob__", "[", result) + result <- gsub("__cb__", "]", result) +} +cat(paste0("first_data_column file: ", args$firstDataColumn, "\n")) +cat(paste0("regex_sample_names file: ", args$regexSampleNames, "\n")) +cat(paste0("regex_sample_grouping file: ", args$regexSampleGrouping, "\n")) +nc <- 1000 +regex_sample_names <- read_config_file_string(args$regexSampleNames, nc) +regex_sample_grouping <- read_config_file_string(args$regexSampleGrouping, nc) +first_data_column <- read_config_file_string(args$firstDataColumn, nc) +cat(paste0("first_data_column: ", first_data_column, "\n")) +cat(paste0("regex_sample_names: ", regex_sample_names, "\n")) +cat(paste0("regex_sample_grouping: ", regex_sample_grouping, "\n")) # from: https://github.com/molgenis/molgenis-pipelines/wiki/ # How-to-source-another_file.R-from-within-your-R-script
--- a/mqppep_anova.xml Tue Mar 22 23:12:18 2022 +0000 +++ b/mqppep_anova.xml Sat Mar 26 02:27:12 2022 +0000 @@ -22,7 +22,7 @@ \${CONDA_PREFIX}/bin/Rscript \$TEMP/mqppep_anova.R --inputFile '$input_file' --alphaFile '$alpha_file' - --firstDataColumn $first_data_column + --firstDataColumn $intensity_column_regex_f --imputationMethod $imputation.imputation_method #if $imputation.imputation_method == "random" --meanPercentile '$imputation.meanPercentile' @@ -45,6 +45,9 @@ <configfile name="sample_grouping_regex_f"> $sample_grouping_regex </configfile> + <configfile name="intensity_column_regex_f"> + $intensity_column_regex + </configfile> </configfiles> <inputs> <param name="input_file" type="data" format="tabular" label="Filtered Phosphopeptide Intensities" @@ -53,9 +56,9 @@ <param name="alpha_file" type="data" format="tabular" label="alpha cutoff level" help="[alpha_file] List of alpha cutoff values for significance testing; text file having one column and no header" /> - <param name="first_data_column" type="text" value="Intensity" - label="First data column" - help="[first_data_column] First column having intensity values (integer or PERL-compatible regular expression matching column label)" + <param name="intensity_column_regex" type="text" value="^Intensity[^_]" + label="Intensity-column pattern" + help="[intensity_column_regex] Pattern matching columns that have peptide intensity data (PERL-compatible regular expression matching column label)" /> <!-- imputation_method <- c("group-median","median","mean","random")[1] --> <conditional name="imputation"> @@ -75,15 +78,15 @@ label="Mean percentile for random values" help="[meanPercentile] Percentile center of random values; range [1,99]" /> - <param name="sdPercentile" type="float" value="0.2" + <param name="sdPercentile" type="float" value="1.0" label="Percentile std. dev. for random values" help="[sdPercentile] Standard deviation adjustment-factor for random values; real number. (1.0 means SD equal to the SD for the entire data set.)" /> </when> </conditional> <param name="sample_names_regex" type="text" value="\.\d+[A-Z]$" - help="[sample_names_regex] PERL-compatible regular expression extracting sample-names from the the name of a spectrum file (without extension)" - label="Sample-extraction regex"> + help="[sample_names_regex] Pattern extracting sample-names from names of columns that have peptide intensity data (PERL-compatible regular expression)" + label="Sample-extraction pattern"> <sanitizer> <valid initial="string.printable"> <remove value="'"/> @@ -91,8 +94,8 @@ </sanitizer> </param> <param name="sample_grouping_regex" type="text" value="\d+" - help="[sample_grouping_regex] PERL-compatible regular expression extracting sample-group from each sample-name (i.e., extracted by previous regex pattern)" - label="Group-extraction regex"> + help="[sample_grouping_regex] Pattern extracting sample-group from the sample-names that are extracted by 'Sample-extraction pattern' (PERL-compatible regular expression)" + label="Group-extraction pattern"> <sanitizer> <valid initial="string.printable"> <remove value="'"/> @@ -112,7 +115,7 @@ <test> <param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/> <param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/> - <param name="first_data_column" value="10"/> + <param name="intensity_column_regex" value="^Intensity[^_]"/> <param name="imputation_method" value="group-median"/> <param name="sample_names_regex" value="\.\d+[A-Z]$"/> <param name="sample_grouping_regex" value="\d+"/> @@ -128,10 +131,10 @@ <test> <param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/> <param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/> - <param name="first_data_column" value="10"/> + <param name="intensity_column_regex" value="^Intensity[^_]"/> <param name="imputation_method" value="random"/> <param name="meanPercentile" value="1" /> - <param name="sdPercentile" value="0.2" /> + <param name="sdPercentile" value="1.0" /> <param name="sample_names_regex" value="\.\d+[A-Z]$"/> <param name="sample_grouping_regex" value="\d+"/> <output name="imp_qn_lt_file"> @@ -167,7 +170,7 @@ **Input parameters** -``first_data_column`` +``intensity_column_regex`` First column of ``input_file`` having intensity values (integer or PERL-compatible regular expression matching column label). Default: **Intensity** ``imputation_method``
--- a/mqppep_anova_script.Rmd Tue Mar 22 23:12:18 2022 +0000 +++ b/mqppep_anova_script.Rmd Sat Mar 26 02:27:12 2022 +0000 @@ -8,24 +8,24 @@ latex_document: toc: true params: - inputFile: "test-data/test_input_for_anova.tabular" alphaFile: "test-data/alpha_levels.tabular" - firstDataColumn: "Intensity" + inputFile: "test-data/UT_Phospho_ST_Sites.preproc.tabular" + firstDataColumn: "^Intensity[^_]" imputationMethod: !r c("group-median", "median", "mean", "random")[4] meanPercentile: 1 sdPercentile: 1.0 regexSampleNames: "\\.\\d+[A-Z]$" regexSampleGrouping: "\\d+" - imputedDataFilename: "test-data/imputedDataFilename.txt" - imputedQNLTDataFile: "test-data/imputedQNLTDataFile.txt" + imputedDataFilename: "test-data/limbo/imputedDataFilename.txt" + imputedQNLTDataFile: "test-data/limbo/imputedQNLTDataFile.txt" show_toc: true --- <!-- - latex_document: default + alphaFile: "test-data/alpha_levels.tabular" inputFile: "test-data/test_input_for_anova.tabular" + inputFile: "test-data/UT_Phospho_ST_Sites.preproc.tabular" inputFile: "test-data/density_failure.preproc_tab.tabular" - inputFile: "test-data/UT_Phospho_STY_Sites.preproc_tab" -date: "May 28, 2018; Mar 16, 2022" + latex_document: default --> ```{r setup, include = FALSE} # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 @@ -44,6 +44,7 @@ sqrt(const_stripchart_cex * const_stripchart_cex / 2) const_stripchart_jitter <- 0.3 const_write_debug_files <- FALSE +const_table_anchor <- "tbp" ### FUNCTIONS @@ -103,7 +104,7 @@ } # Use this like print.data.frame, from which it is adapted: -print_data_frame_latex <- +data_frame_latex <- function( x, ..., @@ -116,7 +117,7 @@ # string with justification of each column justification = NULL, # TRUE to center on page - centered = FALSE, + centered = TRUE, # optional capttion caption = NULL, # h(inline); b(bottom); t (top) or p (separate page) @@ -200,13 +201,10 @@ ``` -## Purpose: +## Purpose Perform imputation of missing values, quantile normalization, and ANOVA. -<!-- -## Variables to change for each input file ---> ```{r include = FALSE} # Input Filename input_file <- params$inputFile @@ -214,19 +212,25 @@ # First data column - ideally, this could be detected via regexSampleNames, # but for now leave it as is. first_data_column <- params$firstDataColumn -fdc_is_integer <- TRUE -first_data_column <- withCallingHandlers( - as.integer(first_data_column) - , warning = function(w) fdc_is_integer <<- FALSE - ) -if (FALSE == fdc_is_integer) { - first_data_column <- params$firstDataColumn +fdc_is_integer <- is.integer(first_data_column) +if (fdc_is_integer) { + first_data_column <- as.integer(params$firstDataColumn) } # False discovery rate adjustment for ANOVA # Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05 val_fdr <- - read.table(file = params$alphaFile, sep = "\t", header = F, quote = "")[, 1] + read.table(file = params$alphaFile, sep = "\t", header = F, quote = "") + +if ( + ncol(val_fdr) != 1 || + sum(!is.numeric(val_fdr[, 1])) || + sum(val_fdr[, 1] < 0) || + sum(val_fdr[, 1] > 1) +) { + stop("alphaFile should be one column of numbers within the range [0.0,1.0]") +} +val_fdr <- val_fdr[, 1] #Imputed Data filename imputed_data_filename <- params$imputedDataFilename @@ -274,32 +278,15 @@ ) ``` -### Parse column names, sample names, and factor levels from input file +## Extract Sample Names and Factor Levels + +Column names parsed from input file are shown in Table 1; sample names and factor levels, in Table 2. ```{r echo = FALSE, results = 'asis'} -# Write column naames as an enumerated list. -column_name_df <- data.frame( - column = seq_len(length(colnames(full_data))), - name = colnames(full_data) - ) -print_data_frame_latex( - x = column_name_df, - justification = "l l", - centered = TRUE, - caption = "Input data column name", - anchor = "h" - ) data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE) -cat( - sprintf( - "\n\nData columns: [%d,%d]\n\n", - min(data_column_indices), - max(data_column_indices) - ) - ) -if (FALSE == fdc_is_integer) { +if (!fdc_is_integer) { if (length(data_column_indices) > 0) { first_data_column <- data_column_indices[1] } else { @@ -307,6 +294,30 @@ } } +cat( + sprintf( + paste( + "\n\nPeptide-intensity data for each sample is", + "in one of columns %d through %d.\n\n" + ), + min(data_column_indices), + max(data_column_indices) + ) + ) + +# Write column names as a LaTeX enumerated list. +column_name_df <- data.frame( + column = seq_len(length(colnames(full_data))), + name = colnames(full_data) + ) +data_frame_latex( + x = column_name_df, + justification = "l l", + centered = TRUE, + caption = "Input data column name", + anchor = const_table_anchor + ) + ``` ```{r echo = FALSE, results = 'asis'} @@ -336,12 +347,12 @@ sample = sample_name_matches, level = sample_factor_levels ) -print_data_frame_latex( +data_frame_latex( x = sample_factor_df, justification = "c c", centered = TRUE, caption = "Factor level", - anchor = "h" + anchor = const_table_anchor ) ``` ```{r echo = FALSE, results = 'asis'} @@ -370,18 +381,9 @@ quant_data_log , las = 1 , col = const_boxplot_fill +, ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") +, xlab = "Sample" ) -# Points -stripchart( - quant_data_log, # Data - method = "jitter", # Random noise - jitter = const_stripchart_jitter, - pch = 19, # Pch symbols - cex = const_stripchart_cex, # Size of symbols reduced - col = "goldenrod", # Color of the symbol - vertical = TRUE, # Vertical mode - add = TRUE # Add it over - ) par(old_par) @@ -398,7 +400,8 @@ ggplot( quant_data_log_stack, aes(x = values) - ) + + ) + xlab(latex2exp::TeX("$log_{10}$(peptide intensity)")) + + ylab("Probability density") + geom_density( aes(group = ind, colour = ind), na.rm = TRUE @@ -411,7 +414,10 @@ ### Globally, are peptide intensities are approximately unimodal? <!-- -# ref for bquote below particularly and plotting math expressions generally: +# bquote could be used as an alternative to latex2exp::TeX below particularly +# and when plotting math expressions generally, at the expense of mastering +# another syntax, which hardly seems worthwhile when I need to use TeX +# elsewhere; here's an introduction to bquote: # https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/ --> ```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5), results = 'asis'} @@ -420,17 +426,21 @@ fin <- is.finite(as.numeric(as.matrix(quant_data_log))) logvalues <- as.numeric(as.matrix(quant_data_log))[fin] +logvalues_density <- density(logvalues) plot( - density(logvalues), - main = bquote( - "Smoothed estimated probability density vs." ~ log[10](intensity)), - xlab = bquote(log[10](intensity)) + x = logvalues_density, + main = latex2exp::TeX( + "Smoothed estimated probability density vs. $log_{10}$(peptide intensity)" + ), + xlab = latex2exp::TeX("$log_{10}$(peptide intensity)"), + ylab = "Probability density" ) hist( - x = as.numeric(as.matrix(quant_data_log)) -, breaks = 100 -, main = bquote("Frequency vs." ~ log[10](intensity)) -, xlab = bquote(log[10](intensity)) + x = as.numeric(as.matrix(quant_data_log)), + xlim = c(min(logvalues_density$x), max(logvalues_density$x)), + breaks = 100, + main = latex2exp::TeX("Frequency vs. $log_{10}$(peptide intensity)"), + xlab = latex2exp::TeX("$log_{10}$(peptide intensity)") ) ``` @@ -452,6 +462,7 @@ density(sds, na.rm = T) , main = "Smoothed estimated probability density vs. std. deviation" , sub = "(probability estimation made with Gaussian smoothing)" + , ylab = "Probability density" ) } else { cat( @@ -510,7 +521,7 @@ , "group-median" = { imputation_method_description <- paste("Substitute missing value with", - "median peptide intensity for sample group\n" + "median peptide intensity for sample group.\n" ) sample_level_integers <- as.integer(sample_factor_levels) for (i in seq_len(length(levels(sample_factor_levels)))) { @@ -524,7 +535,7 @@ , "median" = { imputation_method_description <- paste("Substitute missing value with", - "median peptide intensity across all sample classes\n" + "median peptide intensity across all sample classes.\n" ) quant_data_imp[ind] <- apply(quant_data_imp, 1, median, na.rm = T)[ind[, 1]] good_rows <- !is.na(rowMeans(quant_data_imp)) @@ -532,7 +543,7 @@ , "mean" = { imputation_method_description <- paste("Substitute missing value with", - "mean peptide intensity across all sample classes\n" + "mean peptide intensity across all sample classes.\n" ) quant_data_imp[ind] <- apply(quant_data_imp, 1, mean, na.rm = T)[ind[, 1]] good_rows <- !is.na(rowMeans(quant_data_imp)) @@ -544,7 +555,7 @@ imputation_method_description <- paste("Substitute each missing value with random intensity", sprintf( - "random intensity $N \\sim (%0.2f, %0.2f)$\n", + "random intensity $N \\sim (%0.2f, %0.2f)$.\n", q1, m1 ) ) @@ -552,7 +563,6 @@ 100 * mean_percentile)) cat(sprintf("sd_percentile (from input parameter) is %0.2f\n\n", sd_percentile)) - #ACE cat(sprintf("sd for rnorm is %0.4f\n\n", m1)) quant_data_imp[ind] <- 10 ^ rnorm(number_to_impute, mean = q1, sd = m1) good_rows <- !is.na(rowMeans(quant_data_imp)) @@ -684,11 +694,20 @@ write_debug_file(quant_data_imp_log10) red_dots <- quant_data_imp_log10 * x - ylim <- c( + count_red <- sum(!is.na(red_dots)) + count_blue <- sum(!is.na(blue_dots)) + ylim_save <- ylim <- c( min(red_dots, blue_dots, na.rm = TRUE), max(red_dots, blue_dots, na.rm = TRUE) ) - # ref: https://r-charts.com/distribution/add-points-boxplot/ + show_stripchart <- + 50 > (count_red + count_blue) / length(sample_name_matches) + if (show_stripchart) { + boxplot_sub <- "Light blue = data before imputation; Red = imputed data" + } else { + boxplot_sub <- "" + } + # Vertical plot colnames(blue_dots) <- sample_name_matches boxplot( @@ -697,32 +716,59 @@ , col = const_boxplot_fill , ylim = ylim , main = "Peptide intensities before and after imputation" - , sub = "Light blue = data before imputation; Red = imputed data" + , sub = boxplot_sub , xlab = "Sample" - , ylab = "log10(peptide intensity)" + , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") ) - # Points - # NA values are not plotted - stripchart( - blue_dots, # Data - method = "jitter", # Random noise - jitter = const_stripchart_jitter, - pch = 19, # Pch symbols - cex = const_stripsmall_cex, # Size of symbols reduced - col = "lightblue", # Color of the symbol - vertical = TRUE, # Vertical mode - add = TRUE # Add it over - ) - stripchart( - red_dots, # Data - method = "jitter", # Random noise - jitter = const_stripchart_jitter, - pch = 19, # Pch symbols - cex = const_stripsmall_cex, # Size of symbols reduced - col = "red", # Color of the symbol - vertical = TRUE, # Vertical mode - add = TRUE # Add it over - ) + + if (show_stripchart) { + # Points + # ref: https://r-charts.com/distribution/add-points-boxplot/ + # NA values are not plotted + stripchart( + blue_dots, # Data + method = "jitter", # Random noise + jitter = const_stripchart_jitter, + pch = 19, # Pch symbols + cex = const_stripsmall_cex, # Size of symbols reduced + col = "lightblue", # Color of the symbol + vertical = TRUE, # Vertical mode + add = TRUE # Add it over + ) + stripchart( + red_dots, # Data + method = "jitter", # Random noise + jitter = const_stripchart_jitter, + pch = 19, # Pch symbols + cex = const_stripsmall_cex, # Size of symbols reduced + col = "red", # Color of the symbol + vertical = TRUE, # Vertical mode + add = TRUE # Add it over + ) + + } else { + # violin plot + cat("\\leavevmode\n\\quad\n\n\\quad\n\n") + vioplot::vioplot( + x = lapply(blue_dots, function(x) x[!is.na(x)]), + col = "lightblue1", + side = "left", + plotCentre = "line", + ylim = ylim_save, + main = "Distributions of observed and imputed data", + sub = "Light blue = observed data; Pink = imputed data", + xlab = "Sample", + ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") + ) + vioplot::vioplot( + x = lapply(red_dots, function(x) x[!is.na(x)]), + col = "lightpink1", + side = "right", + plotCentre = "line", + add = T + ) + } + par(old_par) # density plot @@ -738,7 +784,7 @@ "Black = combined" ), main = "Density of peptide intensity before and after imputation", - xlab = "log10(peptide intensity)", + xlab = latex2exp::TeX("$log_{10}$(peptide intensity)"), ylab = "Probability density" ) lines(d_original, col = "blue") @@ -909,18 +955,9 @@ quant_data_log , las = 1 , col = const_boxplot_fill + , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") + , xlab = "Sample" ) - # Points - stripchart( - quant_data_log, # Data - method = "jitter", # Random noise - jitter = const_stripchart_jitter, - pch = 19, # Pch symbols - cex = const_stripchart_cex, # Size of symbols reduced - col = "goldenrod", # Color of the symbol - vertical = TRUE, # Vertical mode - add = TRUE # Add it over - ) par(old_par) } else { cat("There are no peptides to plot\n") @@ -936,7 +973,8 @@ ggplot( quant_data_log_stack, aes(x = values) - ) + + ) + xlab(latex2exp::TeX("$log_{10}$(peptide intensity)")) + + ylab("Probability density") + geom_density( aes(group = ind, colour = ind), na.rm = TRUE @@ -949,7 +987,7 @@ cat("\\leavevmode\\newpage\n") ``` -## Perform ANOVA filters +## Perform ANOVA Filters ```{r, echo = FALSE} # Make new data frame containing only Phosphopeptides @@ -1125,24 +1163,13 @@ main = "Imputed, normalized intensities", # no line plot las = 1, col = const_boxplot_fill, - ylab = expression(log[10](intensity)) + ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") ) - # Points - stripchart( - filtered_data_filtered, # Data - method = "jitter", # Random noise - jitter = const_stripchart_jitter, - pch = 19, # Pch symbols - cex = const_stripchart_cex, # Size of symbols reduced - col = "goldenrod", # Color of the symbol - vertical = TRUE, # Vertical mode - add = TRUE # Add it over - ) par(old_par) } else { cat(sprintf( "%s < %0.2f\n\n\n\n\n", - "No peptides were found to have cutoff adjusted p-value <", + "No peptides were found to have cutoff adjusted p-value", cutoff )) } @@ -1229,11 +1256,12 @@ ) } else { if (nrow(m) == 1) { + next + } else { cat( sprintf("Heatmap for %d usable peptides whose", nrow(m)), sprintf("adjusted p-value < %0.2f\n", cutoff) ) - next } } cat("\n\n\n") @@ -1263,9 +1291,3 @@ } cat("\\leavevmode\n\n\n") ``` - -<!-- -## Peptide IDs, etc. - -See output files. --->
--- a/workflow/ppenrich_suite_wf.ga Tue Mar 22 23:12:18 2022 +0000 +++ b/workflow/ppenrich_suite_wf.ga Sat Mar 26 02:27:12 2022 +0000 @@ -28,27 +28,155 @@ "name": "Input dataset", "outputs": [], "position": { - "bottom": 346.3999938964844, - "height": 81.89999389648438, - "left": 495, - "right": 695, - "top": 264.5, - "width": 200, - "x": 495, - "y": 264.5 + "bottom": 336.19168853759766, + "height": 82.20000457763672, + "left": 482.5333557128906, + "right": 682.5333709716797, + "top": 253.99168395996094, + "width": 200.00001525878906, + "x": 482.5333557128906, + "y": 253.99168395996094 }, "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}", + "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": \"\"}", "tool_version": null, "type": "data_input", - "uuid": "21c3c29d-9e8c-4ece-b585-9e68fed7a93f", + "uuid": "78170155-4d6c-461b-a289-bcf0196b87db", "workflow_outputs": [] }, "1": { + "annotation": "Pattern matching columns that have peptide intensity data (PERL-compatible regular expression matching column label)", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "Pattern matching columns that have peptide intensity data (PERL-compatible regular expression matching column label)", + "name": "Intensity-column pattern" + } + ], + "label": "Intensity-column pattern", + "name": "Input parameter", + "outputs": [], + "position": { + "bottom": 443.9083480834961, + "height": 82.20000457763672, + "left": 510.5917053222656, + "right": 710.5917205810547, + "top": 361.7083435058594, + "width": 200.00001525878906, + "x": 510.5917053222656, + "y": 361.7083435058594 + }, + "tool_id": null, + "tool_state": "{\"default\": \"^Intensity[^_]\", \"parameter_type\": \"text\", \"optional\": true}", + "tool_version": null, + "type": "parameter_input", + "uuid": "b1a27840-417c-4e81-9fc3-7d3e3dee6753", + "workflow_outputs": [] + }, + "2": { + "annotation": "Pattern extracting sample-names from names of columns that have peptide intensity data (PERL-compatible regular expression)", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "Pattern extracting sample-names from names of columns that have peptide intensity data (PERL-compatible regular expression)", + "name": "Sample-extraction pattern" + } + ], + "label": "Sample-extraction pattern", + "name": "Input parameter", + "outputs": [], + "position": { + "bottom": 555.9083480834961, + "height": 82.20000457763672, + "left": 526.6583862304688, + "right": 726.6584014892578, + "top": 473.7083435058594, + "width": 200.00001525878906, + "x": 526.6583862304688, + "y": 473.7083435058594 + }, + "tool_id": null, + "tool_state": "{\"default\": \"\\\\.\\\\d+[A-Z]$\", \"parameter_type\": \"text\", \"optional\": true}", + "tool_version": null, + "type": "parameter_input", + "uuid": "05fe662c-b0a9-4ae7-8232-1b0a09261ac7", + "workflow_outputs": [] + }, + "3": { + "annotation": "Pattern extracting sample-group from the sample-names that are extracted by 'Sample-extraction pattern' (PERL-compatible regular expression)", + "content_id": null, + "errors": null, + "id": 3, + "input_connections": {}, + "inputs": [ + { + "description": "Pattern extracting sample-group from the sample-names that are extracted by 'Sample-extraction pattern' (PERL-compatible regular expression)", + "name": "Group-extraction pattern" + } + ], + "label": "Group-extraction pattern", + "name": "Input parameter", + "outputs": [], + "position": { + "bottom": 671.9083786010742, + "height": 82.20000457763672, + "left": 530.7000122070312, + "right": 730.7000274658203, + "top": 589.7083740234375, + "width": 200.00001525878906, + "x": 530.7000122070312, + "y": 589.7083740234375 + }, + "tool_id": null, + "tool_state": "{\"default\": \"\\\\d+\", \"parameter_type\": \"text\", \"optional\": true}", + "tool_version": null, + "type": "parameter_input", + "uuid": "55d4b63b-8863-4ee4-b504-4b436c6e9684", + "workflow_outputs": [] + }, + "4": { + "annotation": "THIS IS pST BY DEFAULT. Change if your data are enriched for pY.", + "content_id": null, + "errors": null, + "id": 4, + "input_connections": {}, + "inputs": [ + { + "description": "THIS IS pST BY DEFAULT. Change if your data are enriched for pY.", + "name": "enrichmentType" + } + ], + "label": "enrichmentType", + "name": "Input parameter", + "outputs": [], + "position": { + "bottom": 788.8083648681641, + "height": 61.80000305175781, + "left": 534.5750122070312, + "right": 734.5750274658203, + "top": 727.0083618164062, + "width": 200.00001525878906, + "x": 534.5750122070312, + "y": 727.0083618164062 + }, + "tool_id": null, + "tool_state": "{\"restrictions\": [\"pST\", \"pY\"], \"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "644c2a42-475d-4757-a58b-46d0d86323d3", + "workflow_outputs": [] + }, + "5": { "annotation": "FASTA file of all human canonical isoforms, derived from Swiss-Prot (e.g., merge of https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot_varsplic.fasta.gz and https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz)", "content_id": null, "errors": null, - "id": 1, + "id": 5, "input_connections": {}, "inputs": [ { @@ -60,27 +188,27 @@ "name": "Input dataset", "outputs": [], "position": { - "bottom": 708.8000030517578, - "height": 102.30000305175781, - "left": 685, - "right": 885, - "top": 606.5, - "width": 200, - "x": 685, - "y": 606.5 + "bottom": 984.6083679199219, + "height": 102.60000610351562, + "left": 559.5916748046875, + "right": 759.5916900634766, + "top": 882.0083618164062, + "width": 200.00001525878906, + "x": 559.5916748046875, + "y": 882.0083618164062 }, "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"fasta\"]}", + "tool_state": "{\"optional\": false, \"format\": [\"fasta\"], \"tag\": \"\"}", "tool_version": null, "type": "data_input", - "uuid": "5da7317c-4def-48f3-8eac-af95bd18b290", + "uuid": "c190d96f-c9ee-413b-ba19-75808b0eff55", "workflow_outputs": [] }, - "2": { + "6": { "annotation": "Derived from https://networkin.info/download/networkin_human_predictions_3.1.tsv.xz (which is free for non-commercial use - for required citation, see https://networkin.info/)", "content_id": null, "errors": null, - "id": 2, + "id": 6, "input_connections": {}, "inputs": [ { @@ -92,27 +220,27 @@ "name": "Input dataset", "outputs": [], "position": { - "bottom": 853.8000030517578, - "height": 102.30000305175781, - "left": 696, - "right": 896, - "top": 751.5, - "width": 200, - "x": 696, - "y": 751.5 + "bottom": 1115.6083679199219, + "height": 102.60000610351562, + "left": 576.5916748046875, + "right": 776.5916900634766, + "top": 1013.0083618164062, + "width": 200.00001525878906, + "x": 576.5916748046875, + "y": 1013.0083618164062 }, "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}", + "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": \"\"}", "tool_version": null, "type": "data_input", - "uuid": "2edff8de-4379-45e2-b6b9-6ed4706bbf00", + "uuid": "d2783d72-6639-4275-8726-6e32b956aaca", "workflow_outputs": [] }, - "3": { + "7": { "annotation": "Derived from http://hprd.org/serine_motifs, http://hprd.org/tyrosine_motifs, and http://pegasus.biochem.mpg.de/phosida/help/motifs.aspx", "content_id": null, "errors": null, - "id": 3, + "id": 7, "input_connections": {}, "inputs": [ { @@ -124,27 +252,27 @@ "name": "Input dataset", "outputs": [], "position": { - "bottom": 977.3999938964844, - "height": 81.89999389648438, - "left": 708, - "right": 908, - "top": 895.5, - "width": 200, - "x": 708, - "y": 895.5 + "bottom": 1224.2084274291992, + "height": 82.20000457763672, + "left": 593.6000366210938, + "right": 793.6000518798828, + "top": 1142.0084228515625, + "width": 200.00001525878906, + "x": 593.6000366210938, + "y": 1142.0084228515625 }, "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}", + "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": \"\"}", "tool_version": null, "type": "data_input", - "uuid": "86ebaaf2-b050-4eca-a88b-23a4c1af39f5", + "uuid": "e8c138eb-fb7d-4b4b-a5cc-25e000f555e1", "workflow_outputs": [] }, - "4": { + "8": { "annotation": "Derived from Kinase_Substrate_Dataset.gz found at https://www.phosphosite.org/staticDownloads (free for non-commercial use - see that link for citation.)", "content_id": null, "errors": null, - "id": 4, + "id": 8, "input_connections": {}, "inputs": [ { @@ -156,27 +284,27 @@ "name": "Input dataset", "outputs": [], "position": { - "bottom": 1126.8000030517578, - "height": 102.30000305175781, - "left": 729, - "right": 929, - "top": 1024.5, - "width": 200, - "x": 729, - "y": 1024.5 + "bottom": 1354.6084289550781, + "height": 102.60000610351562, + "left": 593.5916748046875, + "right": 793.5916900634766, + "top": 1252.0084228515625, + "width": 200.00001525878906, + "x": 593.5916748046875, + "y": 1252.0084228515625 }, "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}", + "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": \"\"}", "tool_version": null, "type": "data_input", - "uuid": "92f16705-a19c-4fb9-b278-3ae8e11f09d8", + "uuid": "d6aea327-574a-4657-ba6b-13fa973c182a", "workflow_outputs": [] }, - "5": { + "9": { "annotation": "Derived from Regulatory_sites.gz found at https://www.phosphosite.org/staticDownloads (free for non-commercial use - see that link for citation.)", "content_id": null, "errors": null, - "id": 5, + "id": 9, "input_connections": {}, "inputs": [ { @@ -188,27 +316,27 @@ "name": "Input dataset", "outputs": [], "position": { - "bottom": 1251.3999938964844, - "height": 81.89999389648438, - "left": 745, - "right": 945, - "top": 1169.5, - "width": 200, - "x": 745, - "y": 1169.5 + "bottom": 1464.2084274291992, + "height": 82.20000457763672, + "left": 594.5916748046875, + "right": 794.5916900634766, + "top": 1382.0084228515625, + "width": 200.00001525878906, + "x": 594.5916748046875, + "y": 1382.0084228515625 }, "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}", + "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": \"\"}", "tool_version": null, "type": "data_input", - "uuid": "5ab49d93-11e4-4e91-b30b-92269b319879", + "uuid": "1cda2502-f848-47d0-af07-d9997021b8c4", "workflow_outputs": [] }, - "6": { + "10": { "annotation": "List of alpha cutoff values for significance testing; text file having no header and a single line for each cutoff value.", "content_id": null, "errors": null, - "id": 6, + "id": 10, "input_connections": {}, "inputs": [ { @@ -220,34 +348,34 @@ "name": "Input dataset", "outputs": [], "position": { - "bottom": 1501.8999938964844, - "height": 81.89999389648438, - "left": 727, - "right": 927, - "top": 1420, - "width": 200, - "x": 727, - "y": 1420 + "bottom": 1723.7084274291992, + "height": 82.20000457763672, + "left": 611.558349609375, + "right": 811.5583648681641, + "top": 1641.5084228515625, + "width": 200.00001525878906, + "x": 611.558349609375, + "y": 1641.5084228515625 }, "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}", + "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": \"\"}", "tool_version": null, "type": "data_input", - "uuid": "481c627c-a4ce-45d7-b659-4f54692aafc7", + "uuid": "f59256bb-c823-48d6-923e-e54c6d04e155", "workflow_outputs": [] }, - "7": { - "annotation": "", + "11": { + "annotation": "Transform the output of MaxQuant for phosphoproteome-enriched samples to prepare it for statistical anlaysis.", "content_id": "mqppep_preproc", "errors": null, - "id": 7, + "id": 11, "input_connections": { "networkin": { - "id": 2, + "id": 6, "output_name": "output" }, "p_sty_motifs": { - "id": 3, + "id": 7, "output_name": "output" }, "phosphoSites": { @@ -255,20 +383,28 @@ "output_name": "output" }, "protein_fasta": { - "id": 1, + "id": 5, "output_name": "output" }, "psp_kinase_substrate": { + "id": 8, + "output_name": "output" + }, + "psp_regulatory_sites": { + "id": 9, + "output_name": "output" + }, + "pst_py_selector": { "id": 4, "output_name": "output" }, - "psp_regulatory_sites": { - "id": 5, + "startCol": { + "id": 1, "output_name": "output" } }, "inputs": [], - "label": null, + "label": "Preprocess MaxQuant Phospho (STY)Sites", "name": "MaxQuant Phosphopeptide Preprocessing", "outputs": [ { @@ -325,16 +461,46 @@ } ], "position": { - "bottom": 1408.7000122070312, - "height": 793.2000122070312, - "left": 1138.5, - "right": 1338.5, - "top": 615.5, - "width": 200, - "x": 1138.5, - "y": 615.5 + "bottom": 1520.0000610351562, + "height": 936.0000610351562, + "left": 1257.10009765625, + "right": 1457.100112915039, + "top": 584, + "width": 200.00001525878906, + "x": 1257.10009765625, + "y": 584 }, "post_job_actions": { + "HideDatasetActionfilteredData_tabular": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "filteredData_tabular" + }, + "HideDatasetActionmapped_phophopeptides": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "mapped_phophopeptides" + }, + "HideDatasetActionmelted_phophopeptide_map": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "melted_phophopeptide_map" + }, + "HideDatasetActionmqppep_output_sqlite": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "mqppep_output_sqlite" + }, + "HideDatasetActionpreproc_csv": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "preproc_csv" + }, + "HideDatasetActionquantData_tabular": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "quantData_tabular" + }, "RenameDatasetActionenrichGraph": { "action_arguments": { "newname": "#{phosphoSites}.enrichGraph_pdf" @@ -428,101 +594,73 @@ } }, "tool_id": "mqppep_preproc", - - - - - - - "tool_state": "{\"collapseFunc\": \"sum\", \"intervalCol\": \"1\", \"localProbCutoff\": \"0.75\", \"merge_function\": \"sum\", \"networkin\": {\"__class__\": \"ConnectedValue\"}, \"p_sty_motifs\": {\"__class__\": \"ConnectedValue\"}, \"phosphoCol\": \"^Number of Phospho [(]STY[)]$\", \"phosphoSites\": {\"__class__\": \"ConnectedValue\"}, \"protein_fasta\": {\"__class__\": \"ConnectedValue\"}, \"psp_kinase_substrate\": {\"__class__\": \"ConnectedValue\"}, \"psp_regulatory_sites\": {\"__class__\": \"ConnectedValue\"}, \"pst_not_py\": \"true\", \"pst_py_selector\": \"st\", \"species\": \"human\", \"startCol\": \"^Intensity[^_]\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_state": "{\"collapseFunc\": \"sum\", \"intervalCol\": \"1\", \"localProbCutoff\": \"0.75\", \"merge_function\": \"sum\", \"networkin\": {\"__class__\": \"ConnectedValue\"}, \"p_sty_motifs\": {\"__class__\": \"ConnectedValue\"}, \"phosphoCol\": \"^Number of Phospho [(]STY[)]$\", \"phosphoSites\": {\"__class__\": \"ConnectedValue\"}, \"protein_fasta\": {\"__class__\": \"ConnectedValue\"}, \"psp_kinase_substrate\": {\"__class__\": \"ConnectedValue\"}, \"psp_regulatory_sites\": {\"__class__\": \"ConnectedValue\"}, \"pst_py_selector\": {\"__class__\": \"ConnectedValue\"}, \"species\": \"human\", \"startCol\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": null, "type": "tool", - "uuid": "1e983dba-edca-4aed-a589-49b1651b4a85", + "uuid": "fc558352-b85e-438a-b94f-951f73768dfd", "workflow_outputs": [ { - "label": "preproc_csv", - "output_name": "preproc_csv", - "uuid": "8cba5367-b25c-40e0-a324-552225b1aa1d" - }, - { "label": "locProbCutoffGraph_pdf", "output_name": "locProbCutoffGraph", - "uuid": "641c5959-dab4-42d1-986d-8e6aaeb74ef6" - }, - { - "label": "melted_phosphopeptide_map", - "output_name": "melted_phophopeptide_map", - "uuid": "878dc817-26a3-4061-9dd4-56e737b3c4f7" + "uuid": "56f5f9b8-ae81-4743-abf8-1510b895eb2e" }, { "label": "enrichGraph_svg", "output_name": "enrichGraph_svg", - "uuid": "4492366c-945e-492f-8381-1c97c4da2264" + "uuid": "c4c8da47-9bad-4dc1-9205-374233e6c555" }, { "label": "locProbCutoffGraph_svg", "output_name": "locProbCutoffGraph_svg", - "uuid": "06faf93c-5f04-4cb3-9e41-58e465f6180e" - }, - { - "label": "filteredData", - "output_name": "filteredData_tabular", - "uuid": "76e2e268-f728-45f0-9973-793fbde0dd0a" - }, - { - "label": "ppep_map", - "output_name": "mapped_phophopeptides", - "uuid": "d0fea028-2ea5-4862-8a92-c2088edfcbe1" - }, - { - "label": "ppep_mapping_sqlite", - "output_name": "mqppep_output_sqlite", - "uuid": "eb996931-c548-4f3b-aaaa-39cc711df516" + "uuid": "40e1ef7c-a7ff-4ba4-8b8c-08be988d9b90" }, { "label": "preproc_tab", "output_name": "preproc_tab", - "uuid": "c9410cf1-44a2-4aa6-b3df-06cef74f3a45" + "uuid": "c53a5f0b-da40-4dc0-8e73-41f518b7844e" }, { "label": "preproc_sqlite", "output_name": "preproc_sqlite", - "uuid": "4eb22cc3-5879-4625-89c0-e0fddb01a197" + "uuid": "124ddcad-8fbd-489c-80ce-48cf2bc3c78c" }, { "label": "ppep_intensities", "output_name": "phosphoPepIntensities", - "uuid": "c704fd66-5ac3-4779-ad40-536955cd81e3" + "uuid": "25f871f9-6017-4209-ab8b-4551f3d34a79" }, { "label": "enrichGraph_pdf", "output_name": "enrichGraph", - "uuid": "5bf2a478-0431-4d32-84a9-7d46aad80ec5" - }, - { - "label": "quantData", - "output_name": "quantData_tabular", - "uuid": "cc922a75-6e72-4e60-add2-4b6ed8f73cdb" + "uuid": "e2d9f99e-23ec-4da8-aa0b-7ce4f59ea713" } ] }, - "8": { + "12": { "annotation": "Perform ANOVA. For imputing missing values, use median of non-missing values from the same treatment group.", "content_id": "mqppep_anova", "errors": null, - "id": 8, + "id": 12, "input_connections": { "alpha_file": { - "id": 6, + "id": 10, "output_name": "output" }, "input_file": { - "id": 7, + "id": 11, "output_name": "preproc_tab" + }, + "sample_grouping_regex": { + "id": 3, + "output_name": "output" + }, + "sample_names_regex": { + "id": 2, + "output_name": "output" } }, "inputs": [], - "label": "MaxQuant Phosphopeptide ANOVA group-median imputed", + "label": "ANOVA group-median imputed", "name": "MaxQuant Phosphopeptide ANOVA", "outputs": [ { @@ -539,17 +677,17 @@ } ], "position": { - "bottom": 1775.6000061035156, - "height": 255.60000610351562, - "left": 1370, - "right": 1570, - "top": 1520, - "width": 200, - "x": 1370, - "y": 1520 + "bottom": 2134.7084350585938, + "height": 347.20001220703125, + "left": 948.6083984375, + "right": 1148.608413696289, + "top": 1787.5084228515625, + "width": 200.00001525878906, + "x": 948.6083984375, + "y": 1787.5084228515625 }, "post_job_actions": { - "RenameDatasetActionimputed_data_file": { + "RenameDatasetActionimp_qn_lt_file": { "action_arguments": { "newname": "#{input_file}.intensities_group-mean-imputed_QN_LT" }, @@ -565,48 +703,56 @@ }, "RenameDatasetActionreport_file": { "action_arguments": { - "newname": "#{input_file}.intensities_group-mean-imputed_report (download/unzip to view)" + "newname": "#{input_file}.intensities_group-mean-imputed_report" }, "action_type": "RenameDatasetAction", "output_name": "report_file" } }, "tool_id": "mqppep_anova", - "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"first_data_column\": \"Intensity\", \"imputation\": {\"imputation_method\": \"group-median\", \"__current_case__\": 0}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"sample_grouping_regex\": \"(\\\\d+)\", \"sample_names_regex\": \"\\\\.(\\\\d+)[A-Z]$\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"imputation\": {\"imputation_method\": \"group-median\", \"__current_case__\": 0}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"intensity_column_regex\": \"^Intensity[^_]\", \"sample_grouping_regex\": {\"__class__\": \"ConnectedValue\"}, \"sample_names_regex\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": null, "type": "tool", - "uuid": "ffa771c3-c52d-42a4-b78f-a60a39678792", + "uuid": "7002fe3a-bd4d-4d1d-9caf-1193606e3368", "workflow_outputs": [ { "label": "intensities_group-mean-imputed_QN_LT", "output_name": "imp_qn_lt_file", - "uuid": "0e77a2e0-bd7a-4487-aaa6-ead2469509a2" + "uuid": "61a62d74-1026-43ee-80f2-8c3905ebdeb5" }, { "label": "intensities_group-mean-imputed", "output_name": "imputed_data_file", - "uuid": "169d677f-0acb-4c56-b057-21f4aaf2b920" + "uuid": "c455ec37-f6c7-4abb-af6b-8ac6bea6d1a5" }, { "label": "intensities_group-mean-imputed_report", "output_name": "report_file", - "uuid": "25edae88-3bb6-4ec9-8b98-686fded7ed79" + "uuid": "9e138ec0-1248-4f4f-820f-6a8d05213662" } ] }, - "9": { + "13": { "annotation": "Perform ANOVA. For imputing missing values, create random values.", "content_id": "mqppep_anova", "errors": null, - "id": 9, + "id": 13, "input_connections": { "alpha_file": { - "id": 6, + "id": 10, "output_name": "output" }, "input_file": { - "id": 7, + "id": 11, "output_name": "preproc_tab" + }, + "sample_grouping_regex": { + "id": 3, + "output_name": "output" + }, + "sample_names_regex": { + "id": 2, + "output_name": "output" } }, "inputs": [], @@ -627,17 +773,17 @@ } ], "position": { - "bottom": 1609.6000061035156, - "height": 255.60000610351562, - "left": 1617, - "right": 1817, - "top": 1354, - "width": 200, - "x": 1617, - "y": 1354 + "bottom": 1994.1084289550781, + "height": 367.6000061035156, + "left": 1319.6083984375, + "right": 1519.608413696289, + "top": 1626.5084228515625, + "width": 200.00001525878906, + "x": 1319.6083984375, + "y": 1626.5084228515625 }, "post_job_actions": { - "RenameDatasetActionimputed_data_file": { + "RenameDatasetActionimp_qn_lt_file": { "action_arguments": { "newname": "#{input_file}.intensities_randomly-imputed_QN_LT" }, @@ -653,31 +799,32 @@ }, "RenameDatasetActionreport_file": { "action_arguments": { - "newname": "#{input_file}.intensities_randomly-imputed_report (download/unzip to view)" + "newname": "#{input_file}.intensities_randomly-imputed_report" }, "action_type": "RenameDatasetAction", "output_name": "report_file" } }, "tool_id": "mqppep_anova", - "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"first_data_column\": \"Intensity\", \"imputation\": {\"imputation_method\": \"random\", \"__current_case__\": 3, \"meanPercentile\": \"1\", \"sdPercentile\": \"0.2\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"sample_grouping_regex\": \"(\\\\d+)\", \"sample_names_regex\": \"\\\\.(\\\\d+)[A-Z]$\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"imputation\": {\"imputation_method\": \"random\", \"__current_case__\": 3, \"meanPercentile\": \"1\", \"sdPercentile\": \"1.0\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"intensity_column_regex\": \"^Intensity[^_]\", \"sample_grouping_regex\": {\"__class__\": \"ConnectedValue\"}, \"sample_names_regex\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, "type": "tool", - "uuid": "f1f2bdf9-fbc0-4205-b834-9a8af5814dc9", + "uuid": "b239a226-854c-4fbc-8955-cadd968f0704", "workflow_outputs": [ { - "label": "intensities_randomly-imputed_QN_LT", + "label": "intensities_randomly-imputed", "output_name": "imputed_data_file", - "uuid": "d70a3476-fb42-4533-831b-4fcb2bda74fc" + "uuid": "8e403d6c-8f47-4f78-addd-da8923695135" }, { "label": "intensities_randomly-imputed_report", "output_name": "report_file", - "uuid": "d6701a61-357b-4a27-8154-ca41eb16d8a6" + "uuid": "2c54a01e-ca12-4769-b370-cf137fe4b3f4" }, { "label": "intensities_randomly-imputed_QN_LT", "output_name": "imp_qn_lt_file", - "uuid": "637faf12-0d52-45b3-a318-40bfc7fcd4cb" + "uuid": "5fa4acc2-b82f-41e8-aedc-6efdc73f3d58" } ] } @@ -685,6 +832,6 @@ "tags": [ "ppenrich" ], - "uuid": "445a0eb0-25c7-44c0-8259-a3346b01cbf3", - "version": 3 + "uuid": "23c8a0f0-218f-4ba7-9470-ec826d16243a", + "version": 19 }