Mercurial > repos > eschen42 > mqppep_anova

--- a/MaxQuantProcessingScript.R	Fri Mar 11 20:04:05 2022 +0000
+++ b/MaxQuantProcessingScript.R	Tue Mar 15 00:35:16 2022 +0000
@@ -695,7 +695,6 @@

 # Write phosphopeptides filtered by enrichment
 # --
-#ACE colnames(quant_data_qc_enrichment)[1] <- "Phosphopeptide"
 write.table(
   quant_data_qc_enrichment,
   file = output_filename,
--- a/PhosphoPeptide_Upstream_Kinase_Mapping.pl	Fri Mar 11 20:04:05 2022 +0000
+++ b/PhosphoPeptide_Upstream_Kinase_Mapping.pl	Tue Mar 15 00:35:16 2022 +0000
@@ -25,7 +25,6 @@
 ###############################################################################################################################

 use strict;
-#ACE use warnings;
 use warnings 'FATAL' => 'all';

 use Getopt::Std;
@@ -67,7 +66,6 @@
 my (@kinases_PhosphoSite, $kinases_PhosphoSite);
 my ($p_sequence_kinase_PhosphoSite, $p_sequence_PhosphoSite, $kinase_PhosphoSite);
 my (%regulatory_sites_PhosphoSite_hash);
-#ACE my %psp_regsite_protein;
 my (%domain, %ON_FUNCTION, %ON_PROCESS, %ON_PROT_INTERACT, %ON_OTHER_INTERACT, %notes, %organism);
 my (%unique_motifs);
 my ($kinase_substrate_NetworKIN_matches, $kinase_motif_matches, $kinase_substrate_PhosphoSite_matches);
@@ -196,8 +194,6 @@

 getopts('i:f:s:n:m:p:r:P:F:o:O:D:hva', \%opts) ;

-#ACE print %opts; #ACE
-#ACE print "\n"; #ACE

 if (exists($opts{'h'})) {
     usage();
@@ -221,15 +217,6 @@
     $fasta_in = $opts{'f'};
     $use_sqlite = 0;
 }
-#ACE  if (exists($opts{'s'}) && -e $opts{'s'}) {
-#ACE      $use_sqlite = 1;
-#ACE      $dbfile = $opts{'s'};
-#ACE  } elsif (!exists($opts{'f'}) || !-e $opts{'f'}) {
-#ACE      die('Neither input FASTA file nor input SQLite file was found');
-#ACE  } else {
-#ACE      $use_sqlite = 0;
-#ACE      $fasta_in = $opts{'f'};
-#ACE  }
 my $species;
 if ((!exists($opts{'s'})) || ($opts{'s'} eq '')) {
     $species = 'human';
@@ -426,8 +413,6 @@
       push (@databases, $x[0]);
       push (@accessions, $x[1]);
       push (@names, $x[2]);
-      #ACE print "names $x[2]\n";
-      #ACE print "--- $_\n";
       pseudo_sed();
       s/$/\t/;
       push (@parsed_fasta, $_);
@@ -439,7 +424,6 @@
       }
       $parsed_fasta[$#accessions] = $parsed_fasta[$#accessions].$x[0];
     }
-    #ACE print "... '$parsed_fasta[$#accessions]'\n";
   }
   close IN1;
   print "Done Reading FASTA file $fasta_in\n";
@@ -614,7 +598,6 @@
   $i = system("\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in --verbose");
 } else {
   $i = system("\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in");
-  #ACE DELETEME $i = system("\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in --verbose");
 }
 if ($i) {
   print "python $dirname/search_ppep.py -u $db_out -p $file_in\n  exited with exit code $i\n";
@@ -634,7 +617,6 @@
 #
 ###############################################################################################################################

-#ACE print OUT "$headers\tFormatted Motifs\tUnique Motifs\tPhospho-site(s)\tAccessions(s)\tName(s)\n";

 print "--- Match the non_p_peptides to the \@sequences array:\n";

@@ -750,7 +732,6 @@
     }

     # Prepare counter and phosphopeptide tracker for next row
-    #ACE print "counter: $counter; phosphpepetide: $current_ppep\n";
     $former_ppep = $current_ppep;
     $counter -= 1;

@@ -822,15 +803,11 @@
     my @matches = @{$matched_sequences{$peptide_to_match}};
     @tmp_motifs_array = ();
     for my $i (0 .. $#matches) {
-        #ACE print "Matching $peptide_to_match to match $i\n";
-        #ACE print "\$sites{\$peptide_to_match}[\$i] $sites{$peptide_to_match}[$i]\n";

         # Find the location of the phospo-site in the sequence(s)
         $tmp_site = 0; my $offset = 0;
         my $tmp_p_peptide = $peptide_to_match;
-        #ACE print "peptide_to_match: $peptide_to_match at position $sites{$peptide_to_match}[$i] in sequence $matched_sequences{$peptide_to_match}[$i]\n";
         $tmp_p_peptide =~ s/#//g; $tmp_p_peptide =~ s/\d//g; $tmp_p_peptide =~ s/\_//g; $tmp_p_peptide =~ s/\.//g;
-        #ACE print "tmp_p_peptide: $tmp_p_peptide\n";

         # Find all phosphorylated residues in the p_peptide
         @p_sites = ();
@@ -870,9 +847,7 @@
             my $arg1 = $matched_sequences{$peptide_to_match}[$i];

             if (($total_length > 0) && (length($arg1) > $arg2 + $total_length - 1)) {
-                #ACE print "\$tmp_motif = substr($arg1, $arg2, $total_length)\n";
                 $tmp_motif = substr($arg1, $arg2, $total_length);
-                #ACE print "tmp_motif = $tmp_motif\ti = $i\tpeptide_to_match = $peptide_to_match\tmatched_sequences{peptide_to_match}[i] = $matched_sequences{$peptide_to_match}[$i]\targ2 = $arg2\targ3 = $total_length\n";

                 # Put the "p" back in front of the appropriate phospho-residue(s).
                 my (@tmp_residues, $tmp_position);
@@ -883,7 +858,6 @@
                     } else {
                         $tmp_position = $desired_residues_L + $p_sites[$m] - $p_sites[0];
                     }
-                    #ACE print "Line 431: p_sites = $p_sites[$m]\ttmp_position = $tmp_position\ttmp_motif = $tmp_motif\n";
                     if ($tmp_position < length($tmp_motif) + 1) {
                         push (@tmp_residues, substr($tmp_motif, $tmp_position, 1));
                         if ($tmp_residues[$m] eq "S") {substr($tmp_motif, $tmp_position, 1, "s");}
@@ -1032,9 +1006,7 @@
         $x[$i] =~ s/\r//g; $x[$i]  =~ s/\n//g; $x[$i]  =~ s/\"//g;
         }
     if ($line != 0) {
-        #ACE FUE if (($species eq $species) && ($species eq $species)) {
         if (($species eq $x[3]) && ($species eq $x[8])) {
-            #ACE print "KIN_ORGANISM is '$x[3]' and SUB_ORGANISM is '$x[8]', line: $line\n";
             if (exists ($kinases_PhosphoSite -> {$x[0]})) {
                 #do nothing
             }
@@ -1154,7 +1126,6 @@
 open (IN4, "$PSP_Regulatory_Sites_in") or die "I couldn't find $PSP_Regulatory_Sites_in\n";
 print "Reading the PhosphoSite regulatory site data:  $PSP_Regulatory_Sites_in\n";

-#ACE $i = system("head -n 4 $PSP_Regulatory_Sites_in");

 $line = -1;
 while (<IN4>) {
@@ -1177,7 +1148,6 @@
         $x[$i] =~ s/\r//g; $x[$i]  =~ s/\n//g; $x[$i]  =~ s/\"//g;
     }
     my $found_GENE=0;
-    #ACE print STDERR "line $line: $_\n";
     if ( (not exists($x[0])) ) {
         next;
     }
@@ -1197,19 +1167,13 @@
         }
     }
     elsif ($line != 0) {
-        #ACE print "PSPReg $line: $_\n" if ($x[9] eq 'KGQKYFDsGDYNMAK');
-        #ACE FUE if ($species ne $species) {
         if ($species ne $x[6]) {
             # Do nothing - this record was filtered out by the species filter
-            #ACE print "PSP_regsite line rejected: $line\n";
         }
         elsif (!exists($regulatory_sites_PhosphoSite_hash{$x[9]})) {
-            #ACE print "testing \$domain{\$x[9]} for \$regulatory_sites_PhosphoSite_hash{$x[9]}\n" if ($x[9] eq 'KGQKYFDsGDYNMAK'); #ACE
             if (!defined $domain{$x[9]} || $domain{$x[9]} eq "") {
-                #ACE print "adding found \$regulatory_sites_PhosphoSite_hash{$x[9]}\n" if ($x[9] eq 'KGQKYFDsGDYNMAK'); #ACE
                 $regulatory_sites_PhosphoSite_hash{$x[9]} = $x[9];
                 $domain{$x[9]} = $x[10];
-                #ACE $psp_regsite_protein{$x[9]} = $x[1];
                 $ON_FUNCTION{$x[9]} = $x[11];
                 $ON_PROCESS{$x[9]} = $x[12];
                 $ON_PROT_INTERACT{$x[9]} = $x[13];
@@ -1226,12 +1190,9 @@
                   }
                 else {
                   # do nothing
-                  #ACE print "WARNING line $line - no domain or 7aa for:  GENE $x[0]   PROTEIN $x[1]   PROT_TYPE $x[2]   ACC_ID $x[3]   GENE_ID $x[4]   HU_CHR_LOC $x[5]   ORGANISM $x[6]   MOD_RSD $x[7]   SITE_GRP_ID $x[8]   SITE_+/-7_AA $x[9]   DOMAIN $x[10]\n";
-                  #ACE print "$_\n";
                   }
             }
             else {
-                #ACE print "Checking $domain{$x[9]} =~ /$x[10]/\n";
                 if ($domain{$x[9]} =~ /$x[10]/) {
                   # do nothing
                   }
@@ -1453,10 +1414,8 @@
         if (($number_pY == 1) || ($number_pSTY == 1)) {
             my $seq_plus5aa = "";
             my $seq_plus7aa = "";
-            #ACE print "tmp_motif is $tmp_motif before replacement\n";
             $formatted_sequence = &replace_pSpTpY($tmp_motif, $phospho_type);
             print "       a #pY $number_pY; #pSTY $number_pSTY; matching formatted motif $formatted_sequence for peptide  $peptide at " . format_localtime_iso8601() ."\n" if ($verbose);
-            #ACE print "formatted_sequence is $formatted_sequence after replacement\n";
             if ($phospho_type eq 'y') {
                 $seq_plus5aa = (split(/(\w{0,5}y\w{0,5})/, $formatted_sequence))[1];
                 $seq_plus7aa = (split(/(\w{0,7}y\w{0,7})/, $formatted_sequence))[1];
@@ -1474,14 +1433,10 @@
                     print "Error writing tuple ($ppep_id_lut{$peptide},$seq_plus7aa) for peptide $peptide to ppep_regsite_LUT: $ppep_regsite_LUT_stmth->errstr\n";
                 }
             }
-            #ACE print "seq_plus5aa is $seq_plus5aa \n";
-            #ACE print "seq_plus7aa is $seq_plus7aa \n";
             for my $i (0 .. $#kinases_observed) {
                 if (defined $seq_plus5aa) {
                     my $tmp = $seq_plus5aa."_".$kinases_observed[$i];    #eg, should be PGRPLsSYGMD_PKCalpha
                     if (exists($p_sequence_kinase -> {$tmp})) {
-                        #ACE print($tmp."\t");
-                        #ACE print(($p_sequence_kinase -> {$tmp})."\n"); #ACE
                         $kinase_substrate_NetworKIN_matches{$peptide}{$kinases_observed[$i]} = "X"; #ACE
                     }
                 }
@@ -1489,7 +1444,6 @@
             for my $i (0 .. $#motif_sequence) {
                 if ($peptide =~ /$motif_sequence[$i]/) {
                     $kinase_motif_matches{$peptide}{$motif_sequence[$i]} = "X";
-                    #ACE print "\$kinase_motif_matches{$peptide}{$motif_sequence[$i]} = 'X'; $motif_type{$motif_sequence[$i]}\n"; #ACE
                 }
             }
             for my $i (0 .. $#kinases_PhosphoSite) {
@@ -1500,12 +1454,9 @@
                     }
                 }
             }
-            #ACE print "checking for existence of \$regulatory_sites_PhosphoSite_hash{$seq_plus7aa}\n"; #ACE
             if (exists($regulatory_sites_PhosphoSite_hash{$seq_plus7aa})) {
-                #ACE print "found regulatory_sites_PhosphoSite_hash{$seq_plus7aa}\n"; #ACE
                 $seq_plus7aa_2{$peptide} = $seq_plus7aa;
                 $domain_2{$peptide} = $domain{$seq_plus7aa};
-                #ACE $psp_regsite_protein_2{$peptide} = $psp_regsite_protein{$seq_plus7aa};
                 $ON_FUNCTION_2{$peptide} = $ON_FUNCTION{$seq_plus7aa};
                 $ON_PROCESS_2{$peptide} = $ON_PROCESS{$seq_plus7aa};
                 $ON_PROT_INTERACT_2{$peptide} = $ON_PROT_INTERACT{$seq_plus7aa};
@@ -1513,12 +1464,10 @@
                 $notes_2{$peptide} = $notes{$seq_plus7aa};
                 $organism_2{$peptide} = $organism{$seq_plus7aa};
             } else {
-                #ACE print "c not found \$regulatory_sites_PhosphoSite_hash{{$seq_plus7aa}\n"; #ACE
             }
         }
         elsif (($number_pY > 1) || ($number_pSTY > 1)) {  #eg, if $x[4] is 1308-[ VIYFQAIEEVpYpYDHLRSAAKKR ]-1329 and $number_pY == 2
             $formatted_sequence = $tmp_motif;
-            #ACE print "formatted_sequence is $formatted_sequence \n";
             $seq_plus5aa = "";
             $seq_plus7aa = "";
             #Create the sequences with only one phosphorylation site
@@ -1546,14 +1495,11 @@

             my @formatted_sequences;
             for my $k (0 .. $#sites) {
-                #ACE print "pSTY_sequences[k] is $pSTY_sequences[$k] before replacement\n";
                 $formatted_sequences[$k] = &replace_pSpTpY($pSTY_sequences[$k], $phospho_type);
-                #ACE print "formatted_sequences[k] is $formatted_sequences[$k] \n";
             }

             for my $k (0 .. $#formatted_sequences) {
                 print "       b #pY $number_pY; #pSTY $number_pSTY; matching formatted motif $formatted_sequences[$k] for peptide  $peptide at " . format_localtime_iso8601() ."\n" if ($verbose);
-                #ACE print "formatted_sequences[k] for phosphotype $phospho_type is $formatted_sequences[$k] \n";
                 if ($phospho_type eq 'y') {
                     $seq_plus5aa = (split(/(\w{0,5}y\w{0,5})/, $formatted_sequences[$k]))[1];
                     $seq_plus7aa = (split(/(\w{0,7}y\w{0,7})/, $formatted_sequences[$k]))[1];
@@ -1562,21 +1508,15 @@
                     $seq_plus5aa = (split(/(\w{0,5}(s|t|y)\w{0,5})/, $formatted_sequences[$k]))[1];
                     $seq_plus7aa = (split(/(\w{0,7}(s|t|y)\w{0,7})/, $formatted_sequences[$k]))[1];
                 }
-                #ACE print "seq_plus5aa is $seq_plus5aa \n";
-                #ACE print "seq_plus7aa is $seq_plus7aa \n";
                 for my $i (0 .. $#kinases_observed) {
                     my $tmp = $seq_plus5aa."_".$kinases_observed[$i];    #eg, should look like REEILsEMKKV_PKCalpha
-                    #ACE print "seq_plus5aa._.kinases_observed[i] is $tmp\n"; #ACE
                     if (exists($p_sequence_kinase -> {$tmp})) {
                         $kinase_substrate_NetworKIN_matches{$peptide}{$kinases_observed[$i]} = "X";
-                        #ACE print "$tmp matched\n";
                     }
                 }
                 $pSTY_sequence = $formatted_sequences[$k];
-                #ACE print "trying pSTY_sequence $pSTY_sequence \n";
                 for my $i (0 .. $#motif_sequence) {
                     if ($pSTY_sequence =~ /$motif_sequence[$i]/) {
-                        #ACE print "match for pSTY_sequence $pSTY_sequence was $motif_sequence[$i]\n";
                         $kinase_motif_matches{$peptide}{$motif_sequence[$i]} = "X";
                     }
                 }
@@ -1585,11 +1525,9 @@
                     #print "seq_plus7aa._.kinases_PhosphoSite[i] is $tmp";
                     if (exists($p_sequence_kinase_PhosphoSite -> {$tmp})) {
                         $kinase_substrate_PhosphoSite_matches{$peptide}{$kinases_PhosphoSite[$i]} = "X";
-                    #ACE print "$tmp matched \n";
                     }
                 }
                 if (exists($regulatory_sites_PhosphoSite -> {$seq_plus7aa})) {
-                    #ACE print "ACE processing seq_plus7aa '$domain{$seq_plus7aa}'\n"; #ACE
                     $seq_plus7aa_2{$peptide} = $seq_plus7aa;

                     # $domain
@@ -1603,16 +1541,6 @@
                         $domain_2{$peptide} = $domain_2{$peptide}." / ".$domain{$seq_plus7aa};
                     }

-                    #ACE # $psp_regsite_protein
-                    #ACE if ($psp_regsite_protein_2{$peptide} eq "") {
-                    #ACE     $psp_regsite_protein_2{$peptide} = $psp_regsite_protein{$seq_plus7aa};
-                    #ACE }
-                    #ACE elsif ($psp_regsite_protein{$seq_plus7aa} eq "") {
-                    #ACE     # do nothing
-                    #ACE }
-                    #ACE else {
-                    #ACE     $psp_regsite_protein_2{$peptide} = $psp_regsite_protein_2{$peptide}." / ".$psp_regsite_protein{$seq_plus7aa};
-                    #ACE }

                     # $ON_FUNCTION_2
                     if ($ON_FUNCTION_2{$peptide} eq "") {
@@ -1682,7 +1610,6 @@
                     }
                     $organism_2{$peptide} = $organism{$seq_plus7aa};
                 } else {
-                    #ACE print "d not found \$regulatory_sites_PhosphoSite_hash{{$seq_plus7aa}}\n";
                 } # if (exists($regulatory_sites_PhosphoSite -> {$seq_plus7aa}))
             } # for my $k (0 .. $#formatted_sequences)
         } # if/else number of phosphosites
@@ -1716,7 +1643,6 @@

 foreach my $peptide (keys %data) {
     if (exists($domain_2{$peptide}) and (defined $domain_2{$peptide}) and (not $domain_2{$peptide} eq "") ) {
-        #ACE print "writing domain $domain_2{$peptide} for regulatory site(s) $seq_plus7aa_2{$peptide}\n"; #ACE
         $psp_regulatory_site_stmth->bind_param(1, $domain_2{$peptide});
         $psp_regulatory_site_stmth->bind_param(2, $ON_FUNCTION_2{$peptide});
         $psp_regulatory_site_stmth->bind_param(3, $ON_PROCESS_2{$peptide});
@@ -1728,7 +1654,6 @@
         if (not $psp_regulatory_site_stmth->execute()) {
             print "Error writing PSP_Regulatory_site for one regulatory site with peptide '$domain_2{$peptide}': $psp_regulatory_site_stmth->errstr\n";
         } else {
-            #ACE print "added domain for $domain_2{$peptide}\n";
         }
     } elsif (exists($domain_2{$peptide}) and (not defined $domain_2{$peptide})) {
         print "\$domain_2{$peptide} is undefined\n";  #ACE
@@ -1989,7 +1914,6 @@
                                         my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1;        # added 12.05.2012 for Justin's data
                                         print OUT "$foobar";
                                         print OUT "$tmp_site_for_printing\t";
-                                        #ACE print OUT "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing\t";
                                         $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing";
                                         $there_were_sites = 1;
                                     }
@@ -2049,7 +1973,6 @@
     # begin store-to-SQLite "ppep_metadata" table
     # ---
     for $i (1..14) {
-        #ACE print "\$ppep_metadata_stmth->bind_param($i, " . $ppep_metadata[$i-1] . ")\n";
         $ppep_metadata_stmth->bind_param($i, $ppep_metadata[$i-1]);
     }
     if (not $ppep_metadata_stmth->execute()) {
@@ -2074,7 +1997,6 @@
         $ppep_intensity_stmth->bind_param( 1, $ppep_id                     );
         $ppep_intensity_stmth->bind_param( 2, $sample_id_lut{$samples[$i]} );
         $ppep_intensity_stmth->bind_param( 3, $intense                     );
-        #ACE print "insert ($peptide, $samples[$i], $intense)\n";
         if (not $ppep_intensity_stmth->execute()) {
             print "Error writing tuple ($peptide,$samples[$i],$intense): $ppep_intensity_stmth->errstr\n";
         }
@@ -2103,13 +2025,11 @@
         }
         else { print OUT "\t";}
     }
-    #ACE my %wrote_motif = {};
     my %wrote_motif;
     my $motif_parts_0;
     for my $i (0 .. $#motif_sequence) {
         if (exists($kinase_motif_matches{$peptide}{$motif_sequence[$i]})) {
             print OUT "X\t";
-            #ACE my @motif_parts = split(/ motif /, $motif_type{$motif_sequence[$i]});
             $motif_parts_0 = $motif_type{$motif_sequence[$i]}." ".$motif_sequence[$i];
             my $key = "$peptide\t$gene_names\t$motif_parts_0";
             if (!exists($wrote_motif{$key})) {
--- a/macros.xml	Fri Mar 11 20:04:05 2022 +0000
+++ b/macros.xml	Tue Mar 15 00:35:16 2022 +0000
@@ -1,4 +1,30 @@
 <macros>
-    <token name="@TOOL_VERSION@">0.1.0</token>
+    <token name="@TOOL_VERSION@">0.1.1</token>
     <token name="@VERSION_SUFFIX@">0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="0.3.3"   >openblas</requirement>
+            <requirement type="package" version="0.4.0"   >r-sass</requirement>
+            <requirement type="package" version="1.14.2"  >r-data.table</requirement>
+            <requirement type="package" version="1.22.2"  >numpy</requirement>
+            <requirement type="package" version="1.4.0"   >pyahocorasick</requirement>
+            <requirement type="package" version="1.4.0"   >r-stringr</requirement>
+            <requirement type="package" version="1.4.1"   >pandas</requirement>
+            <requirement type="package" version="1.56.0"  >bioconductor-preprocesscore</requirement>
+            <requirement type="package" version="1.64"    >perl-dbd-sqlite</requirement>
+            <requirement type="package" version="1.7.1"   >r-optparse</requirement>
+            <requirement type="package" version="1.7.1"   >r-optparse</requirement>
+            <requirement type="package" version="2.11"    >r-rmarkdown</requirement>
+            <!--
+            It would be nice to use conda-forge/texlive-core, but issue 23 blocked PDF-creation.
+            Also, I got pango font errors (output had missing symbols replaced with boxes) unless
+            I specified the build as well as the version, i.e.
+            texlive-core=20210325=h97429d4_0
+            -->
+            <requirement type="package"                   >r-tinytex</requirement>
+            <requirement type="package" version="3.3.5"   >r-ggplot2</requirement>
+            <requirement type="package" version="3.9.10"  >python</requirement>
+            <requirement type="package" version="5.26.2"  >perl</requirement>
+        </requirements>
+    </xml>
 </macros>
--- a/mqppep_anova.R	Fri Mar 11 20:04:05 2022 +0000
+++ b/mqppep_anova.R	Tue Mar 15 00:35:16 2022 +0000
@@ -196,9 +196,12 @@
 #  from run to run
 set.seed(28571)

+
+library(tinytex)
+tinytex::install_tinytex()
 rmarkdown::render(
   input = paste(script_dir, "mqppep_anova_script.Rmd", sep = "/")
-, output_format = rmarkdown::html_document(pandoc_args = "--self-contained")
+, output_format = rmarkdown::pdf_document()
 , output_file = report_file_name
 , params = rmarkdown_params
 )
--- a/mqppep_anova.xml	Fri Mar 11 20:04:05 2022 +0000
+++ b/mqppep_anova.xml	Tue Mar 15 00:35:16 2022 +0000
@@ -3,24 +3,23 @@
     <macros>
         <import>macros.xml</import>
     </macros>
-    <requirements>
-        <requirement type="package" version="1.7.1"   >r-optparse</requirement>
-        <requirement type="package" version="1.4.0"   >r-stringr</requirement>
-        <requirement type="package" version="1.14.2"  >r-data.table</requirement>
-        <requirement type="package" version="3.3.5"   >r-ggplot2</requirement>
-        <requirement type="package" version="1.56.0"  >bioconductor-preprocesscore</requirement>
-        <requirement type="package" version="0.3.3"   >openblas</requirement>
-        <requirement type="package" version="2.11"    >r-rmarkdown</requirement>
-        <requirement type="package" version="0.4.0"   >r-sass</requirement>
-        <requirement type="package" version="20210325">texlive-core</requirement>
-
-    </requirements>
-    <!-- Rscript -e 'rmarkdown::render("QuantDataProcessingScript.Rmd")' -->
+    <expand macro="requirements"/>
+    <!--
+      The weird invocation used here is because knitr and install_tinytex
+      both need access to a writeable directory, but most directories in a
+      biocontainer are read-only, so this builds a pseudo-home under /tmp
+    -->
     <command detect_errors="exit_code"><![CDATA[
-      cd /tmp;
+      export OLD_PWD=\$(dirname \$(pwd));
+      export HOME=/tmp\${OLD_PWD};
+      mkdir -p \$HOME/bin;
+      mkdir -p \$HOME/tmp;
+      export TEMP=\$HOME/tmp;
+      export TMPDIR=\$TEMP;
+      cd \$TEMP;
       cp '$__tool_directory__/mqppep_anova_script.Rmd' . || exit 0;
       cp '$__tool_directory__/mqppep_anova.R' . || exit 0;
-      \${CONDA_PREFIX}/bin/Rscript /tmp/mqppep_anova.R
+      \${CONDA_PREFIX}/bin/Rscript \$TEMP/mqppep_anova.R
       --inputFile '$input_file'
       --alphaFile $alpha_file
       --firstDataColumn $first_data_column
@@ -33,8 +32,8 @@
       --regexSampleGrouping $sample_grouping_regex_f
       --imputedDataFile $imputed_data_file
       --reportFile $report_file;
-      rm mqppep_anova_script.Rmd;
-      rm mqppep_anova.R
+      cd \${OLD_PWD};
+      rm -rf \$HOME
     ]]></command>
     <configfiles>
       <configfile name="sample_names_regex_f">
@@ -99,8 +98,11 @@
         </param>
     </inputs>
     <outputs>
-        <data name="imputed_data_file" format="tabular" label="${input_file.name}.intensities_${imputation.imputation_method}-imputed_QN_LT" ></data>
-        <data name="report_file" format="html" label="${input_file.name}.report (download/unzip to view)" ></data>
+        <data name="imputed_data_file" format="tabular" label="${input_file.name}.${imputation.imputation_method}-imputed_QN_LT_intensities" ></data>
+        <!--
+        <data name="report_file" format="html" label="${input_file.name}.${imputation.imputation_method}-imputed_report (download/unzip to view)" ></data>
+        -->
+        <data name="report_file" format="pdf" label="${input_file.name}.${imputation.imputation_method}-imputed_report" ></data>
     </outputs>
     <tests>
         <test>
@@ -124,6 +126,8 @@
             <param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/>
             <param name="first_data_column" value="10"/>
             <param name="imputation_method" value="random"/>
+            <param name="meanPercentile" value="1" />
+            <param name="sdPercentile" value="0.2" />
             <param name="sample_names_regex" value="\.\d+[A-Z]$"/>
             <param name="sample_grouping_regex" value="\d+"/>
             <output name="imputed_data_file">
@@ -192,14 +196,7 @@
   Phosphopeptide MS intensities where missing values have been **imputed** by the chosen method, quantile-normalized (**QN**), and log10-transformed (**LT**), in tabular format.

 ``report_file``
-  (download/unzip to view) Summary report for normalization, imputation, and ANOVA.
-  This dataset is displayed in Galaxy as having a datatype of ``html`` in Galaxy,
-  but it is in fact a zipfile; the zip file contains
-  an HTML file.  Please download and unzip it locally to view the report.
-  Ideally this report would be a PDF, but there is an issue
-  `(linked here)
-  <https://github.com/conda-forge/texlive-core-feedstock/issues/19>`_.
-  that needs to be resolved first.
+  Summary report for normalization, imputation, and ANOVA, in PDF format.

 **Authors**
--- a/mqppep_anova_script.Rmd	Fri Mar 11 20:04:05 2022 +0000
+++ b/mqppep_anova_script.Rmd	Tue Mar 15 00:35:16 2022 +0000
@@ -1,24 +1,32 @@
 ---
-title: "Quant Data Processing Script"
+title: "MaxQuant Phospho-Proteomic Enrichment Pipeline ANOVA"
 author: "Larry Cheng; Art Eschenlauer"
 date: "May 28, 2018; Nov 16, 2021"
 output:
-  html_document: default
   pdf_document: default
 params:
-  inputFile: "Upstream_Map_pST_outputfile_STEP4.txt"
-  alphaFile: "alpha_levels.txt"
+  inputFile: "test-data/test_input_for_anova.tabular"
+  alphaFile: "test-data/alpha_levels.tabular"
   firstDataColumn: "Intensity"
-  imputationMethod: !r c("group-median","median","mean","random")[4]
+  imputationMethod: !r c("group-median", "median", "mean", "random")[1]
   meanPercentile: 1
   sdPercentile: 0.2
   regexSampleNames: "\\.(\\d+)[A-Z]$"
   regexSampleGrouping: "(\\d+)"
   imputedDataFilename: "Upstream_Map_pST_outputfile_STEP4_QN_LT.txt"
 ---
-```{r setup, include=FALSE}
+```{r setup, include = FALSE}
 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285
-knitr::opts_chunk$set(echo = FALSE, fig.dim=c(9,10))
+knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10))
+
+### FUNCTIONS
+
+#ANOVA filter function
+anova_func <- function(x, grouping_factor) {
+  x_aov <- aov(as.numeric(x) ~ grouping_factor)
+  pvalue <- summary(x_aov)[[1]][["Pr(>F)"]][1]
+  pvalue
+}
 ```

 ## Purpose:
@@ -28,156 +36,175 @@
 ## Variables to change for each input file
 -->
 ```{r include = FALSE}
-#Input Filename
-inputFile <- params$inputFile
+# Input Filename
+input_file <- params$inputFile

-#First data column - ideally, this could be detected via regexSampleNames, but for now leave it as is.
-firstDataColumn <- params$firstDataColumn
-FDC_is_integer <- TRUE
-firstDataColumn <- withCallingHandlers(
-    as.integer(firstDataColumn)
-  , warning = function(w) FDC_is_integer <<- FALSE
+# First data column - ideally, this could be detected via regexSampleNames,
+#   but for now leave it as is.
+first_data_column <- params$firstDataColumn
+fdc_is_integer <- TRUE
+first_data_column <- withCallingHandlers(
+    as.integer(first_data_column)
+  , warning = function(w) fdc_is_integer <<- FALSE
   )
-if (FALSE == FDC_is_integer) {
-  firstDataColumn <- params$firstDataColumn
+if (FALSE == fdc_is_integer) {
+  first_data_column <- params$firstDataColumn
 }

-#False discovery rate adjustment for ANOVA (Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05)
-valFDR <- read.table(file = params$alphaFile, sep = "\t", header=F, quote="")[,1]
+# False discovery rate adjustment for ANOVA
+#  Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05
+val_fdr <-
+  read.table(file = params$alphaFile, sep = "\t", header = F, quote = "")[, 1]

 #Imputed Data filename
-imputedDataFilename <- params$imputedDataFilename
+imputed_data_filename <- params$imputedDataFilename

 #ANOVA data filename
 ```

-```{r include = FALSE}
-#Imputation method, should be one of c("random","group-median","median","mean")
-imputationMethod <- params$imputationMethod
+```{r echo = FALSE}
+# Imputation method, should be one of
+#   "random", "group-median", "median", or "mean"
+imputation_method <- params$imputationMethod

-#Selection of percentile of logvalue data to set the mean for random number generation when using random imputation
-meanPercentile <- params$meanPercentile / 100.0
+# Selection of percentile of logvalue data to set the mean for random number
+#   generation when using random imputation
+mean_percentile <- params$meanPercentile / 100.0

-#deviation adjustment-factor for random values; real number.
-sdPercentile <- params$sdPercentile
+# deviation adjustment-factor for random values; real number.
+sd_percentile <- params$sdPercentile
+
+# Regular expression of Sample Names, e.g., "\\.(\\d+)[A-Z]$"
+regex_sample_names <- params$regexSampleNames

-#Regular expression of Sample Names, e.g., "\\.(\\d+)[A-Z]$"
-regexSampleNames <- params$regexSampleNames
-
-#Regular expression to extract Sample Grouping from Sample Name (if error occurs, compare sampleNumbers and tempMatches to see if groupings/pairs line up)
-# e.g., "(\\d+)"
-regexSampleGrouping <- params$regexSampleGrouping
+# Regular expression to extract Sample Grouping from Sample Name;
+#   if error occurs, compare sample_factor_levels and temp_matches
+#   to see if groupings/pairs line up
+#   e.g., "(\\d+)"
+regex_sample_grouping <- params$regexSampleGrouping

 ```

-
-```{r include = FALSE}
-### FUNCTIONS
-
-#ANOVA filter function
-anovaFunc <- function(x, groupingFactor) {
-  x.aov = aov(as.numeric(x) ~ groupingFactor)
-  pvalue = summary(x.aov)[[1]][["Pr(>F)"]][1]
-  pvalue
-}
-```
-
-
-
-### Checking that log-transformed sample distributions are similar:
-```{r echo=FALSE}
+```{r echo = FALSE}
+### READ DATA

 library(data.table)

 # read.table reads a file in table format and creates a data frame from it.
-#   - note that `quote=""` means that quotation marks are treated literally.
-fullData <- read.table(file = inputFile, sep = "\t", header=T, quote="", check.names=FALSE)
-print(colnames(fullData))
-#head(fullData)
+#   - note that `quote = ""` means that quotation marks are treated literally.
+full_data <- read.table(
+  file = input_file,
+  sep = "\t",
+  header = T,
+  quote = "",
+  check.names = FALSE
+  )
+```
+
+### Column names from input file

-if (FALSE == FDC_is_integer) {
-  dataColumnIndices <- grep(firstDataColumn, names(fullData), perl=TRUE)
-  str(dataColumnIndices)
-  if (length(dataColumnIndices) > 0) {
-    firstDataColumn <- dataColumnIndices[1]
+```{r echo = FALSE, results = 'markup'}
+print(colnames(full_data))
+data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE)
+cat(sprintf("First data column:  %d\n", min(data_column_indices)))
+cat(sprintf("Last data column:   %d\n", max(data_column_indices)))
+```
+
+```{r echo = FALSE, results = 'asis'}
+cat("\\newpage\n")
+```
+
+### Checking that log-transformed sample distributions are similar:
+
+```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'}
+
+if (FALSE == fdc_is_integer) {
+
+  if (length(data_column_indices) > 0) {
+    first_data_column <- data_column_indices[1]
   } else {
-    stop(paste("failed to convert firstDataColumn:", firstDataColumn))
+    stop(paste("failed to convert firstDataColumn:", first_data_column))
   }
 }
-
-quantData0 <- fullData[firstDataColumn:length(fullData)]
-quantData <- fullData[firstDataColumn:length(fullData)]
-quantData[quantData==0] <- NA  #replace 0 with NA
-quantDataLog <- log10(quantData)
+
+quant_data0 <- full_data[first_data_column:length(full_data)]
+quant_data <- full_data[first_data_column:length(full_data)]
+quant_data[quant_data == 0] <- NA  #replace 0 with NA
+quant_data_log <- log10(quant_data)

-rownames(quantDataLog) <- fullData$Phosphopeptide
-
-summary(quantDataLog)
+rownames(quant_data_log) <- full_data$Phosphopeptide

-#data visualization
+# data visualization
 old_par <- par(
-  mai=par("mai") + c(0.5,0,0,0)
+  mai = par("mai") + c(0.5, 0, 0, 0)
 )
 boxplot(
-  quantDataLog
-, las=2
+  quant_data_log
+, las = 2
 )
 par(old_par)

-quantDataLog_stack <- stack(quantDataLog)
+
+
+cat("\\newline\n")
+cat("\\newline\n")
+
 ```

-```{r echo = FALSE, fig.align="left", fig.dim=c(9,5)}
+```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 4), warning = FALSE}
+quant_data_log_stack <- stack(quant_data_log)
 library(ggplot2)
-ggplot(quantDataLog_stack, aes(x=values)) + geom_density(aes(group=ind, colour=ind))
+ggplot(
+  quant_data_log_stack,
+  aes(x = values)) + geom_density(aes(group = ind, colour = ind))
 ```

 ### Globally, are phosphopeptide intensities are approximately unimodal?
-```{r echo = FALSE,fig.align="left", fig.dim=c(9,5)}

-# ref for bquote particularly and plotting math expressions generally:
+<!--
+# ref for bquote below particularly and plotting math expressions generally:
 #   https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/
+-->
+```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5)}

-#identify the location of missing values
-fin <- is.finite(as.numeric(as.matrix(quantDataLog)))
+# identify the location of missing values
+fin <- is.finite(as.numeric(as.matrix(quant_data_log)))

-logvalues <- as.numeric(as.matrix(quantDataLog))[fin]
+logvalues <- as.numeric(as.matrix(quant_data_log))[fin]
 plot(
-  density(logvalues)
-, main = bquote("Smoothed estimated probability density vs." ~ log[10](intensity))
-, xlab = bquote(log[10](intensity))
-)
+  density(logvalues),
+  main = bquote(
+    "Smoothed estimated probability density vs." ~ log[10](intensity)),
+  xlab = bquote(log[10](intensity))
+  )
 hist(
-  x = as.numeric(as.matrix(quantDataLog))
+  x = as.numeric(as.matrix(quant_data_log))
 , breaks = 100
 , main = bquote("Frequency vs." ~ log[10](intensity))
 , xlab = bquote(log[10](intensity))
 )
 ```

-<!--
-## Impute missing values
--->
-
 ### Distribution of standard deviations of phosphopeptides, ignoring missing values:

-```{r echo = FALSE, fig.align="left", fig.dim=c(9,5)}
-#determine quantile
-q1 <- quantile(logvalues, probs = meanPercentile)[1]
+```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5)}
+# determine quantile
+q1 <- quantile(logvalues, probs = mean_percentile)[1]

-#determine standard deviation of quantile to impute
+# determine standard deviation of quantile to impute
 sd_finite <- function(x) {
   ok <- is.finite(x)
-  sd(x[ok]) * sdPercentile
+  sd(x[ok]) * sd_percentile
 }
-sds <- apply(quantDataLog, 1, sd_finite) # 1 = row of matrix (ie, phosphopeptide)
+# 1 = row of matrix (ie, phosphopeptide)
+sds <- apply(quant_data_log, 1, sd_finite)
 plot(
-  density(sds, na.rm=T)
-, main="Smoothed estimated probability density vs. std. deviation"
-, sub="(probability estimation made with Gaussian smoothing)"
+  density(sds, na.rm = T)
+, main = "Smoothed estimated probability density vs. std. deviation"
+, sub = "(probability estimation made with Gaussian smoothing)"
 )

-m1 <- median(sds, na.rm=T) #sd to be used is the median sd
+m1 <- median(sds, na.rm = T) #sd to be used is the median sd

 ```

@@ -186,102 +213,116 @@
 <!--
 The number of missing values are:
 -->
-```{r echo=FALSE}
+```{r echo = FALSE}
 #Determine number of cells to impute
-temp <- quantData[is.na(quantData)]
+temp <- quant_data[is.na(quant_data)]

 #Determine number of values to impute
-NoToImpute <- length(temp)
+number_to_impute <- length(temp)
 ```

 <!--
 % of values that are missing:
 -->
-```{r echo=FALSE}
-pct_missing_values <- length(temp)/(length(logvalues)+length(temp)) * 100
+```{r echo = FALSE}
+pct_missing_values <- length(temp) / (length(logvalues) + length(temp)) * 100
 ```

 <!--
 First few rows of data before imputation:
 -->
-## Impute missing values
+```{r echo = FALSE, results = 'asis'}
+cat("\\newpage\n")
+```
+
+## Parse sample names
+
+Parse the names of the samples to deduce the factor level for each sample:
+
 ```{r echo = FALSE}

-#ACE start segment: trt-median based imputation
 # prep for trt-median based imputation

-# Assuming that regexSampleNames <- "\\.(\\d+)[A-Z]$"
-#   get factors -> group runs (samples) by ignoring terminal [A-Z] in sample names
-# regexpr(pattern, text, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE)
-m <- regexpr(regexSampleNames, names(quantData), perl=TRUE)
-tempMatches <- regmatches(names(quantData), m)
+# Assuming that regex_sample_names <- "\\.(\\d+)[A-Z]$"
+#   get factors ->
+#      group runs (samples) by ignoring terminal [A-Z] in sample names
+
+m <- regexpr(regex_sample_names, names(quant_data), perl = TRUE)
+temp_matches <- regmatches(names(quant_data), m)
 print("Extracted sample names")
-print(tempMatches)
-m2 <- regexpr(regexSampleGrouping, tempMatches, perl=TRUE)
-sampleNumbers <- as.factor(regmatches(tempMatches, m2))
+print(temp_matches)
+m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE)
+sample_factor_levels <- as.factor(regmatches(temp_matches, m2))
 print("Factor levels")
-print(sampleNumbers)
+print(sample_factor_levels)

 ```
+## Impute missing values
+
 ```{r echo = FALSE}

-#ACE hack begin
 #Determine number of cells to impute
-cat(
-  sprintf("Before imputation, there are:\n %d peptides\n %d missing values (%2.0f%s)"
-  , sum(rep.int(TRUE, nrow(quantData)))
-  , sum(is.na(quantData))
-  , pct_missing_values
-  , "%"
-  )
+cat("Before imputation,",
+  sprintf(
+    "there are:\n  %d peptides\n  %d missing values (%2.0f%s)",
+    sum(rep.int(TRUE, nrow(quant_data))),
+    sum(is.na(quant_data)),
+    pct_missing_values,
+    "%"
+    )
 )
-#ACE hack end

 ```
 ```{r echo = FALSE}

 #Impute data
-quantDataImputed <- quantData
+quant_data_imp <- quant_data

 # Identify which values are missing and need to be imputed
-ind <- which(is.na(quantDataImputed), arr.ind=TRUE)
+ind <- which(is.na(quant_data_imp), arr.ind = TRUE)

 ```
 ```{r echo = FALSE}

 # Apply imputation
 switch(
-  imputationMethod
-, "group-median"={
-    cat("Imputation method: substitute missing value with median peptide-intensity for sample-group\n")
-    #goodRows <- rep.int(TRUE, nrow(quantDataImputed))
-    sampleLevelIntegers <- as.integer(sampleNumbers)
-    for (i in 1:length(levels(sampleNumbers))) {
-      levelCols <- i == sampleLevelIntegers
-      ind <- which(is.na(quantDataImputed[,levelCols]), arr.ind=TRUE)
-      quantDataImputed[ind,levelCols] <- apply(quantDataImputed[,levelCols], 1, median, na.rm=T)[ind[,1]]
+  imputation_method
+, "group-median" = {
+    cat("Imputation method:\n   substitute missing value",
+      "with median peptide-intensity for sample-group\n")
+
+    sample_level_integers <- as.integer(sample_factor_levels)
+    for (i in seq_len(length(levels(sample_factor_levels)))) {
+      level_cols <- i == sample_level_integers
+      ind <- which(is.na(quant_data_imp[, level_cols]), arr.ind = TRUE)
+      quant_data_imp[ind, level_cols] <-
+        apply(quant_data_imp[, level_cols], 1, median, na.rm = T)[ind[, 1]]
     }
-    goodRows <- !is.na(rowMeans(quantDataImputed))
+    good_rows <- !is.na(rowMeans(quant_data_imp))
   }
-, "median"={
-    cat("Imputation method: substitute missing value with median peptide-intensity across all sample classes\n")
-    quantDataImputed[ind] <- apply(quantDataImputed, 1, median, na.rm=T)[ind[,1]]
-    goodRows <- !is.na(rowMeans(quantDataImputed))
+, "median" = {
+    cat("Imputation method:\n   substitute missing value with",
+      "median peptide-intensity across all sample classes\n")
+    quant_data_imp[ind] <- apply(quant_data_imp, 1, median, na.rm = T)[ind[, 1]]
+    good_rows <- !is.na(rowMeans(quant_data_imp))
   }
-, "mean"={
-    cat("Imputation method: substitute missing value with mean peptide-intensity across all sample classes\n")
-    quantDataImputed[ind] <- apply(quantDataImputed, 1, mean, na.rm=T)[ind[,1]]
-    goodRows <- !is.na(rowMeans(quantDataImputed))
+, "mean" = {
+    cat("Imputation method:\n   substitute missing value with",
+      "mean peptide-intensity across all sample classes\n")
+    quant_data_imp[ind] <- apply(quant_data_imp, 1, mean, na.rm = T)[ind[, 1]]
+    good_rows <- !is.na(rowMeans(quant_data_imp))
   }
-, "random"={
+, "random" = {
     cat(
+      "Imputation method:\n   substitute missing value with\n  ",
       sprintf(
-        "Imputation method: substitute missing value with random intensity N ~ (%0.2f, %0.2f)\n"
+        "random intensity N ~ (%0.2f, %0.2f)\n"
       , q1, m1
       )
     )
-    quantDataImputed[is.na(quantDataImputed)] <- 10^rnorm(NoToImpute, mean= q1, sd = m1)
-    goodRows <- !is.na(rowMeans(quantDataImputed))
+    quant_data_imp[is.na(quant_data_imp)] <-
+      10 ^ rnorm(number_to_impute, mean = q1, sd = m1)
+    good_rows <- !is.na(rowMeans(quant_data_imp))
   }
 )

@@ -289,13 +330,16 @@
 ```{r echo = FALSE}

 #Determine number of cells to impute
-temp <- quantDataImputed[is.na(quantDataImputed)]
-cat(
+temp <- quant_data_imp[is.na(quant_data_imp)]
+cat("After imputation, there are:",
   sprintf(
-    "After imputation, there are:\n  %d missing values\n  %d usable peptides\n  %d peptides with too many missing values for further analysis"
-  , sum(is.na(quantDataImputed[goodRows,]))
-  , sum(goodRows)
-  , sum(!goodRows)
+    "\n  %d missing values\n  %d usable peptides analysis"
+  , sum(is.na(quant_data_imp[good_rows, ]))
+  , sum(good_rows)
+  ),
+  sprintf(
+    "\n  %d peptides with too many missing values for further analysis"
+  , sum(!good_rows)
   )
 )
 ```
@@ -303,22 +347,29 @@


 # Zap rows where imputation was ineffective
-fullData         <- fullData        [goodRows, ]
-quantData        <- quantData       [goodRows, ]
-quantDataImputed <- quantDataImputed[goodRows, ]
+full_data         <- full_data        [good_rows, ]
+quant_data        <- quant_data       [good_rows, ]
+quant_data_imp <- quant_data_imp[good_rows, ]

 ```
 ```{r echo = FALSE}

-d_combined <- (density(as.numeric(as.matrix(log10(quantDataImputed)))))
-d_original <- density(as.numeric(as.matrix(log10(quantDataImputed[!is.na(quantData)]))))
+d_combined <- (density(as.numeric(as.matrix(
+  log10(quant_data_imp)
+))))
+d_original <-
+  density(as.numeric(as.matrix(
+    log10(quant_data_imp[!is.na(quant_data)]))))

 ```
 ```{r echo = FALSE}

-if (sum(is.na(quantData)) > 0) {
+if (sum(is.na(quant_data)) > 0) {
   # There ARE missing values
-  d_imputed <- (density(as.numeric(as.matrix(log10(quantDataImputed[is.na(quantData)])))))
+  d_imputed <-
+    (density(as.numeric(as.matrix(
+      log10(quant_data_imp[is.na(quant_data)])
+    ))))
 } else {
   # There are NO missing values
   d_imputed <- d_combined
@@ -326,29 +377,28 @@

 ```

-<!-- ```{r echo = FALSE, fig.cap = "Blue =  Data before imputation; Red = Imputed data"} -->
-```{r echo = FALSE, fig.dim=c(9,5)}
+```{r echo = FALSE, fig.dim = c(9, 5)}
 ylim <- c(0, max(d_combined$y, d_original$y, d_imputed$y))
 plot(
-  d_combined
-, ylim = ylim
-, sub = "Blue = data before imputation; Red = imputed data"
-, main = "Density vs. log10(intensity) before and after imputation"
+  d_combined,
+  ylim = ylim,
+  sub = "Blue = data before imputation; Red = imputed data",
+  main = "Density vs. log10(intensity) before and after imputation"
 )
-lines(d_original, col="blue")
-lines(d_imputed, col="red")
+lines(d_original, col = "blue")
+lines(d_imputed, col = "red")
 ```

 ## Perform Quantile Normalization
-```{r echo=FALSE}
-library(preprocessCore)
+
+<!--
 # Apply quantile normalization using preprocessCore::normalize.quantiles
 # ---
 # tool repository: http://bioconductor.org/packages/release/bioc/html/preprocessCore.html
 #   except this: https://support.bioconductor.org/p/122925/#9135989
 #   says to install it like this:
 #     ```
-#     BiocManager::install("preprocessCore", configure.args="--disable-threading", force = TRUE,lib=.libPaths()[1])
+#     BiocManager::install("preprocessCore", configure.args="--disable-threading", force = TRUE, lib=.libPaths()[1])
 #     ```
 # conda installation (necessary because of a bug in recent openblas):
 #   conda install bioconductor-preprocesscore openblas=0.3.3
@@ -360,7 +410,7 @@
 #   Using a normalization based upon quantiles, this function normalizes a matrix of probe level intensities.
 #
 # Usage:
-#   normalize.quantiles(x,copy=TRUE, keep.names=FALSE)
+#   normalize.quantiles(x, copy = TRUE, keep.names = FALSE)
 #
 # Arguments:
 #
@@ -397,261 +447,355 @@
 #       and Variance. Bioinformatics 19(2), pp 185-193. DOI 10.1093/bioinformatics/19.2.185
 #       http://bmbolstad.com/misc/normalize/normalize.html
 # ...
+-->
+```{r echo = FALSE}
+library(preprocessCore)

 if (TRUE) {
-  quantDataImputed.qn <- normalize.quantiles(as.matrix(quantDataImputed))
+  quant_data_imp_qn <- normalize.quantiles(as.matrix(quant_data_imp))
 } else {
-  quantDataImputed.qn <- as.matrix(quantDataImputed)
+  quant_data_imp_qn <- as.matrix(quant_data_imp)
 }

-quantDataImputed.qn = as.data.frame(quantDataImputed.qn)
-names(quantDataImputed.qn) = names(quantDataImputed)
-quantDataImputed_QN_log <- log10(quantDataImputed.qn)
+quant_data_imp_qn <- as.data.frame(quant_data_imp_qn)
+names(quant_data_imp_qn) <- names(quant_data_imp)
+quant_data_imp_qn_log <- log10(quant_data_imp_qn)

-rownames(quantDataImputed_QN_log) <- fullData[,1]
+rownames(quant_data_imp_qn_log) <- full_data[, 1]

-quantDataImputed.qn.LS = t(scale(t(log10(quantDataImputed.qn))))
-anyNaN <- function (x) {
+quant_data_imp_qn_ls <- t(scale(t(log10(quant_data_imp_qn))))
+any_nan <- function(x) {
   !any(x == "NaN")
 }
-sel = apply(quantDataImputed.qn.LS, 1, anyNaN)
-quantDataImputed.qn.LS2 <- quantDataImputed.qn.LS[which(sel),]
-quantDataImputed.qn.LS2 = as.data.frame(quantDataImputed.qn.LS2)
+sel <- apply(quant_data_imp_qn_ls, 1, any_nan)
+quant_data_imp_qn_ls2 <- quant_data_imp_qn_ls[which(sel), ]
+quant_data_imp_qn_ls2 <- as.data.frame(quant_data_imp_qn_ls2)

 #output quantile normalized data
-dataTableImputed_QN_LT <- cbind(fullData[1:9], quantDataImputed_QN_log)
-write.table(dataTableImputed_QN_LT, file = paste(paste(strsplit(imputedDataFilename, ".txt"),"QN_LT",sep="_"),".txt",sep=""), sep = "\t", col.names=TRUE, row.names=FALSE)
+data_table_imp_qn_lt <- cbind(full_data[1:9], quant_data_imp_qn_log)
+write.table(
+  data_table_imp_qn_lt,
+  file = paste(paste(
+    strsplit(imputed_data_filename, ".txt"), "QN_LT", sep = "_"
+  ), ".txt", sep = ""),
+  sep = "\t",
+  col.names = TRUE,
+  row.names = FALSE
+)

 ```

 <!-- ACE insertion begin -->
 ### Checking that normalized, imputed, log-transformed sample distributions are similar:

-```{r echo=FALSE}
-#library(data.table)
+```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'}
+

-#Save unimputed quantDataLog for plotting below
-unimputedQuantDataLog <- quantDataLog
+# Save unimputed quant_data_log for plotting below
+unimputed_quant_data_log <- quant_data_log

-#Log10 transform (after preparing for zero values, which should never happen...)
-quantDataImputed.qn[quantDataImputed.qn == 0] <- .000000001
-quantDataLog <- log10(quantDataImputed.qn)
+# log10 transform (after preparing for zero values,
+#   which should never happen...)
+quant_data_imp_qn[quant_data_imp_qn == 0] <- .000000001
+quant_data_log <- log10(quant_data_imp_qn)

-summary(quantDataLog)
+# Output quantile-normalized log-transformed dataset
+#   with imputed, normalized data

-#Output quantile-normalized log-transformed dataset with imputed, normalized data
-
-dataTableImputed <- cbind(fullData[1:9], quantDataLog)
+data_table_imputed <- cbind(full_data[1:9], quant_data_log)
 write.table(
-    dataTableImputed
-  , file=imputedDataFilename
-  , sep="\t"
-  , col.names=TRUE
-  , row.names=FALSE
-  , quote=FALSE
+    data_table_imputed
+  , file = imputed_data_filename
+  , sep = "\t"
+  , col.names = TRUE
+  , row.names = FALSE
+  , quote = FALSE
   )


-#data visualization
+# data visualization
 old_par <- par(
-  mai=par("mai") + c(0.5,0,0,0)
-, oma=par("oma") + c(0.5,0,0,0)
+  mai = par("mai") + c(0.5, 0, 0, 0)
+, oma = par("oma") + c(0.5, 0, 0, 0)
 )
 boxplot(
-  quantDataLog
-, las=2
+  quant_data_log
+, las = 2
 )
 par(old_par)
+
+
+
+cat("\\newline\n")
+cat("\\newline\n")
+
 ```

-```{r echo=FALSE, fig.dim=c(9,5)}
-quantDataLog_stack <- stack(quantDataLog)
-ggplot(quantDataLog_stack, aes(x=values)) + geom_density(aes(group=ind, colour=ind))
+```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 4)}
+quant_data_log_stack <- stack(quant_data_log)
+ggplot(
+  quant_data_log_stack,
+  aes(x = values)
+  ) + geom_density(aes(group = ind, colour = ind))
 ```

 ## Perform ANOVA filters

-```{r,echo=FALSE}
-#Make new data frame containing only Phosphopeptides to connect preANOVA to ANOVA (connect_df)
+(see following pages)
+
+```{r, echo = FALSE}
+# Make new data frame containing only Phosphopeptides
+#   to connect preANOVA to ANOVA (connect_df)
 connect_df <- data.frame(
-    dataTableImputed_QN_LT$Phosphopeptide
-  , dataTableImputed_QN_LT[,firstDataColumn]
+    data_table_imp_qn_lt$Phosphopeptide
+  , data_table_imp_qn_lt[, first_data_column]
   )
-colnames(connect_df) <- c("Phosphopeptide","Intensity")
+colnames(connect_df) <- c("Phosphopeptide", "Intensity")
 ```

-```{r echo=FALSE, fig.dim=c(9,10)}
-# Get factors -> group replicates (as indicated by terminal letter) by the preceding digits
-#   For example, group .1A .1B .1C into group 1; .2A .2B .2C, into group 2; etc..
-m <- regexpr(regexSampleNames, names(quantDataImputed_QN_log), perl=TRUE)
-#ACE str(m)
-tempMatches <- regmatches(names(quantDataImputed_QN_log), m)
-#ACE str(tempMatches)
-numSamples <- length(tempMatches)
-#ACE str(numSamples)
-m2 <- regexpr(regexSampleGrouping, tempMatches, perl=TRUE)
-#ACE str(m2)
-#ACE str(regmatches(tempMatches, m2))
-sampleNumbers <- as.factor(regmatches(tempMatches, m2))
-#ACE str(sampleNumbers)
+```{r echo = FALSE, fig.dim = c(9, 10), results = 'asis'}
+# Get factors -> group replicates (as indicated by terminal letter)
+#   by the preceding digits;
+#   e.g., group .1A .1B .1C into group 1; .2A .2B .2C, into group 2; etc..
+m <-
+  regexpr(regex_sample_names, names(quant_data_imp_qn_log), perl = TRUE)
+
+temp_matches <- regmatches(names(quant_data_imp_qn_log), m)
+
+number_of_samples <- length(temp_matches)

-if (length(levels(sampleNumbers))<2) {
-  cat("ERROR!!!! Cannot perform ANOVA analysis because it requires two or more factor levels\n")
+m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE)
+
+
+sample_factor_levels <- as.factor(regmatches(temp_matches, m2))
+
+
+if (length(levels(sample_factor_levels)) < 2) {
+  cat(
+    "ERROR!!!! Cannot perform ANOVA analysis",
+    "because it requires two or more factor levels\n"
+  )
   cat("Unparsed sample names are:\n")
-  print(names(quantDataImputed_QN_log))
-  cat(sprintf("Parsing rule for SampleNames is '%s'\n", regexSampleNames))
+  print(names(quant_data_imp_qn_log))
+  cat(sprintf("Parsing rule for SampleNames is '%s'\n", regex_sample_names))
   cat("Parsed names are:\n")
-  print(tempMatches)
-  cat(sprintf("Parsing rule for SampleGrouping is '%s'\n", regexSampleGrouping))
+  print(temp_matches)
+  cat(sprintf(
+    "Parsing rule for SampleGrouping is '%s'\n",
+    regex_sample_grouping
+  ))
   cat("Sample group assignments are:\n")
-  print(regmatches(tempMatches, m2))
+  print(regmatches(temp_matches, m2))
 } else {
-  pValueData.anovaPs <- apply(quantDataImputed_QN_log, 1, anovaFunc, groupingFactor=sampleNumbers)
+  p_value_data_anova_ps <-
+    apply(
+      quant_data_imp_qn_log,
+      1,
+      anova_func,
+      grouping_factor = sample_factor_levels
+      )

-  pValueData.anovaPs.FDR <- p.adjust(pValueData.anovaPs, method="fdr")
-  pValueData <- data.frame(
-    phosphopeptide = fullData[,1]
-  , rawANOVAp = pValueData.anovaPs
-  , FDRadjustedANOVAp = pValueData.anovaPs.FDR
+  p_value_data_anova_ps_fdr <-
+    p.adjust(p_value_data_anova_ps, method = "fdr")
+  p_value_data <- data.frame(
+    phosphopeptide = full_data[, 1]
+    ,
+    raw_anova_p = p_value_data_anova_ps
+    ,
+    fdr_adjusted_anova_p = p_value_data_anova_ps_fdr
   )
-  #ACE rownames(pValueData) <- fullData[,1]
-  # output ANOVA file to constructed filename,
+
+  # output ANOVA file to constructed filename,
   #   e.g.    "Outputfile_pST_ANOVA_STEP5.txt"
   #   becomes "Outpufile_pST_ANOVA_STEP5_FDR0.05.txt"

-  #Re-output quantile-normalized log-transformed dataset with imputed, normalized data to include p-values
+  # Re-output quantile-normalized log-transformed dataset
+  #   with imputed, normalized data to include p-values

-  dataTableImputed <- cbind(fullData[1:9], pValueData[,2:3], quantDataLog)
+  data_table_imputed <-
+    cbind(full_data[1:9], p_value_data[, 2:3], quant_data_log)
   write.table(
-      dataTableImputed
-    , file=imputedDataFilename
-    , sep="\t"
-    , col.names=TRUE
-    , row.names=FALSE
-    , quote=FALSE
+    data_table_imputed,
+    file = imputed_data_filename,
+    sep = "\t",
+    col.names = TRUE,
+    row.names = FALSE,
+    quote = FALSE
     )


-  pValueData <- pValueData[order(pValueData$FDRadjustedANOVAp),]
+  p_value_data <-
+    p_value_data[order(p_value_data$fdr_adjusted_anova_p), ]
+
+  cutoff <- val_fdr[1]
+  for (cutoff in val_fdr) {
+    #loop through FDR cutoffs

-  cutoff <- valFDR[1]
-  for (cutoff in valFDR){ #loop through FDR cutoffs
-
-    filtered_p <- pValueData[which(pValueData$FDRadjustedANOVAp < cutoff),, drop = FALSE]
-    filteredData.filtered <- quantDataImputed_QN_log[rownames(filtered_p),, drop = FALSE]
-    filteredData.filtered <- filteredData.filtered[order(filtered_p$FDRadjustedANOVAp),, drop = FALSE]
+    filtered_p <-
+      p_value_data[
+        which(p_value_data$fdr_adjusted_anova_p < cutoff),
+        ,
+        drop = FALSE
+        ]
+    filtered_data_filtered <-
+      quant_data_imp_qn_log[
+        rownames(filtered_p),
+        ,
+        drop = FALSE
+        ]
+    filtered_data_filtered <-
+      filtered_data_filtered[
+        order(filtered_p$fdr_adjusted_anova_p),
+        ,
+        drop = FALSE
+        ]

     # <!-- ACE insertion start -->
     old_oma <- par("oma")
     old_par <- par(
-      mai=(par("mai") + c(0.7,0,0,0)) * c(1,1,0.3,1)
-    , oma=old_oma * c(1,1,0.3,1)
-    , cex.main=0.9
-    , cex.axis=0.7
-    )
-
-    if (nrow(filteredData.filtered) > 0) {
+      mai = (par("mai") + c(0.7, 0, 0, 0)) * c(1, 1, 0.3, 1),
+      oma = old_oma * c(1, 1, 0.3, 1),
+      cex.main = 0.9,
+      cex.axis = 0.7
+      )
+
+    cat("\\newpage\n")
+    if (nrow(filtered_data_filtered) > 0) {
+      cat(sprintf(
+        "Intensities for peptides whose adjusted p-value < %0.2f\n",
+        cutoff
+      ))
+      cat("\\newline\n")
+      cat("\\newline\n")
+
       boxplot(
-        filteredData.filtered
-      , main = sprintf("Imputed, normalized intensities where adjusted p-value < %0.2f", cutoff)
-      # no line plot , main = ""
-      , las = 2
-      # , ylim = c(5.5,10)
-      , ylab = expression(log[10](intensity))
+        filtered_data_filtered,
+        main = "Imputed, normalized intensities", # no line plot
+        las = 2,
+        ylab = expression(log[10](intensity))
       )
     } else {
-      cat(sprintf("No peptides were found to have cutoff adjusted p-value < %0.2f\n", cutoff))
+      cat(sprintf(
+        "No peptides were found to have cutoff adjusted p-value < %0.2f\n",
+        cutoff
+      ))
     }
     par(old_par)
-
-    #Add Phosphopeptide column to ANOVA filtered table
-    ANOVA.filtered_merge <- merge(
+
+    if (nrow(filtered_data_filtered) > 0) {
+      #Add Phosphopeptide column to anova_filtered table
+      anova_filtered_merge <- merge(
         x = connect_df
-      , y = filteredData.filtered
-      , by.x="Intensity"
-      , by.y=1
+        ,
+        y = filtered_data_filtered
+        ,
+        by.x = "Intensity"
+        ,
+        by.y = 1
       )
-    ANOVA.filtered_merge.order <- rownames(filtered_p)
-
-    ANOVA.filtered_merge.format <- sapply(
-      X = filtered_p$FDRadjustedANOVAp
-    , FUN = function(x) {
-        if (x > 0.0001)
-          paste0("(%0.",1+ceiling(-log10(x)),"f) %s")
-        else
-          paste0("(%0.4e) %s")
+      anova_filtered_merge_order <- rownames(filtered_p)
+
+      anova_filtered_merge_format <- sapply(
+        X = filtered_p$fdr_adjusted_anova_p
+        ,
+        FUN = function(x) {
+          if (x > 0.0001)
+            paste0("(%0.", 1 + ceiling(-log10(x)), "f) %s")
+          else
+            paste0("(%0.4e) %s")
         }
-    )
-
-    #ANOVA.filtered_merge.format <- paste0("(%0.",1+ceiling(-log10(filtered_p$FDRadjustedANOVAp)),"f) %s")
-
-    ANOVA.filtered <- data.table(
-        ANOVA.filtered_merge$Phosphopeptide
-      , ANOVA.filtered_merge$Intensity
-      , ANOVA.filtered_merge[, 2:numSamples+1]
-      )
-    colnames(ANOVA.filtered) <- c("Phosphopeptide", colnames(filteredData.filtered))
-
-    # merge qualitative columns into the ANOVA data
-    output_table <- data.frame(ANOVA.filtered$Phosphopeptide)
-    output_table <- merge(
-        x = output_table
-      , y = dataTableImputed_QN_LT
-      , by.x = "ANOVA.filtered.Phosphopeptide"
-      , by.y="Phosphopeptide"
       )

-    #Produce heatmap to visualize significance and the effect of imputation
-    m <- as.matrix(unimputedQuantDataLog[ANOVA.filtered_merge.order,])
-    if (nrow(m) > 0) {
-      rownames_m <- rownames(m)
-      rownames(m) <- sapply(
-          X = 1:nrow(m)
-        , FUN = function(i) {
+
+
+      anova_filtered <- data.table(
+        anova_filtered_merge$Phosphopeptide
+        ,
+        anova_filtered_merge$Intensity
+        ,
+        anova_filtered_merge[, 2:number_of_samples + 1]
+      )
+      colnames(anova_filtered) <-
+        c("Phosphopeptide", colnames(filtered_data_filtered))
+
+      # merge qualitative columns into the ANOVA data
+      output_table <- data.frame(anova_filtered$Phosphopeptide)
+      output_table <- merge(
+        x = output_table
+        ,
+        y = data_table_imp_qn_lt
+        ,
+        by.x = "anova_filtered.Phosphopeptide"
+        ,
+        by.y = "Phosphopeptide"
+      )
+
+      #Produce heatmap to visualize significance and the effect of imputation
+      m <-
+        as.matrix(unimputed_quant_data_log[anova_filtered_merge_order, ])
+      if (nrow(m) > 0) {
+        rownames_m <- rownames(m)
+        rownames(m) <- sapply(
+          X = seq_len(nrow(m))
+          ,
+          FUN = function(i) {
             sprintf(
-              ANOVA.filtered_merge.format[i]
-            , filtered_p$FDRadjustedANOVAp[i]
-            , rownames_m[i]
+              anova_filtered_merge_format[i]
+              ,
+              filtered_p$fdr_adjusted_anova_p[i]
+              ,
+              rownames_m[i]
             )
           }
         )
-      margins <- c(
-        max(nchar(colnames(m))) * 10 / 16 # col
-      , max(nchar(rownames(m))) * 5 / 16 # row
-      )
-      how_many_peptides <- min(50, nrow(m))
+        margins <- c(max(nchar(colnames(m))) * 10 / 16 # col
+                     , max(nchar(rownames(m))) * 5 / 16 # row
+                     )
+                     how_many_peptides <- min(50, nrow(m))

-      op <- par("cex.main")
-      try(
-        if (nrow(m) > 1) {
-          par(cex.main=0.6)
-          heatmap(
-            m[how_many_peptides:1,]
-          , Rowv = NA
-          , Colv = NA
-          , cexRow = 0.7
-          , cexCol = 0.8
-          , scale="row"
-          , margins = margins
-          , main = "Heatmap of unimputed, unnormalized intensities"
-          , xlab = ""
-          # , main = bquote(
-          #     .( how_many_peptides )
-          #       ~ " peptides with adjusted p-value <"
-          #       ~ .(sprintf("%0.2f", cutoff))
-          #     )
-          )
-        }
-      )
-      #ACE fig_dim knitr::opts_chunk$set(fig.dim = fig_dim)
-      par(op)
+                     cat("\\newpage\n")
+                     if (nrow(m) > 50) {
+                       cat("Heatmap for the 50 most-significant peptides",
+                         sprintf(
+                           "whose adjusted p-value < %0.2f\n",
+                           cutoff)
+                       )
+                     } else {
+                       cat("Heatmap for peptides whose",
+                         sprintf("adjusted p-value < %0.2f\n",
+                         cutoff)
+                       )
+                     }
+                     cat("\\newline\n")
+                     cat("\\newline\n")
+                     op <- par("cex.main")
+                     try(
+                       if (nrow(m) > 1) {
+                         par(cex.main = 0.6)
+                         heatmap(
+                           m[how_many_peptides:1, ],
+                           Rowv = NA,
+                           Colv = NA,
+                           cexRow = 0.7,
+                           cexCol = 0.8,
+                           scale = "row",
+                           margins = margins,
+                           main =
+                             "Heatmap of unimputed, unnormalized intensities",
+                           xlab = ""
+                           )
+                       }
+                     )
+                     par(op)
+      }
     }
-
   }
 }
 ```

+<!--
 ## Peptide IDs, etc.

 See output files.
+-->
--- a/repository_dependencies.xml	Fri Mar 11 20:04:05 2022 +0000
+++ b/repository_dependencies.xml	Tue Mar 15 00:35:16 2022 +0000
@@ -1,5 +1,5 @@
 <?xml version="1.0" ?>
 <repositories description="Suite for preprocessing and ANOVA of MaxQuant results using LC-MS proteomics data from phosphoproteomic enrichment.">
-    <repository name="mqppep_preproc" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="b91809a18dbe"/>
-    <repository name="mqppep_anova" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="d4d531006735"/>
+    <repository name="mqppep_preproc" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="42daf70d4ed4"/>
+    <repository name="mqppep_anova" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="922d309640db"/>
 </repositories>
\ No newline at end of file
--- a/workflow/ppenrich_suite_wf.ga	Fri Mar 11 20:04:05 2022 +0000
+++ b/workflow/ppenrich_suite_wf.ga	Tue Mar 15 00:35:16 2022 +0000
@@ -28,20 +28,20 @@
             "name": "Input dataset",
             "outputs": [],
             "position": {
-                "bottom": 257.06666564941406,
-                "height": 81.39999389648438,
-                "left": 339.95001220703125,
-                "right": 539.9500122070312,
-                "top": 175.6666717529297,
+                "bottom": -36.30000305175781,
+                "height": 82.19999694824219,
+                "left": 150,
+                "right": 350,
+                "top": -118.5,
                 "width": 200,
-                "x": 339.95001220703125,
-                "y": 175.6666717529297
+                "x": 150,
+                "y": -118.5
             },
             "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": null}",
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
             "tool_version": null,
             "type": "data_input",
-            "uuid": "002d55e6-29a5-426d-9248-70ec33424b15",
+            "uuid": "f4273d40-f2b8-4ad0-8bcc-91e72bd25fe1",
             "workflow_outputs": []
         },
         "1": {
@@ -60,20 +60,20 @@
             "name": "Input dataset",
             "outputs": [],
             "position": {
-                "bottom": 411.4666748046875,
-                "height": 101.79998779296875,
-                "left": 379.95001220703125,
-                "right": 579.9500122070312,
-                "top": 309.66668701171875,
+                "bottom": 278.1000061035156,
+                "height": 102.60000610351562,
+                "left": 376,
+                "right": 576,
+                "top": 175.5,
                 "width": 200,
-                "x": 379.95001220703125,
-                "y": 309.66668701171875
+                "x": 376,
+                "y": 175.5
             },
             "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"fasta\"], \"tag\": null}",
+            "tool_state": "{\"optional\": false, \"format\": [\"fasta\"]}",
             "tool_version": null,
             "type": "data_input",
-            "uuid": "8f079dcc-1843-47cd-b4dc-1830e4466430",
+            "uuid": "cb31b0ac-cacc-42ee-bd42-f42d0bdae128",
             "workflow_outputs": []
         },
         "2": {
@@ -92,20 +92,20 @@
             "name": "Input dataset",
             "outputs": [],
             "position": {
-                "bottom": 573.4666748046875,
-                "height": 101.79998779296875,
-                "left": 418.95001220703125,
-                "right": 618.9500122070312,
-                "top": 471.66668701171875,
+                "bottom": 423.1000061035156,
+                "height": 102.60000610351562,
+                "left": 387,
+                "right": 587,
+                "top": 320.5,
                 "width": 200,
-                "x": 418.95001220703125,
-                "y": 471.66668701171875
+                "x": 387,
+                "y": 320.5
             },
             "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": null}",
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
             "tool_version": null,
             "type": "data_input",
-            "uuid": "dc894a94-97a3-40ff-811e-01b30d498478",
+            "uuid": "e6ec01b8-ff1a-4c90-a064-b40c5cad75bb",
             "workflow_outputs": []
         },
         "3": {
@@ -124,20 +124,20 @@
             "name": "Input dataset",
             "outputs": [],
             "position": {
-                "bottom": 726.0666809082031,
-                "height": 81.39999389648438,
-                "left": 459.95001220703125,
-                "right": 659.9500122070312,
-                "top": 644.6666870117188,
+                "bottom": 546.6999969482422,
+                "height": 82.19999694824219,
+                "left": 399,
+                "right": 599,
+                "top": 464.5,
                 "width": 200,
-                "x": 459.95001220703125,
-                "y": 644.6666870117188
+                "x": 399,
+                "y": 464.5
             },
             "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": null}",
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
             "tool_version": null,
             "type": "data_input",
-            "uuid": "6fc936ad-0b52-484f-a051-73c1776fdeb0",
+            "uuid": "2c59056a-c1b4-4a20-a194-991d56c8b6c2",
             "workflow_outputs": []
         },
         "4": {
@@ -156,20 +156,20 @@
             "name": "Input dataset",
             "outputs": [],
             "position": {
-                "bottom": 894.4666748046875,
-                "height": 101.79998779296875,
-                "left": 503.95001220703125,
-                "right": 703.9500122070312,
-                "top": 792.6666870117188,
+                "bottom": 696.1000061035156,
+                "height": 102.60000610351562,
+                "left": 420,
+                "right": 620,
+                "top": 593.5,
                 "width": 200,
-                "x": 503.95001220703125,
-                "y": 792.6666870117188
+                "x": 420,
+                "y": 593.5
             },
             "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": null}",
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
             "tool_version": null,
             "type": "data_input",
-            "uuid": "22b77482-2339-4b45-8fc6-d39f7175131b",
+            "uuid": "987a5891-15f1-4f70-89a8-386447f0bf24",
             "workflow_outputs": []
         },
         "5": {
@@ -188,20 +188,20 @@
             "name": "Input dataset",
             "outputs": [],
             "position": {
-                "bottom": 1041.0666809082031,
-                "height": 81.39999389648438,
-                "left": 535.9500122070312,
-                "right": 735.9500122070312,
-                "top": 959.6666870117188,
+                "bottom": 820.6999969482422,
+                "height": 82.19999694824219,
+                "left": 436,
+                "right": 636,
+                "top": 738.5,
                 "width": 200,
-                "x": 535.9500122070312,
-                "y": 959.6666870117188
+                "x": 436,
+                "y": 738.5
             },
             "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": null}",
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
             "tool_version": null,
             "type": "data_input",
-            "uuid": "3d97a902-1408-403c-b82e-ddb6ca6a7d47",
+            "uuid": "964d8d21-b063-411a-aee8-372a0d0dfba3",
             "workflow_outputs": []
         },
         "6": {
@@ -220,20 +220,20 @@
             "name": "Input dataset",
             "outputs": [],
             "position": {
-                "bottom": 1210.5666198730469,
-                "height": 81.39999389648438,
-                "left": 562.9500122070312,
-                "right": 762.9500122070312,
-                "top": 1129.1666259765625,
+                "bottom": 1071.1999969482422,
+                "height": 82.19999694824219,
+                "left": 418,
+                "right": 618,
+                "top": 989,
                 "width": 200,
-                "x": 562.9500122070312,
-                "y": 1129.1666259765625
+                "x": 418,
+                "y": 989
             },
             "tool_id": null,
-            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": null}",
+            "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}",
             "tool_version": null,
             "type": "data_input",
-            "uuid": "7b5eab97-7dad-4b0e-81eb-22aac39dd5b6",
+            "uuid": "42577db7-d5e5-4f39-b3ad-d0648abb9df3",
             "workflow_outputs": []
         },
         "7": {
@@ -267,7 +267,32 @@
                     "output_name": "output"
                 }
             },
-            "inputs": [],
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing",
+                    "name": "networkin"
+                },
+                {
+                    "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing",
+                    "name": "p_sty_motifs"
+                },
+                {
+                    "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing",
+                    "name": "phosphoSites"
+                },
+                {
+                    "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing",
+                    "name": "protein_fasta"
+                },
+                {
+                    "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing",
+                    "name": "psp_kinase_substrate"
+                },
+                {
+                    "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing",
+                    "name": "psp_regulatory_sites"
+                }
+            ],
             "label": null,
             "name": "MaxQuant Phosphopeptide Preprocessing",
             "outputs": [
@@ -325,14 +350,14 @@
                 }
             ],
             "position": {
-                "bottom": 1186.6000366210938,
-                "height": 812.933349609375,
-                "left": 945.4500122070312,
-                "right": 1145.4500122070312,
-                "top": 373.66668701171875,
+                "bottom": 964.0999755859375,
+                "height": 793.5999755859375,
+                "left": 826.5,
+                "right": 1026.5,
+                "top": 170.5,
                 "width": 200,
-                "x": 945.4500122070312,
-                "y": 373.66668701171875
+                "x": 826.5,
+                "y": 170.5
             },
             "post_job_actions": {
                 "RenameDatasetActionenrichGraph": {
@@ -428,75 +453,75 @@
                 }
             },
             "tool_id": "mqppep_preproc",
-            "tool_state": "{\"collapseFunc\": \"sum\", \"enriched\": \"ST\", \"intervalCol\": \"1\", \"localProbCutoff\": \"0.75\", \"merge_function\": \"sum\", \"networkin\": {\"__class__\": \"ConnectedValue\"}, \"p_sty_motifs\": {\"__class__\": \"ConnectedValue\"}, \"phosphoCol\": \"^Number of Phospho [(]STY[)]$\", \"phosphoSites\": {\"__class__\": \"ConnectedValue\"}, \"phospho_type\": \"sty\", \"protein_fasta\": {\"__class__\": \"ConnectedValue\"}, \"psp_kinase_substrate\": {\"__class__\": \"ConnectedValue\"}, \"psp_regulatory_sites\": {\"__class__\": \"ConnectedValue\"}, \"species\": \"human\", \"startCol\": \"^Intensity[^_]\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
-            "tool_version": "0.1.0+galaxy0",
+            "tool_state": "{\"collapseFunc\": \"sum\", \"intervalCol\": \"1\", \"localProbCutoff\": \"0.75\", \"merge_function\": \"sum\", \"networkin\": {\"__class__\": \"RuntimeValue\"}, \"p_sty_motifs\": {\"__class__\": \"RuntimeValue\"}, \"phosphoCol\": \"^Number of Phospho [(]STY[)]$\", \"phosphoSites\": {\"__class__\": \"RuntimeValue\"}, \"protein_fasta\": {\"__class__\": \"RuntimeValue\"}, \"psp_kinase_substrate\": {\"__class__\": \"RuntimeValue\"}, \"psp_regulatory_sites\": {\"__class__\": \"RuntimeValue\"}, \"pst_not_py\": \"true\", \"species\": \"human\", \"startCol\": \"^Intensity[^_]\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": null,
             "type": "tool",
-            "uuid": "235b1a2e-ccc0-4c91-bb91-bbf4d272c870",
+            "uuid": "886043ce-8d9b-474e-b970-4fe9ee6a74fa",
             "workflow_outputs": [
                 {
                     "label": "ppep_intensities",
                     "output_name": "phosphoPepIntensities",
-                    "uuid": "92fd4e27-5d4b-4e9f-b3ad-6bdad53bb93d"
+                    "uuid": "e19a64d1-edee-4119-a72e-456af7a6c056"
                 },
                 {
                     "label": "enrichGraph_pdf",
                     "output_name": "enrichGraph",
-                    "uuid": "4c1d5590-f8ba-421c-858c-4c026691b52e"
+                    "uuid": "7e9936d9-9617-4df4-9133-7a04f8d05d26"
                 },
                 {
                     "label": "locProbCutoffGraph_pdf",
                     "output_name": "locProbCutoffGraph",
-                    "uuid": "66a79534-6372-4937-bcf2-8644be985eea"
+                    "uuid": "5656cba7-25e2-4362-ae92-1ddac67dee07"
                 },
                 {
                     "label": "enrichGraph_svg",
                     "output_name": "enrichGraph_svg",
-                    "uuid": "5e713d9c-1868-423b-be9a-25c0486e1472"
+                    "uuid": "ca13a22e-a41b-481c-ab87-1f97bbf768e9"
                 },
                 {
                     "label": "locProbCutoffGraph_svg",
                     "output_name": "locProbCutoffGraph_svg",
-                    "uuid": "4621ea21-ae90-4547-a68f-30dfc7857368"
+                    "uuid": "fc7a11f5-30d8-4409-878a-d3b70366711c"
                 },
                 {
                     "label": "filteredData",
                     "output_name": "filteredData_tabular",
-                    "uuid": "bb26d0fb-6f19-43c7-80ef-1cf81aa09ee8"
+                    "uuid": "aab49fc5-a3cf-4479-ac23-8e9272dadf28"
                 },
                 {
                     "label": "quantData",
                     "output_name": "quantData_tabular",
-                    "uuid": "20efe04f-2700-4af0-92c6-0830a42d8e75"
+                    "uuid": "23940202-403e-4256-916b-92539db07cdb"
                 },
                 {
                     "label": "ppep_map",
                     "output_name": "mapped_phophopeptides",
-                    "uuid": "037e2b97-8fc8-436d-bcc3-af5ee685b752"
+                    "uuid": "08ad13d4-c103-4f18-92cc-2c3b58565981"
                 },
                 {
                     "label": "melted_phosphopeptide_map",
                     "output_name": "melted_phophopeptide_map",
-                    "uuid": "c3e5de84-2659-45eb-81a6-edef6037d8aa"
+                    "uuid": "77cecaeb-8f7c-482e-b78a-e4809b194eb7"
                 },
                 {
                     "label": "ppep_mapping_sqlite",
                     "output_name": "mqppep_output_sqlite",
-                    "uuid": "a1a4f827-1f1f-4175-ae51-c238f9e1f248"
+                    "uuid": "8e53e05a-a47c-4b97-87e4-ebab133ccaea"
                 },
                 {
                     "label": "preproc_tab",
                     "output_name": "preproc_tab",
-                    "uuid": "b22b4b56-9395-4f6d-945e-0089e8897069"
+                    "uuid": "530a8140-9eba-4c87-a76b-4922febc12e7"
                 },
                 {
                     "label": "preproc_csv",
                     "output_name": "preproc_csv",
-                    "uuid": "54be90f9-1158-4686-af42-43d021088300"
+                    "uuid": "c5f22f05-0bf7-48cf-adc0-c2beffe33169"
                 },
                 {
                     "label": "preproc_sqlite",
                     "output_name": "preproc_sqlite",
-                    "uuid": "33663f9c-b718-4bdd-acc9-087c76bea678"
+                    "uuid": "53424150-7673-40af-ad60-0b4035e0c302"
                 }
             ]
         },
@@ -529,14 +554,14 @@
                 }
             ],
             "position": {
-                "bottom": 1488.0999603271484,
-                "height": 254.93333435058594,
-                "left": 1202.949951171875,
-                "right": 1402.949951171875,
-                "top": 1233.1666259765625,
+                "bottom": 1349,
+                "height": 256,
+                "left": 1058,
+                "right": 1258,
+                "top": 1093,
                 "width": 200,
-                "x": 1202.949951171875,
-                "y": 1233.1666259765625
+                "x": 1058,
+                "y": 1093
             },
             "post_job_actions": {
                 "RenameDatasetActionimputed_data_file": {
@@ -556,19 +581,19 @@
             },
             "tool_id": "mqppep_anova",
             "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"first_data_column\": \"Intensity\", \"imputation\": {\"imputation_method\": \"group-median\", \"__current_case__\": 0}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"sample_grouping_regex\": \"(\\\\d+)\", \"sample_names_regex\": \"\\\\.(\\\\d+)[A-Z]$\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
-            "tool_version": "0.1.0+galaxy0",
+            "tool_version": null,
             "type": "tool",
-            "uuid": "2257286b-6f9a-45c1-90a3-bf5b972959d5",
+            "uuid": "a3cb902d-8ef6-4f84-bed3-80b2b20d1916",
             "workflow_outputs": [
                 {
                     "label": "intensities_group-mean-imputed_QN_LT",
                     "output_name": "imputed_data_file",
-                    "uuid": "8e7317c6-95e9-4454-b4d7-31b4de6167a8"
+                    "uuid": "ef19dcd3-8f3e-4fc4-829e-dae6719ff1cc"
                 },
                 {
                     "label": "intensities_group-mean-imputed_report",
                     "output_name": "report_file",
-                    "uuid": "dfe9b34e-1f3e-4971-8382-41178104e253"
+                    "uuid": "26bb93b0-bc11-4455-a280-241253b21981"
                 }
             ]
         },
@@ -601,14 +626,14 @@
                 }
             ],
             "position": {
-                "bottom": 1325.0999603271484,
-                "height": 254.93333435058594,
-                "left": 1452.949951171875,
-                "right": 1652.949951171875,
-                "top": 1070.1666259765625,
+                "bottom": 1186,
+                "height": 256,
+                "left": 1308,
+                "right": 1508,
+                "top": 930,
                 "width": 200,
-                "x": 1452.949951171875,
-                "y": 1070.1666259765625
+                "x": 1308,
+                "y": 930
             },
             "post_job_actions": {
                 "RenameDatasetActionimputed_data_file": {
@@ -628,19 +653,19 @@
             },
             "tool_id": "mqppep_anova",
             "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"first_data_column\": \"Intensity\", \"imputation\": {\"imputation_method\": \"random\", \"__current_case__\": 3, \"meanPercentile\": \"1\", \"sdPercentile\": \"0.2\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"sample_grouping_regex\": \"(\\\\d+)\", \"sample_names_regex\": \"\\\\.(\\\\d+)[A-Z]$\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
-            "tool_version": "0.1.0+galaxy0",
+            "tool_version": null,
             "type": "tool",
-            "uuid": "9516971c-8532-4797-8bf9-4655ff104dbd",
+            "uuid": "217d92af-f6d6-4fd3-a78a-090d8afd3ae0",
             "workflow_outputs": [
                 {
                     "label": "intensities_randomly-imputed_QN_LT",
                     "output_name": "imputed_data_file",
-                    "uuid": "8ceda029-d5fd-4d75-a2b3-ac582bb137c3"
+                    "uuid": "925d734f-f9d8-49e8-aebb-c8d7598d45b2"
                 },
                 {
                     "label": "intensities_randomly-imputed_report",
                     "output_name": "report_file",
-                    "uuid": "84bedf25-c15b-4cc7-97e0-92f746e89f9c"
+                    "uuid": "4ab5f1b1-d04e-4634-8765-265122bc1064"
                 }
             ]
         }
@@ -648,6 +673,6 @@
     "tags": [
         "ppenrich"
     ],
-    "uuid": "ac7bf2d1-89fe-4bf6-920a-d5508842d3f9",
-    "version": 7
+    "uuid": "c54c2b2e-8080-445c-bc3e-43950c89d4e4",
+    "version": 3
 }
\ No newline at end of file