Mercurial > repos > eschen42 > mqppep_anova
changeset 7:d728198f1ba5 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9a0fa6d0f9aadc069a5551a54da6daf307885637"
author | eschen42 |
---|---|
date | Tue, 15 Mar 2022 00:35:16 +0000 |
parents | 922d309640db |
children | 2cdb2280771c |
files | MaxQuantProcessingScript.R PhosphoPeptide_Upstream_Kinase_Mapping.pl macros.xml mqppep_anova.R mqppep_anova.xml mqppep_anova_script.Rmd repository_dependencies.xml workflow/ppenrich_suite_wf.ga |
diffstat | 8 files changed, 683 insertions(+), 569 deletions(-) [+] |
line wrap: on
line diff
--- a/MaxQuantProcessingScript.R Fri Mar 11 20:04:05 2022 +0000 +++ b/MaxQuantProcessingScript.R Tue Mar 15 00:35:16 2022 +0000 @@ -695,7 +695,6 @@ # Write phosphopeptides filtered by enrichment # -- -#ACE colnames(quant_data_qc_enrichment)[1] <- "Phosphopeptide" write.table( quant_data_qc_enrichment, file = output_filename,
--- a/PhosphoPeptide_Upstream_Kinase_Mapping.pl Fri Mar 11 20:04:05 2022 +0000 +++ b/PhosphoPeptide_Upstream_Kinase_Mapping.pl Tue Mar 15 00:35:16 2022 +0000 @@ -25,7 +25,6 @@ ############################################################################################################################### use strict; -#ACE use warnings; use warnings 'FATAL' => 'all'; use Getopt::Std; @@ -67,7 +66,6 @@ my (@kinases_PhosphoSite, $kinases_PhosphoSite); my ($p_sequence_kinase_PhosphoSite, $p_sequence_PhosphoSite, $kinase_PhosphoSite); my (%regulatory_sites_PhosphoSite_hash); -#ACE my %psp_regsite_protein; my (%domain, %ON_FUNCTION, %ON_PROCESS, %ON_PROT_INTERACT, %ON_OTHER_INTERACT, %notes, %organism); my (%unique_motifs); my ($kinase_substrate_NetworKIN_matches, $kinase_motif_matches, $kinase_substrate_PhosphoSite_matches); @@ -196,8 +194,6 @@ getopts('i:f:s:n:m:p:r:P:F:o:O:D:hva', \%opts) ; -#ACE print %opts; #ACE -#ACE print "\n"; #ACE if (exists($opts{'h'})) { usage(); @@ -221,15 +217,6 @@ $fasta_in = $opts{'f'}; $use_sqlite = 0; } -#ACE if (exists($opts{'s'}) && -e $opts{'s'}) { -#ACE $use_sqlite = 1; -#ACE $dbfile = $opts{'s'}; -#ACE } elsif (!exists($opts{'f'}) || !-e $opts{'f'}) { -#ACE die('Neither input FASTA file nor input SQLite file was found'); -#ACE } else { -#ACE $use_sqlite = 0; -#ACE $fasta_in = $opts{'f'}; -#ACE } my $species; if ((!exists($opts{'s'})) || ($opts{'s'} eq '')) { $species = 'human'; @@ -426,8 +413,6 @@ push (@databases, $x[0]); push (@accessions, $x[1]); push (@names, $x[2]); - #ACE print "names $x[2]\n"; - #ACE print "--- $_\n"; pseudo_sed(); s/$/\t/; push (@parsed_fasta, $_); @@ -439,7 +424,6 @@ } $parsed_fasta[$#accessions] = $parsed_fasta[$#accessions].$x[0]; } - #ACE print "... '$parsed_fasta[$#accessions]'\n"; } close IN1; print "Done Reading FASTA file $fasta_in\n"; @@ -614,7 +598,6 @@ $i = system("\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in --verbose"); } else { $i = system("\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in"); - #ACE DELETEME $i = system("\$CONDA_PREFIX/bin/python $dirname/search_ppep.py -u $db_out -p $file_in --verbose"); } if ($i) { print "python $dirname/search_ppep.py -u $db_out -p $file_in\n exited with exit code $i\n"; @@ -634,7 +617,6 @@ # ############################################################################################################################### -#ACE print OUT "$headers\tFormatted Motifs\tUnique Motifs\tPhospho-site(s)\tAccessions(s)\tName(s)\n"; print "--- Match the non_p_peptides to the \@sequences array:\n"; @@ -750,7 +732,6 @@ } # Prepare counter and phosphopeptide tracker for next row - #ACE print "counter: $counter; phosphpepetide: $current_ppep\n"; $former_ppep = $current_ppep; $counter -= 1; @@ -822,15 +803,11 @@ my @matches = @{$matched_sequences{$peptide_to_match}}; @tmp_motifs_array = (); for my $i (0 .. $#matches) { - #ACE print "Matching $peptide_to_match to match $i\n"; - #ACE print "\$sites{\$peptide_to_match}[\$i] $sites{$peptide_to_match}[$i]\n"; # Find the location of the phospo-site in the sequence(s) $tmp_site = 0; my $offset = 0; my $tmp_p_peptide = $peptide_to_match; - #ACE print "peptide_to_match: $peptide_to_match at position $sites{$peptide_to_match}[$i] in sequence $matched_sequences{$peptide_to_match}[$i]\n"; $tmp_p_peptide =~ s/#//g; $tmp_p_peptide =~ s/\d//g; $tmp_p_peptide =~ s/\_//g; $tmp_p_peptide =~ s/\.//g; - #ACE print "tmp_p_peptide: $tmp_p_peptide\n"; # Find all phosphorylated residues in the p_peptide @p_sites = (); @@ -870,9 +847,7 @@ my $arg1 = $matched_sequences{$peptide_to_match}[$i]; if (($total_length > 0) && (length($arg1) > $arg2 + $total_length - 1)) { - #ACE print "\$tmp_motif = substr($arg1, $arg2, $total_length)\n"; $tmp_motif = substr($arg1, $arg2, $total_length); - #ACE print "tmp_motif = $tmp_motif\ti = $i\tpeptide_to_match = $peptide_to_match\tmatched_sequences{peptide_to_match}[i] = $matched_sequences{$peptide_to_match}[$i]\targ2 = $arg2\targ3 = $total_length\n"; # Put the "p" back in front of the appropriate phospho-residue(s). my (@tmp_residues, $tmp_position); @@ -883,7 +858,6 @@ } else { $tmp_position = $desired_residues_L + $p_sites[$m] - $p_sites[0]; } - #ACE print "Line 431: p_sites = $p_sites[$m]\ttmp_position = $tmp_position\ttmp_motif = $tmp_motif\n"; if ($tmp_position < length($tmp_motif) + 1) { push (@tmp_residues, substr($tmp_motif, $tmp_position, 1)); if ($tmp_residues[$m] eq "S") {substr($tmp_motif, $tmp_position, 1, "s");} @@ -1032,9 +1006,7 @@ $x[$i] =~ s/\r//g; $x[$i] =~ s/\n//g; $x[$i] =~ s/\"//g; } if ($line != 0) { - #ACE FUE if (($species eq $species) && ($species eq $species)) { if (($species eq $x[3]) && ($species eq $x[8])) { - #ACE print "KIN_ORGANISM is '$x[3]' and SUB_ORGANISM is '$x[8]', line: $line\n"; if (exists ($kinases_PhosphoSite -> {$x[0]})) { #do nothing } @@ -1154,7 +1126,6 @@ open (IN4, "$PSP_Regulatory_Sites_in") or die "I couldn't find $PSP_Regulatory_Sites_in\n"; print "Reading the PhosphoSite regulatory site data: $PSP_Regulatory_Sites_in\n"; -#ACE $i = system("head -n 4 $PSP_Regulatory_Sites_in"); $line = -1; while (<IN4>) { @@ -1177,7 +1148,6 @@ $x[$i] =~ s/\r//g; $x[$i] =~ s/\n//g; $x[$i] =~ s/\"//g; } my $found_GENE=0; - #ACE print STDERR "line $line: $_\n"; if ( (not exists($x[0])) ) { next; } @@ -1197,19 +1167,13 @@ } } elsif ($line != 0) { - #ACE print "PSPReg $line: $_\n" if ($x[9] eq 'KGQKYFDsGDYNMAK'); - #ACE FUE if ($species ne $species) { if ($species ne $x[6]) { # Do nothing - this record was filtered out by the species filter - #ACE print "PSP_regsite line rejected: $line\n"; } elsif (!exists($regulatory_sites_PhosphoSite_hash{$x[9]})) { - #ACE print "testing \$domain{\$x[9]} for \$regulatory_sites_PhosphoSite_hash{$x[9]}\n" if ($x[9] eq 'KGQKYFDsGDYNMAK'); #ACE if (!defined $domain{$x[9]} || $domain{$x[9]} eq "") { - #ACE print "adding found \$regulatory_sites_PhosphoSite_hash{$x[9]}\n" if ($x[9] eq 'KGQKYFDsGDYNMAK'); #ACE $regulatory_sites_PhosphoSite_hash{$x[9]} = $x[9]; $domain{$x[9]} = $x[10]; - #ACE $psp_regsite_protein{$x[9]} = $x[1]; $ON_FUNCTION{$x[9]} = $x[11]; $ON_PROCESS{$x[9]} = $x[12]; $ON_PROT_INTERACT{$x[9]} = $x[13]; @@ -1226,12 +1190,9 @@ } else { # do nothing - #ACE print "WARNING line $line - no domain or 7aa for: GENE $x[0] PROTEIN $x[1] PROT_TYPE $x[2] ACC_ID $x[3] GENE_ID $x[4] HU_CHR_LOC $x[5] ORGANISM $x[6] MOD_RSD $x[7] SITE_GRP_ID $x[8] SITE_+/-7_AA $x[9] DOMAIN $x[10]\n"; - #ACE print "$_\n"; } } else { - #ACE print "Checking $domain{$x[9]} =~ /$x[10]/\n"; if ($domain{$x[9]} =~ /$x[10]/) { # do nothing } @@ -1453,10 +1414,8 @@ if (($number_pY == 1) || ($number_pSTY == 1)) { my $seq_plus5aa = ""; my $seq_plus7aa = ""; - #ACE print "tmp_motif is $tmp_motif before replacement\n"; $formatted_sequence = &replace_pSpTpY($tmp_motif, $phospho_type); print " a #pY $number_pY; #pSTY $number_pSTY; matching formatted motif $formatted_sequence for peptide $peptide at " . format_localtime_iso8601() ."\n" if ($verbose); - #ACE print "formatted_sequence is $formatted_sequence after replacement\n"; if ($phospho_type eq 'y') { $seq_plus5aa = (split(/(\w{0,5}y\w{0,5})/, $formatted_sequence))[1]; $seq_plus7aa = (split(/(\w{0,7}y\w{0,7})/, $formatted_sequence))[1]; @@ -1474,14 +1433,10 @@ print "Error writing tuple ($ppep_id_lut{$peptide},$seq_plus7aa) for peptide $peptide to ppep_regsite_LUT: $ppep_regsite_LUT_stmth->errstr\n"; } } - #ACE print "seq_plus5aa is $seq_plus5aa \n"; - #ACE print "seq_plus7aa is $seq_plus7aa \n"; for my $i (0 .. $#kinases_observed) { if (defined $seq_plus5aa) { my $tmp = $seq_plus5aa."_".$kinases_observed[$i]; #eg, should be PGRPLsSYGMD_PKCalpha if (exists($p_sequence_kinase -> {$tmp})) { - #ACE print($tmp."\t"); - #ACE print(($p_sequence_kinase -> {$tmp})."\n"); #ACE $kinase_substrate_NetworKIN_matches{$peptide}{$kinases_observed[$i]} = "X"; #ACE } } @@ -1489,7 +1444,6 @@ for my $i (0 .. $#motif_sequence) { if ($peptide =~ /$motif_sequence[$i]/) { $kinase_motif_matches{$peptide}{$motif_sequence[$i]} = "X"; - #ACE print "\$kinase_motif_matches{$peptide}{$motif_sequence[$i]} = 'X'; $motif_type{$motif_sequence[$i]}\n"; #ACE } } for my $i (0 .. $#kinases_PhosphoSite) { @@ -1500,12 +1454,9 @@ } } } - #ACE print "checking for existence of \$regulatory_sites_PhosphoSite_hash{$seq_plus7aa}\n"; #ACE if (exists($regulatory_sites_PhosphoSite_hash{$seq_plus7aa})) { - #ACE print "found regulatory_sites_PhosphoSite_hash{$seq_plus7aa}\n"; #ACE $seq_plus7aa_2{$peptide} = $seq_plus7aa; $domain_2{$peptide} = $domain{$seq_plus7aa}; - #ACE $psp_regsite_protein_2{$peptide} = $psp_regsite_protein{$seq_plus7aa}; $ON_FUNCTION_2{$peptide} = $ON_FUNCTION{$seq_plus7aa}; $ON_PROCESS_2{$peptide} = $ON_PROCESS{$seq_plus7aa}; $ON_PROT_INTERACT_2{$peptide} = $ON_PROT_INTERACT{$seq_plus7aa}; @@ -1513,12 +1464,10 @@ $notes_2{$peptide} = $notes{$seq_plus7aa}; $organism_2{$peptide} = $organism{$seq_plus7aa}; } else { - #ACE print "c not found \$regulatory_sites_PhosphoSite_hash{{$seq_plus7aa}\n"; #ACE } } elsif (($number_pY > 1) || ($number_pSTY > 1)) { #eg, if $x[4] is 1308-[ VIYFQAIEEVpYpYDHLRSAAKKR ]-1329 and $number_pY == 2 $formatted_sequence = $tmp_motif; - #ACE print "formatted_sequence is $formatted_sequence \n"; $seq_plus5aa = ""; $seq_plus7aa = ""; #Create the sequences with only one phosphorylation site @@ -1546,14 +1495,11 @@ my @formatted_sequences; for my $k (0 .. $#sites) { - #ACE print "pSTY_sequences[k] is $pSTY_sequences[$k] before replacement\n"; $formatted_sequences[$k] = &replace_pSpTpY($pSTY_sequences[$k], $phospho_type); - #ACE print "formatted_sequences[k] is $formatted_sequences[$k] \n"; } for my $k (0 .. $#formatted_sequences) { print " b #pY $number_pY; #pSTY $number_pSTY; matching formatted motif $formatted_sequences[$k] for peptide $peptide at " . format_localtime_iso8601() ."\n" if ($verbose); - #ACE print "formatted_sequences[k] for phosphotype $phospho_type is $formatted_sequences[$k] \n"; if ($phospho_type eq 'y') { $seq_plus5aa = (split(/(\w{0,5}y\w{0,5})/, $formatted_sequences[$k]))[1]; $seq_plus7aa = (split(/(\w{0,7}y\w{0,7})/, $formatted_sequences[$k]))[1]; @@ -1562,21 +1508,15 @@ $seq_plus5aa = (split(/(\w{0,5}(s|t|y)\w{0,5})/, $formatted_sequences[$k]))[1]; $seq_plus7aa = (split(/(\w{0,7}(s|t|y)\w{0,7})/, $formatted_sequences[$k]))[1]; } - #ACE print "seq_plus5aa is $seq_plus5aa \n"; - #ACE print "seq_plus7aa is $seq_plus7aa \n"; for my $i (0 .. $#kinases_observed) { my $tmp = $seq_plus5aa."_".$kinases_observed[$i]; #eg, should look like REEILsEMKKV_PKCalpha - #ACE print "seq_plus5aa._.kinases_observed[i] is $tmp\n"; #ACE if (exists($p_sequence_kinase -> {$tmp})) { $kinase_substrate_NetworKIN_matches{$peptide}{$kinases_observed[$i]} = "X"; - #ACE print "$tmp matched\n"; } } $pSTY_sequence = $formatted_sequences[$k]; - #ACE print "trying pSTY_sequence $pSTY_sequence \n"; for my $i (0 .. $#motif_sequence) { if ($pSTY_sequence =~ /$motif_sequence[$i]/) { - #ACE print "match for pSTY_sequence $pSTY_sequence was $motif_sequence[$i]\n"; $kinase_motif_matches{$peptide}{$motif_sequence[$i]} = "X"; } } @@ -1585,11 +1525,9 @@ #print "seq_plus7aa._.kinases_PhosphoSite[i] is $tmp"; if (exists($p_sequence_kinase_PhosphoSite -> {$tmp})) { $kinase_substrate_PhosphoSite_matches{$peptide}{$kinases_PhosphoSite[$i]} = "X"; - #ACE print "$tmp matched \n"; } } if (exists($regulatory_sites_PhosphoSite -> {$seq_plus7aa})) { - #ACE print "ACE processing seq_plus7aa '$domain{$seq_plus7aa}'\n"; #ACE $seq_plus7aa_2{$peptide} = $seq_plus7aa; # $domain @@ -1603,16 +1541,6 @@ $domain_2{$peptide} = $domain_2{$peptide}." / ".$domain{$seq_plus7aa}; } - #ACE # $psp_regsite_protein - #ACE if ($psp_regsite_protein_2{$peptide} eq "") { - #ACE $psp_regsite_protein_2{$peptide} = $psp_regsite_protein{$seq_plus7aa}; - #ACE } - #ACE elsif ($psp_regsite_protein{$seq_plus7aa} eq "") { - #ACE # do nothing - #ACE } - #ACE else { - #ACE $psp_regsite_protein_2{$peptide} = $psp_regsite_protein_2{$peptide}." / ".$psp_regsite_protein{$seq_plus7aa}; - #ACE } # $ON_FUNCTION_2 if ($ON_FUNCTION_2{$peptide} eq "") { @@ -1682,7 +1610,6 @@ } $organism_2{$peptide} = $organism{$seq_plus7aa}; } else { - #ACE print "d not found \$regulatory_sites_PhosphoSite_hash{{$seq_plus7aa}}\n"; } # if (exists($regulatory_sites_PhosphoSite -> {$seq_plus7aa})) } # for my $k (0 .. $#formatted_sequences) } # if/else number of phosphosites @@ -1716,7 +1643,6 @@ foreach my $peptide (keys %data) { if (exists($domain_2{$peptide}) and (defined $domain_2{$peptide}) and (not $domain_2{$peptide} eq "") ) { - #ACE print "writing domain $domain_2{$peptide} for regulatory site(s) $seq_plus7aa_2{$peptide}\n"; #ACE $psp_regulatory_site_stmth->bind_param(1, $domain_2{$peptide}); $psp_regulatory_site_stmth->bind_param(2, $ON_FUNCTION_2{$peptide}); $psp_regulatory_site_stmth->bind_param(3, $ON_PROCESS_2{$peptide}); @@ -1728,7 +1654,6 @@ if (not $psp_regulatory_site_stmth->execute()) { print "Error writing PSP_Regulatory_site for one regulatory site with peptide '$domain_2{$peptide}': $psp_regulatory_site_stmth->errstr\n"; } else { - #ACE print "added domain for $domain_2{$peptide}\n"; } } elsif (exists($domain_2{$peptide}) and (not defined $domain_2{$peptide})) { print "\$domain_2{$peptide} is undefined\n"; #ACE @@ -1989,7 +1914,6 @@ my $tmp_site_for_printing = $p_residues{$peptide}{$i}[$j] + 1; # added 12.05.2012 for Justin's data print OUT "$foobar"; print OUT "$tmp_site_for_printing\t"; - #ACE print OUT "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing\t"; $tmp_for_insert .= "p$residues{$peptide}{$i}[$j]$tmp_site_for_printing"; $there_were_sites = 1; } @@ -2049,7 +1973,6 @@ # begin store-to-SQLite "ppep_metadata" table # --- for $i (1..14) { - #ACE print "\$ppep_metadata_stmth->bind_param($i, " . $ppep_metadata[$i-1] . ")\n"; $ppep_metadata_stmth->bind_param($i, $ppep_metadata[$i-1]); } if (not $ppep_metadata_stmth->execute()) { @@ -2074,7 +1997,6 @@ $ppep_intensity_stmth->bind_param( 1, $ppep_id ); $ppep_intensity_stmth->bind_param( 2, $sample_id_lut{$samples[$i]} ); $ppep_intensity_stmth->bind_param( 3, $intense ); - #ACE print "insert ($peptide, $samples[$i], $intense)\n"; if (not $ppep_intensity_stmth->execute()) { print "Error writing tuple ($peptide,$samples[$i],$intense): $ppep_intensity_stmth->errstr\n"; } @@ -2103,13 +2025,11 @@ } else { print OUT "\t";} } - #ACE my %wrote_motif = {}; my %wrote_motif; my $motif_parts_0; for my $i (0 .. $#motif_sequence) { if (exists($kinase_motif_matches{$peptide}{$motif_sequence[$i]})) { print OUT "X\t"; - #ACE my @motif_parts = split(/ motif /, $motif_type{$motif_sequence[$i]}); $motif_parts_0 = $motif_type{$motif_sequence[$i]}." ".$motif_sequence[$i]; my $key = "$peptide\t$gene_names\t$motif_parts_0"; if (!exists($wrote_motif{$key})) {
--- a/macros.xml Fri Mar 11 20:04:05 2022 +0000 +++ b/macros.xml Tue Mar 15 00:35:16 2022 +0000 @@ -1,4 +1,30 @@ <macros> - <token name="@TOOL_VERSION@">0.1.0</token> + <token name="@TOOL_VERSION@">0.1.1</token> <token name="@VERSION_SUFFIX@">0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="0.3.3" >openblas</requirement> + <requirement type="package" version="0.4.0" >r-sass</requirement> + <requirement type="package" version="1.14.2" >r-data.table</requirement> + <requirement type="package" version="1.22.2" >numpy</requirement> + <requirement type="package" version="1.4.0" >pyahocorasick</requirement> + <requirement type="package" version="1.4.0" >r-stringr</requirement> + <requirement type="package" version="1.4.1" >pandas</requirement> + <requirement type="package" version="1.56.0" >bioconductor-preprocesscore</requirement> + <requirement type="package" version="1.64" >perl-dbd-sqlite</requirement> + <requirement type="package" version="1.7.1" >r-optparse</requirement> + <requirement type="package" version="1.7.1" >r-optparse</requirement> + <requirement type="package" version="2.11" >r-rmarkdown</requirement> + <!-- + It would be nice to use conda-forge/texlive-core, but issue 23 blocked PDF-creation. + Also, I got pango font errors (output had missing symbols replaced with boxes) unless + I specified the build as well as the version, i.e. + texlive-core=20210325=h97429d4_0 + --> + <requirement type="package" >r-tinytex</requirement> + <requirement type="package" version="3.3.5" >r-ggplot2</requirement> + <requirement type="package" version="3.9.10" >python</requirement> + <requirement type="package" version="5.26.2" >perl</requirement> + </requirements> + </xml> </macros>
--- a/mqppep_anova.R Fri Mar 11 20:04:05 2022 +0000 +++ b/mqppep_anova.R Tue Mar 15 00:35:16 2022 +0000 @@ -196,9 +196,12 @@ # from run to run set.seed(28571) + +library(tinytex) +tinytex::install_tinytex() rmarkdown::render( input = paste(script_dir, "mqppep_anova_script.Rmd", sep = "/") -, output_format = rmarkdown::html_document(pandoc_args = "--self-contained") +, output_format = rmarkdown::pdf_document() , output_file = report_file_name , params = rmarkdown_params )
--- a/mqppep_anova.xml Fri Mar 11 20:04:05 2022 +0000 +++ b/mqppep_anova.xml Tue Mar 15 00:35:16 2022 +0000 @@ -3,24 +3,23 @@ <macros> <import>macros.xml</import> </macros> - <requirements> - <requirement type="package" version="1.7.1" >r-optparse</requirement> - <requirement type="package" version="1.4.0" >r-stringr</requirement> - <requirement type="package" version="1.14.2" >r-data.table</requirement> - <requirement type="package" version="3.3.5" >r-ggplot2</requirement> - <requirement type="package" version="1.56.0" >bioconductor-preprocesscore</requirement> - <requirement type="package" version="0.3.3" >openblas</requirement> - <requirement type="package" version="2.11" >r-rmarkdown</requirement> - <requirement type="package" version="0.4.0" >r-sass</requirement> - <requirement type="package" version="20210325">texlive-core</requirement> - - </requirements> - <!-- Rscript -e 'rmarkdown::render("QuantDataProcessingScript.Rmd")' --> + <expand macro="requirements"/> + <!-- + The weird invocation used here is because knitr and install_tinytex + both need access to a writeable directory, but most directories in a + biocontainer are read-only, so this builds a pseudo-home under /tmp + --> <command detect_errors="exit_code"><![CDATA[ - cd /tmp; + export OLD_PWD=\$(dirname \$(pwd)); + export HOME=/tmp\${OLD_PWD}; + mkdir -p \$HOME/bin; + mkdir -p \$HOME/tmp; + export TEMP=\$HOME/tmp; + export TMPDIR=\$TEMP; + cd \$TEMP; cp '$__tool_directory__/mqppep_anova_script.Rmd' . || exit 0; cp '$__tool_directory__/mqppep_anova.R' . || exit 0; - \${CONDA_PREFIX}/bin/Rscript /tmp/mqppep_anova.R + \${CONDA_PREFIX}/bin/Rscript \$TEMP/mqppep_anova.R --inputFile '$input_file' --alphaFile $alpha_file --firstDataColumn $first_data_column @@ -33,8 +32,8 @@ --regexSampleGrouping $sample_grouping_regex_f --imputedDataFile $imputed_data_file --reportFile $report_file; - rm mqppep_anova_script.Rmd; - rm mqppep_anova.R + cd \${OLD_PWD}; + rm -rf \$HOME ]]></command> <configfiles> <configfile name="sample_names_regex_f"> @@ -99,8 +98,11 @@ </param> </inputs> <outputs> - <data name="imputed_data_file" format="tabular" label="${input_file.name}.intensities_${imputation.imputation_method}-imputed_QN_LT" ></data> - <data name="report_file" format="html" label="${input_file.name}.report (download/unzip to view)" ></data> + <data name="imputed_data_file" format="tabular" label="${input_file.name}.${imputation.imputation_method}-imputed_QN_LT_intensities" ></data> + <!-- + <data name="report_file" format="html" label="${input_file.name}.${imputation.imputation_method}-imputed_report (download/unzip to view)" ></data> + --> + <data name="report_file" format="pdf" label="${input_file.name}.${imputation.imputation_method}-imputed_report" ></data> </outputs> <tests> <test> @@ -124,6 +126,8 @@ <param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/> <param name="first_data_column" value="10"/> <param name="imputation_method" value="random"/> + <param name="meanPercentile" value="1" /> + <param name="sdPercentile" value="0.2" /> <param name="sample_names_regex" value="\.\d+[A-Z]$"/> <param name="sample_grouping_regex" value="\d+"/> <output name="imputed_data_file"> @@ -192,14 +196,7 @@ Phosphopeptide MS intensities where missing values have been **imputed** by the chosen method, quantile-normalized (**QN**), and log10-transformed (**LT**), in tabular format. ``report_file`` - (download/unzip to view) Summary report for normalization, imputation, and ANOVA. - This dataset is displayed in Galaxy as having a datatype of ``html`` in Galaxy, - but it is in fact a zipfile; the zip file contains - an HTML file. Please download and unzip it locally to view the report. - Ideally this report would be a PDF, but there is an issue - `(linked here) - <https://github.com/conda-forge/texlive-core-feedstock/issues/19>`_. - that needs to be resolved first. + Summary report for normalization, imputation, and ANOVA, in PDF format. **Authors**
--- a/mqppep_anova_script.Rmd Fri Mar 11 20:04:05 2022 +0000 +++ b/mqppep_anova_script.Rmd Tue Mar 15 00:35:16 2022 +0000 @@ -1,24 +1,32 @@ --- -title: "Quant Data Processing Script" +title: "MaxQuant Phospho-Proteomic Enrichment Pipeline ANOVA" author: "Larry Cheng; Art Eschenlauer" date: "May 28, 2018; Nov 16, 2021" output: - html_document: default pdf_document: default params: - inputFile: "Upstream_Map_pST_outputfile_STEP4.txt" - alphaFile: "alpha_levels.txt" + inputFile: "test-data/test_input_for_anova.tabular" + alphaFile: "test-data/alpha_levels.tabular" firstDataColumn: "Intensity" - imputationMethod: !r c("group-median","median","mean","random")[4] + imputationMethod: !r c("group-median", "median", "mean", "random")[1] meanPercentile: 1 sdPercentile: 0.2 regexSampleNames: "\\.(\\d+)[A-Z]$" regexSampleGrouping: "(\\d+)" imputedDataFilename: "Upstream_Map_pST_outputfile_STEP4_QN_LT.txt" --- -```{r setup, include=FALSE} +```{r setup, include = FALSE} # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 -knitr::opts_chunk$set(echo = FALSE, fig.dim=c(9,10)) +knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10)) + +### FUNCTIONS + +#ANOVA filter function +anova_func <- function(x, grouping_factor) { + x_aov <- aov(as.numeric(x) ~ grouping_factor) + pvalue <- summary(x_aov)[[1]][["Pr(>F)"]][1] + pvalue +} ``` ## Purpose: @@ -28,156 +36,175 @@ ## Variables to change for each input file --> ```{r include = FALSE} -#Input Filename -inputFile <- params$inputFile +# Input Filename +input_file <- params$inputFile -#First data column - ideally, this could be detected via regexSampleNames, but for now leave it as is. -firstDataColumn <- params$firstDataColumn -FDC_is_integer <- TRUE -firstDataColumn <- withCallingHandlers( - as.integer(firstDataColumn) - , warning = function(w) FDC_is_integer <<- FALSE +# First data column - ideally, this could be detected via regexSampleNames, +# but for now leave it as is. +first_data_column <- params$firstDataColumn +fdc_is_integer <- TRUE +first_data_column <- withCallingHandlers( + as.integer(first_data_column) + , warning = function(w) fdc_is_integer <<- FALSE ) -if (FALSE == FDC_is_integer) { - firstDataColumn <- params$firstDataColumn +if (FALSE == fdc_is_integer) { + first_data_column <- params$firstDataColumn } -#False discovery rate adjustment for ANOVA (Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05) -valFDR <- read.table(file = params$alphaFile, sep = "\t", header=F, quote="")[,1] +# False discovery rate adjustment for ANOVA +# Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05 +val_fdr <- + read.table(file = params$alphaFile, sep = "\t", header = F, quote = "")[, 1] #Imputed Data filename -imputedDataFilename <- params$imputedDataFilename +imputed_data_filename <- params$imputedDataFilename #ANOVA data filename ``` -```{r include = FALSE} -#Imputation method, should be one of c("random","group-median","median","mean") -imputationMethod <- params$imputationMethod +```{r echo = FALSE} +# Imputation method, should be one of +# "random", "group-median", "median", or "mean" +imputation_method <- params$imputationMethod -#Selection of percentile of logvalue data to set the mean for random number generation when using random imputation -meanPercentile <- params$meanPercentile / 100.0 +# Selection of percentile of logvalue data to set the mean for random number +# generation when using random imputation +mean_percentile <- params$meanPercentile / 100.0 -#deviation adjustment-factor for random values; real number. -sdPercentile <- params$sdPercentile +# deviation adjustment-factor for random values; real number. +sd_percentile <- params$sdPercentile + +# Regular expression of Sample Names, e.g., "\\.(\\d+)[A-Z]$" +regex_sample_names <- params$regexSampleNames -#Regular expression of Sample Names, e.g., "\\.(\\d+)[A-Z]$" -regexSampleNames <- params$regexSampleNames - -#Regular expression to extract Sample Grouping from Sample Name (if error occurs, compare sampleNumbers and tempMatches to see if groupings/pairs line up) -# e.g., "(\\d+)" -regexSampleGrouping <- params$regexSampleGrouping +# Regular expression to extract Sample Grouping from Sample Name; +# if error occurs, compare sample_factor_levels and temp_matches +# to see if groupings/pairs line up +# e.g., "(\\d+)" +regex_sample_grouping <- params$regexSampleGrouping ``` - -```{r include = FALSE} -### FUNCTIONS - -#ANOVA filter function -anovaFunc <- function(x, groupingFactor) { - x.aov = aov(as.numeric(x) ~ groupingFactor) - pvalue = summary(x.aov)[[1]][["Pr(>F)"]][1] - pvalue -} -``` - - - -### Checking that log-transformed sample distributions are similar: -```{r echo=FALSE} +```{r echo = FALSE} +### READ DATA library(data.table) # read.table reads a file in table format and creates a data frame from it. -# - note that `quote=""` means that quotation marks are treated literally. -fullData <- read.table(file = inputFile, sep = "\t", header=T, quote="", check.names=FALSE) -print(colnames(fullData)) -#head(fullData) +# - note that `quote = ""` means that quotation marks are treated literally. +full_data <- read.table( + file = input_file, + sep = "\t", + header = T, + quote = "", + check.names = FALSE + ) +``` + +### Column names from input file -if (FALSE == FDC_is_integer) { - dataColumnIndices <- grep(firstDataColumn, names(fullData), perl=TRUE) - str(dataColumnIndices) - if (length(dataColumnIndices) > 0) { - firstDataColumn <- dataColumnIndices[1] +```{r echo = FALSE, results = 'markup'} +print(colnames(full_data)) +data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE) +cat(sprintf("First data column: %d\n", min(data_column_indices))) +cat(sprintf("Last data column: %d\n", max(data_column_indices))) +``` + +```{r echo = FALSE, results = 'asis'} +cat("\\newpage\n") +``` + +### Checking that log-transformed sample distributions are similar: + +```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'} + +if (FALSE == fdc_is_integer) { + + if (length(data_column_indices) > 0) { + first_data_column <- data_column_indices[1] } else { - stop(paste("failed to convert firstDataColumn:", firstDataColumn)) + stop(paste("failed to convert firstDataColumn:", first_data_column)) } } - -quantData0 <- fullData[firstDataColumn:length(fullData)] -quantData <- fullData[firstDataColumn:length(fullData)] -quantData[quantData==0] <- NA #replace 0 with NA -quantDataLog <- log10(quantData) + +quant_data0 <- full_data[first_data_column:length(full_data)] +quant_data <- full_data[first_data_column:length(full_data)] +quant_data[quant_data == 0] <- NA #replace 0 with NA +quant_data_log <- log10(quant_data) -rownames(quantDataLog) <- fullData$Phosphopeptide - -summary(quantDataLog) +rownames(quant_data_log) <- full_data$Phosphopeptide -#data visualization +# data visualization old_par <- par( - mai=par("mai") + c(0.5,0,0,0) + mai = par("mai") + c(0.5, 0, 0, 0) ) boxplot( - quantDataLog -, las=2 + quant_data_log +, las = 2 ) par(old_par) -quantDataLog_stack <- stack(quantDataLog) + + +cat("\\newline\n") +cat("\\newline\n") + ``` -```{r echo = FALSE, fig.align="left", fig.dim=c(9,5)} +```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 4), warning = FALSE} +quant_data_log_stack <- stack(quant_data_log) library(ggplot2) -ggplot(quantDataLog_stack, aes(x=values)) + geom_density(aes(group=ind, colour=ind)) +ggplot( + quant_data_log_stack, + aes(x = values)) + geom_density(aes(group = ind, colour = ind)) ``` ### Globally, are phosphopeptide intensities are approximately unimodal? -```{r echo = FALSE,fig.align="left", fig.dim=c(9,5)} -# ref for bquote particularly and plotting math expressions generally: +<!-- +# ref for bquote below particularly and plotting math expressions generally: # https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/ +--> +```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5)} -#identify the location of missing values -fin <- is.finite(as.numeric(as.matrix(quantDataLog))) +# identify the location of missing values +fin <- is.finite(as.numeric(as.matrix(quant_data_log))) -logvalues <- as.numeric(as.matrix(quantDataLog))[fin] +logvalues <- as.numeric(as.matrix(quant_data_log))[fin] plot( - density(logvalues) -, main = bquote("Smoothed estimated probability density vs." ~ log[10](intensity)) -, xlab = bquote(log[10](intensity)) -) + density(logvalues), + main = bquote( + "Smoothed estimated probability density vs." ~ log[10](intensity)), + xlab = bquote(log[10](intensity)) + ) hist( - x = as.numeric(as.matrix(quantDataLog)) + x = as.numeric(as.matrix(quant_data_log)) , breaks = 100 , main = bquote("Frequency vs." ~ log[10](intensity)) , xlab = bquote(log[10](intensity)) ) ``` -<!-- -## Impute missing values ---> - ### Distribution of standard deviations of phosphopeptides, ignoring missing values: -```{r echo = FALSE, fig.align="left", fig.dim=c(9,5)} -#determine quantile -q1 <- quantile(logvalues, probs = meanPercentile)[1] +```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5)} +# determine quantile +q1 <- quantile(logvalues, probs = mean_percentile)[1] -#determine standard deviation of quantile to impute +# determine standard deviation of quantile to impute sd_finite <- function(x) { ok <- is.finite(x) - sd(x[ok]) * sdPercentile + sd(x[ok]) * sd_percentile } -sds <- apply(quantDataLog, 1, sd_finite) # 1 = row of matrix (ie, phosphopeptide) +# 1 = row of matrix (ie, phosphopeptide) +sds <- apply(quant_data_log, 1, sd_finite) plot( - density(sds, na.rm=T) -, main="Smoothed estimated probability density vs. std. deviation" -, sub="(probability estimation made with Gaussian smoothing)" + density(sds, na.rm = T) +, main = "Smoothed estimated probability density vs. std. deviation" +, sub = "(probability estimation made with Gaussian smoothing)" ) -m1 <- median(sds, na.rm=T) #sd to be used is the median sd +m1 <- median(sds, na.rm = T) #sd to be used is the median sd ``` @@ -186,102 +213,116 @@ <!-- The number of missing values are: --> -```{r echo=FALSE} +```{r echo = FALSE} #Determine number of cells to impute -temp <- quantData[is.na(quantData)] +temp <- quant_data[is.na(quant_data)] #Determine number of values to impute -NoToImpute <- length(temp) +number_to_impute <- length(temp) ``` <!-- % of values that are missing: --> -```{r echo=FALSE} -pct_missing_values <- length(temp)/(length(logvalues)+length(temp)) * 100 +```{r echo = FALSE} +pct_missing_values <- length(temp) / (length(logvalues) + length(temp)) * 100 ``` <!-- First few rows of data before imputation: --> -## Impute missing values +```{r echo = FALSE, results = 'asis'} +cat("\\newpage\n") +``` + +## Parse sample names + +Parse the names of the samples to deduce the factor level for each sample: + ```{r echo = FALSE} -#ACE start segment: trt-median based imputation # prep for trt-median based imputation -# Assuming that regexSampleNames <- "\\.(\\d+)[A-Z]$" -# get factors -> group runs (samples) by ignoring terminal [A-Z] in sample names -# regexpr(pattern, text, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE) -m <- regexpr(regexSampleNames, names(quantData), perl=TRUE) -tempMatches <- regmatches(names(quantData), m) +# Assuming that regex_sample_names <- "\\.(\\d+)[A-Z]$" +# get factors -> +# group runs (samples) by ignoring terminal [A-Z] in sample names + +m <- regexpr(regex_sample_names, names(quant_data), perl = TRUE) +temp_matches <- regmatches(names(quant_data), m) print("Extracted sample names") -print(tempMatches) -m2 <- regexpr(regexSampleGrouping, tempMatches, perl=TRUE) -sampleNumbers <- as.factor(regmatches(tempMatches, m2)) +print(temp_matches) +m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE) +sample_factor_levels <- as.factor(regmatches(temp_matches, m2)) print("Factor levels") -print(sampleNumbers) +print(sample_factor_levels) ``` +## Impute missing values + ```{r echo = FALSE} -#ACE hack begin #Determine number of cells to impute -cat( - sprintf("Before imputation, there are:\n %d peptides\n %d missing values (%2.0f%s)" - , sum(rep.int(TRUE, nrow(quantData))) - , sum(is.na(quantData)) - , pct_missing_values - , "%" - ) +cat("Before imputation,", + sprintf( + "there are:\n %d peptides\n %d missing values (%2.0f%s)", + sum(rep.int(TRUE, nrow(quant_data))), + sum(is.na(quant_data)), + pct_missing_values, + "%" + ) ) -#ACE hack end ``` ```{r echo = FALSE} #Impute data -quantDataImputed <- quantData +quant_data_imp <- quant_data # Identify which values are missing and need to be imputed -ind <- which(is.na(quantDataImputed), arr.ind=TRUE) +ind <- which(is.na(quant_data_imp), arr.ind = TRUE) ``` ```{r echo = FALSE} # Apply imputation switch( - imputationMethod -, "group-median"={ - cat("Imputation method: substitute missing value with median peptide-intensity for sample-group\n") - #goodRows <- rep.int(TRUE, nrow(quantDataImputed)) - sampleLevelIntegers <- as.integer(sampleNumbers) - for (i in 1:length(levels(sampleNumbers))) { - levelCols <- i == sampleLevelIntegers - ind <- which(is.na(quantDataImputed[,levelCols]), arr.ind=TRUE) - quantDataImputed[ind,levelCols] <- apply(quantDataImputed[,levelCols], 1, median, na.rm=T)[ind[,1]] + imputation_method +, "group-median" = { + cat("Imputation method:\n substitute missing value", + "with median peptide-intensity for sample-group\n") + + sample_level_integers <- as.integer(sample_factor_levels) + for (i in seq_len(length(levels(sample_factor_levels)))) { + level_cols <- i == sample_level_integers + ind <- which(is.na(quant_data_imp[, level_cols]), arr.ind = TRUE) + quant_data_imp[ind, level_cols] <- + apply(quant_data_imp[, level_cols], 1, median, na.rm = T)[ind[, 1]] } - goodRows <- !is.na(rowMeans(quantDataImputed)) + good_rows <- !is.na(rowMeans(quant_data_imp)) } -, "median"={ - cat("Imputation method: substitute missing value with median peptide-intensity across all sample classes\n") - quantDataImputed[ind] <- apply(quantDataImputed, 1, median, na.rm=T)[ind[,1]] - goodRows <- !is.na(rowMeans(quantDataImputed)) +, "median" = { + cat("Imputation method:\n substitute missing value with", + "median peptide-intensity across all sample classes\n") + quant_data_imp[ind] <- apply(quant_data_imp, 1, median, na.rm = T)[ind[, 1]] + good_rows <- !is.na(rowMeans(quant_data_imp)) } -, "mean"={ - cat("Imputation method: substitute missing value with mean peptide-intensity across all sample classes\n") - quantDataImputed[ind] <- apply(quantDataImputed, 1, mean, na.rm=T)[ind[,1]] - goodRows <- !is.na(rowMeans(quantDataImputed)) +, "mean" = { + cat("Imputation method:\n substitute missing value with", + "mean peptide-intensity across all sample classes\n") + quant_data_imp[ind] <- apply(quant_data_imp, 1, mean, na.rm = T)[ind[, 1]] + good_rows <- !is.na(rowMeans(quant_data_imp)) } -, "random"={ +, "random" = { cat( + "Imputation method:\n substitute missing value with\n ", sprintf( - "Imputation method: substitute missing value with random intensity N ~ (%0.2f, %0.2f)\n" + "random intensity N ~ (%0.2f, %0.2f)\n" , q1, m1 ) ) - quantDataImputed[is.na(quantDataImputed)] <- 10^rnorm(NoToImpute, mean= q1, sd = m1) - goodRows <- !is.na(rowMeans(quantDataImputed)) + quant_data_imp[is.na(quant_data_imp)] <- + 10 ^ rnorm(number_to_impute, mean = q1, sd = m1) + good_rows <- !is.na(rowMeans(quant_data_imp)) } ) @@ -289,13 +330,16 @@ ```{r echo = FALSE} #Determine number of cells to impute -temp <- quantDataImputed[is.na(quantDataImputed)] -cat( +temp <- quant_data_imp[is.na(quant_data_imp)] +cat("After imputation, there are:", sprintf( - "After imputation, there are:\n %d missing values\n %d usable peptides\n %d peptides with too many missing values for further analysis" - , sum(is.na(quantDataImputed[goodRows,])) - , sum(goodRows) - , sum(!goodRows) + "\n %d missing values\n %d usable peptides analysis" + , sum(is.na(quant_data_imp[good_rows, ])) + , sum(good_rows) + ), + sprintf( + "\n %d peptides with too many missing values for further analysis" + , sum(!good_rows) ) ) ``` @@ -303,22 +347,29 @@ # Zap rows where imputation was ineffective -fullData <- fullData [goodRows, ] -quantData <- quantData [goodRows, ] -quantDataImputed <- quantDataImputed[goodRows, ] +full_data <- full_data [good_rows, ] +quant_data <- quant_data [good_rows, ] +quant_data_imp <- quant_data_imp[good_rows, ] ``` ```{r echo = FALSE} -d_combined <- (density(as.numeric(as.matrix(log10(quantDataImputed))))) -d_original <- density(as.numeric(as.matrix(log10(quantDataImputed[!is.na(quantData)])))) +d_combined <- (density(as.numeric(as.matrix( + log10(quant_data_imp) +)))) +d_original <- + density(as.numeric(as.matrix( + log10(quant_data_imp[!is.na(quant_data)])))) ``` ```{r echo = FALSE} -if (sum(is.na(quantData)) > 0) { +if (sum(is.na(quant_data)) > 0) { # There ARE missing values - d_imputed <- (density(as.numeric(as.matrix(log10(quantDataImputed[is.na(quantData)]))))) + d_imputed <- + (density(as.numeric(as.matrix( + log10(quant_data_imp[is.na(quant_data)]) + )))) } else { # There are NO missing values d_imputed <- d_combined @@ -326,29 +377,28 @@ ``` -<!-- ```{r echo = FALSE, fig.cap = "Blue = Data before imputation; Red = Imputed data"} --> -```{r echo = FALSE, fig.dim=c(9,5)} +```{r echo = FALSE, fig.dim = c(9, 5)} ylim <- c(0, max(d_combined$y, d_original$y, d_imputed$y)) plot( - d_combined -, ylim = ylim -, sub = "Blue = data before imputation; Red = imputed data" -, main = "Density vs. log10(intensity) before and after imputation" + d_combined, + ylim = ylim, + sub = "Blue = data before imputation; Red = imputed data", + main = "Density vs. log10(intensity) before and after imputation" ) -lines(d_original, col="blue") -lines(d_imputed, col="red") +lines(d_original, col = "blue") +lines(d_imputed, col = "red") ``` ## Perform Quantile Normalization -```{r echo=FALSE} -library(preprocessCore) + +<!-- # Apply quantile normalization using preprocessCore::normalize.quantiles # --- # tool repository: http://bioconductor.org/packages/release/bioc/html/preprocessCore.html # except this: https://support.bioconductor.org/p/122925/#9135989 # says to install it like this: # ``` -# BiocManager::install("preprocessCore", configure.args="--disable-threading", force = TRUE,lib=.libPaths()[1]) +# BiocManager::install("preprocessCore", configure.args="--disable-threading", force = TRUE, lib=.libPaths()[1]) # ``` # conda installation (necessary because of a bug in recent openblas): # conda install bioconductor-preprocesscore openblas=0.3.3 @@ -360,7 +410,7 @@ # Using a normalization based upon quantiles, this function normalizes a matrix of probe level intensities. # # Usage: -# normalize.quantiles(x,copy=TRUE, keep.names=FALSE) +# normalize.quantiles(x, copy = TRUE, keep.names = FALSE) # # Arguments: # @@ -397,261 +447,355 @@ # and Variance. Bioinformatics 19(2), pp 185-193. DOI 10.1093/bioinformatics/19.2.185 # http://bmbolstad.com/misc/normalize/normalize.html # ... +--> +```{r echo = FALSE} +library(preprocessCore) if (TRUE) { - quantDataImputed.qn <- normalize.quantiles(as.matrix(quantDataImputed)) + quant_data_imp_qn <- normalize.quantiles(as.matrix(quant_data_imp)) } else { - quantDataImputed.qn <- as.matrix(quantDataImputed) + quant_data_imp_qn <- as.matrix(quant_data_imp) } -quantDataImputed.qn = as.data.frame(quantDataImputed.qn) -names(quantDataImputed.qn) = names(quantDataImputed) -quantDataImputed_QN_log <- log10(quantDataImputed.qn) +quant_data_imp_qn <- as.data.frame(quant_data_imp_qn) +names(quant_data_imp_qn) <- names(quant_data_imp) +quant_data_imp_qn_log <- log10(quant_data_imp_qn) -rownames(quantDataImputed_QN_log) <- fullData[,1] +rownames(quant_data_imp_qn_log) <- full_data[, 1] -quantDataImputed.qn.LS = t(scale(t(log10(quantDataImputed.qn)))) -anyNaN <- function (x) { +quant_data_imp_qn_ls <- t(scale(t(log10(quant_data_imp_qn)))) +any_nan <- function(x) { !any(x == "NaN") } -sel = apply(quantDataImputed.qn.LS, 1, anyNaN) -quantDataImputed.qn.LS2 <- quantDataImputed.qn.LS[which(sel),] -quantDataImputed.qn.LS2 = as.data.frame(quantDataImputed.qn.LS2) +sel <- apply(quant_data_imp_qn_ls, 1, any_nan) +quant_data_imp_qn_ls2 <- quant_data_imp_qn_ls[which(sel), ] +quant_data_imp_qn_ls2 <- as.data.frame(quant_data_imp_qn_ls2) #output quantile normalized data -dataTableImputed_QN_LT <- cbind(fullData[1:9], quantDataImputed_QN_log) -write.table(dataTableImputed_QN_LT, file = paste(paste(strsplit(imputedDataFilename, ".txt"),"QN_LT",sep="_"),".txt",sep=""), sep = "\t", col.names=TRUE, row.names=FALSE) +data_table_imp_qn_lt <- cbind(full_data[1:9], quant_data_imp_qn_log) +write.table( + data_table_imp_qn_lt, + file = paste(paste( + strsplit(imputed_data_filename, ".txt"), "QN_LT", sep = "_" + ), ".txt", sep = ""), + sep = "\t", + col.names = TRUE, + row.names = FALSE +) ``` <!-- ACE insertion begin --> ### Checking that normalized, imputed, log-transformed sample distributions are similar: -```{r echo=FALSE} -#library(data.table) +```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'} + -#Save unimputed quantDataLog for plotting below -unimputedQuantDataLog <- quantDataLog +# Save unimputed quant_data_log for plotting below +unimputed_quant_data_log <- quant_data_log -#Log10 transform (after preparing for zero values, which should never happen...) -quantDataImputed.qn[quantDataImputed.qn == 0] <- .000000001 -quantDataLog <- log10(quantDataImputed.qn) +# log10 transform (after preparing for zero values, +# which should never happen...) +quant_data_imp_qn[quant_data_imp_qn == 0] <- .000000001 +quant_data_log <- log10(quant_data_imp_qn) -summary(quantDataLog) +# Output quantile-normalized log-transformed dataset +# with imputed, normalized data -#Output quantile-normalized log-transformed dataset with imputed, normalized data - -dataTableImputed <- cbind(fullData[1:9], quantDataLog) +data_table_imputed <- cbind(full_data[1:9], quant_data_log) write.table( - dataTableImputed - , file=imputedDataFilename - , sep="\t" - , col.names=TRUE - , row.names=FALSE - , quote=FALSE + data_table_imputed + , file = imputed_data_filename + , sep = "\t" + , col.names = TRUE + , row.names = FALSE + , quote = FALSE ) -#data visualization +# data visualization old_par <- par( - mai=par("mai") + c(0.5,0,0,0) -, oma=par("oma") + c(0.5,0,0,0) + mai = par("mai") + c(0.5, 0, 0, 0) +, oma = par("oma") + c(0.5, 0, 0, 0) ) boxplot( - quantDataLog -, las=2 + quant_data_log +, las = 2 ) par(old_par) + + + +cat("\\newline\n") +cat("\\newline\n") + ``` -```{r echo=FALSE, fig.dim=c(9,5)} -quantDataLog_stack <- stack(quantDataLog) -ggplot(quantDataLog_stack, aes(x=values)) + geom_density(aes(group=ind, colour=ind)) +```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 4)} +quant_data_log_stack <- stack(quant_data_log) +ggplot( + quant_data_log_stack, + aes(x = values) + ) + geom_density(aes(group = ind, colour = ind)) ``` ## Perform ANOVA filters -```{r,echo=FALSE} -#Make new data frame containing only Phosphopeptides to connect preANOVA to ANOVA (connect_df) +(see following pages) + +```{r, echo = FALSE} +# Make new data frame containing only Phosphopeptides +# to connect preANOVA to ANOVA (connect_df) connect_df <- data.frame( - dataTableImputed_QN_LT$Phosphopeptide - , dataTableImputed_QN_LT[,firstDataColumn] + data_table_imp_qn_lt$Phosphopeptide + , data_table_imp_qn_lt[, first_data_column] ) -colnames(connect_df) <- c("Phosphopeptide","Intensity") +colnames(connect_df) <- c("Phosphopeptide", "Intensity") ``` -```{r echo=FALSE, fig.dim=c(9,10)} -# Get factors -> group replicates (as indicated by terminal letter) by the preceding digits -# For example, group .1A .1B .1C into group 1; .2A .2B .2C, into group 2; etc.. -m <- regexpr(regexSampleNames, names(quantDataImputed_QN_log), perl=TRUE) -#ACE str(m) -tempMatches <- regmatches(names(quantDataImputed_QN_log), m) -#ACE str(tempMatches) -numSamples <- length(tempMatches) -#ACE str(numSamples) -m2 <- regexpr(regexSampleGrouping, tempMatches, perl=TRUE) -#ACE str(m2) -#ACE str(regmatches(tempMatches, m2)) -sampleNumbers <- as.factor(regmatches(tempMatches, m2)) -#ACE str(sampleNumbers) +```{r echo = FALSE, fig.dim = c(9, 10), results = 'asis'} +# Get factors -> group replicates (as indicated by terminal letter) +# by the preceding digits; +# e.g., group .1A .1B .1C into group 1; .2A .2B .2C, into group 2; etc.. +m <- + regexpr(regex_sample_names, names(quant_data_imp_qn_log), perl = TRUE) + +temp_matches <- regmatches(names(quant_data_imp_qn_log), m) + +number_of_samples <- length(temp_matches) -if (length(levels(sampleNumbers))<2) { - cat("ERROR!!!! Cannot perform ANOVA analysis because it requires two or more factor levels\n") +m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE) + + +sample_factor_levels <- as.factor(regmatches(temp_matches, m2)) + + +if (length(levels(sample_factor_levels)) < 2) { + cat( + "ERROR!!!! Cannot perform ANOVA analysis", + "because it requires two or more factor levels\n" + ) cat("Unparsed sample names are:\n") - print(names(quantDataImputed_QN_log)) - cat(sprintf("Parsing rule for SampleNames is '%s'\n", regexSampleNames)) + print(names(quant_data_imp_qn_log)) + cat(sprintf("Parsing rule for SampleNames is '%s'\n", regex_sample_names)) cat("Parsed names are:\n") - print(tempMatches) - cat(sprintf("Parsing rule for SampleGrouping is '%s'\n", regexSampleGrouping)) + print(temp_matches) + cat(sprintf( + "Parsing rule for SampleGrouping is '%s'\n", + regex_sample_grouping + )) cat("Sample group assignments are:\n") - print(regmatches(tempMatches, m2)) + print(regmatches(temp_matches, m2)) } else { - pValueData.anovaPs <- apply(quantDataImputed_QN_log, 1, anovaFunc, groupingFactor=sampleNumbers) + p_value_data_anova_ps <- + apply( + quant_data_imp_qn_log, + 1, + anova_func, + grouping_factor = sample_factor_levels + ) - pValueData.anovaPs.FDR <- p.adjust(pValueData.anovaPs, method="fdr") - pValueData <- data.frame( - phosphopeptide = fullData[,1] - , rawANOVAp = pValueData.anovaPs - , FDRadjustedANOVAp = pValueData.anovaPs.FDR + p_value_data_anova_ps_fdr <- + p.adjust(p_value_data_anova_ps, method = "fdr") + p_value_data <- data.frame( + phosphopeptide = full_data[, 1] + , + raw_anova_p = p_value_data_anova_ps + , + fdr_adjusted_anova_p = p_value_data_anova_ps_fdr ) - #ACE rownames(pValueData) <- fullData[,1] - # output ANOVA file to constructed filename, + + # output ANOVA file to constructed filename, # e.g. "Outputfile_pST_ANOVA_STEP5.txt" # becomes "Outpufile_pST_ANOVA_STEP5_FDR0.05.txt" - #Re-output quantile-normalized log-transformed dataset with imputed, normalized data to include p-values + # Re-output quantile-normalized log-transformed dataset + # with imputed, normalized data to include p-values - dataTableImputed <- cbind(fullData[1:9], pValueData[,2:3], quantDataLog) + data_table_imputed <- + cbind(full_data[1:9], p_value_data[, 2:3], quant_data_log) write.table( - dataTableImputed - , file=imputedDataFilename - , sep="\t" - , col.names=TRUE - , row.names=FALSE - , quote=FALSE + data_table_imputed, + file = imputed_data_filename, + sep = "\t", + col.names = TRUE, + row.names = FALSE, + quote = FALSE ) - pValueData <- pValueData[order(pValueData$FDRadjustedANOVAp),] + p_value_data <- + p_value_data[order(p_value_data$fdr_adjusted_anova_p), ] + + cutoff <- val_fdr[1] + for (cutoff in val_fdr) { + #loop through FDR cutoffs - cutoff <- valFDR[1] - for (cutoff in valFDR){ #loop through FDR cutoffs - - filtered_p <- pValueData[which(pValueData$FDRadjustedANOVAp < cutoff),, drop = FALSE] - filteredData.filtered <- quantDataImputed_QN_log[rownames(filtered_p),, drop = FALSE] - filteredData.filtered <- filteredData.filtered[order(filtered_p$FDRadjustedANOVAp),, drop = FALSE] + filtered_p <- + p_value_data[ + which(p_value_data$fdr_adjusted_anova_p < cutoff), + , + drop = FALSE + ] + filtered_data_filtered <- + quant_data_imp_qn_log[ + rownames(filtered_p), + , + drop = FALSE + ] + filtered_data_filtered <- + filtered_data_filtered[ + order(filtered_p$fdr_adjusted_anova_p), + , + drop = FALSE + ] # <!-- ACE insertion start --> old_oma <- par("oma") old_par <- par( - mai=(par("mai") + c(0.7,0,0,0)) * c(1,1,0.3,1) - , oma=old_oma * c(1,1,0.3,1) - , cex.main=0.9 - , cex.axis=0.7 - ) - - if (nrow(filteredData.filtered) > 0) { + mai = (par("mai") + c(0.7, 0, 0, 0)) * c(1, 1, 0.3, 1), + oma = old_oma * c(1, 1, 0.3, 1), + cex.main = 0.9, + cex.axis = 0.7 + ) + + cat("\\newpage\n") + if (nrow(filtered_data_filtered) > 0) { + cat(sprintf( + "Intensities for peptides whose adjusted p-value < %0.2f\n", + cutoff + )) + cat("\\newline\n") + cat("\\newline\n") + boxplot( - filteredData.filtered - , main = sprintf("Imputed, normalized intensities where adjusted p-value < %0.2f", cutoff) - # no line plot , main = "" - , las = 2 - # , ylim = c(5.5,10) - , ylab = expression(log[10](intensity)) + filtered_data_filtered, + main = "Imputed, normalized intensities", # no line plot + las = 2, + ylab = expression(log[10](intensity)) ) } else { - cat(sprintf("No peptides were found to have cutoff adjusted p-value < %0.2f\n", cutoff)) + cat(sprintf( + "No peptides were found to have cutoff adjusted p-value < %0.2f\n", + cutoff + )) } par(old_par) - - #Add Phosphopeptide column to ANOVA filtered table - ANOVA.filtered_merge <- merge( + + if (nrow(filtered_data_filtered) > 0) { + #Add Phosphopeptide column to anova_filtered table + anova_filtered_merge <- merge( x = connect_df - , y = filteredData.filtered - , by.x="Intensity" - , by.y=1 + , + y = filtered_data_filtered + , + by.x = "Intensity" + , + by.y = 1 ) - ANOVA.filtered_merge.order <- rownames(filtered_p) - - ANOVA.filtered_merge.format <- sapply( - X = filtered_p$FDRadjustedANOVAp - , FUN = function(x) { - if (x > 0.0001) - paste0("(%0.",1+ceiling(-log10(x)),"f) %s") - else - paste0("(%0.4e) %s") + anova_filtered_merge_order <- rownames(filtered_p) + + anova_filtered_merge_format <- sapply( + X = filtered_p$fdr_adjusted_anova_p + , + FUN = function(x) { + if (x > 0.0001) + paste0("(%0.", 1 + ceiling(-log10(x)), "f) %s") + else + paste0("(%0.4e) %s") } - ) - - #ANOVA.filtered_merge.format <- paste0("(%0.",1+ceiling(-log10(filtered_p$FDRadjustedANOVAp)),"f) %s") - - ANOVA.filtered <- data.table( - ANOVA.filtered_merge$Phosphopeptide - , ANOVA.filtered_merge$Intensity - , ANOVA.filtered_merge[, 2:numSamples+1] - ) - colnames(ANOVA.filtered) <- c("Phosphopeptide", colnames(filteredData.filtered)) - - # merge qualitative columns into the ANOVA data - output_table <- data.frame(ANOVA.filtered$Phosphopeptide) - output_table <- merge( - x = output_table - , y = dataTableImputed_QN_LT - , by.x = "ANOVA.filtered.Phosphopeptide" - , by.y="Phosphopeptide" ) - #Produce heatmap to visualize significance and the effect of imputation - m <- as.matrix(unimputedQuantDataLog[ANOVA.filtered_merge.order,]) - if (nrow(m) > 0) { - rownames_m <- rownames(m) - rownames(m) <- sapply( - X = 1:nrow(m) - , FUN = function(i) { + + + anova_filtered <- data.table( + anova_filtered_merge$Phosphopeptide + , + anova_filtered_merge$Intensity + , + anova_filtered_merge[, 2:number_of_samples + 1] + ) + colnames(anova_filtered) <- + c("Phosphopeptide", colnames(filtered_data_filtered)) + + # merge qualitative columns into the ANOVA data + output_table <- data.frame(anova_filtered$Phosphopeptide) + output_table <- merge( + x = output_table + , + y = data_table_imp_qn_lt + , + by.x = "anova_filtered.Phosphopeptide" + , + by.y = "Phosphopeptide" + ) + + #Produce heatmap to visualize significance and the effect of imputation + m <- + as.matrix(unimputed_quant_data_log[anova_filtered_merge_order, ]) + if (nrow(m) > 0) { + rownames_m <- rownames(m) + rownames(m) <- sapply( + X = seq_len(nrow(m)) + , + FUN = function(i) { sprintf( - ANOVA.filtered_merge.format[i] - , filtered_p$FDRadjustedANOVAp[i] - , rownames_m[i] + anova_filtered_merge_format[i] + , + filtered_p$fdr_adjusted_anova_p[i] + , + rownames_m[i] ) } ) - margins <- c( - max(nchar(colnames(m))) * 10 / 16 # col - , max(nchar(rownames(m))) * 5 / 16 # row - ) - how_many_peptides <- min(50, nrow(m)) + margins <- c(max(nchar(colnames(m))) * 10 / 16 # col + , max(nchar(rownames(m))) * 5 / 16 # row + ) + how_many_peptides <- min(50, nrow(m)) - op <- par("cex.main") - try( - if (nrow(m) > 1) { - par(cex.main=0.6) - heatmap( - m[how_many_peptides:1,] - , Rowv = NA - , Colv = NA - , cexRow = 0.7 - , cexCol = 0.8 - , scale="row" - , margins = margins - , main = "Heatmap of unimputed, unnormalized intensities" - , xlab = "" - # , main = bquote( - # .( how_many_peptides ) - # ~ " peptides with adjusted p-value <" - # ~ .(sprintf("%0.2f", cutoff)) - # ) - ) - } - ) - #ACE fig_dim knitr::opts_chunk$set(fig.dim = fig_dim) - par(op) + cat("\\newpage\n") + if (nrow(m) > 50) { + cat("Heatmap for the 50 most-significant peptides", + sprintf( + "whose adjusted p-value < %0.2f\n", + cutoff) + ) + } else { + cat("Heatmap for peptides whose", + sprintf("adjusted p-value < %0.2f\n", + cutoff) + ) + } + cat("\\newline\n") + cat("\\newline\n") + op <- par("cex.main") + try( + if (nrow(m) > 1) { + par(cex.main = 0.6) + heatmap( + m[how_many_peptides:1, ], + Rowv = NA, + Colv = NA, + cexRow = 0.7, + cexCol = 0.8, + scale = "row", + margins = margins, + main = + "Heatmap of unimputed, unnormalized intensities", + xlab = "" + ) + } + ) + par(op) + } } - } } ``` +<!-- ## Peptide IDs, etc. See output files. +-->
--- a/repository_dependencies.xml Fri Mar 11 20:04:05 2022 +0000 +++ b/repository_dependencies.xml Tue Mar 15 00:35:16 2022 +0000 @@ -1,5 +1,5 @@ <?xml version="1.0" ?> <repositories description="Suite for preprocessing and ANOVA of MaxQuant results using LC-MS proteomics data from phosphoproteomic enrichment."> - <repository name="mqppep_preproc" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="b91809a18dbe"/> - <repository name="mqppep_anova" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="d4d531006735"/> + <repository name="mqppep_preproc" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="42daf70d4ed4"/> + <repository name="mqppep_anova" owner="eschen42" toolshed="https://testtoolshed.g2.bx.psu.edu" changeset_revision="922d309640db"/> </repositories> \ No newline at end of file
--- a/workflow/ppenrich_suite_wf.ga Fri Mar 11 20:04:05 2022 +0000 +++ b/workflow/ppenrich_suite_wf.ga Tue Mar 15 00:35:16 2022 +0000 @@ -28,20 +28,20 @@ "name": "Input dataset", "outputs": [], "position": { - "bottom": 257.06666564941406, - "height": 81.39999389648438, - "left": 339.95001220703125, - "right": 539.9500122070312, - "top": 175.6666717529297, + "bottom": -36.30000305175781, + "height": 82.19999694824219, + "left": 150, + "right": 350, + "top": -118.5, "width": 200, - "x": 339.95001220703125, - "y": 175.6666717529297 + "x": 150, + "y": -118.5 }, "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": null}", + "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}", "tool_version": null, "type": "data_input", - "uuid": "002d55e6-29a5-426d-9248-70ec33424b15", + "uuid": "f4273d40-f2b8-4ad0-8bcc-91e72bd25fe1", "workflow_outputs": [] }, "1": { @@ -60,20 +60,20 @@ "name": "Input dataset", "outputs": [], "position": { - "bottom": 411.4666748046875, - "height": 101.79998779296875, - "left": 379.95001220703125, - "right": 579.9500122070312, - "top": 309.66668701171875, + "bottom": 278.1000061035156, + "height": 102.60000610351562, + "left": 376, + "right": 576, + "top": 175.5, "width": 200, - "x": 379.95001220703125, - "y": 309.66668701171875 + "x": 376, + "y": 175.5 }, "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"fasta\"], \"tag\": null}", + "tool_state": "{\"optional\": false, \"format\": [\"fasta\"]}", "tool_version": null, "type": "data_input", - "uuid": "8f079dcc-1843-47cd-b4dc-1830e4466430", + "uuid": "cb31b0ac-cacc-42ee-bd42-f42d0bdae128", "workflow_outputs": [] }, "2": { @@ -92,20 +92,20 @@ "name": "Input dataset", "outputs": [], "position": { - "bottom": 573.4666748046875, - "height": 101.79998779296875, - "left": 418.95001220703125, - "right": 618.9500122070312, - "top": 471.66668701171875, + "bottom": 423.1000061035156, + "height": 102.60000610351562, + "left": 387, + "right": 587, + "top": 320.5, "width": 200, - "x": 418.95001220703125, - "y": 471.66668701171875 + "x": 387, + "y": 320.5 }, "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": null}", + "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}", "tool_version": null, "type": "data_input", - "uuid": "dc894a94-97a3-40ff-811e-01b30d498478", + "uuid": "e6ec01b8-ff1a-4c90-a064-b40c5cad75bb", "workflow_outputs": [] }, "3": { @@ -124,20 +124,20 @@ "name": "Input dataset", "outputs": [], "position": { - "bottom": 726.0666809082031, - "height": 81.39999389648438, - "left": 459.95001220703125, - "right": 659.9500122070312, - "top": 644.6666870117188, + "bottom": 546.6999969482422, + "height": 82.19999694824219, + "left": 399, + "right": 599, + "top": 464.5, "width": 200, - "x": 459.95001220703125, - "y": 644.6666870117188 + "x": 399, + "y": 464.5 }, "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": null}", + "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}", "tool_version": null, "type": "data_input", - "uuid": "6fc936ad-0b52-484f-a051-73c1776fdeb0", + "uuid": "2c59056a-c1b4-4a20-a194-991d56c8b6c2", "workflow_outputs": [] }, "4": { @@ -156,20 +156,20 @@ "name": "Input dataset", "outputs": [], "position": { - "bottom": 894.4666748046875, - "height": 101.79998779296875, - "left": 503.95001220703125, - "right": 703.9500122070312, - "top": 792.6666870117188, + "bottom": 696.1000061035156, + "height": 102.60000610351562, + "left": 420, + "right": 620, + "top": 593.5, "width": 200, - "x": 503.95001220703125, - "y": 792.6666870117188 + "x": 420, + "y": 593.5 }, "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": null}", + "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}", "tool_version": null, "type": "data_input", - "uuid": "22b77482-2339-4b45-8fc6-d39f7175131b", + "uuid": "987a5891-15f1-4f70-89a8-386447f0bf24", "workflow_outputs": [] }, "5": { @@ -188,20 +188,20 @@ "name": "Input dataset", "outputs": [], "position": { - "bottom": 1041.0666809082031, - "height": 81.39999389648438, - "left": 535.9500122070312, - "right": 735.9500122070312, - "top": 959.6666870117188, + "bottom": 820.6999969482422, + "height": 82.19999694824219, + "left": 436, + "right": 636, + "top": 738.5, "width": 200, - "x": 535.9500122070312, - "y": 959.6666870117188 + "x": 436, + "y": 738.5 }, "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": null}", + "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}", "tool_version": null, "type": "data_input", - "uuid": "3d97a902-1408-403c-b82e-ddb6ca6a7d47", + "uuid": "964d8d21-b063-411a-aee8-372a0d0dfba3", "workflow_outputs": [] }, "6": { @@ -220,20 +220,20 @@ "name": "Input dataset", "outputs": [], "position": { - "bottom": 1210.5666198730469, - "height": 81.39999389648438, - "left": 562.9500122070312, - "right": 762.9500122070312, - "top": 1129.1666259765625, + "bottom": 1071.1999969482422, + "height": 82.19999694824219, + "left": 418, + "right": 618, + "top": 989, "width": 200, - "x": 562.9500122070312, - "y": 1129.1666259765625 + "x": 418, + "y": 989 }, "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": null}", + "tool_state": "{\"optional\": false, \"format\": [\"tabular\"]}", "tool_version": null, "type": "data_input", - "uuid": "7b5eab97-7dad-4b0e-81eb-22aac39dd5b6", + "uuid": "42577db7-d5e5-4f39-b3ad-d0648abb9df3", "workflow_outputs": [] }, "7": { @@ -267,7 +267,32 @@ "output_name": "output" } }, - "inputs": [], + "inputs": [ + { + "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing", + "name": "networkin" + }, + { + "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing", + "name": "p_sty_motifs" + }, + { + "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing", + "name": "phosphoSites" + }, + { + "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing", + "name": "protein_fasta" + }, + { + "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing", + "name": "psp_kinase_substrate" + }, + { + "description": "runtime parameter for tool MaxQuant Phosphopeptide Preprocessing", + "name": "psp_regulatory_sites" + } + ], "label": null, "name": "MaxQuant Phosphopeptide Preprocessing", "outputs": [ @@ -325,14 +350,14 @@ } ], "position": { - "bottom": 1186.6000366210938, - "height": 812.933349609375, - "left": 945.4500122070312, - "right": 1145.4500122070312, - "top": 373.66668701171875, + "bottom": 964.0999755859375, + "height": 793.5999755859375, + "left": 826.5, + "right": 1026.5, + "top": 170.5, "width": 200, - "x": 945.4500122070312, - "y": 373.66668701171875 + "x": 826.5, + "y": 170.5 }, "post_job_actions": { "RenameDatasetActionenrichGraph": { @@ -428,75 +453,75 @@ } }, "tool_id": "mqppep_preproc", - "tool_state": "{\"collapseFunc\": \"sum\", \"enriched\": \"ST\", \"intervalCol\": \"1\", \"localProbCutoff\": \"0.75\", \"merge_function\": \"sum\", \"networkin\": {\"__class__\": \"ConnectedValue\"}, \"p_sty_motifs\": {\"__class__\": \"ConnectedValue\"}, \"phosphoCol\": \"^Number of Phospho [(]STY[)]$\", \"phosphoSites\": {\"__class__\": \"ConnectedValue\"}, \"phospho_type\": \"sty\", \"protein_fasta\": {\"__class__\": \"ConnectedValue\"}, \"psp_kinase_substrate\": {\"__class__\": \"ConnectedValue\"}, \"psp_regulatory_sites\": {\"__class__\": \"ConnectedValue\"}, \"species\": \"human\", \"startCol\": \"^Intensity[^_]\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", - "tool_version": "0.1.0+galaxy0", + "tool_state": "{\"collapseFunc\": \"sum\", \"intervalCol\": \"1\", \"localProbCutoff\": \"0.75\", \"merge_function\": \"sum\", \"networkin\": {\"__class__\": \"RuntimeValue\"}, \"p_sty_motifs\": {\"__class__\": \"RuntimeValue\"}, \"phosphoCol\": \"^Number of Phospho [(]STY[)]$\", \"phosphoSites\": {\"__class__\": \"RuntimeValue\"}, \"protein_fasta\": {\"__class__\": \"RuntimeValue\"}, \"psp_kinase_substrate\": {\"__class__\": \"RuntimeValue\"}, \"psp_regulatory_sites\": {\"__class__\": \"RuntimeValue\"}, \"pst_not_py\": \"true\", \"species\": \"human\", \"startCol\": \"^Intensity[^_]\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, "type": "tool", - "uuid": "235b1a2e-ccc0-4c91-bb91-bbf4d272c870", + "uuid": "886043ce-8d9b-474e-b970-4fe9ee6a74fa", "workflow_outputs": [ { "label": "ppep_intensities", "output_name": "phosphoPepIntensities", - "uuid": "92fd4e27-5d4b-4e9f-b3ad-6bdad53bb93d" + "uuid": "e19a64d1-edee-4119-a72e-456af7a6c056" }, { "label": "enrichGraph_pdf", "output_name": "enrichGraph", - "uuid": "4c1d5590-f8ba-421c-858c-4c026691b52e" + "uuid": "7e9936d9-9617-4df4-9133-7a04f8d05d26" }, { "label": "locProbCutoffGraph_pdf", "output_name": "locProbCutoffGraph", - "uuid": "66a79534-6372-4937-bcf2-8644be985eea" + "uuid": "5656cba7-25e2-4362-ae92-1ddac67dee07" }, { "label": "enrichGraph_svg", "output_name": "enrichGraph_svg", - "uuid": "5e713d9c-1868-423b-be9a-25c0486e1472" + "uuid": "ca13a22e-a41b-481c-ab87-1f97bbf768e9" }, { "label": "locProbCutoffGraph_svg", "output_name": "locProbCutoffGraph_svg", - "uuid": "4621ea21-ae90-4547-a68f-30dfc7857368" + "uuid": "fc7a11f5-30d8-4409-878a-d3b70366711c" }, { "label": "filteredData", "output_name": "filteredData_tabular", - "uuid": "bb26d0fb-6f19-43c7-80ef-1cf81aa09ee8" + "uuid": "aab49fc5-a3cf-4479-ac23-8e9272dadf28" }, { "label": "quantData", "output_name": "quantData_tabular", - "uuid": "20efe04f-2700-4af0-92c6-0830a42d8e75" + "uuid": "23940202-403e-4256-916b-92539db07cdb" }, { "label": "ppep_map", "output_name": "mapped_phophopeptides", - "uuid": "037e2b97-8fc8-436d-bcc3-af5ee685b752" + "uuid": "08ad13d4-c103-4f18-92cc-2c3b58565981" }, { "label": "melted_phosphopeptide_map", "output_name": "melted_phophopeptide_map", - "uuid": "c3e5de84-2659-45eb-81a6-edef6037d8aa" + "uuid": "77cecaeb-8f7c-482e-b78a-e4809b194eb7" }, { "label": "ppep_mapping_sqlite", "output_name": "mqppep_output_sqlite", - "uuid": "a1a4f827-1f1f-4175-ae51-c238f9e1f248" + "uuid": "8e53e05a-a47c-4b97-87e4-ebab133ccaea" }, { "label": "preproc_tab", "output_name": "preproc_tab", - "uuid": "b22b4b56-9395-4f6d-945e-0089e8897069" + "uuid": "530a8140-9eba-4c87-a76b-4922febc12e7" }, { "label": "preproc_csv", "output_name": "preproc_csv", - "uuid": "54be90f9-1158-4686-af42-43d021088300" + "uuid": "c5f22f05-0bf7-48cf-adc0-c2beffe33169" }, { "label": "preproc_sqlite", "output_name": "preproc_sqlite", - "uuid": "33663f9c-b718-4bdd-acc9-087c76bea678" + "uuid": "53424150-7673-40af-ad60-0b4035e0c302" } ] }, @@ -529,14 +554,14 @@ } ], "position": { - "bottom": 1488.0999603271484, - "height": 254.93333435058594, - "left": 1202.949951171875, - "right": 1402.949951171875, - "top": 1233.1666259765625, + "bottom": 1349, + "height": 256, + "left": 1058, + "right": 1258, + "top": 1093, "width": 200, - "x": 1202.949951171875, - "y": 1233.1666259765625 + "x": 1058, + "y": 1093 }, "post_job_actions": { "RenameDatasetActionimputed_data_file": { @@ -556,19 +581,19 @@ }, "tool_id": "mqppep_anova", "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"first_data_column\": \"Intensity\", \"imputation\": {\"imputation_method\": \"group-median\", \"__current_case__\": 0}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"sample_grouping_regex\": \"(\\\\d+)\", \"sample_names_regex\": \"\\\\.(\\\\d+)[A-Z]$\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", - "tool_version": "0.1.0+galaxy0", + "tool_version": null, "type": "tool", - "uuid": "2257286b-6f9a-45c1-90a3-bf5b972959d5", + "uuid": "a3cb902d-8ef6-4f84-bed3-80b2b20d1916", "workflow_outputs": [ { "label": "intensities_group-mean-imputed_QN_LT", "output_name": "imputed_data_file", - "uuid": "8e7317c6-95e9-4454-b4d7-31b4de6167a8" + "uuid": "ef19dcd3-8f3e-4fc4-829e-dae6719ff1cc" }, { "label": "intensities_group-mean-imputed_report", "output_name": "report_file", - "uuid": "dfe9b34e-1f3e-4971-8382-41178104e253" + "uuid": "26bb93b0-bc11-4455-a280-241253b21981" } ] }, @@ -601,14 +626,14 @@ } ], "position": { - "bottom": 1325.0999603271484, - "height": 254.93333435058594, - "left": 1452.949951171875, - "right": 1652.949951171875, - "top": 1070.1666259765625, + "bottom": 1186, + "height": 256, + "left": 1308, + "right": 1508, + "top": 930, "width": 200, - "x": 1452.949951171875, - "y": 1070.1666259765625 + "x": 1308, + "y": 930 }, "post_job_actions": { "RenameDatasetActionimputed_data_file": { @@ -628,19 +653,19 @@ }, "tool_id": "mqppep_anova", "tool_state": "{\"alpha_file\": {\"__class__\": \"ConnectedValue\"}, \"first_data_column\": \"Intensity\", \"imputation\": {\"imputation_method\": \"random\", \"__current_case__\": 3, \"meanPercentile\": \"1\", \"sdPercentile\": \"0.2\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"sample_grouping_regex\": \"(\\\\d+)\", \"sample_names_regex\": \"\\\\.(\\\\d+)[A-Z]$\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", - "tool_version": "0.1.0+galaxy0", + "tool_version": null, "type": "tool", - "uuid": "9516971c-8532-4797-8bf9-4655ff104dbd", + "uuid": "217d92af-f6d6-4fd3-a78a-090d8afd3ae0", "workflow_outputs": [ { "label": "intensities_randomly-imputed_QN_LT", "output_name": "imputed_data_file", - "uuid": "8ceda029-d5fd-4d75-a2b3-ac582bb137c3" + "uuid": "925d734f-f9d8-49e8-aebb-c8d7598d45b2" }, { "label": "intensities_randomly-imputed_report", "output_name": "report_file", - "uuid": "84bedf25-c15b-4cc7-97e0-92f746e89f9c" + "uuid": "4ab5f1b1-d04e-4634-8765-265122bc1064" } ] } @@ -648,6 +673,6 @@ "tags": [ "ppenrich" ], - "uuid": "ac7bf2d1-89fe-4bf6-920a-d5508842d3f9", - "version": 7 + "uuid": "c54c2b2e-8080-445c-bc3e-43950c89d4e4", + "version": 3 } \ No newline at end of file