annotate MaxQuantProcessingScript.R @ 0:c1403d18c189 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
author eschen42
date Mon, 07 Mar 2022 19:05:01 +0000
parents
children d4d531006735
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
1 #!/usr/bin/env Rscript
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
2
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
3 # This is the implementation for the
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
4 # "MaxQuant Phosphopeptide Localization Probability Cutoff"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
5 # Galaxy tool (mqppep_lclztn_filter)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
6 # It is adapted from the MaxQuant Processing Script written by Larry Cheng.
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
7
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
8 # libraries
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
9 library(optparse)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
10 library(data.table)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
11 library(stringr)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
12 library(ggplot2)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
13 #library(PTXQC)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
14 #require(PTXQC)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
15 #require(methods)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
16
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
17 # title: "MaxQuant Processing Script"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
18 # author: "Larry Cheng"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
19 # date: "February 19, 2018"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
20 #
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
21 # # MaxQuant Processing Script
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
22 # Takes MaxQuant Phospho (STY)sites.txt file as input and performs the following (in order):
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
23 # 1) Runs the Proteomics Quality Control software
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
24 # 2) Remove contaminant and reverse sequence rows
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
25 # 3) Filters rows based on localization probability
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
26 # 4) Extract the quantitative data
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
27 # 5) Sequences phosphopeptides
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
28 # 6) Merges multiply phosphorylated peptides
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
29 # 7) Filters out phosphopeptides based on enrichment
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
30 # The output file contains the phosphopeptide (first column) and the quantitative values for each sample
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
31 #
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
32 # ## Revision History
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
33 # Rev. 2022-02-10 :wrap for inclusion in Galaxy
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
34 # Rev. 2018-02-19 :break up analysis script into "MaxQuant Processing Script" and "Phosphopeptide Processing Script"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
35 # Rev. 2017-12-12 :added PTXQC
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
36 # added additional plots and table outputs for quality control
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
37 # allowed for more than 2 samples to be grouped together (up to 26 (eg, 1A, 1B, 1C, etc))regexSampleNames <-
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
38 # "\\.(\\d+)[A-Z]$"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
39 # converted from .r to .rmd file to knit report for quality control
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
40 # Rev. 2016-09-11 :automated the FDR cutoffs; removed the option to data impute multiple times
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
41 # Rev. 2016-09-09 :added filter to eliminate contaminant and reverse sequence rows
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
42 # Rev. 2016-09-01 :moved the collapse step from after ANOVA filter to prior to preANOVA file output
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
43 # Rev. 2016-08-22 :changed regexpression to regexSampleNames <- "\\.(\\d+)[AB]$" so that it looks at the end of string
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
44 # Rev. 2016-08-05 :Removed vestigial line (ppeptides <- ....)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
45 # Rev. 2016-07-03 :Removed row names from the write.table() output for ANOVA and PreANOVA
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
46 # Rev. 2016-06-25 :Set default Localization Probability cutoff to 0.75
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
47 # Rev. 2016-06-23 :fixed a bug in filtering for pY enrichment by resetting the row numbers afterwards
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
48 # Rev. 2016-06-21 :test18 + standardized the regexpression in protocol
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
49
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
50
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
51 ### FUNCTION DECLARATIONS begin ----------------------------------------------
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
52
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
53 # Read first line of file at filePath
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
54 # adapted from: https://stackoverflow.com/a/35761217/15509512
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
55 readFirstLine <- function(filepath) {
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
56 con = file(filepath, "r")
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
57 line = readLines(con, n = 1)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
58 close(con)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
59 return(line)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
60 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
61
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
62 # Move columns to the end of dataframe
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
63 # - data: the dataframe
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
64 # - move: a vector of column names, each of which is an element of names(data)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
65 movetolast <- function(data, move) {
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
66 data[c(setdiff(names(data), move), move)]
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
67 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
68
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
69 # Generate phosphopeptide and build list when applied
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
70 phosphopeptide_func <- function(df) {
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
71
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
72 #generate peptide sequence and list of phosphopositions
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
73 phosphoprobsequence <- strsplit(as.character(df["Phospho (STY) Score diffs"]), "")[[1]]
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
74 output <- vector()
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
75 phosphopeptide <- ""
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
76 counter <- 0 #keep track of position in peptide
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
77 phosphopositions <- vector() #keep track of phosphorylation positions in peptide
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
78 score_diff <- ""
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
79 for (chara in phosphoprobsequence){
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
80 #build peptide sequence
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
81 if (!(chara == " " | chara == "(" | chara == ")" | chara =="." | chara =="-" | chara == "0" | chara == "1" | chara == "2" | chara == "3" | chara =="4" | chara == "5" | chara == "6" | chara == "7" | chara =="8" | chara =="9")) {
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
82 phosphopeptide <- paste(phosphopeptide,chara,sep="")
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
83 counter <- counter + 1
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
84 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
85 #generate score_diff
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
86 if (chara == "-" | chara =="." | chara == "0" | chara == "1" | chara == "2" | chara == "3" | chara =="4" | chara == "5" | chara == "6" | chara == "7" | chara =="8" | chara =="9"){
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
87 score_diff <- paste(score_diff,chara,sep="")
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
88 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
89 #evaluate score_diff
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
90 if (chara == ")" ){
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
91 score_diff <- as.numeric(score_diff)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
92 #only consider a phosphoresidue if score_diff > 0
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
93 if (score_diff > 0) {
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
94 phosphopositions <- append(phosphopositions, counter)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
95 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
96 score_diff <- ""
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
97 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
98 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
99
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
100 #generate phosphopeptide sequence (ie, peptide sequence with "p"'s)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
101 counter <- 1
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
102 phosphoposition_correction1 <- -1 #used to correct phosphosposition as "p"'s are inserted into the phosphopeptide string
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
103 phosphoposition_correction2 <- 0 #used to correct phosphosposition as "p"'s are inserted into the phosphopeptide string
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
104 while (counter <= length(phosphopositions) ) {
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
105 phosphopeptide <- paste(substr(phosphopeptide,0,phosphopositions[counter]+phosphoposition_correction1),"p",substr(phosphopeptide,phosphopositions[counter]+phosphoposition_correction2,nchar(phosphopeptide)),sep="")
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
106 counter <- counter + 1
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
107 phosphoposition_correction1 <- phosphoposition_correction1 + 1
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
108 phosphoposition_correction2 <- phosphoposition_correction2 + 1
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
109 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
110
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
111 #building phosphopeptide list
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
112 output <- append(output,phosphopeptide)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
113 return(output)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
114 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
115
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
116 ### FUNCTION DECLARATIONS end ------------------------------------------------
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
117
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
118
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
119 ### EXTRACT ARGUMENTS begin --------------------------------------------------
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
120
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
121 # parse options
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
122 option_list <- list(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
123 make_option(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
124 c("-i", "--input"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
125 action = "store",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
126 type = "character",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
127 help = "A MaxQuant Phospho (STY)Sites.txt"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
128 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
129 , make_option(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
130 c("-o", "--output"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
131 action = "store",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
132 type = "character",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
133 help = "path to output file"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
134 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
135 , make_option(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
136 c("-E", "--enrichGraph"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
137 action = "store",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
138 type = "character",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
139 help = "path to enrichment graph PDF"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
140 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
141 , make_option(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
142 c("-F", "--enrichGraph_svg"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
143 action = "store",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
144 type = "character",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
145 help = "path to enrichment graph SVG"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
146 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
147 , make_option(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
148 c("-L", "--locProbCutoffGraph"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
149 action = "store",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
150 type = "character",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
151 help = "path to location-proability cutoff graph PDF"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
152 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
153 , make_option(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
154 c("-M", "--locProbCutoffGraph_svg"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
155 action = "store",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
156 type = "character",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
157 help = "path to location-proability cutoff graph SVG"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
158 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
159 , make_option(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
160 c("-e", "--enriched"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
161 action = "store",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
162 type = "character",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
163 help = "pY or pST enriched samples (ie, 'Y' or 'ST')"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
164 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
165 # default = "^Number of Phospho [(]STY[)]$",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
166 , make_option(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
167 c("-p", "--phosphoCol"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
168 action = "store",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
169 type = "character",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
170 help = "PERL-compatible regular expression matching header of column having number of 'Phospho (STY)'"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
171 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
172 # default = "^Intensity[^_]",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
173 , make_option(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
174 c("-s", "--startCol"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
175 action = "store",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
176 type = "character",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
177 help = "PERL-compatible regular expression matching column header having first sample intensity"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
178 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
179 # default = 1,
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
180 , make_option(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
181 c("-I", "--intervalCol"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
182 action = "store",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
183 type = "integer",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
184 help = "Column interval between the Intensities of samples (eg, 1 if subsequent column; 2 if every other column"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
185 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
186 # default = 0.75,
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
187 , make_option(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
188 c("-l", "--localProbCutoff"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
189 action = "store",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
190 type = "double",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
191 help = "Localization Probability Cutoff"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
192 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
193 # default = "sum",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
194 , make_option(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
195 c("-f", "--collapse_func"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
196 action = "store",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
197 type = "character",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
198 help = "merge identical phosphopeptides by ('sum' or 'average') the intensities"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
199 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
200 # default = "filteredData.txt",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
201 , make_option(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
202 c("-r", "--filtered_data"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
203 action = "store",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
204 type = "character",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
205 help = "filteredData.txt"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
206 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
207 # default = "quantData.txt",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
208 , make_option(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
209 c("-q", "--quant_data"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
210 action = "store",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
211 type = "character",
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
212 help = "quantData.txt"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
213 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
214 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
215 args <- parse_args(OptionParser(option_list=option_list))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
216 # Check parameter values
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
217
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
218 ### EXTRACT ARGUMENTS end ----------------------------------------------------
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
219
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
220
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
221 ### EXTRACT PARAMETERS from arguments begin ----------------------------------
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
222
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
223 if (! file.exists(args$input)) {
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
224 stop((paste("File", args$input, "does not exist")))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
225 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
226
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
227 phosphoColPattern <- "^Number of Phospho [(][STY][STY]*[)]$"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
228 startColPattern <- "^Intensity[^_]"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
229 phosphoColPattern <- readFirstLine(args$phosphoCol)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
230 startColPattern <- readFirstLine(args$startCol)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
231
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
232 sink(getConnection(2))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
233 #ACE print(paste("phosphoColPattern", phosphoColPattern))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
234 #ACE print(paste("startColPattern", startColPattern))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
235
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
236 inputFilename <- args$input
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
237 filteredFilename <- args$filtered_data
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
238 quantFilename <- args$quant_data
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
239 intervalCol <- as.integer(args$intervalCol)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
240
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
241 firstLine <- readFirstLine(inputFilename)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
242 columnHeaders <- unlist(strsplit(x=firstLine, split=c('\t'), fixed=TRUE))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
243 sink(getConnection(2))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
244 #ACE print("columnHeaders")
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
245 #ACE print(columnHeaders)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
246 sink()
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
247
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
248
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
249 intensityHeaderCols <- grep(pattern=startColPattern, x=columnHeaders, perl=TRUE)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
250 if ( length(intensityHeaderCols) == 0) {
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
251 err_msg <- paste("Found no intensity columns matching pattern:", startColPattern)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
252 # Divert output to stderr
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
253 sink(getConnection(2))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
254 print(err_msg)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
255 sink()
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
256 stop(err_msg)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
257 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
258
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
259
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
260 phosphoCol <- grep(pattern=phosphoColPattern, x=columnHeaders, perl=TRUE)[1]
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
261 if (is.na(phosphoCol)) {
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
262 err_msg <- paste("Found no 'number of phospho sites' columns matching pattern:", phosphoColPattern)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
263 # Divert output to stderr
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
264 sink(getConnection(2))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
265 print(err_msg)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
266 sink()
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
267 stop(err_msg)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
268 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
269
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
270
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
271 i_count <- 0
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
272 this_column <- 1
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
273 last_value <- intensityHeaderCols[1]
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
274 intensityCols <- c(last_value)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
275
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
276 while ( length(intensityHeaderCols) >= intervalCol * i_count ) {
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
277 i_count <- 1 + i_count
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
278 this_column <- intervalCol + this_column
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
279 if ( last_value + intervalCol != intensityHeaderCols[this_column] ) break
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
280 last_value <- intensityHeaderCols[this_column]
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
281 if (length(intensityHeaderCols) < intervalCol * i_count) break
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
282 intensityCols <- c(intensityCols, intensityHeaderCols[this_column])
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
283 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
284
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
285 startCol <- intensityCols[1]
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
286 numSamples <- i_count
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
287
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
288 outputfilename <- args$output
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
289 enrichGraphFilename <- args$enrichGraph
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
290 locProbCutoffGraphFilename <- args$locProbCutoffGraph
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
291 enrichGraphFilename_svg <- args$enrichGraph_svg
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
292 locProbCutoffGraphFilename_svg <- args$locProbCutoffGraph_svg
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
293
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
294 localProbCutoff <- args$localProbCutoff
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
295 enriched <- args$enriched
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
296 collapse_FUN <- args$collapse_func
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
297
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
298 ### EXTRACT PARAMETERS from arguments end ------------------------------------
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
299
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
300
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
301 # Proteomics Quality Control for MaxQuant Results
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
302 # (Bielow C et al. J Proteome Res. 2016 PMID: 26653327)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
303 # is run by the Galaxy MaxQuant wrapper and need not be invoked here.
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
304
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
305
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
306 # Read data, filtering out contaminants, reverse sequences, and localization probability
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
307 # ---
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
308 fullData <- read.table(file = inputFilename, sep ="\t", header=T, quote="")
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
309
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
310 #Filter out contaminant rows and reverse rows
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
311 filteredData <- subset(fullData,!grepl("CON__", Proteins))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
312 filteredData <- subset(filteredData,!grepl("_MYCOPLASMA", Proteins))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
313 filteredData <- subset(filteredData,!grepl("CONTAMINANT_", Proteins))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
314 filteredData <- subset(filteredData,!grepl("REV__", Protein)) #since REV__ rows are blank in the first column (Proteins)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
315 write.table(filteredData, file = filteredFilename, sep = "\t", quote=FALSE, col.names=TRUE, row.names=FALSE)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
316 # ...
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
317
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
318
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
319 # Filter out data with localization probability below localProbCutoff
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
320 # ---
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
321 #Data filtered by localization probability
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
322 locProbFilteredData <- filteredData[filteredData$Localization.prob>=localProbCutoff,]
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
323 # ...
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
324
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
325
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
326 # Localization probability -- visualize locprob cutoff
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
327 # ---
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
328 locProbGraphData <- data.frame(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
329 group = c(paste(">",toString(localProbCutoff),sep=""), paste("<",toString(localProbCutoff),sep="")),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
330 value = c(nrow(locProbFilteredData)/nrow(filteredData)*100, (nrow(filteredData)-nrow(locProbFilteredData))/nrow(filteredData)*100)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
331 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
332 gigi <-
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
333 ggplot(locProbGraphData, aes(x = "", y = value, fill = group)) +
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
334 geom_bar(width = 0.5, stat = "identity", color = "black") +
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
335 labs(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
336 x = NULL
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
337 , y = "percent"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
338 , title = "Phosphopeptides partitioned by localization-probability cutoff"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
339 ) +
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
340 scale_fill_discrete(name = "phosphopeptide\nlocalization-\nprobability") +
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
341 theme_minimal() +
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
342 theme(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
343 legend.position = "right"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
344 , legend.title=element_text()
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
345 , plot.title = element_text(hjust = 0.5)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
346 , plot.subtitle = element_text(hjust = 0.5)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
347 , plot.title.position = "plot"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
348 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
349 pdf(locProbCutoffGraphFilename)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
350 print(gigi)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
351 dev.off()
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
352 svg(locProbCutoffGraphFilename_svg)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
353 print(gigi)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
354 dev.off()
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
355 # ...
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
356
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
357
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
358 # Extract quantitative values from filtered data
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
359 # ---
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
360 quantData <- locProbFilteredData[,seq(from=startCol, by=intervalCol, length.out=numSamples)]
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
361 # ...
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
362
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
363
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
364 # Generate Phosphopeptide Sequence
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
365 # for latest version of MaxQuant (Version 1.5.3.30)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
366 # ---
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
367 dataTable <- data.frame(locProbFilteredData[,1:8],locProbFilteredData[,phosphoCol],locProbFilteredData[,phosphoCol+1],locProbFilteredData[,phosphoCol+2],locProbFilteredData[,phosphoCol+3],locProbFilteredData[,phosphoCol+4],locProbFilteredData[,phosphoCol+5],locProbFilteredData[,phosphoCol+6],locProbFilteredData[,phosphoCol+7],quantData)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
368 colnames(dataTable) <- c("Proteins","Positions within proteins", "Leading proteins", "Protein", "Protein names", "Gene names", "Fasta headers", "Localization prob", "Number of Phospho (STY)", "Amino Acid", "Sequence window","Modification window", "Peptide window coverage", "Phospho (STY) Probabilities", "Phospho (STY) Score diffs", "Position in peptide", colnames(quantData))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
369 # 'phosphopeptide_func' generates a phosphopeptide sequence for each row of data.
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
370 # for the 'apply' function: MARGIN 1 == rows, 2 == columns, c(1,2) = both
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
371 dataTable$Phosphopeptide <- apply(X=dataTable, MARGIN=1, FUN=phosphopeptide_func)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
372 # Move the quant data columns to the right end of the data.frame
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
373 dataTable <- movetolast(dataTable,c(colnames(quantData)))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
374 # ...
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
375
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
376
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
377 # Write quantitative values for debugging purposes
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
378 # ---
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
379 quantWrite <- cbind( dataTable[,"Sequence window"], quantData )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
380 colnames(quantWrite)[1] <- "Sequence.Window"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
381 write.table(quantWrite, file = quantFilename, sep = "\t", quote=FALSE, col.names=TRUE, row.names=FALSE)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
382 # ...
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
383
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
384
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
385 # Make new data frame containing only Phosphopeptides to be mapped to quant data (merge_df)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
386 # ---
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
387 dataTable <- setDT(dataTable, keep.rownames=TRUE) #row name will be used to map
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
388 merge_df <- data.frame(as.integer(dataTable$rn), dataTable$Phosphopeptide) #row index to merge data frames
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
389 colnames(merge_df) <- c("rn", "Phosphopeptide")
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
390 # ...
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
391
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
392
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
393 # Add Phosphopeptide column to quant columns for quality control checking
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
394 # ---
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
395 quantData_qc <- as.data.frame(quantData)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
396 setDT(quantData_qc, keep.rownames=TRUE) #will use to match rowname to data
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
397 quantData_qc$rn <- as.integer(quantData_qc$rn)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
398 quantData_qc <- merge(merge_df,quantData_qc, by="rn")
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
399 quantData_qc$rn <- NULL #remove rn column
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
400 # ...
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
401
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
402
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
403 # Collapse multiphosphorylated peptides
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
404 # ---
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
405 quantData_qc_collapsed <- data.table(quantData_qc, key = "Phosphopeptide")
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
406 quantData_qc_collapsed <- aggregate(. ~ Phosphopeptide,quantData_qc, FUN= collapse_FUN)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
407 # ...
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
408
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
409
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
410 # Compute (as string) % of phosphopeptides that are multiphosphorylated (for use in next step)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
411 # ---
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
412 pct_multiphos <- (nrow(quantData_qc) - nrow(quantData_qc_collapsed)) / (2 * nrow(quantData_qc))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
413 pct_multiphos <- sprintf("%0.1f%s", 100 * pct_multiphos, "%")
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
414 # ...
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
415
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
416
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
417 # Compute and visualize breakdown of pY, pS, and pT before enrichment filter
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
418 # ---
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
419 pY_data <- quantData_qc_collapsed[str_detect(quantData_qc_collapsed$Phosphopeptide, "pY"),]
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
420 pS_data <- quantData_qc_collapsed[str_detect(quantData_qc_collapsed$Phosphopeptide, "pS"),]
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
421 pT_data <- quantData_qc_collapsed[str_detect(quantData_qc_collapsed$Phosphopeptide, "pT"),]
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
422
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
423 pY_num <- nrow(pY_data)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
424 pS_num <- nrow(pS_data)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
425 pT_num <- nrow(pT_data)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
426
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
427 # Visualize enrichment
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
428 enrichGraphData <- data.frame(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
429 group = c("pY", "pS", "pT"),
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
430 value = c(pY_num, pS_num, pT_num)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
431 )
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
432
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
433 enrichGraphData <- enrichGraphData[enrichGraphData$value > 0,]
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
434
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
435 # Plot pie chart with legend
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
436 # start: https://stackoverflow.com/a/62522478/15509512
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
437 # refine: https://www.statology.org/ggplot-pie-chart/
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
438 # colors: https://colorbrewer2.org/#type=diverging&scheme=BrBG&n=8
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
439 slices <- enrichGraphData$value
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
440 phosphoresidue <- enrichGraphData$group
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
441 pct <- round(100 * slices / sum(slices))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
442 lbls <- paste(enrichGraphData$group,"\n",pct, "%\n(", slices, ")", sep="")
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
443 slc_ctr <- c()
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
444 run_tot <- 0
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
445 for (p in pct) {
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
446 slc_ctr <- c(slc_ctr, run_tot + p/2.0)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
447 run_tot <- run_tot + p
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
448 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
449 lbl_y <- 100 - slc_ctr
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
450 df <- data.frame(slices, pct, lbls, phosphoresidue = factor(phosphoresidue, levels = phosphoresidue))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
451 gigi <- ggplot(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
452 df
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
453 , aes(x = 1, y = pct, fill = phosphoresidue)) +
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
454 geom_col(position = "stack", orientation = "x") +
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
455 geom_text(aes(x = 1, y = lbl_y, label = lbls), col = "black") +
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
456 coord_polar(theta = "y", direction = -1) +
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
457 labs(
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
458 x = NULL
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
459 , y = NULL
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
460 , title = "Percentages (and counts) of phosphosites, by type of residue"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
461 , caption = sprintf("Roughly %s of peptides have multiple phosphosites.", pct_multiphos)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
462 ) +
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
463 labs(x = NULL, y = NULL, fill = NULL) +
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
464 theme_classic() +
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
465 theme( legend.position="right"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
466 , axis.line = element_blank()
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
467 , axis.text = element_blank()
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
468 , axis.ticks = element_blank()
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
469 , plot.title = element_text(hjust = 0.5)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
470 , plot.subtitle = element_text(hjust = 0.5)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
471 , plot.caption = element_text(hjust = 0.5)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
472 , plot.title.position = "plot"
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
473 ) +
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
474 scale_fill_manual(breaks = phosphoresidue, values=c("#c7eae5", "#f6e8c3", "#dfc27d"))
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
475
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
476 pdf(enrichGraphFilename)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
477 print(gigi)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
478 dev.off()
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
479 svg(enrichGraphFilename_svg)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
480 print(gigi)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
481 dev.off()
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
482 # ...
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
483
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
484
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
485 # Filter phosphopeptides by enrichment
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
486 # --
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
487 if (enriched == "Y"){
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
488 quantData_qc_enrichment <- quantData_qc_collapsed[str_detect(quantData_qc_collapsed$Phosphopeptide, "pY"),]
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
489 } else if ( enriched == "ST" ) {
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
490 quantData_qc_enrichment <- quantData_qc_collapsed[str_detect(quantData_qc_collapsed$Phosphopeptide, "pS") | str_detect(quantData_qc_collapsed$Phosphopeptide, "pT"),]
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
491 } else {
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
492 print("Error in enriched variable. Set to either 'Y' or 'ST'")
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
493 }
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
494 # ...
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
495
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
496
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
497 # Write phosphopeptides filtered by enrichment
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
498 # --
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
499 write.table(quantData_qc_enrichment, file=outputfilename, sep="\t", quote = FALSE, row.names = FALSE)
c1403d18c189 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff changeset
500 # ...