# HG changeset patch # User galaxyp # Date 1493746555 14400 # Node ID 525f1318bb55118c32b4f4b405b2612d6531616a planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/psm_zscore commit 6ea2ae47fb74320afbb88a92e3fe40b5f8f86ad1 diff -r 000000000000 -r 525f1318bb55 PSM_Z_Score.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PSM_Z_Score.R Tue May 02 13:35:55 2017 -0400 @@ -0,0 +1,57 @@ +############################################################################### +# # +# TestScript.R # +# # +# I am using this to test the scripting language and passing parameters # +# # +# Created: 2017-04-12 # +# # +############################################################################### + +args <- commandArgs(TRUE) # Saves the parameters (command code) +eval(parse(text=args)) # Runs the parameters +# NOTE: This is extremely unsafe programming - any valid R code can run here + +#sprintf("z_cutoff is %s and ppm_tolerance is %s", z_cutoff, ppm_tolerance) +#sprintf("Finally, the PSM report file is %s", psm_report) +#sprintf("Oh, and the output file name is %s", output_psm_report) + +##### Support functions +calc_z <- function(v=NULL, mu=NULL, sigma=NULL) { + return( (v-mu) / sigma ) +} + +##### Load Data +data <- read.table(psm_report, + header = TRUE, + blank.lines.skip = TRUE, + fill = TRUE, + sep = "\t") +#sprintf("data loaded") + +##### Local "confidence" +values <- data$Precursor.m.z.Error..ppm. +#sprintf("Number of values: %d", length(values)) +mu = mean(values) +#sprintf("mean is %f", mu) +sigma = sd(values) +#sprintf("sigma is %f", sigma) +precursor_z <- calc_z(v=values, mu=mu, sigma=sigma) +data$precursor_z <- precursor_z +#sprintf("made the z list") +write.table(data, file=output_psm_report, quote=FALSE, sep="\t", row.names=FALSE) +#sprintf("file written") + +##### Global "confidence" +ppm_min <- -ppm_tolerance +ppm_max <- +ppm_tolerance +zmin <- calc_z(v=ppm_min, mu=mu, sigma=sigma) +zmax <- calc_z(v=ppm_max, mu=mu, sigma=sigma) +area <- (-z_cutoff - zmin) + (zmax - z_cutoff) +nFalseHit <- sum(abs(precursor_z) > z_cutoff) +n <- length(values) +propArea <- area / (zmax - zmin) +global_precursor_conf <- nFalseHit / (n * propArea) + +global_precursor_conf + diff -r 000000000000 -r 525f1318bb55 psm_zscore.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/psm_zscore.xml Tue May 02 13:35:55 2017 -0400 @@ -0,0 +1,41 @@ + + Add a zscore column to a PSM report + + R + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 525f1318bb55 test-data/PSM_report_in.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/PSM_report_in.tsv Tue May 02 13:35:55 2017 -0400 @@ -0,0 +1,10 @@ + Protein(s) Sequence AAs Before AAs After Position Modified Sequence Variable Modifications Fixed Modifications Spectrum File Spectrum Title Spectrum Scan Number RT m/z Measured Charge Identification Charge Theoretical Mass Isotope Number Precursor m/z Error [ppm] Localization Confidence Probabilistic PTM score D-score Confidence [%] Validation +1 NP_001419 IDKLMIEMDGTENK EK SK 90 NH2-IDKLMIEMDGTENK-COOH Oxidation of M(8, 5) Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.09529.09529.2 9529 -1.0 834.894104 2+ 2+ 1667.7746235619102 0 -0.5800106672224822 Oxidation of M (5: Very Confident, 8: Very Confident) Oxidation of M (5: 100.0, 8: 100.0) Oxidation of M (5: 100.0, 8: 100.0) 100.0 Doubtful +2 NP_001419 LMIEMDGTENK DK SK 93 NH2-LMIEMDGTENK-COOH Oxidation of M(5) Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.09211.09211.2 9211 -1.0 648.794495 2+ 2+ 1295.5737389273904 0 0.5380281170676847 Oxidation of M (5: Very Confident) Oxidation of M (2: 0.008579572548232045, 5: 99.99142042745177) Oxidation of M (5: 25.657894736842103) 100.0 Doubtful +3 NP_001419 IDKLMIEMDGTENK EK SK 90 NH2-IDKLMIEMDGTENK-COOH Oxidation of M(5, 8) Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.09410.09410.3 9410 -1.0 556.932617 3+ 3+ 1667.7746235619102 0 0.836749235076534 Oxidation of M (5: Very Confident, 8: Very Confident) Oxidation of M (5: 100.0, 8: 100.0) Oxidation of M (5: 100.0, 8: 100.0) 100.0 Doubtful +4 NP_001419 LNVTEQEKIDK KK LM 82 NH2-LNVTEQEKIDK-COOH Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.07277.07277.3 7277 -1.0 439.573456 3+ 3+ 1315.6983451935503 0 0.1466619056362385 100.0 Doubtful +5 NP_001188412, NP_001419 IGAEVYHNLK MR; MR NV; NV 91; 184 NH2-IGAEVYHNLK-COOH Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.08920.08920.2 8920 -1.0 572.311768 2+ 2+ 1142.6084079903403 0 0.5024152539610399 100.0 Doubtful +6 NP_001419 GNPTVEVDLFTSK SR GL 16 NH2-GNPTVEVDLFTSK-COOH Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.19124.19124.2 19124 -1.0 703.861816 2+ 2+ 1405.7089098772503 0 0.12018633626694622 100.0 Doubtful +7 NP_001419 LMIEMDGTENK DK SK 93 NH2-LMIEMDGTENK-COOH Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.14908.14908.2 14908 -1.0 640.796692 2+ 2+ 1279.5788243078305 0 0.005273548940125693 100.0 Doubtful +8 NP_001419 LMIEMDGTENKSK DK FG 93 NH2-LMIEMDGTENKSK-COOH Oxidation of M(5, 2) Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.05568.05568.3 5568 -1.0 509.906097 3+ 3+ 1526.6956449652205 0 0.5338465156052076 Oxidation of M (2: Very Confident, 5: Very Confident) Oxidation of M (2: 100.0, 5: 100.0) Oxidation of M (2: 100.0, 5: 100.0) 100.0 Doubtful +9 NP_001419 GNPTVEVDLFTSK SR GL 16 NH2-GNPTVEVDLFTSK-COOH Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.16695.16695.2 16695 -1.0 703.861694 2+ 2+ 1405.7089098772503 0 -0.05314316081446145 100.0 Doubtful