# HG changeset patch
# User galaxyp
# Date 1493746555 14400
# Node ID 525f1318bb55118c32b4f4b405b2612d6531616a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/psm_zscore commit 6ea2ae47fb74320afbb88a92e3fe40b5f8f86ad1
diff -r 000000000000 -r 525f1318bb55 PSM_Z_Score.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/PSM_Z_Score.R Tue May 02 13:35:55 2017 -0400
@@ -0,0 +1,57 @@
+###############################################################################
+# #
+# TestScript.R #
+# #
+# I am using this to test the scripting language and passing parameters #
+# #
+# Created: 2017-04-12 #
+# #
+###############################################################################
+
+args <- commandArgs(TRUE) # Saves the parameters (command code)
+eval(parse(text=args)) # Runs the parameters
+# NOTE: This is extremely unsafe programming - any valid R code can run here
+
+#sprintf("z_cutoff is %s and ppm_tolerance is %s", z_cutoff, ppm_tolerance)
+#sprintf("Finally, the PSM report file is %s", psm_report)
+#sprintf("Oh, and the output file name is %s", output_psm_report)
+
+##### Support functions
+calc_z <- function(v=NULL, mu=NULL, sigma=NULL) {
+ return( (v-mu) / sigma )
+}
+
+##### Load Data
+data <- read.table(psm_report,
+ header = TRUE,
+ blank.lines.skip = TRUE,
+ fill = TRUE,
+ sep = "\t")
+#sprintf("data loaded")
+
+##### Local "confidence"
+values <- data$Precursor.m.z.Error..ppm.
+#sprintf("Number of values: %d", length(values))
+mu = mean(values)
+#sprintf("mean is %f", mu)
+sigma = sd(values)
+#sprintf("sigma is %f", sigma)
+precursor_z <- calc_z(v=values, mu=mu, sigma=sigma)
+data$precursor_z <- precursor_z
+#sprintf("made the z list")
+write.table(data, file=output_psm_report, quote=FALSE, sep="\t", row.names=FALSE)
+#sprintf("file written")
+
+##### Global "confidence"
+ppm_min <- -ppm_tolerance
+ppm_max <- +ppm_tolerance
+zmin <- calc_z(v=ppm_min, mu=mu, sigma=sigma)
+zmax <- calc_z(v=ppm_max, mu=mu, sigma=sigma)
+area <- (-z_cutoff - zmin) + (zmax - z_cutoff)
+nFalseHit <- sum(abs(precursor_z) > z_cutoff)
+n <- length(values)
+propArea <- area / (zmax - zmin)
+global_precursor_conf <- nFalseHit / (n * propArea)
+
+global_precursor_conf
+
diff -r 000000000000 -r 525f1318bb55 psm_zscore.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/psm_zscore.xml Tue May 02 13:35:55 2017 -0400
@@ -0,0 +1,41 @@
+
+ Add a zscore column to a PSM report
+
+ R
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 525f1318bb55 test-data/PSM_report_in.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/PSM_report_in.tsv Tue May 02 13:35:55 2017 -0400
@@ -0,0 +1,10 @@
+ Protein(s) Sequence AAs Before AAs After Position Modified Sequence Variable Modifications Fixed Modifications Spectrum File Spectrum Title Spectrum Scan Number RT m/z Measured Charge Identification Charge Theoretical Mass Isotope Number Precursor m/z Error [ppm] Localization Confidence Probabilistic PTM score D-score Confidence [%] Validation
+1 NP_001419 IDKLMIEMDGTENK EK SK 90 NH2-IDKLMIEMDGTENK-COOH Oxidation of M(8, 5) Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.09529.09529.2 9529 -1.0 834.894104 2+ 2+ 1667.7746235619102 0 -0.5800106672224822 Oxidation of M (5: Very Confident, 8: Very Confident) Oxidation of M (5: 100.0, 8: 100.0) Oxidation of M (5: 100.0, 8: 100.0) 100.0 Doubtful
+2 NP_001419 LMIEMDGTENK DK SK 93 NH2-LMIEMDGTENK-COOH Oxidation of M(5) Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.09211.09211.2 9211 -1.0 648.794495 2+ 2+ 1295.5737389273904 0 0.5380281170676847 Oxidation of M (5: Very Confident) Oxidation of M (2: 0.008579572548232045, 5: 99.99142042745177) Oxidation of M (5: 25.657894736842103) 100.0 Doubtful
+3 NP_001419 IDKLMIEMDGTENK EK SK 90 NH2-IDKLMIEMDGTENK-COOH Oxidation of M(5, 8) Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.09410.09410.3 9410 -1.0 556.932617 3+ 3+ 1667.7746235619102 0 0.836749235076534 Oxidation of M (5: Very Confident, 8: Very Confident) Oxidation of M (5: 100.0, 8: 100.0) Oxidation of M (5: 100.0, 8: 100.0) 100.0 Doubtful
+4 NP_001419 LNVTEQEKIDK KK LM 82 NH2-LNVTEQEKIDK-COOH Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.07277.07277.3 7277 -1.0 439.573456 3+ 3+ 1315.6983451935503 0 0.1466619056362385 100.0 Doubtful
+5 NP_001188412, NP_001419 IGAEVYHNLK MR; MR NV; NV 91; 184 NH2-IGAEVYHNLK-COOH Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.08920.08920.2 8920 -1.0 572.311768 2+ 2+ 1142.6084079903403 0 0.5024152539610399 100.0 Doubtful
+6 NP_001419 GNPTVEVDLFTSK SR GL 16 NH2-GNPTVEVDLFTSK-COOH Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.19124.19124.2 19124 -1.0 703.861816 2+ 2+ 1405.7089098772503 0 0.12018633626694622 100.0 Doubtful
+7 NP_001419 LMIEMDGTENK DK SK 93 NH2-LMIEMDGTENK-COOH Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.14908.14908.2 14908 -1.0 640.796692 2+ 2+ 1279.5788243078305 0 0.005273548940125693 100.0 Doubtful
+8 NP_001419 LMIEMDGTENKSK DK FG 93 NH2-LMIEMDGTENKSK-COOH Oxidation of M(5, 2) Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.05568.05568.3 5568 -1.0 509.906097 3+ 3+ 1526.6956449652205 0 0.5338465156052076 Oxidation of M (2: Very Confident, 5: Very Confident) Oxidation of M (2: 100.0, 5: 100.0) Oxidation of M (2: 100.0, 5: 100.0) 100.0 Doubtful
+9 NP_001419 GNPTVEVDLFTSK SR GL 16 NH2-GNPTVEVDLFTSK-COOH Mascot formatted MGF of data 27.mgf tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.16695.16695.2 16695 -1.0 703.861694 2+ 2+ 1405.7089098772503 0 -0.05314316081446145 100.0 Doubtful