# HG changeset patch
# User galaxyp
# Date 1493746555 14400
# Node ID 525f1318bb55118c32b4f4b405b2612d6531616a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/psm_zscore commit 6ea2ae47fb74320afbb88a92e3fe40b5f8f86ad1
diff -r 000000000000 -r 525f1318bb55 PSM_Z_Score.R
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PSM_Z_Score.R	Tue May 02 13:35:55 2017 -0400
@@ -0,0 +1,57 @@
+###############################################################################
+#                                                                             #
+# TestScript.R                                                                #
+#                                                                             #
+# I am using this to test the scripting language and passing parameters       #
+#                                                                             #
+# Created: 2017-04-12                                                         #
+#                                                                             #
+###############################################################################
+
+args <- commandArgs(TRUE) # Saves the parameters (command code)
+eval(parse(text=args))    # Runs the parameters
+# NOTE: This is extremely unsafe programming - any valid R code can run here
+
+#sprintf("z_cutoff is %s and ppm_tolerance is %s", z_cutoff, ppm_tolerance)
+#sprintf("Finally, the PSM report file is %s", psm_report)
+#sprintf("Oh, and the output file name is %s", output_psm_report)
+
+##### Support functions
+calc_z <- function(v=NULL, mu=NULL, sigma=NULL) {
+  return( (v-mu) / sigma )
+}
+
+##### Load Data
+data <- read.table(psm_report, 
+                   header = TRUE, 
+                   blank.lines.skip = TRUE, 
+                   fill = TRUE, 
+                   sep = "\t")
+#sprintf("data loaded")
+
+##### Local "confidence"
+values <- data$Precursor.m.z.Error..ppm.
+#sprintf("Number of values: %d", length(values))
+mu = mean(values)
+#sprintf("mean is %f", mu)
+sigma = sd(values)
+#sprintf("sigma is %f", sigma)
+precursor_z      <- calc_z(v=values, mu=mu, sigma=sigma)
+data$precursor_z <- precursor_z
+#sprintf("made the z list")
+write.table(data, file=output_psm_report, quote=FALSE, sep="\t", row.names=FALSE)
+#sprintf("file written")
+
+##### Global "confidence"
+ppm_min <- -ppm_tolerance
+ppm_max <- +ppm_tolerance
+zmin    <- calc_z(v=ppm_min, mu=mu, sigma=sigma)
+zmax    <- calc_z(v=ppm_max, mu=mu, sigma=sigma)
+area    <- (-z_cutoff - zmin) + (zmax - z_cutoff)
+nFalseHit <- sum(abs(precursor_z) > z_cutoff)
+n         <- length(values)
+propArea  <- area / (zmax - zmin)
+global_precursor_conf <- nFalseHit / (n * propArea)
+
+global_precursor_conf
+
diff -r 000000000000 -r 525f1318bb55 psm_zscore.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/psm_zscore.xml	Tue May 02 13:35:55 2017 -0400
@@ -0,0 +1,41 @@
+
+    Add a zscore column to a PSM report
+    
+        R
+    
+    
+        
+    
+    
+    
+        
+        
+        
+    
+    
+        
+    
+    
+        
+            
+            
+            
+            
+        
+    
+    
+
diff -r 000000000000 -r 525f1318bb55 test-data/PSM_report_in.tsv
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/PSM_report_in.tsv	Tue May 02 13:35:55 2017 -0400
@@ -0,0 +1,10 @@
+	Protein(s)	Sequence	AAs Before	AAs After	Position	Modified Sequence	Variable Modifications	Fixed Modifications	Spectrum File	Spectrum Title	Spectrum Scan Number	RT	m/z	Measured Charge	Identification Charge	Theoretical Mass	Isotope Number	Precursor m/z Error [ppm]	Localization Confidence	Probabilistic PTM score	D-score	Confidence [%]	Validation
+1	NP_001419	IDKLMIEMDGTENK	EK	SK	90	NH2-IDKLMIEMDGTENK-COOH	Oxidation of M(8, 5)		Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.09529.09529.2	9529	-1.0	834.894104	2+	2+	1667.7746235619102	0	-0.5800106672224822	Oxidation of M (5: Very Confident, 8: Very Confident)	Oxidation of M (5: 100.0, 8: 100.0)	Oxidation of M (5: 100.0, 8: 100.0)	100.0	Doubtful
+2	NP_001419	LMIEMDGTENK	DK	SK	93	NH2-LMIEMDGTENK-COOH	Oxidation of M(5)		Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.09211.09211.2	9211	-1.0	648.794495	2+	2+	1295.5737389273904	0	0.5380281170676847	Oxidation of M (5: Very Confident)	Oxidation of M (2: 0.008579572548232045, 5: 99.99142042745177)	Oxidation of M (5: 25.657894736842103)	100.0	Doubtful
+3	NP_001419	IDKLMIEMDGTENK	EK	SK	90	NH2-IDKLMIEMDGTENK-COOH	Oxidation of M(5, 8)		Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.09410.09410.3	9410	-1.0	556.932617	3+	3+	1667.7746235619102	0	0.836749235076534	Oxidation of M (5: Very Confident, 8: Very Confident)	Oxidation of M (5: 100.0, 8: 100.0)	Oxidation of M (5: 100.0, 8: 100.0)	100.0	Doubtful
+4	NP_001419	LNVTEQEKIDK	KK	LM	82	NH2-LNVTEQEKIDK-COOH			Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.07277.07277.3	7277	-1.0	439.573456	3+	3+	1315.6983451935503	0	0.1466619056362385				100.0	Doubtful
+5	NP_001188412, NP_001419	IGAEVYHNLK	MR; MR	NV; NV	91; 184	NH2-IGAEVYHNLK-COOH			Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.08920.08920.2	8920	-1.0	572.311768	2+	2+	1142.6084079903403	0	0.5024152539610399				100.0	Doubtful
+6	NP_001419	GNPTVEVDLFTSK	SR	GL	16	NH2-GNPTVEVDLFTSK-COOH			Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.19124.19124.2	19124	-1.0	703.861816	2+	2+	1405.7089098772503	0	0.12018633626694622				100.0	Doubtful
+7	NP_001419	LMIEMDGTENK	DK	SK	93	NH2-LMIEMDGTENK-COOH			Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.14908.14908.2	14908	-1.0	640.796692	2+	2+	1279.5788243078305	0	0.005273548940125693				100.0	Doubtful
+8	NP_001419	LMIEMDGTENKSK	DK	FG	93	NH2-LMIEMDGTENKSK-COOH	Oxidation of M(5, 2)		Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.05568.05568.3	5568	-1.0	509.906097	3+	3+	1526.6956449652205	0	0.5338465156052076	Oxidation of M (2: Very Confident, 5: Very Confident)	Oxidation of M (2: 100.0, 5: 100.0)	Oxidation of M (2: 100.0, 5: 100.0)	100.0	Doubtful
+9	NP_001419	GNPTVEVDLFTSK	SR	GL	16	NH2-GNPTVEVDLFTSK-COOH			Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.16695.16695.2	16695	-1.0	703.861694	2+	2+	1405.7089098772503	0	-0.05314316081446145				100.0	Doubtful