changeset 0:525f1318bb55 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/psm_zscore commit 6ea2ae47fb74320afbb88a92e3fe40b5f8f86ad1
author galaxyp
date Tue, 02 May 2017 13:35:55 -0400
parents
children
files PSM_Z_Score.R psm_zscore.xml test-data/PSM_report_in.tsv
diffstat 3 files changed, 108 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PSM_Z_Score.R	Tue May 02 13:35:55 2017 -0400
@@ -0,0 +1,57 @@
+###############################################################################
+#                                                                             #
+# TestScript.R                                                                #
+#                                                                             #
+# I am using this to test the scripting language and passing parameters       #
+#                                                                             #
+# Created: 2017-04-12                                                         #
+#                                                                             #
+###############################################################################
+
+args <- commandArgs(TRUE) # Saves the parameters (command code)
+eval(parse(text=args))    # Runs the parameters
+# NOTE: This is extremely unsafe programming - any valid R code can run here
+
+#sprintf("z_cutoff is %s and ppm_tolerance is %s", z_cutoff, ppm_tolerance)
+#sprintf("Finally, the PSM report file is %s", psm_report)
+#sprintf("Oh, and the output file name is %s", output_psm_report)
+
+##### Support functions
+calc_z <- function(v=NULL, mu=NULL, sigma=NULL) {
+  return( (v-mu) / sigma )
+}
+
+##### Load Data
+data <- read.table(psm_report, 
+                   header = TRUE, 
+                   blank.lines.skip = TRUE, 
+                   fill = TRUE, 
+                   sep = "\t")
+#sprintf("data loaded")
+
+##### Local "confidence"
+values <- data$Precursor.m.z.Error..ppm.
+#sprintf("Number of values: %d", length(values))
+mu = mean(values)
+#sprintf("mean is %f", mu)
+sigma = sd(values)
+#sprintf("sigma is %f", sigma)
+precursor_z      <- calc_z(v=values, mu=mu, sigma=sigma)
+data$precursor_z <- precursor_z
+#sprintf("made the z list")
+write.table(data, file=output_psm_report, quote=FALSE, sep="\t", row.names=FALSE)
+#sprintf("file written")
+
+##### Global "confidence"
+ppm_min <- -ppm_tolerance
+ppm_max <- +ppm_tolerance
+zmin    <- calc_z(v=ppm_min, mu=mu, sigma=sigma)
+zmax    <- calc_z(v=ppm_max, mu=mu, sigma=sigma)
+area    <- (-z_cutoff - zmin) + (zmax - z_cutoff)
+nFalseHit <- sum(abs(precursor_z) > z_cutoff)
+n         <- length(values)
+propArea  <- area / (zmax - zmin)
+global_precursor_conf <- nFalseHit / (n * propArea)
+
+global_precursor_conf
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/psm_zscore.xml	Tue May 02 13:35:55 2017 -0400
@@ -0,0 +1,41 @@
+<tool id="psm_zscore" name="PSM z-score" version="0.1.0">
+    <description>Add a zscore column to a PSM report</description>
+    <requirements>
+        <requirement type="package" version="3.2.1">R</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command><![CDATA[
+        echo "Global FDR: ";
+        Rscript '${__tool_directory__}/PSM_Z_Score.R'
+        z_cutoff=$z_cutoff 
+        ppm_tolerance=$ppm_tolerance 
+        psm_report='"$psm_report"' 
+        output_psm_report='"$zscored_psm_report"' | awk '{print \$2}'
+    ]]></command>
+    <inputs>
+        <param name="psm_report" type="data" format="tabular" label="PSM report"/>
+        <param name="z_cutoff" type="float" value="2" min="0.1" max="10" label="Z-value cutoof"/>
+        <param name="ppm_tolerance" type="integer" value="10" label="ppm tolerance" 
+         help="The ppm tolerance value used when generating the PSM report"/>
+    </inputs>
+    <outputs>
+        <data name="zscored_psm_report" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="psm_report" ftype="tabular" value="PSM_report_in.tsv"/>
+            <param name="z_cutoff" value="2"/>
+            <param name="ppm_tolerance" value="0"/>
+            <output name="zscored_psm_report">
+                <assert_contents>
+                    <has_text_matching expression="1\tNP_001419\t.*\t\d+\.\d+"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+        TODO: Fill in help.
+    ]]></help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/PSM_report_in.tsv	Tue May 02 13:35:55 2017 -0400
@@ -0,0 +1,10 @@
+	Protein(s)	Sequence	AAs Before	AAs After	Position	Modified Sequence	Variable Modifications	Fixed Modifications	Spectrum File	Spectrum Title	Spectrum Scan Number	RT	m/z	Measured Charge	Identification Charge	Theoretical Mass	Isotope Number	Precursor m/z Error [ppm]	Localization Confidence	Probabilistic PTM score	D-score	Confidence [%]	Validation
+1	NP_001419	IDKLMIEMDGTENK	EK	SK	90	NH2-IDKLM<ox>IEM<ox>DGTENK-COOH	Oxidation of M(8, 5)		Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.09529.09529.2	9529	-1.0	834.894104	2+	2+	1667.7746235619102	0	-0.5800106672224822	Oxidation of M (5: Very Confident, 8: Very Confident)	Oxidation of M (5: 100.0, 8: 100.0)	Oxidation of M (5: 100.0, 8: 100.0)	100.0	Doubtful
+2	NP_001419	LMIEMDGTENK	DK	SK	93	NH2-LMIEM<ox>DGTENK-COOH	Oxidation of M(5)		Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.09211.09211.2	9211	-1.0	648.794495	2+	2+	1295.5737389273904	0	0.5380281170676847	Oxidation of M (5: Very Confident)	Oxidation of M (2: 0.008579572548232045, 5: 99.99142042745177)	Oxidation of M (5: 25.657894736842103)	100.0	Doubtful
+3	NP_001419	IDKLMIEMDGTENK	EK	SK	90	NH2-IDKLM<ox>IEM<ox>DGTENK-COOH	Oxidation of M(5, 8)		Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.09410.09410.3	9410	-1.0	556.932617	3+	3+	1667.7746235619102	0	0.836749235076534	Oxidation of M (5: Very Confident, 8: Very Confident)	Oxidation of M (5: 100.0, 8: 100.0)	Oxidation of M (5: 100.0, 8: 100.0)	100.0	Doubtful
+4	NP_001419	LNVTEQEKIDK	KK	LM	82	NH2-LNVTEQEKIDK-COOH			Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.07277.07277.3	7277	-1.0	439.573456	3+	3+	1315.6983451935503	0	0.1466619056362385				100.0	Doubtful
+5	NP_001188412, NP_001419	IGAEVYHNLK	MR; MR	NV; NV	91; 184	NH2-IGAEVYHNLK-COOH			Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.08920.08920.2	8920	-1.0	572.311768	2+	2+	1142.6084079903403	0	0.5024152539610399				100.0	Doubtful
+6	NP_001419	GNPTVEVDLFTSK	SR	GL	16	NH2-GNPTVEVDLFTSK-COOH			Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.19124.19124.2	19124	-1.0	703.861816	2+	2+	1405.7089098772503	0	0.12018633626694622				100.0	Doubtful
+7	NP_001419	LMIEMDGTENK	DK	SK	93	NH2-LMIEMDGTENK-COOH			Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.14908.14908.2	14908	-1.0	640.796692	2+	2+	1279.5788243078305	0	0.005273548940125693				100.0	Doubtful
+8	NP_001419	LMIEMDGTENKSK	DK	FG	93	NH2-LM<ox>IEM<ox>DGTENKSK-COOH	Oxidation of M(5, 2)		Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.05568.05568.3	5568	-1.0	509.906097	3+	3+	1526.6956449652205	0	0.5338465156052076	Oxidation of M (2: Very Confident, 5: Very Confident)	Oxidation of M (2: 100.0, 5: 100.0)	Oxidation of M (2: 100.0, 5: 100.0)	100.0	Doubtful
+9	NP_001419	GNPTVEVDLFTSK	SR	GL	16	NH2-GNPTVEVDLFTSK-COOH			Mascot formatted MGF of data 27.mgf	tgriffin_cguerrer_20150128_RP_MCF7_ingeldigest_band48kb_replicate3.16695.16695.2	16695	-1.0	703.861694	2+	2+	1405.7089098772503	0	-0.05314316081446145				100.0	Doubtful