annotate Testtest/GCMS-test_spectrum.R @ 0:40de28c7d3fb draft

Uploaded
author melpetera
date Thu, 23 Nov 2017 08:50:14 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
1 # author: Pauline Ribeyre
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
2
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
3
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
4 #############
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
5 # find ions #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
6 #############
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
7
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
8 is_ion_present <- function(GC_results, rt, mz_list) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
9 # Looks for a ion in the pseudospectra.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
10 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
11 # Args:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
12 # GC_results: result of a test by runGC().
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
13 # rt: retention time of the ion to look for.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
14 # mz_list: list of the ion's isotopes' m/z.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
15 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
16 # Returns:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
17 # The number of times the ion was found in the pseudospectra.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
18
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
19 delta_rt <- ions_delta_rt
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
20 delta_mz <- ions_delta_mz
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
21
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
22 res = 0
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
23
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
24 pseudospectra <- GC_results$PseudoSpectra
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
25 for (peak in pseudospectra) { # check all the peaks
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
26 if (peak$rt >= rt - delta_rt && peak$rt <= rt + delta_rt) { # check if the retention time matches
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
27
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
28 ps <- peak$pspectrum
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
29 found_ion <- rep(NA, length(mz_list))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
30
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
31 for (i in 1:length(mz_list)) { # check the presence of all the ions given in parameter
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
32
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
33 ion <- mz_list[i][[1]]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
34 found_ion[i] <- sum((ps[,"mz"] >= ion - delta_mz) & (ps[,"mz"] <= ion + delta_mz))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
35
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
36 } # end for
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
37
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
38 if (length(which(found_ion > 1)) > 0) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
39 stop("The ion was found more than 1 time in the same pseudospectrum.")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
40 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
41
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
42 if (sum(found_ion) == length(mz_list)) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
43 res <- res + 1
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
44 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
45
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
46 } # end if
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
47 } # end for
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
48
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
49 return (res)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
50
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
51 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
52
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
53
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
54 ##############
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
55 # duplicates #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
56 ##############
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
57
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
58 count_duplicates_function <- function(GC_results) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
59 # Counts the number of ions that have been found in more than one pseudospectra (slow).
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
60 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
61 # Args:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
62 # GC_results: result of a test by runGC().
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
63 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
64 # Returns:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
65 # The number of duplicates.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
66
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
67 delta_rt <- duplicates_delta_rt
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
68 delta_mz <- duplicates_delta_mz
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
69
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
70 pseudospectra <- GC_results$PseudoSpectra
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
71
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
72 # order by pseudospectrum rt
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
73 rts <- sapply(pseudospectra, function(x) x$rt)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
74 rts_order <- order(rts)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
75
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
76 duplicates <- NULL # record all the duplicates we find
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
77 already_counted <- NULL # record which duplicates have already been counted
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
78
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
79 for (pspec in 1:length(rts)) { # for each pseudospectrum
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
80
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
81 i <- rts_order[pspec]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
82 this_rt <- rts[i]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
83
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
84 # number of following pseudospectra to consider
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
85 k <- 0
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
86 while (pspec + k + 1 < length(rts_order) && rts[rts_order[pspec + k + 1]] < this_rt + delta_rt) { # check if the retention time matches
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
87 k <- k + 1
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
88 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
89
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
90 mzs <- lapply(pseudospectra[rts_order[pspec:(pspec + k)]], function(x) x$pspectrum)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
91 mzs_sorted <- NULL
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
92 for (j in 1:(k + 1)) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
93 mzs_sorted <- rbind(mzs_sorted, t(sapply(1:length(mzs[[j]][,1]), function(n) c(rts[rts_order[pspec + j - 1]], mzs[[j]][,1][n]))))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
94 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
95 mzs_sorted <- mzs_sorted[order(mzs_sorted[,2]),] # column 1: rt ; column 2: m/z
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
96
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
97 i <- 1
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
98 while (i <= nrow(mzs_sorted)) { # for each m/z
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
99
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
100 j <- 1
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
101 this_already_counted <- nrow(merge(data.frame(t(mzs_sorted[i,])), data.frame(already_counted))) > 0
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
102
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
103 if (is.null(already_counted) || !this_already_counted) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
104 this_mz <- mzs_sorted[i,2]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
105 already_counted <- rbind(already_counted, mzs_sorted[i,])
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
106 same_ion <- 1 # count how many times this m/z appears
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
107
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
108 while (j <= nrow(mzs_sorted) - i && mzs_sorted[i+j,2] >= this_mz - delta_mz && mzs_sorted[i+j,2] <= this_mz + delta_mz) { # check if the m/z matches
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
109 this_already_counted <- nrow(merge(data.frame(t(mzs_sorted[i+j,])), data.frame(already_counted))) > 0
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
110
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
111 if (!this_already_counted) { # do not count a duplicate more than once
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
112 same_ion <- same_ion + 1
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
113 already_counted <- rbind(already_counted, mzs_sorted[i+j,]) # record which duplicates have already been counted
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
114 this_mz <- mzs_sorted[i+j,2]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
115 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
116
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
117 j <- j + 1
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
118 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
119
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
120 # first/min m/z, last/max m/z, rt where we started, number of duplicates
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
121 if (same_ion > 1)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
122 duplicates <- rbind(duplicates, c(mzs_sorted[i,2], this_mz, mzs_sorted[i,1], same_ion))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
123 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
124 i <- i + j
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
125 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
126
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
127 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
128
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
129 if (is.null(duplicates))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
130 duplicates <- c(0, 0, 0, 0)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
131
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
132 return (duplicates)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
133
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
134 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
135