Mercurial > repos > prog > lcmsmatching
annotate massdb-helper.R @ 2:9a0288561ba3 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 476a081c0da66822f4e77070f5ce59d9f14511f4-dirty
| author | prog |
|---|---|
| date | Thu, 02 Mar 2017 11:09:37 -0500 |
| parents | 45e985cd8e9e |
| children |
| rev | line source |
|---|---|
|
1
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
1 simplifySpectrum <- function(spec) { |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
2 if(length(spec) == 0){ |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
3 return(NA_real_) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
4 } |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
5 #print(spec) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
6 if (nrow(spec) == 0) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
7 return(NA_real_) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
8 if (ncol(spec) != 2) { |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
9 spec[, BIODB.PEAK.MZ] |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
10 mint <- BIODB.GROUP.INTENSITY %in% colnames(spec) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
11 pint <- which(mint[1]) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
12 if (length(pint) == 0) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
13 stop( |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
14 "No intensity column founds, if there is more than 2 column, columns should be named", |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
15 paste0(BIODB.GROUP.INTENSITY, collapse = ", ") |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
16 ) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
17 spec <- spec[, c(BIODB.PEAK.MZ, BIODB.GROUP.INTENSITY[pint[1]])] |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
18 ###Normalizing the intenities. |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
19 } |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
20 spec[, 2] <- as.numeric(spec[, 2]) * 100 / max(as.numeric(spec[, 2])) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
21 colnames(spec) <- c(BIODB.PEAK.MZ, BIODB.PEAK.RELATIVE.INTENSITY) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
22 spec |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
23 } |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
24 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
25 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
26 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
27 calcDistance <- |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
28 function(spec1 , |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
29 spec2, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
30 npmin = 2, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
31 fun = c("wcosine"), |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
32 params = list()) { |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
33 #fun <- match.arg(fun) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
34 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
35 #SPec are always notmlized in pourcentage toa voir issues; |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
36 spec1 <- simplifySpectrum(spec1) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
37 spec2 <- simplifySpectrum(spec2) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
38 if(is.na(spec1)||is.na(spec2)) return(list(matched=numeric(0),similarity=0)) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
39 params$mz1 <- as.numeric(spec1[, BIODB.PEAK.MZ]) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
40 params$mz2 <- as.numeric(spec2[, BIODB.PEAK.MZ]) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
41 params$int1 <- as.numeric(spec1[, BIODB.PEAK.RELATIVE.INTENSITY]) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
42 params$int2 <- as.numeric(spec2[, BIODB.PEAK.RELATIVE.INTENSITY]) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
43 res <- do.call(fun, args = params) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
44 if (sum(res$matched != -1) < npmin) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
45 return(list(matched = res$matched, similarity = 0)) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
46 list(matched = res$matched, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
47 similarity = res$measure) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
48 } |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
49 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
50 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
51 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
52 ###The returned sim list is not ordered |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
53 compareSpectra <- |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
54 function(spec, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
55 libspec, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
56 npmin = 2, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
57 fun = BIODB.MSMS.DIST.WCOSINE, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
58 params = list(), |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
59 decreasing = TRUE) { |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
60 #fun <- match.arg(fun) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
61 if (length(libspec) == 0) { |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
62 return(NULL) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
63 } |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
64 if (nrow(spec) == 0) { |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
65 return(NULL) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
66 } |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
67 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
68 ####spec is directly normalized. |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
69 vall <- |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
70 sapply( |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
71 libspec, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
72 calcDistance, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
73 spec1 = spec, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
74 params = params, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
75 fun = fun, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
76 simplify = FALSE |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
77 ) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
78 ####the list is ordered with the chosen metric. |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
79 sim <- |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
80 vapply(vall, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
81 '[[', |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
82 i = "similarity", |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
83 FUN.VALUE = ifelse(decreasing, 0, 1)) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
84 osim <- order(sim, decreasing = decreasing) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
85 matched <- sapply(vall, '[[', i = "matched", simplify = FALSE) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
86 |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
87 return(list( |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
88 ord = osim, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
89 matched = matched, |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
90 similarity = sim |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
91 )) |
|
45e985cd8e9e
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff
changeset
|
92 } |
