annotate KeggEntry.R @ 5:18254e8d1b72 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 947b8707b06176a4801de64a71c8771617311ffb-dirty
author prog
date Thu, 16 Mar 2017 08:15:11 -0400
parents 45e985cd8e9e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
1 #####################
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
2 # CLASS DECLARATION #
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
3 #####################
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
4
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
5 KeggEntry <- methods::setRefClass("KeggEntry", contains = 'BiodbEntry')
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
6
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
7 ###########
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
8 # FACTORY #
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
9 ###########
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
10
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
11 createKeggEntryFromTxt <- function(contents, drop = TRUE) {
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
12
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
13 entries <- list()
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
14
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
15 # Define fields regex
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
16 regex <- character()
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
17 regex[[BIODB.NAME]] <- "^NAME\\s+([^,;]+)"
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
18 regex[[BIODB.CHEBI.ID]] <- "^\\s+ChEBI:\\s+(\\S+)"
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
19 regex[[BIODB.LIPIDMAPS.ID]] <- "^\\s+LIPIDMAPS:\\s+(\\S+)"
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
20
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
21 for (text in contents) {
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
22
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
23 # Create instance
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
24 entry <- KeggEntry$new()
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
25
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
26 lines <- strsplit(text, "\n")
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
27 for (s in lines[[1]]) {
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
28
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
29 # Test generic regex
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
30 parsed <- FALSE
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
31 for (field in names(regex)) {
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
32 g <- stringr::str_match(s, regex[[field]])
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
33 if ( ! is.na(g[1,1])) {
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
34 entry$setField(field, g[1,2])
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
35 parsed <- TRUE
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
36 break
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
37 }
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
38 }
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
39 if (parsed)
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
40 next
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
41
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
42 # ACCESSION
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
43 {
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
44 # ENZYME ID
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
45 g <- stringr::str_match(s, "^ENTRY\\s+EC\\s+(\\S+)")
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
46 if ( ! is.na(g[1,1])){
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
47 entry$setField(BIODB.ACCESSION, paste('ec', g[1,2], sep = ':'))
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
48
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
49 # ENTRY ID
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
50 }else {
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
51 g <- stringr::str_match(s, "^ENTRY\\s+(\\S+)\\s+Compound")
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
52 if ( ! is.na(g[1,1])){
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
53 entry$setField(BIODB.ACCESSION, paste('cpd', g[1,2], sep = ':'))
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
54
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
55 # OTHER ID
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
56 }else {
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
57 g <- stringr::str_match(s, "^ENTRY\\s+(\\S+)")
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
58 if ( ! is.na(g[1,1]))
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
59 entry$setField(BIODB.ACCESSION, g[1,2])
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
60 }
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
61 }
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
62
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
63 # ORGANISM
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
64 g <- stringr::str_match(s, "^ORGANISM\\s+(\\S+)")
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
65 if ( ! is.na(g[1,1]))
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
66 entry$setField(BIODB.ACCESSION, paste(g[1,2], entry$getField(BIODB.ACCESSION), sep = ':'))
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
67 }
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
68 }
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
69
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
70 entries <- c(entries, entry)
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
71 }
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
72
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
73 # Replace elements with no accession id by NULL
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
74 entries <- lapply(entries, function(x) if (is.na(x$getField(BIODB.ACCESSION))) NULL else x)
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
75
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
76 # If the input was a single element, then output a single object
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
77 if (drop && length(contents) == 1)
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
78 entries <- entries[[1]]
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
79
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
80 return(entries)
45e985cd8e9e planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
prog
parents:
diff changeset
81 }