comparison KeggEntry.R @ 1:45e985cd8e9e draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
author prog
date Tue, 31 Jan 2017 05:27:24 -0500
parents
children
comparison
equal deleted inserted replaced
0:3afe41d3e9e7 1:45e985cd8e9e
1 #####################
2 # CLASS DECLARATION #
3 #####################
4
5 KeggEntry <- methods::setRefClass("KeggEntry", contains = 'BiodbEntry')
6
7 ###########
8 # FACTORY #
9 ###########
10
11 createKeggEntryFromTxt <- function(contents, drop = TRUE) {
12
13 entries <- list()
14
15 # Define fields regex
16 regex <- character()
17 regex[[BIODB.NAME]] <- "^NAME\\s+([^,;]+)"
18 regex[[BIODB.CHEBI.ID]] <- "^\\s+ChEBI:\\s+(\\S+)"
19 regex[[BIODB.LIPIDMAPS.ID]] <- "^\\s+LIPIDMAPS:\\s+(\\S+)"
20
21 for (text in contents) {
22
23 # Create instance
24 entry <- KeggEntry$new()
25
26 lines <- strsplit(text, "\n")
27 for (s in lines[[1]]) {
28
29 # Test generic regex
30 parsed <- FALSE
31 for (field in names(regex)) {
32 g <- stringr::str_match(s, regex[[field]])
33 if ( ! is.na(g[1,1])) {
34 entry$setField(field, g[1,2])
35 parsed <- TRUE
36 break
37 }
38 }
39 if (parsed)
40 next
41
42 # ACCESSION
43 {
44 # ENZYME ID
45 g <- stringr::str_match(s, "^ENTRY\\s+EC\\s+(\\S+)")
46 if ( ! is.na(g[1,1])){
47 entry$setField(BIODB.ACCESSION, paste('ec', g[1,2], sep = ':'))
48
49 # ENTRY ID
50 }else {
51 g <- stringr::str_match(s, "^ENTRY\\s+(\\S+)\\s+Compound")
52 if ( ! is.na(g[1,1])){
53 entry$setField(BIODB.ACCESSION, paste('cpd', g[1,2], sep = ':'))
54
55 # OTHER ID
56 }else {
57 g <- stringr::str_match(s, "^ENTRY\\s+(\\S+)")
58 if ( ! is.na(g[1,1]))
59 entry$setField(BIODB.ACCESSION, g[1,2])
60 }
61 }
62
63 # ORGANISM
64 g <- stringr::str_match(s, "^ORGANISM\\s+(\\S+)")
65 if ( ! is.na(g[1,1]))
66 entry$setField(BIODB.ACCESSION, paste(g[1,2], entry$getField(BIODB.ACCESSION), sep = ':'))
67 }
68 }
69
70 entries <- c(entries, entry)
71 }
72
73 # Replace elements with no accession id by NULL
74 entries <- lapply(entries, function(x) if (is.na(x$getField(BIODB.ACCESSION))) NULL else x)
75
76 # If the input was a single element, then output a single object
77 if (drop && length(contents) == 1)
78 entries <- entries[[1]]
79
80 return(entries)
81 }