comparison biodb-common.R @ 1:45e985cd8e9e draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
author prog
date Tue, 31 Jan 2017 05:27:24 -0500
parents 3afe41d3e9e7
children
comparison
equal deleted inserted replaced
0:3afe41d3e9e7 1:45e985cd8e9e
1 if ( ! exists('RBIODB.COMPOUND')) { # Do not load again if already loaded 1 if ( ! exists('BIODB.XML')) {
2
3 ###############
4 # CACHE MODES #
5 ###############
6
7 BIODB.CACHE.READ.ONLY <- 'read-only'
8 BIODB.CACHE.READ.WRITE <- 'read-write'
9 BIODB.CACHE.WRITE.ONLY <- 'write-only'
10
11 #######################
12 # ENTRY CONTENT TYPES #
13 #######################
14
15 BIODB.HTML <- 'html'
16 BIODB.TXT <- 'txt'
17 BIODB.XML <- 'xml'
18 BIODB.CSV <- 'csv'
19 BIODB.DATAFRAME <- 'dataframe'
20 BIODB.JSON <- 'json'
2 21
3 ############# 22 #############
4 # CONSTANTS # 23 # DATABASES #
5 ############# 24 #############
6 25
7 # Entry types 26 BIODB.CHEBI <- 'chebi'
8 RBIODB.COMPOUND <- 'compound' 27 BIODB.KEGG <- 'kegg'
9 RBIODB.SPECTRUM <- 'spectrum' 28 BIODB.PUBCHEMCOMP <- 'pubchemcomp' # Compound database
10 29 BIODB.PUBCHEMSUB <- 'pubchemsub' # Substance database
11 # Entry content types 30 BIODB.HMDB <- 'hmdb'
12 RBIODB.HTML <- 'html' 31 BIODB.CHEMSPIDER <- 'chemspider'
13 RBIODB.TXT <- 'txt' 32 BIODB.ENZYME <- 'enzyme'
14 RBIODB.XML <- 'xml' 33 BIODB.LIPIDMAPS <- 'lipidmaps'
15 RBIODB.CSV <- 'csv' 34 BIODB.MIRBASE <- 'mirbase'
16 RBIODB.ANY <- 'any' 35 BIODB.NCBIGENE <- 'ncbigene'
17 36 BIODB.NCBICCDS <- 'ncbiccds'
18 # Class names 37 BIODB.UNIPROT <- 'uniprot'
19 RBIODB.CHEBI <- 'chebi' 38 BIODB.MASSBANK <- 'massbank'
20 RBIODB.KEGG <- 'kegg' 39 BIODB.MASSFILEDB <- 'massfiledb'
21 RBIODB.PUBCHEM <- 'pubchem' 40 BIODB.PEAKFOREST <- 'peakforest'
22 RBIODB.HMDB <- 'hmdb' 41
23 RBIODB.CHEMSPIDER <- 'chemspider' 42 BIODB.DATABASES <- c(BIODB.CHEBI, BIODB.KEGG, BIODB.PUBCHEMCOMP, BIODB.PUBCHEMSUB, BIODB.HMDB, BIODB.CHEMSPIDER, BIODB.ENZYME, BIODB.LIPIDMAPS, BIODB.MIRBASE, BIODB.NCBIGENE, BIODB.NCBICCDS, BIODB.UNIPROT, BIODB.MASSBANK, BIODB.MASSFILEDB, BIODB.PEAKFOREST)
24 RBIODB.ENZYME <- 'enzyme' 43
25 RBIODB.LIPIDMAPS <- 'lipidmaps' 44 ##########
26 RBIODB.MIRBASE <- 'mirbase' 45 # FIELDS #
27 RBIODB.NCBIGENE <- 'ncbigene' 46 ##########
28 RBIODB.NCBICCDS <- 'ncbiccds' 47
29 RBIODB.UNIPROT <- 'uniprot' 48 BIODB.ACCESSION <- 'accession'
30 RBIODB.MASSBANK <- 'massbank' 49 BIODB.DESCRIPTION <- 'description'
31 50 BIODB.PROTEIN.DESCRIPTION <- 'protdesc'
32 # Fields 51 BIODB.NAME <- 'name'
33 RBIODB.COMPOUND <- 'compound' 52 BIODB.COMP.IUPAC.NAME.ALLOWED <- 'comp.iupac.name.allowed'
34 RBIODB.ACCESSION <- 'accession' 53 BIODB.COMP.IUPAC.NAME.TRAD <- 'comp.iupac.name.trad'
35 RBIODB.DESCRIPTION <- 'description' 54 BIODB.COMP.IUPAC.NAME.SYST <- 'comp.iupac.name.syst'
36 RBIODB.PROTEIN.DESCRIPTION <- 'protdesc' 55 BIODB.COMP.IUPAC.NAME.PREF <- 'comp.iupac.name.pref'
37 RBIODB.NAME <- 'name' 56 BIODB.COMP.IUPAC.NAME.CAS <- 'comp.iupac.name.cas'
38 RBIODB.FULLNAMES <- 'fullnames' 57 BIODB.FULLNAMES <- 'fullnames'
39 RBIODB.SYNONYMS <- 'synonyms' 58 BIODB.SYNONYMS <- 'synonyms'
40 RBIODB.SYMBOL <- 'symbol' 59 BIODB.SYMBOL <- 'symbol'
41 RBIODB.GENE.SYMBOLS <- 'genesymbols' 60 BIODB.GENE.SYMBOLS <- 'genesymbols'
42 RBIODB.CHEBI.ID <- 'chebiid' 61 BIODB.CHEBI.ID <- 'chebiid'
43 RBIODB.LIPIDMAPS.ID <- 'lipidmapsid' 62 BIODB.LIPIDMAPS.ID <- 'lipidmapsid'
44 RBIODB.KEGG.ID <- 'keggid' 63 BIODB.KEGG.ID <- 'keggid'
45 RBIODB.HMDB.ID <- 'hmdbid' 64 BIODB.HMDB.ID <- 'hmdbid'
46 RBIODB.ENZYME.ID <- 'enzymeid' 65 BIODB.ENZYME.ID <- 'enzymeid'
47 RBIODB.NCBI.CCDS.ID <- 'ncbiccdsid' 66 BIODB.NCBI.CCDS.ID <- 'ncbiccdsid'
48 RBIODB.NCBI.GENE.ID <- 'ncbigeneid' 67 BIODB.NCBI.GENE.ID <- 'ncbigeneid'
49 RBIODB.PUBCHEM.ID <- 'pubchemid' 68 BIODB.PUBCHEMCOMP.ID <- 'pubchemcompid'
50 RBIODB.UNIPROT.ID <- 'uniprotid' 69 BIODB.PUBCHEMSUB.ID <- 'pubchemsubid'
51 RBIODB.INCHI <- 'inchi' 70 BIODB.CHEMSPIDER.ID <- 'chemspiderid'
52 RBIODB.INCHIKEY <- 'inchikey' 71 BIODB.UNIPROT.ID <- 'uniprotid'
53 RBIODB.MSDEV <- 'msdev' 72 BIODB.CAS.ID <- 'casid'
54 RBIODB.MSDEVTYPE <- 'msdevtype' 73 BIODB.PEAKFOREST.ID <- 'peakforestid'
55 RBIODB.MSTYPE <- 'mstype' 74 BIODB.SMILES <- 'smiles'
56 RBIODB.MSMODE <- 'msmode' 75 BIODB.INCHI <- 'inchi'
57 RBIODB.MSPRECMZ <- 'msprecmz' # numeric 76 BIODB.INCHIKEY <- 'inchikey'
58 RBIODB.MSPRECANNOT <- 'msprecannot' 77 BIODB.MSDEV <- 'msdev'
59 RBIODB.FORMULA <- 'formula' 78 BIODB.MSDEVTYPE <- 'msdevtype'
60 RBIODB.SUPER.CLASS <- 'superclass' 79 BIODB.MSTYPE <- 'mstype'
61 RBIODB.MASS <- 'mass' 80 BIODB.MSMODE <- 'msmode'
62 RBIODB.AVERAGE.MASS <- 'averagemass' 81 BIODB.MSPRECMZ <- 'msprecmz' # numeric
63 RBIODB.MONOISOTOPIC.MASS <- 'monoisotopicmass' 82 BIODB.MSPRECANNOT <- 'msprecannot'
64 RBIODB.SEQUENCE <- 'sequence' 83 BIODB.FORMULA <- 'formula'
65 RBIODB.LOCATION <- 'location' 84 BIODB.SUPER.CLASS <- 'superclass'
66 RBIODB.LENGTH <- 'length' 85 BIODB.MASS <- 'mass'
67 RBIODB.NB.PEAKS <- 'nbpeaks' 86 BIODB.AVERAGE.MASS <- 'averagemass'
68 RBIODB.NB.PEAKS <- 'nbpeaks' 87 BIODB.MONOISOTOPIC.MASS <- 'monoisotopicmass'
69 RBIODB.PEAKS <- 'peaks' 88 BIODB.SEQUENCE <- 'sequence'
89 BIODB.LOCATION <- 'location'
90 BIODB.LENGTH <- 'length'
91 BIODB.NB.PEAKS <- 'nbpeaks'
92 BIODB.PEAKS <- 'peaks'
93 BIODB.COMPOUNDS <- 'compounds'
94 BIODB.NB.COMPOUNDS <- 'nbcompounds'
95 BIODB.COMPOUND.ID <- 'compoundid'
96 BIODB.COMPOUND.MASS <- 'compoundmass'
97 BIODB.COMPOUND.COMP <- 'compoundcomp'
98 BIODB.CHROM.COL <- 'chromcol' # Chromatographic column
99 BIODB.CHROM.COL.RT <- 'chromcolrt' # Retention time measured on chromatographic column
100 BIODB.ID <- 'id'
101 BIODB.TITLE <- 'title'
102 BIODB.PEAK.MZ <- 'mz'
103 BIODB.PEAK.RT <- 'rt'
104 BIODB.PEAK.MZEXP <- 'mzexp'
105 BIODB.PEAK.MZTHEO <- 'mztheo'
106 BIODB.PEAK.FORMULA <- 'formula'
107 BIODB.PEAK.FORMULA.COUNT <- 'formula.count'
108 BIODB.PEAK.COMP <- 'peakcomp' # Peak composition
109 BIODB.PEAK.ATTR <- 'peakattr' # Peak attribution
110 BIODB.PEAK.MASS <- 'mass'
111 # BIODB.PEAK.ATTR <- 'attr'
112 BIODB.PEAK.ERROR.PPM <- 'error.ppm'
113 BIODB.PEAK.INTENSITY <- 'intensity'
114 BIODB.PEAK.RELATIVE.INTENSITY <- 'relative.intensity'
70 115
71 # Mode values 116 # Mode values
72 RBIODB.MSMODE.NEG <- 'neg' 117 BIODB.MSMODE.NEG <- 'neg'
73 RBIODB.MSMODE.POS <- 'pos' 118 BIODB.MSMODE.POS <- 'pos'
74 119
75 # Cardinalities 120 # Tolerance values
76 RBIODB.CARD.ONE <- '1' 121 BIODB.TOL <- 'mztol'
77 RBIODB.CARD.MANY <- '*' 122 BIODB.MZTOLUNIT.PPM <- 'ppm'
78 123 BIODB.MZTOLUNIT.PLAIN <- 'plain' # same as mz: mass-to-charge ratio
79 # Field attributes 124 BIODB.MZTOLUNIT.VALS <- c(BIODB.MZTOLUNIT.PPM, BIODB.MZTOLUNIT.PLAIN)
80 RBIODB.FIELDS <- data.frame(matrix(c( 125
81 # FIELD NAME CLASS CARDINALITY 126 ########################
82 RBIODB.COMPOUND, 'BiodEntry', RBIODB.CARD.ONE, 127 # MS-MS MEASURE VALUES #
83 RBIODB.ACCESSION, 'character', RBIODB.CARD.ONE, 128 ########################
84 RBIODB.DESCRIPTION, 'character', RBIODB.CARD.ONE, 129
85 RBIODB.NAME, 'character', RBIODB.CARD.ONE, 130 BIODB.MSMS.DIST.COS <- "cosine"
86 RBIODB.FULLNAMES, 'character', RBIODB.CARD.MANY, 131 BIODB.MSMS.DIST.WCOSINE <- "wcosine"
87 RBIODB.SYNONYMS, 'character', RBIODB.CARD.MANY, 132 BIODB.MSMS.DIST.PKERNEL <- "pkernel"
88 RBIODB.PROTEIN.DESCRIPTION, 'character', RBIODB.CARD.ONE, 133 BIODB.MSMS.DIST <- c(BIODB.MSMS.DIST.COS, BIODB.MSMS.DIST.WCOSINE, BIODB.MSMS.DIST.PKERNEL)
89 RBIODB.SYMBOL, 'character', RBIODB.CARD.ONE, 134
90 RBIODB.GENE.SYMBOLS, 'character', RBIODB.CARD.MANY, 135
91 RBIODB.CHEBI.ID, 'character', RBIODB.CARD.ONE, 136 #################
92 RBIODB.LIPIDMAPS.ID, 'character', RBIODB.CARD.ONE, 137 # CARDINALITIES #
93 RBIODB.KEGG.ID, 'character', RBIODB.CARD.ONE, 138 #################
94 RBIODB.HMDB.ID, 'character', RBIODB.CARD.ONE, 139
95 RBIODB.ENZYME.ID, 'character', RBIODB.CARD.ONE, 140 BIODB.CARD.ONE <- '1'
96 RBIODB.PUBCHEM.ID, 'character', RBIODB.CARD.ONE, 141 BIODB.CARD.MANY <- '*'
97 RBIODB.UNIPROT.ID, 'character', RBIODB.CARD.ONE, 142
98 RBIODB.NCBI.CCDS.ID, 'character', RBIODB.CARD.ONE, 143 #####################
99 RBIODB.NCBI.GENE.ID, 'character', RBIODB.CARD.ONE, 144 #INTENSITy NOTATIONS#
100 RBIODB.INCHI, 'character', RBIODB.CARD.ONE, 145 #####################
101 RBIODB.INCHIKEY, 'character', RBIODB.CARD.ONE, 146
102 RBIODB.MSDEV, 'character', RBIODB.CARD.ONE, 147 BIODB.GROUP.INTENSITY<-c(BIODB.PEAK.INTENSITY,BIODB.PEAK.RELATIVE.INTENSITY)
103 RBIODB.MSDEVTYPE, 'character', RBIODB.CARD.ONE, 148
104 RBIODB.MSTYPE, 'character', RBIODB.CARD.ONE, 149 ##########################
105 RBIODB.MSMODE, 'character', RBIODB.CARD.ONE, 150 # ENTRY FIELD ATTRIBUTES #
106 RBIODB.MSPRECMZ, 'double', RBIODB.CARD.ONE, 151 ##########################
107 RBIODB.MSPRECANNOT, 'character', RBIODB.CARD.ONE, 152 # FIELD NAME CLASS CARDINALITY TYPE
108 RBIODB.FORMULA, 'character', RBIODB.CARD.ONE, 153 BIODB.FIELDS <- data.frame(matrix(c(
109 RBIODB.SUPER.CLASS, 'character', RBIODB.CARD.ONE, 154 BIODB.ACCESSION, 'character', BIODB.CARD.ONE, 'none',
110 RBIODB.MASS, 'double', RBIODB.CARD.ONE, 155 BIODB.DESCRIPTION, 'character', BIODB.CARD.ONE, 'none',
111 RBIODB.AVERAGE.MASS, 'double', RBIODB.CARD.ONE, 156 BIODB.NAME, 'character', BIODB.CARD.ONE, 'name',
112 RBIODB.MONOISOTOPIC.MASS, 'double', RBIODB.CARD.ONE, 157 BIODB.COMP.IUPAC.NAME.ALLOWED, 'character', BIODB.CARD.ONE, 'name',
113 RBIODB.SEQUENCE, 'character', RBIODB.CARD.ONE, 158 BIODB.COMP.IUPAC.NAME.TRAD, 'character', BIODB.CARD.ONE, 'name',
114 RBIODB.LENGTH, 'integer', RBIODB.CARD.ONE, 159 BIODB.COMP.IUPAC.NAME.SYST, 'character', BIODB.CARD.ONE, 'name',
115 RBIODB.LOCATION, 'character', RBIODB.CARD.ONE, 160 BIODB.COMP.IUPAC.NAME.PREF, 'character', BIODB.CARD.ONE, 'name',
116 RBIODB.NB.PEAKS, 'integer', RBIODB.CARD.ONE, 161 BIODB.COMP.IUPAC.NAME.CAS, 'character', BIODB.CARD.ONE, 'name',
117 RBIODB.PEAKS, 'data.frame', RBIODB.CARD.ONE 162 BIODB.FULLNAMES, 'character', BIODB.CARD.MANY, 'name',
118 ), byrow = TRUE, ncol = 3), stringsAsFactors = FALSE) 163 BIODB.SYNONYMS, 'character', BIODB.CARD.MANY, 'name',
119 colnames(RBIODB.FIELDS) <- c('name', 'class', 'cardinality') 164 BIODB.PROTEIN.DESCRIPTION, 'character', BIODB.CARD.ONE, 'none',
120 165 BIODB.SYMBOL, 'character', BIODB.CARD.ONE, 'none',
121 # How to compute a missing field ? 166 BIODB.GENE.SYMBOLS, 'character', BIODB.CARD.MANY, 'none',
122 RBIODB.FIELD.COMPUTING <- list() 167 BIODB.NB.COMPOUNDS, 'integer', BIODB.CARD.ONE, 'none',
123 RBIODB.FIELD.COMPUTING[[RBIODB.INCHI]] <- c(RBIODB.CHEBI) 168 BIODB.COMPOUNDS, 'object', BIODB.CARD.MANY, 'none',
124 RBIODB.FIELD.COMPUTING[[RBIODB.INCHIKEY]] <- c(RBIODB.CHEBI) 169 BIODB.CHEBI.ID, 'character', BIODB.CARD.ONE, 'none',
125 RBIODB.FIELD.COMPUTING[[RBIODB.SEQUENCE]] <- c(RBIODB.NCBICCDS) 170 BIODB.LIPIDMAPS.ID, 'character', BIODB.CARD.ONE, 'none',
126 171 BIODB.KEGG.ID, 'character', BIODB.CARD.ONE, 'none',
127 # Peaks data frame columns 172 BIODB.HMDB.ID, 'character', BIODB.CARD.ONE, 'none',
128 RBIODB.PEAK.MZ <- 'mz' 173 BIODB.ENZYME.ID, 'character', BIODB.CARD.ONE, 'none',
129 RBIODB.PEAK.FORMULA <- 'formula' 174 BIODB.PUBCHEMCOMP.ID, 'character', BIODB.CARD.ONE, 'none',
130 RBIODB.PEAK.FORMULA.COUNT <- 'formula.count' 175 BIODB.PUBCHEMSUB.ID, 'character', BIODB.CARD.ONE, 'none',
131 RBIODB.PEAK.MASS <- 'mass' 176 BIODB.PEAKFOREST.ID, 'character', BIODB.CARD.ONE, 'none',
132 RBIODB.PEAK.ERROR.PPM <- 'error.ppm' 177 BIODB.UNIPROT.ID, 'character', BIODB.CARD.ONE, 'none',
133 RBIODB.PEAK.INTENSITY <- 'intensity' 178 BIODB.NCBI.CCDS.ID, 'character', BIODB.CARD.ONE, 'none',
134 RBIODB.PEAK.RELATIVE.INTENSITY <- 'relative.intensity' 179 BIODB.NCBI.GENE.ID, 'character', BIODB.CARD.ONE, 'none',
135 RBIODB.PEAK.DF.EXAMPLE <- data.frame(mz = double(), int = double(), rel.int = integer(), formula = character(), formula.count <- integer(), mass = double(), error = double(), stringsAsFactors = FALSE) 180 BIODB.INCHI, 'character', BIODB.CARD.ONE, 'none',
136 colnames(RBIODB.PEAK.DF.EXAMPLE) <- c(RBIODB.PEAK.MZ, RBIODB.PEAK.INTENSITY, RBIODB.PEAK.RELATIVE.INTENSITY, RBIODB.PEAK.FORMULA, RBIODB.PEAK.FORMULA.COUNT, RBIODB.PEAK.MASS, RBIODB.PEAK.ERROR.PPM) 181 BIODB.INCHIKEY, 'character', BIODB.CARD.ONE, 'none',
182 BIODB.MSDEV, 'character', BIODB.CARD.ONE, 'none',
183 BIODB.MSDEVTYPE, 'character', BIODB.CARD.ONE, 'none',
184 BIODB.MSTYPE, 'character', BIODB.CARD.ONE, 'none',
185 BIODB.MSMODE, 'character', BIODB.CARD.ONE, 'none',
186 BIODB.MSPRECMZ, 'double', BIODB.CARD.ONE, 'none',
187 BIODB.PEAK.MZTHEO, 'double', BIODB.CARD.ONE, 'none',
188 BIODB.MSPRECANNOT, 'character', BIODB.CARD.ONE, 'none',
189 BIODB.FORMULA, 'character', BIODB.CARD.ONE, 'none',
190 BIODB.SUPER.CLASS, 'character', BIODB.CARD.ONE, 'none',
191 BIODB.MASS, 'double', BIODB.CARD.ONE, 'none',
192 BIODB.AVERAGE.MASS, 'double', BIODB.CARD.ONE, 'none',
193 BIODB.MONOISOTOPIC.MASS, 'double', BIODB.CARD.ONE, 'none',
194 BIODB.SEQUENCE, 'character', BIODB.CARD.ONE, 'none',
195 BIODB.LENGTH, 'integer', BIODB.CARD.ONE, 'none',
196 BIODB.LOCATION, 'character', BIODB.CARD.ONE, 'none',
197 BIODB.NB.PEAKS, 'integer', BIODB.CARD.ONE, 'none',
198 BIODB.PEAKS, 'data.frame', BIODB.CARD.ONE, 'none',
199 BIODB.SMILES, 'character', BIODB.CARD.ONE, 'none',
200 BIODB.CHEMSPIDER.ID, 'character', BIODB.CARD.ONE, 'none',
201 BIODB.CAS.ID, 'character', BIODB.CARD.ONE, 'none'
202 ), byrow = TRUE, ncol = 4), stringsAsFactors = FALSE)
203 colnames(BIODB.FIELDS) <- c('name', 'class', 'cardinality', 'type')
204
205 #########################
206 # GET DATABASE ID FIELD #
207 #########################
208
209 biodb.get.database.id.field <- function(database) {
210
211 id.field <- NA_character_
212
213 if (database %in% BIODB.DATABASES) {
214 id.field <- paste0(database, 'id')
215 if ( ! id.field %in% BIODB.FIELDS[['name']])
216 stop(paste0('No ID field defined for database ', database, '.'))
217 }
218
219 return(id.field)
220 }
221
222 #####################
223 # COMPUTABLE FIELDS #
224 #####################
225
226 BIODB.FIELD.COMPUTING <- list()
227 BIODB.FIELD.COMPUTING[[BIODB.INCHI]] <- c(BIODB.CHEBI)
228 BIODB.FIELD.COMPUTING[[BIODB.INCHIKEY]] <- c(BIODB.CHEBI)
229 BIODB.FIELD.COMPUTING[[BIODB.SEQUENCE]] <- c(BIODB.NCBICCDS)
230
231 ####################
232 # PEAKS DATA FRAME #
233 ####################
234
235 # Example
236 BIODB.PEAK.DF.EXAMPLE <- data.frame(mz = double(), int = double(), rel.int = integer(), formula = character(), formula.count <- integer(), mass = double(), error = double(), stringsAsFactors = FALSE)
237 colnames(BIODB.PEAK.DF.EXAMPLE) <- c(BIODB.PEAK.MZ, BIODB.PEAK.INTENSITY, BIODB.PEAK.RELATIVE.INTENSITY, BIODB.PEAK.FORMULA, BIODB.PEAK.FORMULA.COUNT, BIODB.PEAK.MASS, BIODB.PEAK.ERROR.PPM)
137 238
138 ################# 239 #################
139 # GET ENTRY URL # 240 # GET ENTRY URL #
140 ################# 241 #################
141 242
142 # TODO Let the choice to use either jp or eu 243 # TODO Let the choice to use either jp or eu
143 RBIODB.MASSBANK.JP.WS.URL <- "http://www.massbank.jp/api/services/MassBankAPI/getRecordInfo" 244 BIODB.MASSBANK.JP.WS.URL <- "http://www.massbank.jp/api/services/MassBankAPI/"
144 RBIODB.MASSBANK.EU.WS.URL <- "http://massbank.eu/api/services/MassBankAPI/getRecordInfo" 245 BIODB.MASSBANK.EU.WS.URL <- "http://massbank.eu/api/services/MassBankAPI/"
145 246
146 get.entry.url <- function(class, accession, content.type = RBIODB.ANY) { 247 .do.get.entry.url <- function(class, accession, content.type = BIODB.HTML, base.url = NA_character_, token = NA_character_) {
147 248
249 # Only certain databases can handle multiple accession ids
250 if ( ! class %in% c(BIODB.MASSBANK, BIODB.CHEMSPIDER, BIODB.PUBCHEMCOMP, BIODB.PUBCHEMSUB, BIODB.PEAKFOREST) && length(accession) > 1)
251 stop(paste0("Cannot build a URL for getting multiple entries for class ", class, "."))
252
253 # Get URL
148 url <- switch(class, 254 url <- switch(class,
149 chebi = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('https://www.ebi.ac.uk/chebi/searchId.do?chebiId=', accession) else NULL, 255 chebi = if (content.type == BIODB.HTML) paste0('https://www.ebi.ac.uk/chebi/searchId.do?chebiId=', accession) else NULL,
150 chemspider = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('http://www.chemspider.com/Chemical-Structure.', accession, '.html') else NULL, 256 chemspider = {
151 enzyme = if (content.type %in% c(RBIODB.ANY, RBIODB.TXT)) paste0('http://enzyme.expasy.org/EC/', accession, '.txt') else NULL, 257 token.param <- if (is.na(token)) '' else paste('&token', token, sep = '=')
258 switch(content.type,
259 html = paste0('http://www.chemspider.com/Chemical-Structure.', accession, '.html'),
260 xml = paste0('http://www.chemspider.com/MassSpecAPI.asmx/GetExtendedCompoundInfoArray?', paste(paste0('CSIDs=', accession), collapse = '&'), token.param),
261 NULL)
262 },
263 enzyme = if (content.type == BIODB.TXT) paste0('http://enzyme.expasy.org/EC/', accession, '.txt') else NULL,
152 hmdb = switch(content.type, 264 hmdb = switch(content.type,
153 xml = paste0('http://www.hmdb.ca/metabolites/', accession, '.xml'), 265 xml = paste0('http://www.hmdb.ca/metabolites/', accession, '.xml'),
154 html = paste0('http://www.hmdb.ca/metabolites/', accession), 266 html = paste0('http://www.hmdb.ca/metabolites/', accession),
155 any = paste0('http://www.hmdb.ca/metabolites/', accession),
156 NULL), 267 NULL),
157 kegg = switch(content.type, 268 kegg = switch(content.type,
158 txt = paste0('http://rest.kegg.jp/get/', accession), 269 txt = paste0('http://rest.kegg.jp/get/', accession),
159 html = paste0('http://www.genome.jp/dbget-bin/www_bget?cpd:', accession), 270 html = paste0('http://www.genome.jp/dbget-bin/www_bget?cpd:', accession),
160 any = paste0('http://www.genome.jp/dbget-bin/www_bget?cpd:', accession),
161 NULL), 271 NULL),
162 lipidmaps = if (content.type %in% c(RBIODB.ANY, RBIODB.CSV)) paste0('http://www.lipidmaps.org/data/LMSDRecord.php?Mode=File&LMID=', accession, '&OutputType=CSV&OutputQuote=No') else NULL, 272 lipidmaps = if (content.type == BIODB.CSV) paste0('http://www.lipidmaps.org/data/LMSDRecord.php?Mode=File&LMID=', accession, '&OutputType=CSV&OutputQuote=No') else NULL,
163 massbank = if (content.type %in% c(RBIODB.ANY, RBIODB.TXT)) paste0(RBIODB.MASSBANK.EU.WS.URL, '?ids=', paste(accession, collapse = ',')) else NULL, 273 massbank = if (content.type == BIODB.TXT) paste0((if (is.na(base.url)) BIODB.MASSBANK.EU.WS.URL else base.url), 'getRecordInfo?ids=', paste(accession, collapse = ',')) else NULL,
164 mirbase = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('http://www.mirbase.org/cgi-bin/mature.pl?mature_acc=', accession) else NULL, 274 mirbase = if (content.type == BIODB.HTML) paste0('http://www.mirbase.org/cgi-bin/mature.pl?mature_acc=', accession) else NULL,
165 pubchem = { 275 pubchemcomp = switch(content.type,
166 accession <- gsub(' ', '', accession, perl = TRUE) 276 xml = paste0('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/', paste(accession, collapse = ','), '/XML'),
167 accession <- gsub('^CID', '', accession, perl = TRUE)
168 switch(content.type,
169 xml = paste0('http://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/', accession, '/XML/?response_type=save&response_basename=CID_', accession),
170 html = paste0('http://pubchem.ncbi.nlm.nih.gov/compound/', accession), 277 html = paste0('http://pubchem.ncbi.nlm.nih.gov/compound/', accession),
171 NULL) 278 NULL),
172 }, 279 pubchemsub = switch(content.type,
173 ncbigene = if (content.type %in% c(RBIODB.ANY, RBIODB.XML)) paste0('http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=', accession, '&rettype=xml&retmode=text') else NULL, 280 xml = paste0('https://pubchem.ncbi.nlm.nih.gov/rest/pug/substance/sid/', paste(accession, collapse = ','), '/XML'),
174 ncbiccds = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&GO=MainBrowse&DATA=', accession), 281 html = paste0('http://pubchem.ncbi.nlm.nih.gov/substance/', accession),
175 uniprot = if (content.type %in% c(RBIODB.ANY, RBIODB.XML)) paste0('http://www.uniprot.org/uniprot/', accession, '.xml'), 282 NULL),
283 ncbigene = if (content.type == BIODB.XML) paste0('https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=', accession, '&rettype=xml&retmode=text') else NULL,
284 ncbiccds = if (content.type == BIODB.HTML) paste0('https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&GO=MainBrowse&DATA=', accession),
285 uniprot = if (content.type == BIODB.XML) paste0('http://www.uniprot.org/uniprot/', accession, '.xml'),
286 peakforest = switch(content.type,
287 html= paste0('https://peakforest.org/home?PFs=',accession),
288 json= paste0('https://peakforest-alpha.inra.fr/rest/spectra/lcms/ids/',paste(accession,sep=','),'?token=',token),
289
176 NULL 290 NULL
177 ) 291 )
178 292 )
179 return(url) 293 return(url)
180 } 294 }
295
296 get.entry.url <- function(class, accession, content.type = BIODB.HTML, max.length = 0, base.url = NA_character_, token = NA_character_) {
297
298 if (length(accession) == 0)
299 return(NULL)
300
301 full.url <- .do.get.entry.url(class, accession, content.type = content.type, base.url = base.url, token = token)
302 if (max.length == 0 || nchar(full.url) <= max.length)
303 return(if (max.length == 0) full.url else list(url = full.url, n = length(accession)))
304
305 # Find max size URL
306 a <- 1
307 b <- length(accession)
308 while (a < b) {
309 m <- as.integer((a + b) / 2)
310 url <- .do.get.entry.url(class, accession[1:m], content.type = content.type, base.url = base.url, token = token)
311 if (nchar(url) <= max.length && m != a)
312 a <- m
313 else
314 b <- m
315 }
316 url <- .do.get.entry.url(class, accession[1:a], content.type = content.type, base.url = base.url, token = token)
317
318 return(list( url = url, n = a))
319 }
320
321 #################
322 # PRINT MESSAGE #
323 #################
324
325 BIODB.DEBUG <- 1
326 BIODB.LEVEL.NAMES <- c('DEBUG')
327
328 .print.msg <- function(msg, level = BIODB.DEBUG, class = NA_character_) {
329 cat(paste0(BIODB.LEVEL.NAMES[[level]], if (is.na(class)) '' else paste0(", ", class), ": ", msg, "\n"), file = stderr())
330 }
331
332 #####################
333 # BIODB GET ENV VAR #
334 #####################
335
336 .biodb.get.env.var <- function(v) {
337
338 # Get all env vars
339 env <- Sys.getenv()
340
341 # Make env var name
342 env.var <- paste(c('BIODB', toupper(v)), collapse = '_')
343
344 # Look if this env var exists
345 if (env.var %in% names(env))
346 return(env[[env.var]])
347
348 return(NA_character_)
349 }
181 } 350 }