comparison biodb-common.R @ 0:3afe41d3e9e7 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit bb4d3e23d99828bfee16d31d794c49a17313ec2f
author prog
date Mon, 11 Jul 2016 09:12:03 -0400
parents
children 45e985cd8e9e
comparison
equal deleted inserted replaced
-1:000000000000 0:3afe41d3e9e7
1 if ( ! exists('RBIODB.COMPOUND')) { # Do not load again if already loaded
2
3 #############
4 # CONSTANTS #
5 #############
6
7 # Entry types
8 RBIODB.COMPOUND <- 'compound'
9 RBIODB.SPECTRUM <- 'spectrum'
10
11 # Entry content types
12 RBIODB.HTML <- 'html'
13 RBIODB.TXT <- 'txt'
14 RBIODB.XML <- 'xml'
15 RBIODB.CSV <- 'csv'
16 RBIODB.ANY <- 'any'
17
18 # Class names
19 RBIODB.CHEBI <- 'chebi'
20 RBIODB.KEGG <- 'kegg'
21 RBIODB.PUBCHEM <- 'pubchem'
22 RBIODB.HMDB <- 'hmdb'
23 RBIODB.CHEMSPIDER <- 'chemspider'
24 RBIODB.ENZYME <- 'enzyme'
25 RBIODB.LIPIDMAPS <- 'lipidmaps'
26 RBIODB.MIRBASE <- 'mirbase'
27 RBIODB.NCBIGENE <- 'ncbigene'
28 RBIODB.NCBICCDS <- 'ncbiccds'
29 RBIODB.UNIPROT <- 'uniprot'
30 RBIODB.MASSBANK <- 'massbank'
31
32 # Fields
33 RBIODB.COMPOUND <- 'compound'
34 RBIODB.ACCESSION <- 'accession'
35 RBIODB.DESCRIPTION <- 'description'
36 RBIODB.PROTEIN.DESCRIPTION <- 'protdesc'
37 RBIODB.NAME <- 'name'
38 RBIODB.FULLNAMES <- 'fullnames'
39 RBIODB.SYNONYMS <- 'synonyms'
40 RBIODB.SYMBOL <- 'symbol'
41 RBIODB.GENE.SYMBOLS <- 'genesymbols'
42 RBIODB.CHEBI.ID <- 'chebiid'
43 RBIODB.LIPIDMAPS.ID <- 'lipidmapsid'
44 RBIODB.KEGG.ID <- 'keggid'
45 RBIODB.HMDB.ID <- 'hmdbid'
46 RBIODB.ENZYME.ID <- 'enzymeid'
47 RBIODB.NCBI.CCDS.ID <- 'ncbiccdsid'
48 RBIODB.NCBI.GENE.ID <- 'ncbigeneid'
49 RBIODB.PUBCHEM.ID <- 'pubchemid'
50 RBIODB.UNIPROT.ID <- 'uniprotid'
51 RBIODB.INCHI <- 'inchi'
52 RBIODB.INCHIKEY <- 'inchikey'
53 RBIODB.MSDEV <- 'msdev'
54 RBIODB.MSDEVTYPE <- 'msdevtype'
55 RBIODB.MSTYPE <- 'mstype'
56 RBIODB.MSMODE <- 'msmode'
57 RBIODB.MSPRECMZ <- 'msprecmz' # numeric
58 RBIODB.MSPRECANNOT <- 'msprecannot'
59 RBIODB.FORMULA <- 'formula'
60 RBIODB.SUPER.CLASS <- 'superclass'
61 RBIODB.MASS <- 'mass'
62 RBIODB.AVERAGE.MASS <- 'averagemass'
63 RBIODB.MONOISOTOPIC.MASS <- 'monoisotopicmass'
64 RBIODB.SEQUENCE <- 'sequence'
65 RBIODB.LOCATION <- 'location'
66 RBIODB.LENGTH <- 'length'
67 RBIODB.NB.PEAKS <- 'nbpeaks'
68 RBIODB.NB.PEAKS <- 'nbpeaks'
69 RBIODB.PEAKS <- 'peaks'
70
71 # Mode values
72 RBIODB.MSMODE.NEG <- 'neg'
73 RBIODB.MSMODE.POS <- 'pos'
74
75 # Cardinalities
76 RBIODB.CARD.ONE <- '1'
77 RBIODB.CARD.MANY <- '*'
78
79 # Field attributes
80 RBIODB.FIELDS <- data.frame(matrix(c(
81 # FIELD NAME CLASS CARDINALITY
82 RBIODB.COMPOUND, 'BiodEntry', RBIODB.CARD.ONE,
83 RBIODB.ACCESSION, 'character', RBIODB.CARD.ONE,
84 RBIODB.DESCRIPTION, 'character', RBIODB.CARD.ONE,
85 RBIODB.NAME, 'character', RBIODB.CARD.ONE,
86 RBIODB.FULLNAMES, 'character', RBIODB.CARD.MANY,
87 RBIODB.SYNONYMS, 'character', RBIODB.CARD.MANY,
88 RBIODB.PROTEIN.DESCRIPTION, 'character', RBIODB.CARD.ONE,
89 RBIODB.SYMBOL, 'character', RBIODB.CARD.ONE,
90 RBIODB.GENE.SYMBOLS, 'character', RBIODB.CARD.MANY,
91 RBIODB.CHEBI.ID, 'character', RBIODB.CARD.ONE,
92 RBIODB.LIPIDMAPS.ID, 'character', RBIODB.CARD.ONE,
93 RBIODB.KEGG.ID, 'character', RBIODB.CARD.ONE,
94 RBIODB.HMDB.ID, 'character', RBIODB.CARD.ONE,
95 RBIODB.ENZYME.ID, 'character', RBIODB.CARD.ONE,
96 RBIODB.PUBCHEM.ID, 'character', RBIODB.CARD.ONE,
97 RBIODB.UNIPROT.ID, 'character', RBIODB.CARD.ONE,
98 RBIODB.NCBI.CCDS.ID, 'character', RBIODB.CARD.ONE,
99 RBIODB.NCBI.GENE.ID, 'character', RBIODB.CARD.ONE,
100 RBIODB.INCHI, 'character', RBIODB.CARD.ONE,
101 RBIODB.INCHIKEY, 'character', RBIODB.CARD.ONE,
102 RBIODB.MSDEV, 'character', RBIODB.CARD.ONE,
103 RBIODB.MSDEVTYPE, 'character', RBIODB.CARD.ONE,
104 RBIODB.MSTYPE, 'character', RBIODB.CARD.ONE,
105 RBIODB.MSMODE, 'character', RBIODB.CARD.ONE,
106 RBIODB.MSPRECMZ, 'double', RBIODB.CARD.ONE,
107 RBIODB.MSPRECANNOT, 'character', RBIODB.CARD.ONE,
108 RBIODB.FORMULA, 'character', RBIODB.CARD.ONE,
109 RBIODB.SUPER.CLASS, 'character', RBIODB.CARD.ONE,
110 RBIODB.MASS, 'double', RBIODB.CARD.ONE,
111 RBIODB.AVERAGE.MASS, 'double', RBIODB.CARD.ONE,
112 RBIODB.MONOISOTOPIC.MASS, 'double', RBIODB.CARD.ONE,
113 RBIODB.SEQUENCE, 'character', RBIODB.CARD.ONE,
114 RBIODB.LENGTH, 'integer', RBIODB.CARD.ONE,
115 RBIODB.LOCATION, 'character', RBIODB.CARD.ONE,
116 RBIODB.NB.PEAKS, 'integer', RBIODB.CARD.ONE,
117 RBIODB.PEAKS, 'data.frame', RBIODB.CARD.ONE
118 ), byrow = TRUE, ncol = 3), stringsAsFactors = FALSE)
119 colnames(RBIODB.FIELDS) <- c('name', 'class', 'cardinality')
120
121 # How to compute a missing field ?
122 RBIODB.FIELD.COMPUTING <- list()
123 RBIODB.FIELD.COMPUTING[[RBIODB.INCHI]] <- c(RBIODB.CHEBI)
124 RBIODB.FIELD.COMPUTING[[RBIODB.INCHIKEY]] <- c(RBIODB.CHEBI)
125 RBIODB.FIELD.COMPUTING[[RBIODB.SEQUENCE]] <- c(RBIODB.NCBICCDS)
126
127 # Peaks data frame columns
128 RBIODB.PEAK.MZ <- 'mz'
129 RBIODB.PEAK.FORMULA <- 'formula'
130 RBIODB.PEAK.FORMULA.COUNT <- 'formula.count'
131 RBIODB.PEAK.MASS <- 'mass'
132 RBIODB.PEAK.ERROR.PPM <- 'error.ppm'
133 RBIODB.PEAK.INTENSITY <- 'intensity'
134 RBIODB.PEAK.RELATIVE.INTENSITY <- 'relative.intensity'
135 RBIODB.PEAK.DF.EXAMPLE <- data.frame(mz = double(), int = double(), rel.int = integer(), formula = character(), formula.count <- integer(), mass = double(), error = double(), stringsAsFactors = FALSE)
136 colnames(RBIODB.PEAK.DF.EXAMPLE) <- c(RBIODB.PEAK.MZ, RBIODB.PEAK.INTENSITY, RBIODB.PEAK.RELATIVE.INTENSITY, RBIODB.PEAK.FORMULA, RBIODB.PEAK.FORMULA.COUNT, RBIODB.PEAK.MASS, RBIODB.PEAK.ERROR.PPM)
137
138 #################
139 # GET ENTRY URL #
140 #################
141
142 # TODO Let the choice to use either jp or eu
143 RBIODB.MASSBANK.JP.WS.URL <- "http://www.massbank.jp/api/services/MassBankAPI/getRecordInfo"
144 RBIODB.MASSBANK.EU.WS.URL <- "http://massbank.eu/api/services/MassBankAPI/getRecordInfo"
145
146 get.entry.url <- function(class, accession, content.type = RBIODB.ANY) {
147
148 url <- switch(class,
149 chebi = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('https://www.ebi.ac.uk/chebi/searchId.do?chebiId=', accession) else NULL,
150 chemspider = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('http://www.chemspider.com/Chemical-Structure.', accession, '.html') else NULL,
151 enzyme = if (content.type %in% c(RBIODB.ANY, RBIODB.TXT)) paste0('http://enzyme.expasy.org/EC/', accession, '.txt') else NULL,
152 hmdb = switch(content.type,
153 xml = paste0('http://www.hmdb.ca/metabolites/', accession, '.xml'),
154 html = paste0('http://www.hmdb.ca/metabolites/', accession),
155 any = paste0('http://www.hmdb.ca/metabolites/', accession),
156 NULL),
157 kegg = switch(content.type,
158 txt = paste0('http://rest.kegg.jp/get/', accession),
159 html = paste0('http://www.genome.jp/dbget-bin/www_bget?cpd:', accession),
160 any = paste0('http://www.genome.jp/dbget-bin/www_bget?cpd:', accession),
161 NULL),
162 lipidmaps = if (content.type %in% c(RBIODB.ANY, RBIODB.CSV)) paste0('http://www.lipidmaps.org/data/LMSDRecord.php?Mode=File&LMID=', accession, '&OutputType=CSV&OutputQuote=No') else NULL,
163 massbank = if (content.type %in% c(RBIODB.ANY, RBIODB.TXT)) paste0(RBIODB.MASSBANK.EU.WS.URL, '?ids=', paste(accession, collapse = ',')) else NULL,
164 mirbase = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('http://www.mirbase.org/cgi-bin/mature.pl?mature_acc=', accession) else NULL,
165 pubchem = {
166 accession <- gsub(' ', '', accession, perl = TRUE)
167 accession <- gsub('^CID', '', accession, perl = TRUE)
168 switch(content.type,
169 xml = paste0('http://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/', accession, '/XML/?response_type=save&response_basename=CID_', accession),
170 html = paste0('http://pubchem.ncbi.nlm.nih.gov/compound/', accession),
171 NULL)
172 },
173 ncbigene = if (content.type %in% c(RBIODB.ANY, RBIODB.XML)) paste0('http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=', accession, '&rettype=xml&retmode=text') else NULL,
174 ncbiccds = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&GO=MainBrowse&DATA=', accession),
175 uniprot = if (content.type %in% c(RBIODB.ANY, RBIODB.XML)) paste0('http://www.uniprot.org/uniprot/', accession, '.xml'),
176 NULL
177 )
178
179 return(url)
180 }
181 }