Mercurial > repos > prog > lcmsmatching
comparison biodb-common.R @ 0:3afe41d3e9e7 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit bb4d3e23d99828bfee16d31d794c49a17313ec2f
| author | prog |
|---|---|
| date | Mon, 11 Jul 2016 09:12:03 -0400 |
| parents | |
| children | 45e985cd8e9e |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3afe41d3e9e7 |
|---|---|
| 1 if ( ! exists('RBIODB.COMPOUND')) { # Do not load again if already loaded | |
| 2 | |
| 3 ############# | |
| 4 # CONSTANTS # | |
| 5 ############# | |
| 6 | |
| 7 # Entry types | |
| 8 RBIODB.COMPOUND <- 'compound' | |
| 9 RBIODB.SPECTRUM <- 'spectrum' | |
| 10 | |
| 11 # Entry content types | |
| 12 RBIODB.HTML <- 'html' | |
| 13 RBIODB.TXT <- 'txt' | |
| 14 RBIODB.XML <- 'xml' | |
| 15 RBIODB.CSV <- 'csv' | |
| 16 RBIODB.ANY <- 'any' | |
| 17 | |
| 18 # Class names | |
| 19 RBIODB.CHEBI <- 'chebi' | |
| 20 RBIODB.KEGG <- 'kegg' | |
| 21 RBIODB.PUBCHEM <- 'pubchem' | |
| 22 RBIODB.HMDB <- 'hmdb' | |
| 23 RBIODB.CHEMSPIDER <- 'chemspider' | |
| 24 RBIODB.ENZYME <- 'enzyme' | |
| 25 RBIODB.LIPIDMAPS <- 'lipidmaps' | |
| 26 RBIODB.MIRBASE <- 'mirbase' | |
| 27 RBIODB.NCBIGENE <- 'ncbigene' | |
| 28 RBIODB.NCBICCDS <- 'ncbiccds' | |
| 29 RBIODB.UNIPROT <- 'uniprot' | |
| 30 RBIODB.MASSBANK <- 'massbank' | |
| 31 | |
| 32 # Fields | |
| 33 RBIODB.COMPOUND <- 'compound' | |
| 34 RBIODB.ACCESSION <- 'accession' | |
| 35 RBIODB.DESCRIPTION <- 'description' | |
| 36 RBIODB.PROTEIN.DESCRIPTION <- 'protdesc' | |
| 37 RBIODB.NAME <- 'name' | |
| 38 RBIODB.FULLNAMES <- 'fullnames' | |
| 39 RBIODB.SYNONYMS <- 'synonyms' | |
| 40 RBIODB.SYMBOL <- 'symbol' | |
| 41 RBIODB.GENE.SYMBOLS <- 'genesymbols' | |
| 42 RBIODB.CHEBI.ID <- 'chebiid' | |
| 43 RBIODB.LIPIDMAPS.ID <- 'lipidmapsid' | |
| 44 RBIODB.KEGG.ID <- 'keggid' | |
| 45 RBIODB.HMDB.ID <- 'hmdbid' | |
| 46 RBIODB.ENZYME.ID <- 'enzymeid' | |
| 47 RBIODB.NCBI.CCDS.ID <- 'ncbiccdsid' | |
| 48 RBIODB.NCBI.GENE.ID <- 'ncbigeneid' | |
| 49 RBIODB.PUBCHEM.ID <- 'pubchemid' | |
| 50 RBIODB.UNIPROT.ID <- 'uniprotid' | |
| 51 RBIODB.INCHI <- 'inchi' | |
| 52 RBIODB.INCHIKEY <- 'inchikey' | |
| 53 RBIODB.MSDEV <- 'msdev' | |
| 54 RBIODB.MSDEVTYPE <- 'msdevtype' | |
| 55 RBIODB.MSTYPE <- 'mstype' | |
| 56 RBIODB.MSMODE <- 'msmode' | |
| 57 RBIODB.MSPRECMZ <- 'msprecmz' # numeric | |
| 58 RBIODB.MSPRECANNOT <- 'msprecannot' | |
| 59 RBIODB.FORMULA <- 'formula' | |
| 60 RBIODB.SUPER.CLASS <- 'superclass' | |
| 61 RBIODB.MASS <- 'mass' | |
| 62 RBIODB.AVERAGE.MASS <- 'averagemass' | |
| 63 RBIODB.MONOISOTOPIC.MASS <- 'monoisotopicmass' | |
| 64 RBIODB.SEQUENCE <- 'sequence' | |
| 65 RBIODB.LOCATION <- 'location' | |
| 66 RBIODB.LENGTH <- 'length' | |
| 67 RBIODB.NB.PEAKS <- 'nbpeaks' | |
| 68 RBIODB.NB.PEAKS <- 'nbpeaks' | |
| 69 RBIODB.PEAKS <- 'peaks' | |
| 70 | |
| 71 # Mode values | |
| 72 RBIODB.MSMODE.NEG <- 'neg' | |
| 73 RBIODB.MSMODE.POS <- 'pos' | |
| 74 | |
| 75 # Cardinalities | |
| 76 RBIODB.CARD.ONE <- '1' | |
| 77 RBIODB.CARD.MANY <- '*' | |
| 78 | |
| 79 # Field attributes | |
| 80 RBIODB.FIELDS <- data.frame(matrix(c( | |
| 81 # FIELD NAME CLASS CARDINALITY | |
| 82 RBIODB.COMPOUND, 'BiodEntry', RBIODB.CARD.ONE, | |
| 83 RBIODB.ACCESSION, 'character', RBIODB.CARD.ONE, | |
| 84 RBIODB.DESCRIPTION, 'character', RBIODB.CARD.ONE, | |
| 85 RBIODB.NAME, 'character', RBIODB.CARD.ONE, | |
| 86 RBIODB.FULLNAMES, 'character', RBIODB.CARD.MANY, | |
| 87 RBIODB.SYNONYMS, 'character', RBIODB.CARD.MANY, | |
| 88 RBIODB.PROTEIN.DESCRIPTION, 'character', RBIODB.CARD.ONE, | |
| 89 RBIODB.SYMBOL, 'character', RBIODB.CARD.ONE, | |
| 90 RBIODB.GENE.SYMBOLS, 'character', RBIODB.CARD.MANY, | |
| 91 RBIODB.CHEBI.ID, 'character', RBIODB.CARD.ONE, | |
| 92 RBIODB.LIPIDMAPS.ID, 'character', RBIODB.CARD.ONE, | |
| 93 RBIODB.KEGG.ID, 'character', RBIODB.CARD.ONE, | |
| 94 RBIODB.HMDB.ID, 'character', RBIODB.CARD.ONE, | |
| 95 RBIODB.ENZYME.ID, 'character', RBIODB.CARD.ONE, | |
| 96 RBIODB.PUBCHEM.ID, 'character', RBIODB.CARD.ONE, | |
| 97 RBIODB.UNIPROT.ID, 'character', RBIODB.CARD.ONE, | |
| 98 RBIODB.NCBI.CCDS.ID, 'character', RBIODB.CARD.ONE, | |
| 99 RBIODB.NCBI.GENE.ID, 'character', RBIODB.CARD.ONE, | |
| 100 RBIODB.INCHI, 'character', RBIODB.CARD.ONE, | |
| 101 RBIODB.INCHIKEY, 'character', RBIODB.CARD.ONE, | |
| 102 RBIODB.MSDEV, 'character', RBIODB.CARD.ONE, | |
| 103 RBIODB.MSDEVTYPE, 'character', RBIODB.CARD.ONE, | |
| 104 RBIODB.MSTYPE, 'character', RBIODB.CARD.ONE, | |
| 105 RBIODB.MSMODE, 'character', RBIODB.CARD.ONE, | |
| 106 RBIODB.MSPRECMZ, 'double', RBIODB.CARD.ONE, | |
| 107 RBIODB.MSPRECANNOT, 'character', RBIODB.CARD.ONE, | |
| 108 RBIODB.FORMULA, 'character', RBIODB.CARD.ONE, | |
| 109 RBIODB.SUPER.CLASS, 'character', RBIODB.CARD.ONE, | |
| 110 RBIODB.MASS, 'double', RBIODB.CARD.ONE, | |
| 111 RBIODB.AVERAGE.MASS, 'double', RBIODB.CARD.ONE, | |
| 112 RBIODB.MONOISOTOPIC.MASS, 'double', RBIODB.CARD.ONE, | |
| 113 RBIODB.SEQUENCE, 'character', RBIODB.CARD.ONE, | |
| 114 RBIODB.LENGTH, 'integer', RBIODB.CARD.ONE, | |
| 115 RBIODB.LOCATION, 'character', RBIODB.CARD.ONE, | |
| 116 RBIODB.NB.PEAKS, 'integer', RBIODB.CARD.ONE, | |
| 117 RBIODB.PEAKS, 'data.frame', RBIODB.CARD.ONE | |
| 118 ), byrow = TRUE, ncol = 3), stringsAsFactors = FALSE) | |
| 119 colnames(RBIODB.FIELDS) <- c('name', 'class', 'cardinality') | |
| 120 | |
| 121 # How to compute a missing field ? | |
| 122 RBIODB.FIELD.COMPUTING <- list() | |
| 123 RBIODB.FIELD.COMPUTING[[RBIODB.INCHI]] <- c(RBIODB.CHEBI) | |
| 124 RBIODB.FIELD.COMPUTING[[RBIODB.INCHIKEY]] <- c(RBIODB.CHEBI) | |
| 125 RBIODB.FIELD.COMPUTING[[RBIODB.SEQUENCE]] <- c(RBIODB.NCBICCDS) | |
| 126 | |
| 127 # Peaks data frame columns | |
| 128 RBIODB.PEAK.MZ <- 'mz' | |
| 129 RBIODB.PEAK.FORMULA <- 'formula' | |
| 130 RBIODB.PEAK.FORMULA.COUNT <- 'formula.count' | |
| 131 RBIODB.PEAK.MASS <- 'mass' | |
| 132 RBIODB.PEAK.ERROR.PPM <- 'error.ppm' | |
| 133 RBIODB.PEAK.INTENSITY <- 'intensity' | |
| 134 RBIODB.PEAK.RELATIVE.INTENSITY <- 'relative.intensity' | |
| 135 RBIODB.PEAK.DF.EXAMPLE <- data.frame(mz = double(), int = double(), rel.int = integer(), formula = character(), formula.count <- integer(), mass = double(), error = double(), stringsAsFactors = FALSE) | |
| 136 colnames(RBIODB.PEAK.DF.EXAMPLE) <- c(RBIODB.PEAK.MZ, RBIODB.PEAK.INTENSITY, RBIODB.PEAK.RELATIVE.INTENSITY, RBIODB.PEAK.FORMULA, RBIODB.PEAK.FORMULA.COUNT, RBIODB.PEAK.MASS, RBIODB.PEAK.ERROR.PPM) | |
| 137 | |
| 138 ################# | |
| 139 # GET ENTRY URL # | |
| 140 ################# | |
| 141 | |
| 142 # TODO Let the choice to use either jp or eu | |
| 143 RBIODB.MASSBANK.JP.WS.URL <- "http://www.massbank.jp/api/services/MassBankAPI/getRecordInfo" | |
| 144 RBIODB.MASSBANK.EU.WS.URL <- "http://massbank.eu/api/services/MassBankAPI/getRecordInfo" | |
| 145 | |
| 146 get.entry.url <- function(class, accession, content.type = RBIODB.ANY) { | |
| 147 | |
| 148 url <- switch(class, | |
| 149 chebi = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('https://www.ebi.ac.uk/chebi/searchId.do?chebiId=', accession) else NULL, | |
| 150 chemspider = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('http://www.chemspider.com/Chemical-Structure.', accession, '.html') else NULL, | |
| 151 enzyme = if (content.type %in% c(RBIODB.ANY, RBIODB.TXT)) paste0('http://enzyme.expasy.org/EC/', accession, '.txt') else NULL, | |
| 152 hmdb = switch(content.type, | |
| 153 xml = paste0('http://www.hmdb.ca/metabolites/', accession, '.xml'), | |
| 154 html = paste0('http://www.hmdb.ca/metabolites/', accession), | |
| 155 any = paste0('http://www.hmdb.ca/metabolites/', accession), | |
| 156 NULL), | |
| 157 kegg = switch(content.type, | |
| 158 txt = paste0('http://rest.kegg.jp/get/', accession), | |
| 159 html = paste0('http://www.genome.jp/dbget-bin/www_bget?cpd:', accession), | |
| 160 any = paste0('http://www.genome.jp/dbget-bin/www_bget?cpd:', accession), | |
| 161 NULL), | |
| 162 lipidmaps = if (content.type %in% c(RBIODB.ANY, RBIODB.CSV)) paste0('http://www.lipidmaps.org/data/LMSDRecord.php?Mode=File&LMID=', accession, '&OutputType=CSV&OutputQuote=No') else NULL, | |
| 163 massbank = if (content.type %in% c(RBIODB.ANY, RBIODB.TXT)) paste0(RBIODB.MASSBANK.EU.WS.URL, '?ids=', paste(accession, collapse = ',')) else NULL, | |
| 164 mirbase = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('http://www.mirbase.org/cgi-bin/mature.pl?mature_acc=', accession) else NULL, | |
| 165 pubchem = { | |
| 166 accession <- gsub(' ', '', accession, perl = TRUE) | |
| 167 accession <- gsub('^CID', '', accession, perl = TRUE) | |
| 168 switch(content.type, | |
| 169 xml = paste0('http://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/', accession, '/XML/?response_type=save&response_basename=CID_', accession), | |
| 170 html = paste0('http://pubchem.ncbi.nlm.nih.gov/compound/', accession), | |
| 171 NULL) | |
| 172 }, | |
| 173 ncbigene = if (content.type %in% c(RBIODB.ANY, RBIODB.XML)) paste0('http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=', accession, '&rettype=xml&retmode=text') else NULL, | |
| 174 ncbiccds = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&GO=MainBrowse&DATA=', accession), | |
| 175 uniprot = if (content.type %in% c(RBIODB.ANY, RBIODB.XML)) paste0('http://www.uniprot.org/uniprot/', accession, '.xml'), | |
| 176 NULL | |
| 177 ) | |
| 178 | |
| 179 return(url) | |
| 180 } | |
| 181 } |
