Mercurial > repos > prog > lcmsmatching
comparison biodb-common.R @ 1:45e985cd8e9e draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
| author | prog |
|---|---|
| date | Tue, 31 Jan 2017 05:27:24 -0500 |
| parents | 3afe41d3e9e7 |
| children |
comparison
equal
deleted
inserted
replaced
| 0:3afe41d3e9e7 | 1:45e985cd8e9e |
|---|---|
| 1 if ( ! exists('RBIODB.COMPOUND')) { # Do not load again if already loaded | 1 if ( ! exists('BIODB.XML')) { |
| 2 | |
| 3 ############### | |
| 4 # CACHE MODES # | |
| 5 ############### | |
| 6 | |
| 7 BIODB.CACHE.READ.ONLY <- 'read-only' | |
| 8 BIODB.CACHE.READ.WRITE <- 'read-write' | |
| 9 BIODB.CACHE.WRITE.ONLY <- 'write-only' | |
| 10 | |
| 11 ####################### | |
| 12 # ENTRY CONTENT TYPES # | |
| 13 ####################### | |
| 14 | |
| 15 BIODB.HTML <- 'html' | |
| 16 BIODB.TXT <- 'txt' | |
| 17 BIODB.XML <- 'xml' | |
| 18 BIODB.CSV <- 'csv' | |
| 19 BIODB.DATAFRAME <- 'dataframe' | |
| 20 BIODB.JSON <- 'json' | |
| 2 | 21 |
| 3 ############# | 22 ############# |
| 4 # CONSTANTS # | 23 # DATABASES # |
| 5 ############# | 24 ############# |
| 6 | 25 |
| 7 # Entry types | 26 BIODB.CHEBI <- 'chebi' |
| 8 RBIODB.COMPOUND <- 'compound' | 27 BIODB.KEGG <- 'kegg' |
| 9 RBIODB.SPECTRUM <- 'spectrum' | 28 BIODB.PUBCHEMCOMP <- 'pubchemcomp' # Compound database |
| 10 | 29 BIODB.PUBCHEMSUB <- 'pubchemsub' # Substance database |
| 11 # Entry content types | 30 BIODB.HMDB <- 'hmdb' |
| 12 RBIODB.HTML <- 'html' | 31 BIODB.CHEMSPIDER <- 'chemspider' |
| 13 RBIODB.TXT <- 'txt' | 32 BIODB.ENZYME <- 'enzyme' |
| 14 RBIODB.XML <- 'xml' | 33 BIODB.LIPIDMAPS <- 'lipidmaps' |
| 15 RBIODB.CSV <- 'csv' | 34 BIODB.MIRBASE <- 'mirbase' |
| 16 RBIODB.ANY <- 'any' | 35 BIODB.NCBIGENE <- 'ncbigene' |
| 17 | 36 BIODB.NCBICCDS <- 'ncbiccds' |
| 18 # Class names | 37 BIODB.UNIPROT <- 'uniprot' |
| 19 RBIODB.CHEBI <- 'chebi' | 38 BIODB.MASSBANK <- 'massbank' |
| 20 RBIODB.KEGG <- 'kegg' | 39 BIODB.MASSFILEDB <- 'massfiledb' |
| 21 RBIODB.PUBCHEM <- 'pubchem' | 40 BIODB.PEAKFOREST <- 'peakforest' |
| 22 RBIODB.HMDB <- 'hmdb' | 41 |
| 23 RBIODB.CHEMSPIDER <- 'chemspider' | 42 BIODB.DATABASES <- c(BIODB.CHEBI, BIODB.KEGG, BIODB.PUBCHEMCOMP, BIODB.PUBCHEMSUB, BIODB.HMDB, BIODB.CHEMSPIDER, BIODB.ENZYME, BIODB.LIPIDMAPS, BIODB.MIRBASE, BIODB.NCBIGENE, BIODB.NCBICCDS, BIODB.UNIPROT, BIODB.MASSBANK, BIODB.MASSFILEDB, BIODB.PEAKFOREST) |
| 24 RBIODB.ENZYME <- 'enzyme' | 43 |
| 25 RBIODB.LIPIDMAPS <- 'lipidmaps' | 44 ########## |
| 26 RBIODB.MIRBASE <- 'mirbase' | 45 # FIELDS # |
| 27 RBIODB.NCBIGENE <- 'ncbigene' | 46 ########## |
| 28 RBIODB.NCBICCDS <- 'ncbiccds' | 47 |
| 29 RBIODB.UNIPROT <- 'uniprot' | 48 BIODB.ACCESSION <- 'accession' |
| 30 RBIODB.MASSBANK <- 'massbank' | 49 BIODB.DESCRIPTION <- 'description' |
| 31 | 50 BIODB.PROTEIN.DESCRIPTION <- 'protdesc' |
| 32 # Fields | 51 BIODB.NAME <- 'name' |
| 33 RBIODB.COMPOUND <- 'compound' | 52 BIODB.COMP.IUPAC.NAME.ALLOWED <- 'comp.iupac.name.allowed' |
| 34 RBIODB.ACCESSION <- 'accession' | 53 BIODB.COMP.IUPAC.NAME.TRAD <- 'comp.iupac.name.trad' |
| 35 RBIODB.DESCRIPTION <- 'description' | 54 BIODB.COMP.IUPAC.NAME.SYST <- 'comp.iupac.name.syst' |
| 36 RBIODB.PROTEIN.DESCRIPTION <- 'protdesc' | 55 BIODB.COMP.IUPAC.NAME.PREF <- 'comp.iupac.name.pref' |
| 37 RBIODB.NAME <- 'name' | 56 BIODB.COMP.IUPAC.NAME.CAS <- 'comp.iupac.name.cas' |
| 38 RBIODB.FULLNAMES <- 'fullnames' | 57 BIODB.FULLNAMES <- 'fullnames' |
| 39 RBIODB.SYNONYMS <- 'synonyms' | 58 BIODB.SYNONYMS <- 'synonyms' |
| 40 RBIODB.SYMBOL <- 'symbol' | 59 BIODB.SYMBOL <- 'symbol' |
| 41 RBIODB.GENE.SYMBOLS <- 'genesymbols' | 60 BIODB.GENE.SYMBOLS <- 'genesymbols' |
| 42 RBIODB.CHEBI.ID <- 'chebiid' | 61 BIODB.CHEBI.ID <- 'chebiid' |
| 43 RBIODB.LIPIDMAPS.ID <- 'lipidmapsid' | 62 BIODB.LIPIDMAPS.ID <- 'lipidmapsid' |
| 44 RBIODB.KEGG.ID <- 'keggid' | 63 BIODB.KEGG.ID <- 'keggid' |
| 45 RBIODB.HMDB.ID <- 'hmdbid' | 64 BIODB.HMDB.ID <- 'hmdbid' |
| 46 RBIODB.ENZYME.ID <- 'enzymeid' | 65 BIODB.ENZYME.ID <- 'enzymeid' |
| 47 RBIODB.NCBI.CCDS.ID <- 'ncbiccdsid' | 66 BIODB.NCBI.CCDS.ID <- 'ncbiccdsid' |
| 48 RBIODB.NCBI.GENE.ID <- 'ncbigeneid' | 67 BIODB.NCBI.GENE.ID <- 'ncbigeneid' |
| 49 RBIODB.PUBCHEM.ID <- 'pubchemid' | 68 BIODB.PUBCHEMCOMP.ID <- 'pubchemcompid' |
| 50 RBIODB.UNIPROT.ID <- 'uniprotid' | 69 BIODB.PUBCHEMSUB.ID <- 'pubchemsubid' |
| 51 RBIODB.INCHI <- 'inchi' | 70 BIODB.CHEMSPIDER.ID <- 'chemspiderid' |
| 52 RBIODB.INCHIKEY <- 'inchikey' | 71 BIODB.UNIPROT.ID <- 'uniprotid' |
| 53 RBIODB.MSDEV <- 'msdev' | 72 BIODB.CAS.ID <- 'casid' |
| 54 RBIODB.MSDEVTYPE <- 'msdevtype' | 73 BIODB.PEAKFOREST.ID <- 'peakforestid' |
| 55 RBIODB.MSTYPE <- 'mstype' | 74 BIODB.SMILES <- 'smiles' |
| 56 RBIODB.MSMODE <- 'msmode' | 75 BIODB.INCHI <- 'inchi' |
| 57 RBIODB.MSPRECMZ <- 'msprecmz' # numeric | 76 BIODB.INCHIKEY <- 'inchikey' |
| 58 RBIODB.MSPRECANNOT <- 'msprecannot' | 77 BIODB.MSDEV <- 'msdev' |
| 59 RBIODB.FORMULA <- 'formula' | 78 BIODB.MSDEVTYPE <- 'msdevtype' |
| 60 RBIODB.SUPER.CLASS <- 'superclass' | 79 BIODB.MSTYPE <- 'mstype' |
| 61 RBIODB.MASS <- 'mass' | 80 BIODB.MSMODE <- 'msmode' |
| 62 RBIODB.AVERAGE.MASS <- 'averagemass' | 81 BIODB.MSPRECMZ <- 'msprecmz' # numeric |
| 63 RBIODB.MONOISOTOPIC.MASS <- 'monoisotopicmass' | 82 BIODB.MSPRECANNOT <- 'msprecannot' |
| 64 RBIODB.SEQUENCE <- 'sequence' | 83 BIODB.FORMULA <- 'formula' |
| 65 RBIODB.LOCATION <- 'location' | 84 BIODB.SUPER.CLASS <- 'superclass' |
| 66 RBIODB.LENGTH <- 'length' | 85 BIODB.MASS <- 'mass' |
| 67 RBIODB.NB.PEAKS <- 'nbpeaks' | 86 BIODB.AVERAGE.MASS <- 'averagemass' |
| 68 RBIODB.NB.PEAKS <- 'nbpeaks' | 87 BIODB.MONOISOTOPIC.MASS <- 'monoisotopicmass' |
| 69 RBIODB.PEAKS <- 'peaks' | 88 BIODB.SEQUENCE <- 'sequence' |
| 89 BIODB.LOCATION <- 'location' | |
| 90 BIODB.LENGTH <- 'length' | |
| 91 BIODB.NB.PEAKS <- 'nbpeaks' | |
| 92 BIODB.PEAKS <- 'peaks' | |
| 93 BIODB.COMPOUNDS <- 'compounds' | |
| 94 BIODB.NB.COMPOUNDS <- 'nbcompounds' | |
| 95 BIODB.COMPOUND.ID <- 'compoundid' | |
| 96 BIODB.COMPOUND.MASS <- 'compoundmass' | |
| 97 BIODB.COMPOUND.COMP <- 'compoundcomp' | |
| 98 BIODB.CHROM.COL <- 'chromcol' # Chromatographic column | |
| 99 BIODB.CHROM.COL.RT <- 'chromcolrt' # Retention time measured on chromatographic column | |
| 100 BIODB.ID <- 'id' | |
| 101 BIODB.TITLE <- 'title' | |
| 102 BIODB.PEAK.MZ <- 'mz' | |
| 103 BIODB.PEAK.RT <- 'rt' | |
| 104 BIODB.PEAK.MZEXP <- 'mzexp' | |
| 105 BIODB.PEAK.MZTHEO <- 'mztheo' | |
| 106 BIODB.PEAK.FORMULA <- 'formula' | |
| 107 BIODB.PEAK.FORMULA.COUNT <- 'formula.count' | |
| 108 BIODB.PEAK.COMP <- 'peakcomp' # Peak composition | |
| 109 BIODB.PEAK.ATTR <- 'peakattr' # Peak attribution | |
| 110 BIODB.PEAK.MASS <- 'mass' | |
| 111 # BIODB.PEAK.ATTR <- 'attr' | |
| 112 BIODB.PEAK.ERROR.PPM <- 'error.ppm' | |
| 113 BIODB.PEAK.INTENSITY <- 'intensity' | |
| 114 BIODB.PEAK.RELATIVE.INTENSITY <- 'relative.intensity' | |
| 70 | 115 |
| 71 # Mode values | 116 # Mode values |
| 72 RBIODB.MSMODE.NEG <- 'neg' | 117 BIODB.MSMODE.NEG <- 'neg' |
| 73 RBIODB.MSMODE.POS <- 'pos' | 118 BIODB.MSMODE.POS <- 'pos' |
| 74 | 119 |
| 75 # Cardinalities | 120 # Tolerance values |
| 76 RBIODB.CARD.ONE <- '1' | 121 BIODB.TOL <- 'mztol' |
| 77 RBIODB.CARD.MANY <- '*' | 122 BIODB.MZTOLUNIT.PPM <- 'ppm' |
| 78 | 123 BIODB.MZTOLUNIT.PLAIN <- 'plain' # same as mz: mass-to-charge ratio |
| 79 # Field attributes | 124 BIODB.MZTOLUNIT.VALS <- c(BIODB.MZTOLUNIT.PPM, BIODB.MZTOLUNIT.PLAIN) |
| 80 RBIODB.FIELDS <- data.frame(matrix(c( | 125 |
| 81 # FIELD NAME CLASS CARDINALITY | 126 ######################## |
| 82 RBIODB.COMPOUND, 'BiodEntry', RBIODB.CARD.ONE, | 127 # MS-MS MEASURE VALUES # |
| 83 RBIODB.ACCESSION, 'character', RBIODB.CARD.ONE, | 128 ######################## |
| 84 RBIODB.DESCRIPTION, 'character', RBIODB.CARD.ONE, | 129 |
| 85 RBIODB.NAME, 'character', RBIODB.CARD.ONE, | 130 BIODB.MSMS.DIST.COS <- "cosine" |
| 86 RBIODB.FULLNAMES, 'character', RBIODB.CARD.MANY, | 131 BIODB.MSMS.DIST.WCOSINE <- "wcosine" |
| 87 RBIODB.SYNONYMS, 'character', RBIODB.CARD.MANY, | 132 BIODB.MSMS.DIST.PKERNEL <- "pkernel" |
| 88 RBIODB.PROTEIN.DESCRIPTION, 'character', RBIODB.CARD.ONE, | 133 BIODB.MSMS.DIST <- c(BIODB.MSMS.DIST.COS, BIODB.MSMS.DIST.WCOSINE, BIODB.MSMS.DIST.PKERNEL) |
| 89 RBIODB.SYMBOL, 'character', RBIODB.CARD.ONE, | 134 |
| 90 RBIODB.GENE.SYMBOLS, 'character', RBIODB.CARD.MANY, | 135 |
| 91 RBIODB.CHEBI.ID, 'character', RBIODB.CARD.ONE, | 136 ################# |
| 92 RBIODB.LIPIDMAPS.ID, 'character', RBIODB.CARD.ONE, | 137 # CARDINALITIES # |
| 93 RBIODB.KEGG.ID, 'character', RBIODB.CARD.ONE, | 138 ################# |
| 94 RBIODB.HMDB.ID, 'character', RBIODB.CARD.ONE, | 139 |
| 95 RBIODB.ENZYME.ID, 'character', RBIODB.CARD.ONE, | 140 BIODB.CARD.ONE <- '1' |
| 96 RBIODB.PUBCHEM.ID, 'character', RBIODB.CARD.ONE, | 141 BIODB.CARD.MANY <- '*' |
| 97 RBIODB.UNIPROT.ID, 'character', RBIODB.CARD.ONE, | 142 |
| 98 RBIODB.NCBI.CCDS.ID, 'character', RBIODB.CARD.ONE, | 143 ##################### |
| 99 RBIODB.NCBI.GENE.ID, 'character', RBIODB.CARD.ONE, | 144 #INTENSITy NOTATIONS# |
| 100 RBIODB.INCHI, 'character', RBIODB.CARD.ONE, | 145 ##################### |
| 101 RBIODB.INCHIKEY, 'character', RBIODB.CARD.ONE, | 146 |
| 102 RBIODB.MSDEV, 'character', RBIODB.CARD.ONE, | 147 BIODB.GROUP.INTENSITY<-c(BIODB.PEAK.INTENSITY,BIODB.PEAK.RELATIVE.INTENSITY) |
| 103 RBIODB.MSDEVTYPE, 'character', RBIODB.CARD.ONE, | 148 |
| 104 RBIODB.MSTYPE, 'character', RBIODB.CARD.ONE, | 149 ########################## |
| 105 RBIODB.MSMODE, 'character', RBIODB.CARD.ONE, | 150 # ENTRY FIELD ATTRIBUTES # |
| 106 RBIODB.MSPRECMZ, 'double', RBIODB.CARD.ONE, | 151 ########################## |
| 107 RBIODB.MSPRECANNOT, 'character', RBIODB.CARD.ONE, | 152 # FIELD NAME CLASS CARDINALITY TYPE |
| 108 RBIODB.FORMULA, 'character', RBIODB.CARD.ONE, | 153 BIODB.FIELDS <- data.frame(matrix(c( |
| 109 RBIODB.SUPER.CLASS, 'character', RBIODB.CARD.ONE, | 154 BIODB.ACCESSION, 'character', BIODB.CARD.ONE, 'none', |
| 110 RBIODB.MASS, 'double', RBIODB.CARD.ONE, | 155 BIODB.DESCRIPTION, 'character', BIODB.CARD.ONE, 'none', |
| 111 RBIODB.AVERAGE.MASS, 'double', RBIODB.CARD.ONE, | 156 BIODB.NAME, 'character', BIODB.CARD.ONE, 'name', |
| 112 RBIODB.MONOISOTOPIC.MASS, 'double', RBIODB.CARD.ONE, | 157 BIODB.COMP.IUPAC.NAME.ALLOWED, 'character', BIODB.CARD.ONE, 'name', |
| 113 RBIODB.SEQUENCE, 'character', RBIODB.CARD.ONE, | 158 BIODB.COMP.IUPAC.NAME.TRAD, 'character', BIODB.CARD.ONE, 'name', |
| 114 RBIODB.LENGTH, 'integer', RBIODB.CARD.ONE, | 159 BIODB.COMP.IUPAC.NAME.SYST, 'character', BIODB.CARD.ONE, 'name', |
| 115 RBIODB.LOCATION, 'character', RBIODB.CARD.ONE, | 160 BIODB.COMP.IUPAC.NAME.PREF, 'character', BIODB.CARD.ONE, 'name', |
| 116 RBIODB.NB.PEAKS, 'integer', RBIODB.CARD.ONE, | 161 BIODB.COMP.IUPAC.NAME.CAS, 'character', BIODB.CARD.ONE, 'name', |
| 117 RBIODB.PEAKS, 'data.frame', RBIODB.CARD.ONE | 162 BIODB.FULLNAMES, 'character', BIODB.CARD.MANY, 'name', |
| 118 ), byrow = TRUE, ncol = 3), stringsAsFactors = FALSE) | 163 BIODB.SYNONYMS, 'character', BIODB.CARD.MANY, 'name', |
| 119 colnames(RBIODB.FIELDS) <- c('name', 'class', 'cardinality') | 164 BIODB.PROTEIN.DESCRIPTION, 'character', BIODB.CARD.ONE, 'none', |
| 120 | 165 BIODB.SYMBOL, 'character', BIODB.CARD.ONE, 'none', |
| 121 # How to compute a missing field ? | 166 BIODB.GENE.SYMBOLS, 'character', BIODB.CARD.MANY, 'none', |
| 122 RBIODB.FIELD.COMPUTING <- list() | 167 BIODB.NB.COMPOUNDS, 'integer', BIODB.CARD.ONE, 'none', |
| 123 RBIODB.FIELD.COMPUTING[[RBIODB.INCHI]] <- c(RBIODB.CHEBI) | 168 BIODB.COMPOUNDS, 'object', BIODB.CARD.MANY, 'none', |
| 124 RBIODB.FIELD.COMPUTING[[RBIODB.INCHIKEY]] <- c(RBIODB.CHEBI) | 169 BIODB.CHEBI.ID, 'character', BIODB.CARD.ONE, 'none', |
| 125 RBIODB.FIELD.COMPUTING[[RBIODB.SEQUENCE]] <- c(RBIODB.NCBICCDS) | 170 BIODB.LIPIDMAPS.ID, 'character', BIODB.CARD.ONE, 'none', |
| 126 | 171 BIODB.KEGG.ID, 'character', BIODB.CARD.ONE, 'none', |
| 127 # Peaks data frame columns | 172 BIODB.HMDB.ID, 'character', BIODB.CARD.ONE, 'none', |
| 128 RBIODB.PEAK.MZ <- 'mz' | 173 BIODB.ENZYME.ID, 'character', BIODB.CARD.ONE, 'none', |
| 129 RBIODB.PEAK.FORMULA <- 'formula' | 174 BIODB.PUBCHEMCOMP.ID, 'character', BIODB.CARD.ONE, 'none', |
| 130 RBIODB.PEAK.FORMULA.COUNT <- 'formula.count' | 175 BIODB.PUBCHEMSUB.ID, 'character', BIODB.CARD.ONE, 'none', |
| 131 RBIODB.PEAK.MASS <- 'mass' | 176 BIODB.PEAKFOREST.ID, 'character', BIODB.CARD.ONE, 'none', |
| 132 RBIODB.PEAK.ERROR.PPM <- 'error.ppm' | 177 BIODB.UNIPROT.ID, 'character', BIODB.CARD.ONE, 'none', |
| 133 RBIODB.PEAK.INTENSITY <- 'intensity' | 178 BIODB.NCBI.CCDS.ID, 'character', BIODB.CARD.ONE, 'none', |
| 134 RBIODB.PEAK.RELATIVE.INTENSITY <- 'relative.intensity' | 179 BIODB.NCBI.GENE.ID, 'character', BIODB.CARD.ONE, 'none', |
| 135 RBIODB.PEAK.DF.EXAMPLE <- data.frame(mz = double(), int = double(), rel.int = integer(), formula = character(), formula.count <- integer(), mass = double(), error = double(), stringsAsFactors = FALSE) | 180 BIODB.INCHI, 'character', BIODB.CARD.ONE, 'none', |
| 136 colnames(RBIODB.PEAK.DF.EXAMPLE) <- c(RBIODB.PEAK.MZ, RBIODB.PEAK.INTENSITY, RBIODB.PEAK.RELATIVE.INTENSITY, RBIODB.PEAK.FORMULA, RBIODB.PEAK.FORMULA.COUNT, RBIODB.PEAK.MASS, RBIODB.PEAK.ERROR.PPM) | 181 BIODB.INCHIKEY, 'character', BIODB.CARD.ONE, 'none', |
| 182 BIODB.MSDEV, 'character', BIODB.CARD.ONE, 'none', | |
| 183 BIODB.MSDEVTYPE, 'character', BIODB.CARD.ONE, 'none', | |
| 184 BIODB.MSTYPE, 'character', BIODB.CARD.ONE, 'none', | |
| 185 BIODB.MSMODE, 'character', BIODB.CARD.ONE, 'none', | |
| 186 BIODB.MSPRECMZ, 'double', BIODB.CARD.ONE, 'none', | |
| 187 BIODB.PEAK.MZTHEO, 'double', BIODB.CARD.ONE, 'none', | |
| 188 BIODB.MSPRECANNOT, 'character', BIODB.CARD.ONE, 'none', | |
| 189 BIODB.FORMULA, 'character', BIODB.CARD.ONE, 'none', | |
| 190 BIODB.SUPER.CLASS, 'character', BIODB.CARD.ONE, 'none', | |
| 191 BIODB.MASS, 'double', BIODB.CARD.ONE, 'none', | |
| 192 BIODB.AVERAGE.MASS, 'double', BIODB.CARD.ONE, 'none', | |
| 193 BIODB.MONOISOTOPIC.MASS, 'double', BIODB.CARD.ONE, 'none', | |
| 194 BIODB.SEQUENCE, 'character', BIODB.CARD.ONE, 'none', | |
| 195 BIODB.LENGTH, 'integer', BIODB.CARD.ONE, 'none', | |
| 196 BIODB.LOCATION, 'character', BIODB.CARD.ONE, 'none', | |
| 197 BIODB.NB.PEAKS, 'integer', BIODB.CARD.ONE, 'none', | |
| 198 BIODB.PEAKS, 'data.frame', BIODB.CARD.ONE, 'none', | |
| 199 BIODB.SMILES, 'character', BIODB.CARD.ONE, 'none', | |
| 200 BIODB.CHEMSPIDER.ID, 'character', BIODB.CARD.ONE, 'none', | |
| 201 BIODB.CAS.ID, 'character', BIODB.CARD.ONE, 'none' | |
| 202 ), byrow = TRUE, ncol = 4), stringsAsFactors = FALSE) | |
| 203 colnames(BIODB.FIELDS) <- c('name', 'class', 'cardinality', 'type') | |
| 204 | |
| 205 ######################### | |
| 206 # GET DATABASE ID FIELD # | |
| 207 ######################### | |
| 208 | |
| 209 biodb.get.database.id.field <- function(database) { | |
| 210 | |
| 211 id.field <- NA_character_ | |
| 212 | |
| 213 if (database %in% BIODB.DATABASES) { | |
| 214 id.field <- paste0(database, 'id') | |
| 215 if ( ! id.field %in% BIODB.FIELDS[['name']]) | |
| 216 stop(paste0('No ID field defined for database ', database, '.')) | |
| 217 } | |
| 218 | |
| 219 return(id.field) | |
| 220 } | |
| 221 | |
| 222 ##################### | |
| 223 # COMPUTABLE FIELDS # | |
| 224 ##################### | |
| 225 | |
| 226 BIODB.FIELD.COMPUTING <- list() | |
| 227 BIODB.FIELD.COMPUTING[[BIODB.INCHI]] <- c(BIODB.CHEBI) | |
| 228 BIODB.FIELD.COMPUTING[[BIODB.INCHIKEY]] <- c(BIODB.CHEBI) | |
| 229 BIODB.FIELD.COMPUTING[[BIODB.SEQUENCE]] <- c(BIODB.NCBICCDS) | |
| 230 | |
| 231 #################### | |
| 232 # PEAKS DATA FRAME # | |
| 233 #################### | |
| 234 | |
| 235 # Example | |
| 236 BIODB.PEAK.DF.EXAMPLE <- data.frame(mz = double(), int = double(), rel.int = integer(), formula = character(), formula.count <- integer(), mass = double(), error = double(), stringsAsFactors = FALSE) | |
| 237 colnames(BIODB.PEAK.DF.EXAMPLE) <- c(BIODB.PEAK.MZ, BIODB.PEAK.INTENSITY, BIODB.PEAK.RELATIVE.INTENSITY, BIODB.PEAK.FORMULA, BIODB.PEAK.FORMULA.COUNT, BIODB.PEAK.MASS, BIODB.PEAK.ERROR.PPM) | |
| 137 | 238 |
| 138 ################# | 239 ################# |
| 139 # GET ENTRY URL # | 240 # GET ENTRY URL # |
| 140 ################# | 241 ################# |
| 141 | 242 |
| 142 # TODO Let the choice to use either jp or eu | 243 # TODO Let the choice to use either jp or eu |
| 143 RBIODB.MASSBANK.JP.WS.URL <- "http://www.massbank.jp/api/services/MassBankAPI/getRecordInfo" | 244 BIODB.MASSBANK.JP.WS.URL <- "http://www.massbank.jp/api/services/MassBankAPI/" |
| 144 RBIODB.MASSBANK.EU.WS.URL <- "http://massbank.eu/api/services/MassBankAPI/getRecordInfo" | 245 BIODB.MASSBANK.EU.WS.URL <- "http://massbank.eu/api/services/MassBankAPI/" |
| 145 | 246 |
| 146 get.entry.url <- function(class, accession, content.type = RBIODB.ANY) { | 247 .do.get.entry.url <- function(class, accession, content.type = BIODB.HTML, base.url = NA_character_, token = NA_character_) { |
| 147 | 248 |
| 249 # Only certain databases can handle multiple accession ids | |
| 250 if ( ! class %in% c(BIODB.MASSBANK, BIODB.CHEMSPIDER, BIODB.PUBCHEMCOMP, BIODB.PUBCHEMSUB, BIODB.PEAKFOREST) && length(accession) > 1) | |
| 251 stop(paste0("Cannot build a URL for getting multiple entries for class ", class, ".")) | |
| 252 | |
| 253 # Get URL | |
| 148 url <- switch(class, | 254 url <- switch(class, |
| 149 chebi = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('https://www.ebi.ac.uk/chebi/searchId.do?chebiId=', accession) else NULL, | 255 chebi = if (content.type == BIODB.HTML) paste0('https://www.ebi.ac.uk/chebi/searchId.do?chebiId=', accession) else NULL, |
| 150 chemspider = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('http://www.chemspider.com/Chemical-Structure.', accession, '.html') else NULL, | 256 chemspider = { |
| 151 enzyme = if (content.type %in% c(RBIODB.ANY, RBIODB.TXT)) paste0('http://enzyme.expasy.org/EC/', accession, '.txt') else NULL, | 257 token.param <- if (is.na(token)) '' else paste('&token', token, sep = '=') |
| 258 switch(content.type, | |
| 259 html = paste0('http://www.chemspider.com/Chemical-Structure.', accession, '.html'), | |
| 260 xml = paste0('http://www.chemspider.com/MassSpecAPI.asmx/GetExtendedCompoundInfoArray?', paste(paste0('CSIDs=', accession), collapse = '&'), token.param), | |
| 261 NULL) | |
| 262 }, | |
| 263 enzyme = if (content.type == BIODB.TXT) paste0('http://enzyme.expasy.org/EC/', accession, '.txt') else NULL, | |
| 152 hmdb = switch(content.type, | 264 hmdb = switch(content.type, |
| 153 xml = paste0('http://www.hmdb.ca/metabolites/', accession, '.xml'), | 265 xml = paste0('http://www.hmdb.ca/metabolites/', accession, '.xml'), |
| 154 html = paste0('http://www.hmdb.ca/metabolites/', accession), | 266 html = paste0('http://www.hmdb.ca/metabolites/', accession), |
| 155 any = paste0('http://www.hmdb.ca/metabolites/', accession), | |
| 156 NULL), | 267 NULL), |
| 157 kegg = switch(content.type, | 268 kegg = switch(content.type, |
| 158 txt = paste0('http://rest.kegg.jp/get/', accession), | 269 txt = paste0('http://rest.kegg.jp/get/', accession), |
| 159 html = paste0('http://www.genome.jp/dbget-bin/www_bget?cpd:', accession), | 270 html = paste0('http://www.genome.jp/dbget-bin/www_bget?cpd:', accession), |
| 160 any = paste0('http://www.genome.jp/dbget-bin/www_bget?cpd:', accession), | |
| 161 NULL), | 271 NULL), |
| 162 lipidmaps = if (content.type %in% c(RBIODB.ANY, RBIODB.CSV)) paste0('http://www.lipidmaps.org/data/LMSDRecord.php?Mode=File&LMID=', accession, '&OutputType=CSV&OutputQuote=No') else NULL, | 272 lipidmaps = if (content.type == BIODB.CSV) paste0('http://www.lipidmaps.org/data/LMSDRecord.php?Mode=File&LMID=', accession, '&OutputType=CSV&OutputQuote=No') else NULL, |
| 163 massbank = if (content.type %in% c(RBIODB.ANY, RBIODB.TXT)) paste0(RBIODB.MASSBANK.EU.WS.URL, '?ids=', paste(accession, collapse = ',')) else NULL, | 273 massbank = if (content.type == BIODB.TXT) paste0((if (is.na(base.url)) BIODB.MASSBANK.EU.WS.URL else base.url), 'getRecordInfo?ids=', paste(accession, collapse = ',')) else NULL, |
| 164 mirbase = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('http://www.mirbase.org/cgi-bin/mature.pl?mature_acc=', accession) else NULL, | 274 mirbase = if (content.type == BIODB.HTML) paste0('http://www.mirbase.org/cgi-bin/mature.pl?mature_acc=', accession) else NULL, |
| 165 pubchem = { | 275 pubchemcomp = switch(content.type, |
| 166 accession <- gsub(' ', '', accession, perl = TRUE) | 276 xml = paste0('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/', paste(accession, collapse = ','), '/XML'), |
| 167 accession <- gsub('^CID', '', accession, perl = TRUE) | |
| 168 switch(content.type, | |
| 169 xml = paste0('http://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/', accession, '/XML/?response_type=save&response_basename=CID_', accession), | |
| 170 html = paste0('http://pubchem.ncbi.nlm.nih.gov/compound/', accession), | 277 html = paste0('http://pubchem.ncbi.nlm.nih.gov/compound/', accession), |
| 171 NULL) | 278 NULL), |
| 172 }, | 279 pubchemsub = switch(content.type, |
| 173 ncbigene = if (content.type %in% c(RBIODB.ANY, RBIODB.XML)) paste0('http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=', accession, '&rettype=xml&retmode=text') else NULL, | 280 xml = paste0('https://pubchem.ncbi.nlm.nih.gov/rest/pug/substance/sid/', paste(accession, collapse = ','), '/XML'), |
| 174 ncbiccds = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&GO=MainBrowse&DATA=', accession), | 281 html = paste0('http://pubchem.ncbi.nlm.nih.gov/substance/', accession), |
| 175 uniprot = if (content.type %in% c(RBIODB.ANY, RBIODB.XML)) paste0('http://www.uniprot.org/uniprot/', accession, '.xml'), | 282 NULL), |
| 283 ncbigene = if (content.type == BIODB.XML) paste0('https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=', accession, '&rettype=xml&retmode=text') else NULL, | |
| 284 ncbiccds = if (content.type == BIODB.HTML) paste0('https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&GO=MainBrowse&DATA=', accession), | |
| 285 uniprot = if (content.type == BIODB.XML) paste0('http://www.uniprot.org/uniprot/', accession, '.xml'), | |
| 286 peakforest = switch(content.type, | |
| 287 html= paste0('https://peakforest.org/home?PFs=',accession), | |
| 288 json= paste0('https://peakforest-alpha.inra.fr/rest/spectra/lcms/ids/',paste(accession,sep=','),'?token=',token), | |
| 289 | |
| 176 NULL | 290 NULL |
| 177 ) | 291 ) |
| 178 | 292 ) |
| 179 return(url) | 293 return(url) |
| 180 } | 294 } |
| 295 | |
| 296 get.entry.url <- function(class, accession, content.type = BIODB.HTML, max.length = 0, base.url = NA_character_, token = NA_character_) { | |
| 297 | |
| 298 if (length(accession) == 0) | |
| 299 return(NULL) | |
| 300 | |
| 301 full.url <- .do.get.entry.url(class, accession, content.type = content.type, base.url = base.url, token = token) | |
| 302 if (max.length == 0 || nchar(full.url) <= max.length) | |
| 303 return(if (max.length == 0) full.url else list(url = full.url, n = length(accession))) | |
| 304 | |
| 305 # Find max size URL | |
| 306 a <- 1 | |
| 307 b <- length(accession) | |
| 308 while (a < b) { | |
| 309 m <- as.integer((a + b) / 2) | |
| 310 url <- .do.get.entry.url(class, accession[1:m], content.type = content.type, base.url = base.url, token = token) | |
| 311 if (nchar(url) <= max.length && m != a) | |
| 312 a <- m | |
| 313 else | |
| 314 b <- m | |
| 315 } | |
| 316 url <- .do.get.entry.url(class, accession[1:a], content.type = content.type, base.url = base.url, token = token) | |
| 317 | |
| 318 return(list( url = url, n = a)) | |
| 319 } | |
| 320 | |
| 321 ################# | |
| 322 # PRINT MESSAGE # | |
| 323 ################# | |
| 324 | |
| 325 BIODB.DEBUG <- 1 | |
| 326 BIODB.LEVEL.NAMES <- c('DEBUG') | |
| 327 | |
| 328 .print.msg <- function(msg, level = BIODB.DEBUG, class = NA_character_) { | |
| 329 cat(paste0(BIODB.LEVEL.NAMES[[level]], if (is.na(class)) '' else paste0(", ", class), ": ", msg, "\n"), file = stderr()) | |
| 330 } | |
| 331 | |
| 332 ##################### | |
| 333 # BIODB GET ENV VAR # | |
| 334 ##################### | |
| 335 | |
| 336 .biodb.get.env.var <- function(v) { | |
| 337 | |
| 338 # Get all env vars | |
| 339 env <- Sys.getenv() | |
| 340 | |
| 341 # Make env var name | |
| 342 env.var <- paste(c('BIODB', toupper(v)), collapse = '_') | |
| 343 | |
| 344 # Look if this env var exists | |
| 345 if (env.var %in% names(env)) | |
| 346 return(env[[env.var]]) | |
| 347 | |
| 348 return(NA_character_) | |
| 349 } | |
| 181 } | 350 } |
