comparison MsPeakForestDb.R @ 4:1ba222315fd5 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 947b8707b06176a4801de64a71c8771617311ffb
author prog
date Thu, 16 Mar 2017 05:05:55 -0400
parents abfba8eb1c8d
children 18254e8d1b72
comparison
equal deleted inserted replaced
3:abfba8eb1c8d 4:1ba222315fd5
13 ############### 13 ###############
14 # CONSTRUCTOR # 14 # CONSTRUCTOR #
15 ############### 15 ###############
16 16
17 MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) { 17 MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) {
18
19 callSuper(...)
18 20
19 # Check URL 21 # Check URL
20 if (is.null(url) || is.na(url)) 22 if (is.null(url) || is.na(url))
21 stop("No URL defined for new MsPeakForestDb instance.") 23 stop("No URL defined for new MsPeakForestDb instance.")
22 24
24 url <- substring(url, nchar(url) - 1) 26 url <- substring(url, nchar(url) - 1)
25 .url <<- url 27 .url <<- url
26 .url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent) 28 .url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent)
27 .self$.url.scheduler$setVerbose(1L) 29 .self$.url.scheduler$setVerbose(1L)
28 .token <<- token 30 .token <<- token
29 31 .rt.unit <<- MSDB.RTUNIT.MIN
30 callSuper(...)
31 }) 32 })
32 33
33 ########### 34 ###########
34 # GET URL # 35 # GET URL #
35 ########### 36 ###########
51 # Get URL 52 # Get URL
52 content <- .self$.url.scheduler$getUrl(url = url, params = params) 53 content <- .self$.url.scheduler$getUrl(url = url, params = params)
53 54
54 if (ret.type == 'json') { 55 if (ret.type == 'json') {
55 56
56 library(RJSONIO) 57 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE)
57 58
58 res <- fromJSON(content, nullValue = NA) 59 if (is.null(res)) {
59
60 if (class(res) == 'list' && 'success' %in% names(res) && res$success == FALSE) {
61 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '') 60 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '')
62 stop(paste0("Failed to run web service. URL was \"", url, param.str, "\".")) 61 stop(paste0("Failed to run web service. URL was \"", url, param.str, "\"."))
63 } 62 }
64 } else { 63 } else {
65 if (ret.type == 'integer') { 64 if (ret.type == 'integer') {
66 if (grepl('^[0-9]+$', content, perl = TRUE)) 65 if (grepl('^[0-9]+$', content, perl = TRUE))
67 res <- as.integer(content) 66 res <- as.integer(content)
68 else { 67 else {
69 library(RJSONIO) 68 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE)
70 res <- fromJSON(content, nullValue = NA)
71 } 69 }
72 } 70 }
73 } 71 }
74 72
75 return(res) 73 return(res)
139 spectra <- .self$.get.url(url = 'spectra/lcms/search', params = params) 137 spectra <- .self$.get.url(url = 'spectra/lcms/search', params = params)
140 if (class(spectra) == 'list' && length(spectra) > 0) { 138 if (class(spectra) == 'list' && length(spectra) > 0) {
141 for (s in spectra) 139 for (s in spectra)
142 if (is.na(col) || s$liquidChromatography$columnCode %in% col) { 140 if (is.na(col) || s$liquidChromatography$columnCode %in% col) {
143 ret.time <- (s$RTmin + s$RTmax) / 2 141 ret.time <- (s$RTmin + s$RTmax) / 2
142 ret.time <- ret.time * 60 # Retention time are in minutes in Peakforest, but we want them in seconds
144 c <- s$liquidChromatography$columnCode 143 c <- s$liquidChromatography$columnCode
145 if (c %in% names(rt)) { 144 if (c %in% names(rt)) {
146 if ( ! ret.time %in% rt[[c]]) 145 if ( ! ret.time %in% rt[[c]])
147 rt[[c]] <- c(rt[[c]], ret.time) 146 rt[[c]] <- c(rt[[c]], ret.time)
148 } else 147 } else
260 259
261 # Build result data frame 260 # Build result data frame
262 results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character()) 261 results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character())
263 for (x in spectra) { 262 for (x in spectra) {
264 if ('source' %in% names(x) && is.list(x$source)) 263 if ('source' %in% names(x) && is.list(x$source))
265 mztheo <- if ('theoricalMass' %in% names(x)) as.numeric(x$theoricalMass) else NA_real_ 264 mztheo <- if ('mz' %in% names(x) && ! is.null(x$mz)) as.numeric(x$mz) else NA_real_
266 comp <- if ('composition' %in% names(x)) x$composition else NA_character_ 265 comp <- if ('composition' %in% names(x) && ! is.null(x$composition)) x$composition else NA_character_
267 attr <- if ('attribution' %in% names(x)) x$attribution else NA_character_ 266 attr <- if ('attribution' %in% names(x) && ! is.null(x$attribution)) x$attribution else NA_character_
268 if ('listOfCompounds' %in% names(x$source)) { 267 if ('listOfCompounds' %in% names(x$source)) {
269 molids <- vapply(x$source$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = '') 268 molids <- vapply(x$source$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '')
270 molnames <- vapply(x$source$listOfCompounds, function(c) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP), FUN.VALUE = '') 269 molnames <- vapply(x$source$listOfCompounds, function(c) if ('names' %in% names(c) && ! is.null(c$names)) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP) else NA_character_, FUN.VALUE = '')
271 mass <- vapply(x$source$listOfCompounds, function(c) as.character(c$averageMass), FUN.VALUE = '') 270 mass <- vapply(x$source$listOfCompounds, function(c) if ( ! 'averageMass' %in% names(c) || is.null(c$averageMass)) NA_real_ else as.double(c$averageMass), FUN.VALUE = 0.0)
272 inchi <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChI), FUN.VALUE = '') 271 inchi <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChI' %in% names(c) || is.null(c$inChI)) NA_character_ else as.character(c$inChI), FUN.VALUE = '')
273 inchikey <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChIKey), FUN.VALUE = '') 272 inchikey <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChIKey' %in% names(c) || is.null(c$inChIKey)) NA_character_ else as.character(c$inChIKey), FUN.VALUE = '')
274 chebi <- vapply(x$source$listOfCompounds, function(c) as.character(c$ChEBI), FUN.VALUE = '') 273 chebi <- vapply(x$source$listOfCompounds, function(c) if ('ChEBI' %in% names(c) && ! is.null(c$ChEBI)) as.character(c$ChEBI) else NA_character_, FUN.VALUE = '')
275 chebi[chebi == 'CHEBI:null'] <- NA_character_ 274 chebi[chebi == 'CHEBI:null'] <- NA_character_
276 hmdb <- vapply(x$source$listOfCompounds, function(c) as.character(c$HMDB), FUN.VALUE = '') 275 hmdb <- vapply(x$source$listOfCompounds, function(c) if ('HMDB' %in% names(c) && ! is.null(c$HMDB)) as.character(c$HMDB) else NA_character_, FUN.VALUE = '')
277 hmdb[hmdb == 'HMDBnull'] <- NA_character_ 276 hmdb[hmdb == 'HMDBnull'] <- NA_character_
278 kegg <- vapply(x$source$listOfCompounds, function(c) as.character(c$KEGG), FUN.VALUE = '') 277 kegg <- vapply(x$source$listOfCompounds, function(c) if ( ! 'KEGG' %in% names(c) || is.null(c$KEGG)) NA_character_ else as.character(c$KEGG), FUN.VALUE = '')
279 pubchem <- vapply(x$source$listOfCompounds, function(c) as.character(c$PubChemCID), FUN.VALUE = '') 278 pubchem <- vapply(x$source$listOfCompounds, function(c) if ( ! 'PubChemCID' %in% names(c) || is.null(c$PubChemCID)) NA_character_ else as.character(c$PubChemCID), FUN.VALUE = '')
280 if (length(molids) > 0 && length(molids) == length(molnames)) 279 if (length(molids) > 0 && length(molids) == length(molnames))
281 results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE)) 280 results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE))
282 } 281 }
283 } 282 }
284 283
290 if (nrow(results) > 0) { 289 if (nrow(results) > 0) {
291 # Build URL for rt search 290 # Build URL for rt search
292 url <- paste0('spectra/lcms/range-rt-min/', rt.low, '/', rt.high) 291 url <- paste0('spectra/lcms/range-rt-min/', rt.low, '/', rt.high)
293 params <- NULL 292 params <- NULL
294 if ( ! is.null(col)) 293 if ( ! is.null(col))
295 params <- c(columns = paste(col, collapse = ',')) 294 params <- c(columns = paste(col[['id']], collapse = ','))
296 295
297 # Run query 296 # Run query
298 rtspectra <- .self$.get.url(url = url, params = params) 297 rtspectra <- .self$.get.url(url = url, params = params)
299 298
299
300 # Get compound/molecule IDs 300 # Get compound/molecule IDs
301 for (x in spectra) 301 for (x in rtspectra)
302 rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = vapply(x$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = ''), 302 if (all(c('listOfCompounds', 'liquidChromatography') %in% names(x))) {
303 MSDB.TAG.COL = as.character(x$liquidChromatography$columnCode), 303 molids <- vapply(x$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '')
304 MSDB.TAG.COLRT = (as.numeric(x$RTmin) + as.numeric(x$RTmax)) / 2, 304 if (length(molids) > 0) {
305 stringsAsFactors = FALSE)) 305 col <- if ('columnCode' %in% names(x$liquidChromatography) && ! is.null(x$liquidChromatography$columnCode)) as.character(x$liquidChromatography$columnCode) else NA_character_
306 rtmin <- if ('RTmin' %in% names(x) && ! is.null(x$RTmin)) as.double(x$RTmin) else NA_real_
307 rtmax <- if ('RTmax' %in% names(x) && ! is.null(x$RTmax)) as.double(x$RTmax) else NA_real_
308 colrt <- (rtmin + rtmax) / 2
309 rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = molids,
310 MSDB.TAG.COL = col,
311 MSDB.TAG.COLRT = colrt,
312 stringsAsFactors = FALSE))
313 }
314 }
306 } 315 }
307 316
308 # Add retention times and column info 317 # Add retention times and column info
309 results <- merge(results, rt.res) 318 results <- merge(results, rt.res)
310 } 319 }