Mercurial > repos > prog > lcmsmatching
comparison MsPeakForestDb.R @ 4:1ba222315fd5 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 947b8707b06176a4801de64a71c8771617311ffb
| author | prog |
|---|---|
| date | Thu, 16 Mar 2017 05:05:55 -0400 |
| parents | abfba8eb1c8d |
| children | 18254e8d1b72 |
comparison
equal
deleted
inserted
replaced
| 3:abfba8eb1c8d | 4:1ba222315fd5 |
|---|---|
| 13 ############### | 13 ############### |
| 14 # CONSTRUCTOR # | 14 # CONSTRUCTOR # |
| 15 ############### | 15 ############### |
| 16 | 16 |
| 17 MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) { | 17 MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) { |
| 18 | |
| 19 callSuper(...) | |
| 18 | 20 |
| 19 # Check URL | 21 # Check URL |
| 20 if (is.null(url) || is.na(url)) | 22 if (is.null(url) || is.na(url)) |
| 21 stop("No URL defined for new MsPeakForestDb instance.") | 23 stop("No URL defined for new MsPeakForestDb instance.") |
| 22 | 24 |
| 24 url <- substring(url, nchar(url) - 1) | 26 url <- substring(url, nchar(url) - 1) |
| 25 .url <<- url | 27 .url <<- url |
| 26 .url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent) | 28 .url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent) |
| 27 .self$.url.scheduler$setVerbose(1L) | 29 .self$.url.scheduler$setVerbose(1L) |
| 28 .token <<- token | 30 .token <<- token |
| 29 | 31 .rt.unit <<- MSDB.RTUNIT.MIN |
| 30 callSuper(...) | |
| 31 }) | 32 }) |
| 32 | 33 |
| 33 ########### | 34 ########### |
| 34 # GET URL # | 35 # GET URL # |
| 35 ########### | 36 ########### |
| 51 # Get URL | 52 # Get URL |
| 52 content <- .self$.url.scheduler$getUrl(url = url, params = params) | 53 content <- .self$.url.scheduler$getUrl(url = url, params = params) |
| 53 | 54 |
| 54 if (ret.type == 'json') { | 55 if (ret.type == 'json') { |
| 55 | 56 |
| 56 library(RJSONIO) | 57 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE) |
| 57 | 58 |
| 58 res <- fromJSON(content, nullValue = NA) | 59 if (is.null(res)) { |
| 59 | |
| 60 if (class(res) == 'list' && 'success' %in% names(res) && res$success == FALSE) { | |
| 61 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '') | 60 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '') |
| 62 stop(paste0("Failed to run web service. URL was \"", url, param.str, "\".")) | 61 stop(paste0("Failed to run web service. URL was \"", url, param.str, "\".")) |
| 63 } | 62 } |
| 64 } else { | 63 } else { |
| 65 if (ret.type == 'integer') { | 64 if (ret.type == 'integer') { |
| 66 if (grepl('^[0-9]+$', content, perl = TRUE)) | 65 if (grepl('^[0-9]+$', content, perl = TRUE)) |
| 67 res <- as.integer(content) | 66 res <- as.integer(content) |
| 68 else { | 67 else { |
| 69 library(RJSONIO) | 68 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE) |
| 70 res <- fromJSON(content, nullValue = NA) | |
| 71 } | 69 } |
| 72 } | 70 } |
| 73 } | 71 } |
| 74 | 72 |
| 75 return(res) | 73 return(res) |
| 139 spectra <- .self$.get.url(url = 'spectra/lcms/search', params = params) | 137 spectra <- .self$.get.url(url = 'spectra/lcms/search', params = params) |
| 140 if (class(spectra) == 'list' && length(spectra) > 0) { | 138 if (class(spectra) == 'list' && length(spectra) > 0) { |
| 141 for (s in spectra) | 139 for (s in spectra) |
| 142 if (is.na(col) || s$liquidChromatography$columnCode %in% col) { | 140 if (is.na(col) || s$liquidChromatography$columnCode %in% col) { |
| 143 ret.time <- (s$RTmin + s$RTmax) / 2 | 141 ret.time <- (s$RTmin + s$RTmax) / 2 |
| 142 ret.time <- ret.time * 60 # Retention time are in minutes in Peakforest, but we want them in seconds | |
| 144 c <- s$liquidChromatography$columnCode | 143 c <- s$liquidChromatography$columnCode |
| 145 if (c %in% names(rt)) { | 144 if (c %in% names(rt)) { |
| 146 if ( ! ret.time %in% rt[[c]]) | 145 if ( ! ret.time %in% rt[[c]]) |
| 147 rt[[c]] <- c(rt[[c]], ret.time) | 146 rt[[c]] <- c(rt[[c]], ret.time) |
| 148 } else | 147 } else |
| 260 | 259 |
| 261 # Build result data frame | 260 # Build result data frame |
| 262 results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character()) | 261 results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character()) |
| 263 for (x in spectra) { | 262 for (x in spectra) { |
| 264 if ('source' %in% names(x) && is.list(x$source)) | 263 if ('source' %in% names(x) && is.list(x$source)) |
| 265 mztheo <- if ('theoricalMass' %in% names(x)) as.numeric(x$theoricalMass) else NA_real_ | 264 mztheo <- if ('mz' %in% names(x) && ! is.null(x$mz)) as.numeric(x$mz) else NA_real_ |
| 266 comp <- if ('composition' %in% names(x)) x$composition else NA_character_ | 265 comp <- if ('composition' %in% names(x) && ! is.null(x$composition)) x$composition else NA_character_ |
| 267 attr <- if ('attribution' %in% names(x)) x$attribution else NA_character_ | 266 attr <- if ('attribution' %in% names(x) && ! is.null(x$attribution)) x$attribution else NA_character_ |
| 268 if ('listOfCompounds' %in% names(x$source)) { | 267 if ('listOfCompounds' %in% names(x$source)) { |
| 269 molids <- vapply(x$source$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = '') | 268 molids <- vapply(x$source$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '') |
| 270 molnames <- vapply(x$source$listOfCompounds, function(c) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP), FUN.VALUE = '') | 269 molnames <- vapply(x$source$listOfCompounds, function(c) if ('names' %in% names(c) && ! is.null(c$names)) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP) else NA_character_, FUN.VALUE = '') |
| 271 mass <- vapply(x$source$listOfCompounds, function(c) as.character(c$averageMass), FUN.VALUE = '') | 270 mass <- vapply(x$source$listOfCompounds, function(c) if ( ! 'averageMass' %in% names(c) || is.null(c$averageMass)) NA_real_ else as.double(c$averageMass), FUN.VALUE = 0.0) |
| 272 inchi <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChI), FUN.VALUE = '') | 271 inchi <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChI' %in% names(c) || is.null(c$inChI)) NA_character_ else as.character(c$inChI), FUN.VALUE = '') |
| 273 inchikey <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChIKey), FUN.VALUE = '') | 272 inchikey <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChIKey' %in% names(c) || is.null(c$inChIKey)) NA_character_ else as.character(c$inChIKey), FUN.VALUE = '') |
| 274 chebi <- vapply(x$source$listOfCompounds, function(c) as.character(c$ChEBI), FUN.VALUE = '') | 273 chebi <- vapply(x$source$listOfCompounds, function(c) if ('ChEBI' %in% names(c) && ! is.null(c$ChEBI)) as.character(c$ChEBI) else NA_character_, FUN.VALUE = '') |
| 275 chebi[chebi == 'CHEBI:null'] <- NA_character_ | 274 chebi[chebi == 'CHEBI:null'] <- NA_character_ |
| 276 hmdb <- vapply(x$source$listOfCompounds, function(c) as.character(c$HMDB), FUN.VALUE = '') | 275 hmdb <- vapply(x$source$listOfCompounds, function(c) if ('HMDB' %in% names(c) && ! is.null(c$HMDB)) as.character(c$HMDB) else NA_character_, FUN.VALUE = '') |
| 277 hmdb[hmdb == 'HMDBnull'] <- NA_character_ | 276 hmdb[hmdb == 'HMDBnull'] <- NA_character_ |
| 278 kegg <- vapply(x$source$listOfCompounds, function(c) as.character(c$KEGG), FUN.VALUE = '') | 277 kegg <- vapply(x$source$listOfCompounds, function(c) if ( ! 'KEGG' %in% names(c) || is.null(c$KEGG)) NA_character_ else as.character(c$KEGG), FUN.VALUE = '') |
| 279 pubchem <- vapply(x$source$listOfCompounds, function(c) as.character(c$PubChemCID), FUN.VALUE = '') | 278 pubchem <- vapply(x$source$listOfCompounds, function(c) if ( ! 'PubChemCID' %in% names(c) || is.null(c$PubChemCID)) NA_character_ else as.character(c$PubChemCID), FUN.VALUE = '') |
| 280 if (length(molids) > 0 && length(molids) == length(molnames)) | 279 if (length(molids) > 0 && length(molids) == length(molnames)) |
| 281 results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE)) | 280 results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE)) |
| 282 } | 281 } |
| 283 } | 282 } |
| 284 | 283 |
| 290 if (nrow(results) > 0) { | 289 if (nrow(results) > 0) { |
| 291 # Build URL for rt search | 290 # Build URL for rt search |
| 292 url <- paste0('spectra/lcms/range-rt-min/', rt.low, '/', rt.high) | 291 url <- paste0('spectra/lcms/range-rt-min/', rt.low, '/', rt.high) |
| 293 params <- NULL | 292 params <- NULL |
| 294 if ( ! is.null(col)) | 293 if ( ! is.null(col)) |
| 295 params <- c(columns = paste(col, collapse = ',')) | 294 params <- c(columns = paste(col[['id']], collapse = ',')) |
| 296 | 295 |
| 297 # Run query | 296 # Run query |
| 298 rtspectra <- .self$.get.url(url = url, params = params) | 297 rtspectra <- .self$.get.url(url = url, params = params) |
| 299 | 298 |
| 299 | |
| 300 # Get compound/molecule IDs | 300 # Get compound/molecule IDs |
| 301 for (x in spectra) | 301 for (x in rtspectra) |
| 302 rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = vapply(x$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = ''), | 302 if (all(c('listOfCompounds', 'liquidChromatography') %in% names(x))) { |
| 303 MSDB.TAG.COL = as.character(x$liquidChromatography$columnCode), | 303 molids <- vapply(x$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '') |
| 304 MSDB.TAG.COLRT = (as.numeric(x$RTmin) + as.numeric(x$RTmax)) / 2, | 304 if (length(molids) > 0) { |
| 305 stringsAsFactors = FALSE)) | 305 col <- if ('columnCode' %in% names(x$liquidChromatography) && ! is.null(x$liquidChromatography$columnCode)) as.character(x$liquidChromatography$columnCode) else NA_character_ |
| 306 rtmin <- if ('RTmin' %in% names(x) && ! is.null(x$RTmin)) as.double(x$RTmin) else NA_real_ | |
| 307 rtmax <- if ('RTmax' %in% names(x) && ! is.null(x$RTmax)) as.double(x$RTmax) else NA_real_ | |
| 308 colrt <- (rtmin + rtmax) / 2 | |
| 309 rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = molids, | |
| 310 MSDB.TAG.COL = col, | |
| 311 MSDB.TAG.COLRT = colrt, | |
| 312 stringsAsFactors = FALSE)) | |
| 313 } | |
| 314 } | |
| 306 } | 315 } |
| 307 | 316 |
| 308 # Add retention times and column info | 317 # Add retention times and column info |
| 309 results <- merge(results, rt.res) | 318 results <- merge(results, rt.res) |
| 310 } | 319 } |
