Mercurial > repos > prog > lcmsmatching
comparison PubchemConn.R @ 1:45e985cd8e9e draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
| author | prog |
|---|---|
| date | Tue, 31 Jan 2017 05:27:24 -0500 |
| parents | 3afe41d3e9e7 |
| children |
comparison
equal
deleted
inserted
replaced
| 0:3afe41d3e9e7 | 1:45e985cd8e9e |
|---|---|
| 1 if ( ! exists('get.pubchem.compound.url')) { # Do not load again if already loaded | 1 ##################### |
| 2 # CLASS DECLARATION # | |
| 3 ##################### | |
| 2 | 4 |
| 3 source('BiodbConn.R') | 5 PubchemConn <- methods::setRefClass("PubchemConn", contains = "RemotedbConn", fields = list( .db = "character" )) |
| 4 source('PubchemCompound.R') | |
| 5 | |
| 6 ##################### | |
| 7 # CLASS DECLARATION # | |
| 8 ##################### | |
| 9 | |
| 10 PubchemConn <- setRefClass("PubchemConn", contains = "BiodbConn") | |
| 11 | 6 |
| 12 ########################## | 7 ############### |
| 13 # GET ENTRY CONTENT TYPE # | 8 # CONSTRUCTOR # |
| 14 ########################## | 9 ############### |
| 15 | 10 |
| 16 PubchemConn$methods( getEntryContentType = function(type) { | 11 PubchemConn$methods( initialize = function(db = BIODB.PUBCHEMCOMP, ...) { |
| 17 return(RBIODB.XML) | 12 .db <<- db |
| 18 }) | 13 callSuper(...) |
| 14 }) | |
| 19 | 15 |
| 20 ##################### | 16 ########################## |
| 21 # GET ENTRY CONTENT # | 17 # GET ENTRY CONTENT TYPE # |
| 22 ##################### | 18 ########################## |
| 23 | |
| 24 PubchemConn$methods( getEntryContent = function(type, id) { | |
| 25 | 19 |
| 26 if (type == RBIODB.COMPOUND) { | 20 PubchemConn$methods( getEntryContentType = function() { |
| 21 return(BIODB.XML) | |
| 22 }) | |
| 27 | 23 |
| 28 # Initialize return values | 24 ##################### |
| 29 content <- rep(NA_character_, length(id)) | 25 # GET ENTRY CONTENT # |
| 26 ##################### | |
| 30 | 27 |
| 31 # Request | 28 PubchemConn$methods( getEntryContent = function(ids) { |
| 32 content <- vapply(id, function(x) .self$.scheduler$getUrl(get.entry.url(RBIODB.PUBCHEM, x, content.type = RBIODB.XML)), FUN.VALUE = '') | |
| 33 | 29 |
| 34 return(content) | 30 # Debug |
| 31 .self$.print.debug.msg(paste0("Get entry content(s) for ", length(ids)," id(s)...")) | |
| 32 | |
| 33 URL.MAX.LENGTH <- 2083 | |
| 34 | |
| 35 # Initialize return values | |
| 36 content <- rep(NA_character_, length(ids)) | |
| 37 | |
| 38 # Loop on all | |
| 39 n <- 0 | |
| 40 while (n < length(ids)) { | |
| 41 | |
| 42 # Get list of accession ids to retrieve | |
| 43 accessions <- ids[(n + 1):length(ids)] | |
| 44 | |
| 45 # Create URL request | |
| 46 x <- get.entry.url(class = .self$.db, accession = accessions, content.type = BIODB.XML, max.length = URL.MAX.LENGTH) | |
| 47 | |
| 48 # Debug | |
| 49 .self$.print.debug.msg(paste0("Send URL request for ", x$n," id(s)...")) | |
| 50 | |
| 51 # Send request | |
| 52 xmlstr <- .self$.get.url(x$url) | |
| 53 | |
| 54 # Increase number of entries retrieved | |
| 55 n <- n + x$n | |
| 56 | |
| 57 # TODO When one of the id is wrong, no content is returned. Only a single error is returned, with the first faulty ID: | |
| 58 # <Fault xmlns="http://pubchem.ncbi.nlm.nih.gov/pug_rest" xmlns:xs="http://www.w3.org/2001/XMLSchema-instance" xs:schemaLocation="http://pubchem.ncbi.nlm.nih.gov/pug_rest https://pubchem.ncbi.nlm.nih.gov/pug_rest/pug_rest.xsd"> | |
| 59 # <Code>PUGREST.NotFound</Code> | |
| 60 # <Message>Record not found</Message> | |
| 61 # <Details>No record data for CID 1246452553</Details> | |
| 62 # </Fault> | |
| 63 | |
| 64 # Parse XML and get included XML | |
| 65 if ( ! is.na(xmlstr)) { | |
| 66 xml <- xmlInternalTreeParse(xmlstr, asText = TRUE) | |
| 67 ns <- c(pcns = "http://www.ncbi.nlm.nih.gov") | |
| 68 returned.ids <- xpathSApply(xml, paste0("//pcns:", if (.self$.db == BIODB.PUBCHEMCOMP) 'PC-CompoundType_id_cid' else 'PC-ID_id'), xmlValue, namespaces = ns) | |
| 69 content[match(returned.ids, ids)] <- vapply(getNodeSet(xml, paste0("//pcns:", if (.self$.db == BIODB.PUBCHEMCOMP) "PC-Compound" else 'PC-Substance'), namespaces = ns), saveXML, FUN.VALUE = '') | |
| 35 } | 70 } |
| 36 | 71 |
| 37 return(NULL) | 72 # Debug |
| 38 }) | 73 .self$.print.debug.msg(paste0("Now ", length(ids) - n," id(s) left to be retrieved...")) |
| 39 | 74 } |
| 40 ################ | |
| 41 # CREATE ENTRY # | |
| 42 ################ | |
| 43 | |
| 44 PubchemConn$methods( createEntry = function(type, content, drop = TRUE) { | |
| 45 return(if (type == RBIODB.COMPOUND) createPubchemCompoundFromXml(content, drop = drop) else NULL) | |
| 46 }) | |
| 47 | 75 |
| 48 ######################### | 76 return(content) |
| 49 # GET PUBCHEM IMAGE URL # | 77 }) |
| 50 ######################### | |
| 51 | |
| 52 get.pubchem.image.url <- function(id) { | |
| 53 | |
| 54 url <- paste0('http://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?cid=', id, '&t=l') | |
| 55 | 78 |
| 56 return(url) | 79 ################ |
| 57 } | 80 # CREATE ENTRY # |
| 58 | 81 ################ |
| 59 } # end of load safe guard | 82 |
| 83 PubchemConn$methods( createEntry = function(content, drop = TRUE) { | |
| 84 return(if (.self$.db == BIODB.PUBCHEMCOMP) createPubchemEntryFromXml(content, drop = drop) else createPubchemSubstanceFromXml(content, drop = drop)) | |
| 85 }) | |
| 86 | |
| 87 ######################### | |
| 88 # GET PUBCHEM IMAGE URL # | |
| 89 ######################### | |
| 90 | |
| 91 get.pubchem.image.url <- function(id, db = BIODB.PUBCHEMCOMP) { | |
| 92 | |
| 93 url <- paste0('http://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?', (if (db == BIODB.PUBCHEMCOMP) 'cid' else 'sid'), '=', id, '&t=l') | |
| 94 | |
| 95 return(url) | |
| 96 } |
