Mercurial > repos > prog > lcmsmatching
comparison PeakforestEntry.R @ 1:45e985cd8e9e draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
| author | prog |
|---|---|
| date | Tue, 31 Jan 2017 05:27:24 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:3afe41d3e9e7 | 1:45e985cd8e9e |
|---|---|
| 1 ##################### | |
| 2 # CLASS DECLARATION # | |
| 3 ##################### | |
| 4 | |
| 5 # TODO Create class PeakforestCompoundEntry | |
| 6 PeakForestSpectrumEntry <- methods::setRefClass("PeakForestSpectrumEntry", contains = "BiodbEntry") | |
| 7 | |
| 8 PeakForestCompoundEntry <- methods::setRefClass("PeakForestCompoundEntry", contains = "BiodbEntry") | |
| 9 | |
| 10 | |
| 11 ########### | |
| 12 # FACTORY # | |
| 13 ########### | |
| 14 | |
| 15 | |
| 16 ###Arg is jcontent ot indicate that the content is already a json. | |
| 17 createPeakforestCompoundFromJSON <- function(contents, drop = FALSE) { | |
| 18 | |
| 19 if(is.character(contents)) | |
| 20 contents <- jsonlite::fromJSON(contents, simplifyDataFrame=FALSE) | |
| 21 | |
| 22 jsonfields <- list() | |
| 23 jsonfields[[BIODB.ACCESSION]] <- "id" | |
| 24 jsonfields[[BIODB.PUBCHEMCOMP.ID]] <- "PubChemCID" | |
| 25 jsonfields[[BIODB.CHEBI.ID]] <- "ChEBI" | |
| 26 jsonfields[[BIODB.HMDB.ID]] <- "HMDB" | |
| 27 jsonfields[[BIODB.KEGG.ID]] <- "KEGG" | |
| 28 jsonfields[[BIODB.FORMULA]] <- "formula" | |
| 29 jsonfields[[BIODB.SMILES]] <- "canSmiles" | |
| 30 jsonfields[[BIODB.AVERAGE.MASS]] <- "averageMass" | |
| 31 jsonfields[[BIODB.MONOISOTOPIC.MASS]] <- "monoisotopicMass" | |
| 32 jsonfields[[BIODB.INCHI]] <- "inChI" | |
| 33 jsonfields[[BIODB.INCHIKEY]] <- "inchiIKey" | |
| 34 jsonfields[[BIODB.NAME]] <- "mainName" | |
| 35 | |
| 36 entries <- vector(length(contents),mode="list") | |
| 37 | |
| 38 for (i in seq_along(contents)){ | |
| 39 | |
| 40 jsontree <- contents[[i]] | |
| 41 entry <- PeakForestCompoundEntry$new() | |
| 42 | |
| 43 | |
| 44 for(field in names(jsonfields)){ | |
| 45 | |
| 46 tosearch <- jsonfields[[field]] | |
| 47 value <- jsontree$tosearch | |
| 48 entry$setField(field,value) | |
| 49 } | |
| 50 | |
| 51 entries[[i]] <- entry | |
| 52 } | |
| 53 | |
| 54 | |
| 55 if (drop && length(contents) == 1) | |
| 56 entries <- entries[[1]] | |
| 57 | |
| 58 entries | |
| 59 } | |
| 60 | |
| 61 createPeakforestSpectraFromJSON <- function(contents, drop = FALSE, checkSub = TRUE) { | |
| 62 | |
| 63 entries <- vector(length(contents),mode="list") | |
| 64 jsonfields <- character() | |
| 65 jsonfields[[BIODB.ACCESSION]] <- "id" # TODO Use BIODB.ACCESSION instead | |
| 66 jsonfields[[BIODB.MSMODE]] <- "polarity" | |
| 67 | |
| 68 | |
| 69 ###Checking that it's a list. | |
| 70 if(length(contents) == 1){ | |
| 71 if(startsWith(contents[[1]], "<html>") ){ | |
| 72 return(NULL) | |
| 73 }else{ | |
| 74 contents <- jsonlite::fromJSON(contents[[1]],simplifyDataFrame=FALSE) | |
| 75 | |
| 76 } | |
| 77 } | |
| 78 | |
| 79 for (i in seq_along(contents)){ | |
| 80 | |
| 81 content <- contents[[i]] | |
| 82 jsontree <- NULL | |
| 83 if(typeof(content) == "character"){ | |
| 84 if(startsWith(content, "<html>")|content=="null"){ | |
| 85 entries[[i]] <- NULL | |
| 86 next | |
| 87 } | |
| 88 jsontree <- jsonlite::fromJSON(content,simplifyDataFrame=FALSE) | |
| 89 }else{ | |
| 90 jsontree <- content | |
| 91 } | |
| 92 cnames <- c(BIODB.PEAK.MZ, BIODB.PEAK.RELATIVE.INTENSITY, BIODB.PEAK.FORMULA, BIODB.PEAK.MZTHEO, BIODB.PEAK.ERROR.PPM) | |
| 93 | |
| 94 entry <- PeakForestSpectrumEntry$new() | |
| 95 #####Setting thz mass analyzer | |
| 96 entry$setField(BIODB.MSDEV,jsontree$analyzerMassSpectrometerDevice$instrumentName) | |
| 97 entry$setField(BIODB.MSDEVTYPE,jsontree$analyzerMassSpectrometerDevice$ionAnalyzerType) | |
| 98 | |
| 99 | |
| 100 | |
| 101 for(field in names(jsonfields)){ | |
| 102 | |
| 103 tosearch <- jsonfields[[field]] | |
| 104 value <- jsontree$tosearch | |
| 105 entry$setField(field,value) | |
| 106 } | |
| 107 | |
| 108 ###################### | |
| 109 # TREATING THE PEAKS # | |
| 110 ###################### | |
| 111 | |
| 112 entry$setField(BIODB.NB.PEAKS,length(jsontree$peaks)) | |
| 113 peaks <- data.frame( matrix( 0,ncol = length(cnames), nrow = 0)) | |
| 114 colnames(peaks) <- cnames | |
| 115 ###Parsing peaks. | |
| 116 if(length(jsontree$peaks) != 0){ | |
| 117 peaks <- sapply(jsontree$peaks,function(x){ | |
| 118 return(list(as.double(x$mz), | |
| 119 as.integer(x$ri), | |
| 120 as.character(x$composition), | |
| 121 as.double(x$theoricalMass), | |
| 122 as.double(x$deltaPPM) | |
| 123 )) | |
| 124 }) | |
| 125 ###Removing all whitespaces from the formule. | |
| 126 peaks[3,]<-vapply(peaks[3,],function(x){ | |
| 127 gsub(" ","",trimws(x)) | |
| 128 },FUN.VALUE = NA_character_) | |
| 129 | |
| 130 peaks<-t(peaks) | |
| 131 colnames(peaks)<-cnames | |
| 132 } | |
| 133 | |
| 134 entry$setField(BIODB.PEAKS,peaks) | |
| 135 | |
| 136 ################################## | |
| 137 # TREATING THE LIST OF COMPOUNDS # | |
| 138 ################################## | |
| 139 | |
| 140 entry$setField(BIODB.NB.COMPOUNDS,length(jsontree$listOfCompounds)) | |
| 141 compounds <- list() | |
| 142 | |
| 143 ###Parsing compounds. | |
| 144 if( length( jsontree$listOfCompounds) != 0){ | |
| 145 compounds <- lapply( jsontree$listOfCompounds, function(x){ | |
| 146 createPeakforestCompoundFromJSON(x) | |
| 147 }) | |
| 148 } | |
| 149 | |
| 150 entry$setField(BIODB.COMPOUNDS, compounds) | |
| 151 | |
| 152 | |
| 153 entries[[i]] <- entry | |
| 154 } | |
| 155 | |
| 156 | |
| 157 if (drop && length(contents) == 1) | |
| 158 entries <- entries[[1]] | |
| 159 | |
| 160 entries | |
| 161 } | |
| 162 | |
| 163 | |
| 164 ####TDO CLEAN THIS | |
| 165 | |
| 166 createReducedSpectraFromJSON <- function(contents, | |
| 167 drop = FALSE, | |
| 168 checkSub = TRUE) { | |
| 169 entries <- vector(length(contents), mode = "list") | |
| 170 jsonfields <- character() | |
| 171 # jsonfields[[BIODB.ACCESSION]] <- | |
| 172 # "id" # TODO Use BIODB.ACCESSION instead | |
| 173 | |
| 174 | |
| 175 ###Checking that it's a list. | |
| 176 if (length(contents) == 1) { | |
| 177 if (startsWith(contents[[1]], "<html>")) { | |
| 178 return(NULL) | |
| 179 } else{ | |
| 180 contents <- jsonlite::fromJSON(contents[[1]], simplifyDataFrame=FALSE) | |
| 181 | |
| 182 } | |
| 183 } | |
| 184 | |
| 185 for (i in seq_along(contents)) { | |
| 186 content <- contents[[i]] | |
| 187 jsontree <- NULL | |
| 188 if (typeof(content) == "character") { | |
| 189 if (startsWith(content, "<html>") | content == "null") { | |
| 190 entries[[i]] <- NULL | |
| 191 next | |
| 192 } | |
| 193 jsontree <- jsonlite::fromJSON(content, simplifyDataFrame=FALSE) | |
| 194 } else{ | |
| 195 jsontree <- content | |
| 196 } | |
| 197 | |
| 198 | |
| 199 cnames <- | |
| 200 c( | |
| 201 BIODB.PEAK.MZ, | |
| 202 BIODB.PEAK.RELATIVE.INTENSITY, | |
| 203 BIODB.PEAK.FORMULA, | |
| 204 BIODB.PEAK.MZTHEO, | |
| 205 BIODB.PEAK.ERROR.PPM | |
| 206 ) | |
| 207 | |
| 208 entry <- PeakForestSpectrumEntry$new() | |
| 209 entry$setField(BIODB.ACCESSION, jsontree$id) | |
| 210 | |
| 211 ###################### | |
| 212 # TREATING THE PEAKS # | |
| 213 ###################### | |
| 214 | |
| 215 entry$setField(BIODB.NB.PEAKS, length(jsontree$peaks)) | |
| 216 peaks <- data.frame(matrix(0, ncol = length(cnames), nrow = 0)) | |
| 217 colnames(peaks) <- cnames | |
| 218 ###Parsing peaks. | |
| 219 if (length(jsontree$peaks) != 0) { | |
| 220 peaks <- sapply(jsontree$peaks, function(x) { | |
| 221 return( | |
| 222 list( | |
| 223 as.double(x$mz), | |
| 224 as.integer(x$ri), | |
| 225 as.character(x$composition), | |
| 226 as.double(x$theoricalMass), | |
| 227 as.double(x$deltaPPM) | |
| 228 ) | |
| 229 ) | |
| 230 }) | |
| 231 ###Removing all whitespaces from the formule. | |
| 232 peaks[3, ] <- vapply(peaks[3, ], function(x) { | |
| 233 gsub(" ", "", trimws(x)) | |
| 234 }, FUN.VALUE = NA_character_) | |
| 235 | |
| 236 peaks <- as.data.frame(t(peaks)) | |
| 237 colnames(peaks) <- cnames | |
| 238 } | |
| 239 | |
| 240 entry$setField(BIODB.PEAKS, peaks) | |
| 241 | |
| 242 entries[[i]] <- entry | |
| 243 } | |
| 244 | |
| 245 | |
| 246 if (drop && length(contents) == 1) | |
| 247 entries <- entries[[1]] | |
| 248 | |
| 249 entries | |
| 250 } |
