comparison PeakforestEntry.R @ 1:45e985cd8e9e draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
author prog
date Tue, 31 Jan 2017 05:27:24 -0500
parents
children
comparison
equal deleted inserted replaced
0:3afe41d3e9e7 1:45e985cd8e9e
1 #####################
2 # CLASS DECLARATION #
3 #####################
4
5 # TODO Create class PeakforestCompoundEntry
6 PeakForestSpectrumEntry <- methods::setRefClass("PeakForestSpectrumEntry", contains = "BiodbEntry")
7
8 PeakForestCompoundEntry <- methods::setRefClass("PeakForestCompoundEntry", contains = "BiodbEntry")
9
10
11 ###########
12 # FACTORY #
13 ###########
14
15
16 ###Arg is jcontent ot indicate that the content is already a json.
17 createPeakforestCompoundFromJSON <- function(contents, drop = FALSE) {
18
19 if(is.character(contents))
20 contents <- jsonlite::fromJSON(contents, simplifyDataFrame=FALSE)
21
22 jsonfields <- list()
23 jsonfields[[BIODB.ACCESSION]] <- "id"
24 jsonfields[[BIODB.PUBCHEMCOMP.ID]] <- "PubChemCID"
25 jsonfields[[BIODB.CHEBI.ID]] <- "ChEBI"
26 jsonfields[[BIODB.HMDB.ID]] <- "HMDB"
27 jsonfields[[BIODB.KEGG.ID]] <- "KEGG"
28 jsonfields[[BIODB.FORMULA]] <- "formula"
29 jsonfields[[BIODB.SMILES]] <- "canSmiles"
30 jsonfields[[BIODB.AVERAGE.MASS]] <- "averageMass"
31 jsonfields[[BIODB.MONOISOTOPIC.MASS]] <- "monoisotopicMass"
32 jsonfields[[BIODB.INCHI]] <- "inChI"
33 jsonfields[[BIODB.INCHIKEY]] <- "inchiIKey"
34 jsonfields[[BIODB.NAME]] <- "mainName"
35
36 entries <- vector(length(contents),mode="list")
37
38 for (i in seq_along(contents)){
39
40 jsontree <- contents[[i]]
41 entry <- PeakForestCompoundEntry$new()
42
43
44 for(field in names(jsonfields)){
45
46 tosearch <- jsonfields[[field]]
47 value <- jsontree$tosearch
48 entry$setField(field,value)
49 }
50
51 entries[[i]] <- entry
52 }
53
54
55 if (drop && length(contents) == 1)
56 entries <- entries[[1]]
57
58 entries
59 }
60
61 createPeakforestSpectraFromJSON <- function(contents, drop = FALSE, checkSub = TRUE) {
62
63 entries <- vector(length(contents),mode="list")
64 jsonfields <- character()
65 jsonfields[[BIODB.ACCESSION]] <- "id" # TODO Use BIODB.ACCESSION instead
66 jsonfields[[BIODB.MSMODE]] <- "polarity"
67
68
69 ###Checking that it's a list.
70 if(length(contents) == 1){
71 if(startsWith(contents[[1]], "<html>") ){
72 return(NULL)
73 }else{
74 contents <- jsonlite::fromJSON(contents[[1]],simplifyDataFrame=FALSE)
75
76 }
77 }
78
79 for (i in seq_along(contents)){
80
81 content <- contents[[i]]
82 jsontree <- NULL
83 if(typeof(content) == "character"){
84 if(startsWith(content, "<html>")|content=="null"){
85 entries[[i]] <- NULL
86 next
87 }
88 jsontree <- jsonlite::fromJSON(content,simplifyDataFrame=FALSE)
89 }else{
90 jsontree <- content
91 }
92 cnames <- c(BIODB.PEAK.MZ, BIODB.PEAK.RELATIVE.INTENSITY, BIODB.PEAK.FORMULA, BIODB.PEAK.MZTHEO, BIODB.PEAK.ERROR.PPM)
93
94 entry <- PeakForestSpectrumEntry$new()
95 #####Setting thz mass analyzer
96 entry$setField(BIODB.MSDEV,jsontree$analyzerMassSpectrometerDevice$instrumentName)
97 entry$setField(BIODB.MSDEVTYPE,jsontree$analyzerMassSpectrometerDevice$ionAnalyzerType)
98
99
100
101 for(field in names(jsonfields)){
102
103 tosearch <- jsonfields[[field]]
104 value <- jsontree$tosearch
105 entry$setField(field,value)
106 }
107
108 ######################
109 # TREATING THE PEAKS #
110 ######################
111
112 entry$setField(BIODB.NB.PEAKS,length(jsontree$peaks))
113 peaks <- data.frame( matrix( 0,ncol = length(cnames), nrow = 0))
114 colnames(peaks) <- cnames
115 ###Parsing peaks.
116 if(length(jsontree$peaks) != 0){
117 peaks <- sapply(jsontree$peaks,function(x){
118 return(list(as.double(x$mz),
119 as.integer(x$ri),
120 as.character(x$composition),
121 as.double(x$theoricalMass),
122 as.double(x$deltaPPM)
123 ))
124 })
125 ###Removing all whitespaces from the formule.
126 peaks[3,]<-vapply(peaks[3,],function(x){
127 gsub(" ","",trimws(x))
128 },FUN.VALUE = NA_character_)
129
130 peaks<-t(peaks)
131 colnames(peaks)<-cnames
132 }
133
134 entry$setField(BIODB.PEAKS,peaks)
135
136 ##################################
137 # TREATING THE LIST OF COMPOUNDS #
138 ##################################
139
140 entry$setField(BIODB.NB.COMPOUNDS,length(jsontree$listOfCompounds))
141 compounds <- list()
142
143 ###Parsing compounds.
144 if( length( jsontree$listOfCompounds) != 0){
145 compounds <- lapply( jsontree$listOfCompounds, function(x){
146 createPeakforestCompoundFromJSON(x)
147 })
148 }
149
150 entry$setField(BIODB.COMPOUNDS, compounds)
151
152
153 entries[[i]] <- entry
154 }
155
156
157 if (drop && length(contents) == 1)
158 entries <- entries[[1]]
159
160 entries
161 }
162
163
164 ####TDO CLEAN THIS
165
166 createReducedSpectraFromJSON <- function(contents,
167 drop = FALSE,
168 checkSub = TRUE) {
169 entries <- vector(length(contents), mode = "list")
170 jsonfields <- character()
171 # jsonfields[[BIODB.ACCESSION]] <-
172 # "id" # TODO Use BIODB.ACCESSION instead
173
174
175 ###Checking that it's a list.
176 if (length(contents) == 1) {
177 if (startsWith(contents[[1]], "<html>")) {
178 return(NULL)
179 } else{
180 contents <- jsonlite::fromJSON(contents[[1]], simplifyDataFrame=FALSE)
181
182 }
183 }
184
185 for (i in seq_along(contents)) {
186 content <- contents[[i]]
187 jsontree <- NULL
188 if (typeof(content) == "character") {
189 if (startsWith(content, "<html>") | content == "null") {
190 entries[[i]] <- NULL
191 next
192 }
193 jsontree <- jsonlite::fromJSON(content, simplifyDataFrame=FALSE)
194 } else{
195 jsontree <- content
196 }
197
198
199 cnames <-
200 c(
201 BIODB.PEAK.MZ,
202 BIODB.PEAK.RELATIVE.INTENSITY,
203 BIODB.PEAK.FORMULA,
204 BIODB.PEAK.MZTHEO,
205 BIODB.PEAK.ERROR.PPM
206 )
207
208 entry <- PeakForestSpectrumEntry$new()
209 entry$setField(BIODB.ACCESSION, jsontree$id)
210
211 ######################
212 # TREATING THE PEAKS #
213 ######################
214
215 entry$setField(BIODB.NB.PEAKS, length(jsontree$peaks))
216 peaks <- data.frame(matrix(0, ncol = length(cnames), nrow = 0))
217 colnames(peaks) <- cnames
218 ###Parsing peaks.
219 if (length(jsontree$peaks) != 0) {
220 peaks <- sapply(jsontree$peaks, function(x) {
221 return(
222 list(
223 as.double(x$mz),
224 as.integer(x$ri),
225 as.character(x$composition),
226 as.double(x$theoricalMass),
227 as.double(x$deltaPPM)
228 )
229 )
230 })
231 ###Removing all whitespaces from the formule.
232 peaks[3, ] <- vapply(peaks[3, ], function(x) {
233 gsub(" ", "", trimws(x))
234 }, FUN.VALUE = NA_character_)
235
236 peaks <- as.data.frame(t(peaks))
237 colnames(peaks) <- cnames
238 }
239
240 entry$setField(BIODB.PEAKS, peaks)
241
242 entries[[i]] <- entry
243 }
244
245
246 if (drop && length(contents) == 1)
247 entries <- entries[[1]]
248
249 entries
250 }