comparison spectralMatching.R @ 14:1389e565f95e draft default tip

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit cb903cd93f9378cfb5eeb68512a54178dcea7bbc
author tomnl
date Wed, 27 Nov 2019 12:37:27 +0000
parents a90832e6acb2
children
comparison
equal deleted inserted replaced
13:4164de21d178 14:1389e565f95e
87 87
88 # store options 88 # store options
89 opt<- parse_args(OptionParser(option_list=option_list)) 89 opt<- parse_args(OptionParser(option_list=option_list))
90 90
91 print(opt) 91 print(opt)
92
93 # check if the sqlite databases have any spectra
94 checkSPeakMeta <- function(dbPth, nme){
95 if(is.null(dbPth)){
96 return(TRUE)
97 }else if ((file.exists(dbPth)) & (file.info(dbPth)$size>0)){
98 con <- DBI::dbConnect(RSQLite::SQLite(), dbPth)
99 if (DBI::dbExistsTable(con, "s_peak_meta")){
100 spm <- DBI::dbGetQuery(con, 'SELECT * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1')
101 return(TRUE)
102 }else if(DBI::dbExistsTable(con, "library_spectra_meta")){
103 spm <- DBI::dbGetQuery(con, 'SELECT * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1')
104 return(TRUE)
105 }else{
106 print(paste("No spectra available for ",nme))
107 return(FALSE)
108 }
109 }else{
110 print(paste("file empty or does not exist for", nme))
111 return(FALSE)
112 }
113
114
115 }
116
117
118 addQueryNameColumn <- function(sm){
119 if (is.null(sm$matchedResults) || length(sm$matchedResults)==1 || nrow(sm$matchedResults)==0){
120 return(sm)
121 }
122
123 con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth)
124 if (DBI::dbExistsTable(con, "s_peak_meta")){
125 spm <- DBI::dbGetQuery(con, 'SELECT pid, name AS query_entry_name FROM s_peak_meta')
126 }else if(DBI::dbExistsTable(con, "library_spectra_meta")){
127 spm <- DBI::dbGetQuery(con, 'SELECT id AS pid, name AS query_entry_name FROM library_spectra_meta')
128 }
129 print(sm$matchedResults)
130 if ('pid' %in% colnames(sm$matchedResults)){
131 sm$matchedResults <- merge(sm$matchedResults, spm, by.x='pid', by.y='pid')
132 }else{
133 sm$matchedResults <- merge(sm$matchedResults, spm, by.x='qpid', by.y='pid')
134 }
135
136 print(sm$xcmsMatchedResults)
137 if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults)==1 || nrow(sm$xcmsMatchedResults)==0){
138 return(sm)
139 }else{
140 if ('pid' %in% colnames(sm$xcmsMatchedResults)){
141 sm$xcmsMatchedResults<- merge(sm$xcmsMatchedResults, spm, by.x='pid', by.y='pid')
142 }else{
143 sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x='qpid', by.y='pid')
144 }
145 }
146
147 return(sm)
148
149 }
150
151
152 updateDbF <- function(q_con, l_con){
153 message('Adding extra details to database')
154 q_con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth)
155 if (DBI::dbExistsTable(q_con, "l_s_peak_meta")){
156 l_s_peak_meta <- DBI::dbGetQuery(q_con, 'SELECT * FROM l_s_peak_meta')
157 colnames(l_s_peak_meta)[1] <- 'pid'
158 }
159
160 l_con <- DBI::dbConnect(RSQLite::SQLite(),l_dbPth)
161 if (DBI::dbExistsTable(l_con, "s_peaks")){
162 l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse=',')))
163
164 }else if(DBI::dbExistsTable(l_con, "library_spectra")){
165 l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT * FROM library_spectra
166 WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse=',')))
167 }else{
168 l_s_peaks = NULL
169 }
170
171 if (DBI::dbExistsTable(l_con, "source")){
172 l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM source')
173 }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) {
174 l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM library_spectra_source')
175 }else{
176 l_source = NULL
177 }
178
179 if (!is.null(l_s_peaks)){
180 DBI::dbWriteTable(q_con, name='l_s_peaks', value=l_s_peaks, row.names=FALSE, append=TRUE)
181 }
182
183 if (!is.null(l_source)){
184 DBI::dbWriteTable(q_con, name='l_source', value=l_source, row.names=FALSE, append=TRUE)
185 }
186
187
188 }
189
92 190
93 extractMultiple <- function(optParam){ 191 extractMultiple <- function(optParam){
94 if (!is.na(optParam)){ 192 if (!is.na(optParam)){
95 param <- trimws(strsplit(optParam, ',')[[1]]) 193 param <- trimws(strsplit(optParam, ',')[[1]])
96 param <- param[param != ""] 194 param <- param[param != ""]
116 l_dbType <- opt$l_dbType 214 l_dbType <- opt$l_dbType
117 l_dbPth <- opt$l_dbPth 215 l_dbPth <- opt$l_dbPth
118 } 216 }
119 217
120 218
121
122
123 q_polarity <- extractMultiple(opt$q_polarity) 219 q_polarity <- extractMultiple(opt$q_polarity)
124 l_polarity <- extractMultiple(opt$l_polarity) 220 l_polarity <- extractMultiple(opt$l_polarity)
125 221
126 q_xcmsGroups <- extractMultiple(opt$q_xcmsGroups) 222 q_xcmsGroups <- extractMultiple(opt$q_xcmsGroups)
127 l_xcmsGroups <- extractMultiple(opt$l_xcmsGroups) 223 l_xcmsGroups <- extractMultiple(opt$l_xcmsGroups)
191 q_rtrangeMin <- opt$q_rtrangeMin 287 q_rtrangeMin <- opt$q_rtrangeMin
192 }else{ 288 }else{
193 q_rtrangeMin <- NA 289 q_rtrangeMin <- NA
194 } 290 }
195 291
196 292 q_check <- checkSPeakMeta(opt$q_dbPth, 'query')
197 293 l_check <- checkSPeakMeta(opt$l_dbPth, 'library')
198 sm <- msPurity::spectralMatching( 294
199 q_purity = opt$q_purity, 295 if (q_check && l_check){
200 l_purity = opt$l_purity, 296 sm <- msPurity::spectralMatching(
201 297 q_purity = opt$q_purity,
202 q_ppmProd = opt$q_ppmProd, 298 l_purity = opt$l_purity,
203 l_ppmProd = opt$l_ppmProd, 299
204 300 q_ppmProd = opt$q_ppmProd,
205 q_ppmPrec = opt$q_ppmPrec, 301 l_ppmProd = opt$l_ppmProd,
206 l_ppmPrec = opt$l_ppmPrec, 302
207 303 q_ppmPrec = opt$q_ppmPrec,
208 q_raThres = opt$q_raThres, 304 l_ppmPrec = opt$l_ppmPrec,
209 l_raThres = opt$l_raThres, 305
210 306 q_raThres = opt$q_raThres,
211 q_pol = q_polarity, 307 l_raThres = opt$l_raThres,
212 l_pol = l_polarity, 308
213 309 q_pol = q_polarity,
214 q_xcmsGroups = q_xcmsGroups, 310 l_pol = l_polarity,
215 l_xcmsGroups = l_xcmsGroups, 311
216 312 q_xcmsGroups = q_xcmsGroups,
217 q_pids = q_pids, 313 l_xcmsGroups = l_xcmsGroups,
218 l_pids = l_pids, 314
219 315 q_pids = q_pids,
220 q_sources = q_sources, 316 l_pids = l_pids,
221 l_sources = l_sources, 317
222 318 q_sources = q_sources,
223 q_instrumentTypes = q_instrumentTypes, 319 l_sources = l_sources,
224 l_instrumentTypes = l_instrumentTypes, 320
225 321 q_instrumentTypes = q_instrumentTypes,
226 q_spectraFilter= q_spectraFilter, 322 l_instrumentTypes = l_instrumentTypes,
227 l_spectraFilter= l_spectraFilter, 323
228 324 q_spectraFilter= q_spectraFilter,
229 l_rtrange=c(l_rtrangeMin, l_rtrangeMax), 325 l_spectraFilter= l_spectraFilter,
230 q_rtrange=c(q_rtrangeMin, q_rtrangeMax), 326
231 327 l_rtrange=c(l_rtrangeMin, l_rtrangeMax),
232 q_accessions = opt$q_accessions, 328 q_rtrange=c(q_rtrangeMin, q_rtrangeMax),
233 l_accessions= opt$l_accessions, 329
234 330 q_accessions = opt$q_accessions,
235 raW = opt$raW, 331 l_accessions= opt$l_accessions,
236 mzW = opt$mzW, 332
237 rttol=opt$rttol, 333 raW = opt$raW,
238 cores=opt$cores, 334 mzW = opt$mzW,
239 335 rttol=opt$rttol,
240 copyDb=copyDb, 336 cores=opt$cores,
241 updateDb=updateDb, 337
242 outPth = "db_with_spectral_matching.sqlite", 338 copyDb=copyDb,
243 339 updateDb=updateDb,
244 q_dbPth = q_dbPth, 340 outPth = "db_with_spectral_matching.sqlite",
245 q_dbType = q_dbType, 341
246 q_dbName = q_dbName, 342 q_dbPth = q_dbPth,
247 q_dbHost = q_dbHost, 343 q_dbType = q_dbType,
248 q_dbUser = q_dbUser, 344 q_dbName = q_dbName,
249 q_dbPass = q_dbPass, 345 q_dbHost = q_dbHost,
250 q_dbPort = q_dbPort, 346 q_dbUser = q_dbUser,
251 347 q_dbPass = q_dbPass,
252 l_dbPth = l_dbPth, 348 q_dbPort = q_dbPort,
253 l_dbType = l_dbType, 349
254 l_dbName = l_dbName, 350 l_dbPth = l_dbPth,
255 l_dbHost = l_dbHost, 351 l_dbType = l_dbType,
256 l_dbUser = l_dbUser, 352 l_dbName = l_dbName,
257 l_dbPass = l_dbPass, 353 l_dbHost = l_dbHost,
258 l_dbPort = l_dbPort 354 l_dbUser = l_dbUser,
259 355 l_dbPass = l_dbPass,
260 ) 356 l_dbPort = l_dbPort
261 357
262 358 )
263 359
264 write.table(sm$matchedResults, 'matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) 360 sm <- addQueryNameColumn(sm)
265 write.table(sm$xcmsMatchedResults, 'xcms_matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) 361 # Get name of the query results (and merged with the data frames)
266 362 write.table(sm$matchedResults, 'matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE)
267 363 write.table(sm$xcmsMatchedResults, 'xcms_matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE)
268 # Add extra details from library spectra in resulting database 364
269 # First get all the ids from the l_s_peak_meta from the query database 365 if(updateDb){
270 if(updateDb){ 366 updateDbF(q_con, l_con)
271 message('Adding extra details to database') 367 }
272 q_con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth) 368 }
273 if (DBI::dbExistsTable(q_con, "l_s_peak_meta")){
274 l_s_peak_meta <- DBI::dbGetQuery(q_con, 'SELECT * FROM l_s_peak_meta')
275 colnames(l_s_peak_meta)[1] <- 'pid'
276 }
277
278 l_con <- DBI::dbConnect(RSQLite::SQLite(),l_dbPth)
279 if (DBI::dbExistsTable(l_con, "s_peaks")){
280 l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse=',')))
281
282 }else if(DBI::dbExistsTable(l_con, "library_spectra")){
283 l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT * FROM library_spectra
284 WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse=',')))
285 }else{
286 l_s_peaks = NULL
287 }
288
289 if (DBI::dbExistsTable(l_con, "source")){
290 l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM source')
291 }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) {
292 l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM library_spectra_source')
293 }else{
294 l_source = NULL
295 }
296
297 if (!is.null(l_s_peaks)){
298 DBI::dbWriteTable(q_con, name='l_s_peaks', value=l_s_peaks, row.names=FALSE, append=TRUE)
299 }
300
301 if (!is.null(l_source)){
302 DBI::dbWriteTable(q_con, name='l_source', value=l_source, row.names=FALSE, append=TRUE)
303 }
304
305 }
306
307
308
309
310