Mercurial > repos > computational-metabolomics > mspurity_spectralmatching
comparison spectralMatching.R @ 0:a8ab07c27338 draft default tip
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
| author | computational-metabolomics |
|---|---|
| date | Thu, 04 Mar 2021 12:20:23 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a8ab07c27338 |
|---|---|
| 1 library(msPurity) | |
| 2 library(msPurityData) | |
| 3 library(optparse) | |
| 4 print(sessionInfo()) | |
| 5 # load in library spectra config | |
| 6 source_local <- function(fname) { | |
| 7 argv <- commandArgs(trailingOnly = FALSE) | |
| 8 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) | |
| 9 source(paste(base_dir, fname, sep = "/")) | |
| 10 } | |
| 11 source_local("dbconfig.R") | |
| 12 | |
| 13 option_list <- list( | |
| 14 make_option(c("-o", "--outDir"), type = "character"), | |
| 15 make_option("--q_dbPth", type = "character"), | |
| 16 make_option("--l_dbPth", type = "character"), | |
| 17 | |
| 18 make_option("--q_dbType", type = "character", default = NA), | |
| 19 make_option("--l_dbType", type = "character", default = NA), | |
| 20 | |
| 21 make_option("--q_msp", type = "character", default = NA), | |
| 22 make_option("--l_msp", type = "character", default = NA), | |
| 23 | |
| 24 make_option("--q_defaultDb", action = "store_true"), | |
| 25 make_option("--l_defaultDb", action = "store_true"), | |
| 26 | |
| 27 make_option("--q_ppmPrec", type = "double"), | |
| 28 make_option("--l_ppmPrec", type = "double"), | |
| 29 | |
| 30 make_option("--q_ppmProd", type = "double"), | |
| 31 make_option("--l_ppmProd", type = "double"), | |
| 32 | |
| 33 make_option("--q_raThres", type = "double", default = NA), | |
| 34 make_option("--l_raThres", type = "double", default = NA), | |
| 35 | |
| 36 make_option("--q_polarity", type = "character", default = NA), | |
| 37 make_option("--l_polarity", type = "character", default = NA), | |
| 38 | |
| 39 make_option("--q_purity", type = "double", default = NA), | |
| 40 make_option("--l_purity", type = "double", default = NA), | |
| 41 | |
| 42 make_option("--q_xcmsGroups", type = "character", default = NA), | |
| 43 make_option("--l_xcmsGroups", type = "character", default = NA), | |
| 44 | |
| 45 make_option("--q_pids", type = "character", default = NA), | |
| 46 make_option("--l_pids", type = "character", default = NA), | |
| 47 | |
| 48 make_option("--q_rtrangeMin", type = "double", default = NA), | |
| 49 make_option("--l_rtrangeMin", type = "double", default = NA), | |
| 50 | |
| 51 make_option("--q_rtrangeMax", type = "double", default = NA), | |
| 52 make_option("--l_rtrangeMax", type = "double", default = NA), | |
| 53 | |
| 54 make_option("--q_accessions", type = "character", default = NA), | |
| 55 make_option("--l_accessions", type = "character", default = NA), | |
| 56 | |
| 57 make_option("--q_sources", type = "character", default = NA), | |
| 58 make_option("--l_sources", type = "character", default = NA), | |
| 59 | |
| 60 make_option("--q_sourcesUser", type = "character", default = NA), | |
| 61 make_option("--l_sourcesUser", type = "character", default = NA), | |
| 62 | |
| 63 make_option("--q_instrumentTypes", type = "character", default = NA), | |
| 64 make_option("--l_instrumentTypes", type = "character", default = NA), | |
| 65 | |
| 66 make_option("--q_instrumentTypesUser", type = "character", default = NA), | |
| 67 make_option("--l_instrumentTypesUser", type = "character", default = NA), | |
| 68 | |
| 69 make_option("--q_instruments", type = "character", default = NA), | |
| 70 make_option("--l_instruments", type = "character", default = NA), | |
| 71 | |
| 72 make_option("--q_spectraTypes", type = "character", default = NA), | |
| 73 make_option("--l_spectraTypes", type = "character", default = NA), | |
| 74 | |
| 75 make_option("--q_spectraFilter", action = "store_true"), | |
| 76 make_option("--l_spectraFilter", action = "store_true"), | |
| 77 | |
| 78 make_option("--usePrecursors", action = "store_true"), | |
| 79 | |
| 80 make_option("--mzW", type = "double"), | |
| 81 make_option("--raW", type = "double"), | |
| 82 | |
| 83 make_option("--rttol", type = "double", default = NA), | |
| 84 | |
| 85 make_option("--updateDb", action = "store_true"), | |
| 86 make_option("--copyDb", action = "store_true"), | |
| 87 make_option("--cores", default = 1) | |
| 88 | |
| 89 | |
| 90 ) | |
| 91 | |
| 92 # store options | |
| 93 opt <- parse_args(OptionParser(option_list = option_list)) | |
| 94 | |
| 95 print(opt) | |
| 96 | |
| 97 # check if the sqlite databases have any spectra | |
| 98 checkSPeakMeta <- function(dbPth, nme) { | |
| 99 if (is.null(dbPth)) { | |
| 100 return(TRUE) | |
| 101 }else if ((file.exists(dbPth)) & (file.info(dbPth)$size > 0)) { | |
| 102 con <- DBI::dbConnect(RSQLite::SQLite(), dbPth) | |
| 103 if (DBI::dbExistsTable(con, "s_peak_meta")) { | |
| 104 spm <- DBI::dbGetQuery(con, "SELECT * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1") | |
| 105 return(TRUE) | |
| 106 }else if (DBI::dbExistsTable(con, "library_spectra_meta")) { | |
| 107 spm <- DBI::dbGetQuery(con, "SELECT * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1") | |
| 108 return(TRUE) | |
| 109 }else{ | |
| 110 print(paste("No spectra available for ", nme)) | |
| 111 return(FALSE) | |
| 112 } | |
| 113 }else{ | |
| 114 print(paste("file empty or does not exist for", nme)) | |
| 115 return(FALSE) | |
| 116 } | |
| 117 | |
| 118 | |
| 119 } | |
| 120 | |
| 121 | |
| 122 addQueryNameColumn <- function(sm) { | |
| 123 if (is.null(sm$matchedResults) || length(sm$matchedResults) == 1 || nrow(sm$matchedResults) == 0) { | |
| 124 return(sm) | |
| 125 } | |
| 126 | |
| 127 con <- DBI::dbConnect(RSQLite::SQLite(), sm$q_dbPth) | |
| 128 if (DBI::dbExistsTable(con, "s_peak_meta")) { | |
| 129 spm <- DBI::dbGetQuery(con, "SELECT pid, name AS query_entry_name FROM s_peak_meta") | |
| 130 }else if (DBI::dbExistsTable(con, "library_spectra_meta")) { | |
| 131 spm <- DBI::dbGetQuery(con, "SELECT id AS pid, name AS query_entry_name FROM library_spectra_meta") | |
| 132 } | |
| 133 print(sm$matchedResults) | |
| 134 if ("pid" %in% colnames(sm$matchedResults)) { | |
| 135 sm$matchedResults <- merge(sm$matchedResults, spm, by.x = "pid", by.y = "pid") | |
| 136 }else{ | |
| 137 sm$matchedResults <- merge(sm$matchedResults, spm, by.x = "qpid", by.y = "pid") | |
| 138 } | |
| 139 | |
| 140 print(sm$xcmsMatchedResults) | |
| 141 if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults) == 1 || nrow(sm$xcmsMatchedResults) == 0) { | |
| 142 return(sm) | |
| 143 }else{ | |
| 144 if ("pid" %in% colnames(sm$xcmsMatchedResults)) { | |
| 145 sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x = "pid", by.y = "pid") | |
| 146 }else{ | |
| 147 sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x = "qpid", by.y = "pid") | |
| 148 } | |
| 149 } | |
| 150 | |
| 151 return(sm) | |
| 152 | |
| 153 } | |
| 154 | |
| 155 | |
| 156 updateDbF <- function(q_con, l_con) { | |
| 157 message("Adding extra details to database") | |
| 158 q_con <- DBI::dbConnect(RSQLite::SQLite(), sm$q_dbPth) | |
| 159 if (DBI::dbExistsTable(q_con, "l_s_peak_meta")) { | |
| 160 l_s_peak_meta <- DBI::dbGetQuery(q_con, "SELECT * FROM l_s_peak_meta") | |
| 161 colnames(l_s_peak_meta)[1] <- "pid" | |
| 162 } | |
| 163 | |
| 164 l_con <- DBI::dbConnect(RSQLite::SQLite(), l_dbPth) | |
| 165 if (DBI::dbExistsTable(l_con, "s_peaks")) { | |
| 166 l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse = ","))) | |
| 167 | |
| 168 }else if (DBI::dbExistsTable(l_con, "library_spectra")) { | |
| 169 l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT * FROM library_spectra | |
| 170 WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse = ","))) | |
| 171 }else{ | |
| 172 l_s_peaks <- NULL | |
| 173 } | |
| 174 | |
| 175 if (DBI::dbExistsTable(l_con, "source")) { | |
| 176 l_source <- DBI::dbGetQuery(l_con, "SELECT * FROM source") | |
| 177 }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) { | |
| 178 l_source <- DBI::dbGetQuery(l_con, "SELECT * FROM library_spectra_source") | |
| 179 }else{ | |
| 180 l_source <- NULL | |
| 181 } | |
| 182 | |
| 183 if (!is.null(l_s_peaks)) { | |
| 184 DBI::dbWriteTable(q_con, name = "l_s_peaks", value = l_s_peaks, row.names = FALSE, append = TRUE) | |
| 185 } | |
| 186 | |
| 187 if (!is.null(l_source)) { | |
| 188 DBI::dbWriteTable(q_con, name = "l_source", value = l_source, row.names = FALSE, append = TRUE) | |
| 189 } | |
| 190 | |
| 191 } | |
| 192 | |
| 193 | |
| 194 extractMultiple <- function(optParam) { | |
| 195 if (!is.na(optParam)) { | |
| 196 param <- trimws(strsplit(optParam, ",")[[1]]) | |
| 197 param <- param[param != ""] | |
| 198 }else { | |
| 199 param <- NA | |
| 200 } | |
| 201 return(param) | |
| 202 | |
| 203 } | |
| 204 | |
| 205 if (!is.null(opt$q_defaultDb)) { | |
| 206 q_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package = "msPurityData") | |
| 207 q_dbType <- "sqlite" | |
| 208 }else if (!opt$q_dbType == "local_config") { | |
| 209 q_dbType <- opt$q_dbType | |
| 210 q_dbPth <- opt$q_dbPth | |
| 211 } | |
| 212 | |
| 213 if (!is.null(opt$l_defaultDb)) { | |
| 214 l_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package = "msPurityData") | |
| 215 l_dbType <- "sqlite" | |
| 216 }else if (!opt$l_dbType == "local_config") { | |
| 217 l_dbType <- opt$l_dbType | |
| 218 l_dbPth <- opt$l_dbPth | |
| 219 } | |
| 220 | |
| 221 q_spectraTypes <- extractMultiple(opt$q_spectraTypes) | |
| 222 l_spectraTypes <- extractMultiple(opt$l_spectraTypes) | |
| 223 | |
| 224 q_polarity <- extractMultiple(opt$q_polarity) | |
| 225 l_polarity <- extractMultiple(opt$l_polarity) | |
| 226 | |
| 227 q_xcmsGroups <- extractMultiple(opt$q_xcmsGroups) | |
| 228 l_xcmsGroups <- extractMultiple(opt$l_xcmsGroups) | |
| 229 | |
| 230 q_pids <- extractMultiple(opt$q_pids) | |
| 231 l_pids <- extractMultiple(opt$l_pids) | |
| 232 | |
| 233 q_sources <- extractMultiple(opt$q_sources) | |
| 234 l_sources <- extractMultiple(opt$l_sources) | |
| 235 | |
| 236 q_sourcesUser <- extractMultiple(opt$q_sourcesUser) | |
| 237 l_sourcesUser <- extractMultiple(opt$l_sourcesUser) | |
| 238 | |
| 239 q_sources <- c(q_sources, q_sourcesUser) | |
| 240 l_sources <- c(l_sources, l_sourcesUser) | |
| 241 | |
| 242 q_instrumentTypes <- extractMultiple(opt$q_instrumentTypes) | |
| 243 l_instrumentTypes <- extractMultiple(opt$l_instrumentTypes) | |
| 244 | |
| 245 q_instrumentTypes <- c(q_instrumentTypes, q_instrumentTypes) | |
| 246 l_instrumentTypes <- c(l_instrumentTypes, l_instrumentTypes) | |
| 247 | |
| 248 | |
| 249 if (!is.null(opt$l_spectraFilter)) { | |
| 250 l_spectraFilter <- TRUE | |
| 251 }else{ | |
| 252 l_spectraFilter <- FALSE | |
| 253 } | |
| 254 | |
| 255 if (!is.null(opt$q_spectraFilter)) { | |
| 256 q_spectraFilter <- TRUE | |
| 257 }else{ | |
| 258 q_spectraFilter <- FALSE | |
| 259 } | |
| 260 | |
| 261 if (!is.null(opt$updateDb)) { | |
| 262 updateDb <- TRUE | |
| 263 }else{ | |
| 264 updateDb <- FALSE | |
| 265 } | |
| 266 | |
| 267 if (!is.null(opt$copyDb)) { | |
| 268 copyDb <- TRUE | |
| 269 }else{ | |
| 270 copyDb <- FALSE | |
| 271 } | |
| 272 | |
| 273 if (!is.null(opt$l_rtrangeMax)) { | |
| 274 l_rtrangeMax <- opt$l_rtrangeMax | |
| 275 }else{ | |
| 276 l_rtrangeMax <- NA | |
| 277 } | |
| 278 | |
| 279 if (!is.null(opt$q_rtrangeMax)) { | |
| 280 q_rtrangeMax <- opt$q_rtrangeMax | |
| 281 }else{ | |
| 282 q_rtrangeMax <- NA | |
| 283 } | |
| 284 | |
| 285 if (!is.null(opt$l_rtrangeMin)) { | |
| 286 l_rtrangeMin <- opt$l_rtrangeMin | |
| 287 }else{ | |
| 288 l_rtrangeMin <- NA | |
| 289 } | |
| 290 | |
| 291 if (!is.null(opt$q_rtrangeMin)) { | |
| 292 q_rtrangeMin <- opt$q_rtrangeMin | |
| 293 }else{ | |
| 294 q_rtrangeMin <- NA | |
| 295 } | |
| 296 | |
| 297 q_check <- checkSPeakMeta(opt$q_dbPth, "query") | |
| 298 l_check <- checkSPeakMeta(opt$l_dbPth, "library") | |
| 299 | |
| 300 | |
| 301 if (q_check && l_check) { | |
| 302 sm <- msPurity::spectralMatching( | |
| 303 q_purity = opt$q_purity, | |
| 304 l_purity = opt$l_purity, | |
| 305 | |
| 306 q_ppmProd = opt$q_ppmProd, | |
| 307 l_ppmProd = opt$l_ppmProd, | |
| 308 | |
| 309 q_ppmPrec = opt$q_ppmPrec, | |
| 310 l_ppmPrec = opt$l_ppmPrec, | |
| 311 | |
| 312 q_raThres = opt$q_raThres, | |
| 313 l_raThres = opt$l_raThres, | |
| 314 | |
| 315 q_pol = q_polarity, | |
| 316 l_pol = l_polarity, | |
| 317 | |
| 318 q_spectraFilter = q_spectraFilter, | |
| 319 l_spectraFilter = l_spectraFilter, | |
| 320 | |
| 321 q_xcmsGroups = q_xcmsGroups, | |
| 322 l_xcmsGroups = l_xcmsGroups, | |
| 323 | |
| 324 q_pids = q_pids, | |
| 325 l_pids = l_pids, | |
| 326 | |
| 327 q_sources = q_sources, | |
| 328 l_sources = l_sources, | |
| 329 | |
| 330 q_instrumentTypes = q_instrumentTypes, | |
| 331 l_instrumentTypes = l_instrumentTypes, | |
| 332 | |
| 333 q_spectraTypes = q_spectraTypes, | |
| 334 l_spectraTypes = l_spectraTypes, | |
| 335 | |
| 336 l_rtrange = c(l_rtrangeMin, l_rtrangeMax), | |
| 337 q_rtrange = c(q_rtrangeMin, q_rtrangeMax), | |
| 338 | |
| 339 q_accessions = opt$q_accessions, | |
| 340 l_accessions = opt$l_accessions, | |
| 341 | |
| 342 raW = opt$raW, | |
| 343 mzW = opt$mzW, | |
| 344 rttol = opt$rttol, | |
| 345 cores = opt$cores, | |
| 346 | |
| 347 copyDb = copyDb, | |
| 348 updateDb = updateDb, | |
| 349 outPth = "db_with_spectral_matching.sqlite", | |
| 350 | |
| 351 q_dbPth = q_dbPth, | |
| 352 q_dbType = q_dbType, | |
| 353 q_dbName = q_dbName, | |
| 354 q_dbHost = q_dbHost, | |
| 355 q_dbUser = q_dbUser, | |
| 356 q_dbPass = q_dbPass, | |
| 357 q_dbPort = q_dbPort, | |
| 358 | |
| 359 l_dbPth = l_dbPth, | |
| 360 l_dbType = l_dbType, | |
| 361 l_dbName = l_dbName, | |
| 362 l_dbHost = l_dbHost, | |
| 363 l_dbUser = l_dbUser, | |
| 364 l_dbPass = l_dbPass, | |
| 365 l_dbPort = l_dbPort | |
| 366 | |
| 367 ) | |
| 368 | |
| 369 sm <- addQueryNameColumn(sm) | |
| 370 # Get name of the query results (and merged with the data frames) | |
| 371 write.table(sm$matchedResults, "matched_results.tsv", sep = "\t", row.names = FALSE, col.names = TRUE) | |
| 372 write.table(sm$xcmsMatchedResults, "xcms_matched_results.tsv", sep = "\t", row.names = FALSE, col.names = TRUE) | |
| 373 | |
| 374 if (updateDb) { | |
| 375 updateDbF(q_con, l_con) | |
| 376 } | |
| 377 } |
