Mercurial > repos > tomnl > mspurity_dimspredictpuritysingle
comparison spectralMatching.R @ 14:1389e565f95e draft default tip
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit cb903cd93f9378cfb5eeb68512a54178dcea7bbc
| author | tomnl |
|---|---|
| date | Wed, 27 Nov 2019 12:37:27 +0000 |
| parents | a90832e6acb2 |
| children |
comparison
equal
deleted
inserted
replaced
| 13:4164de21d178 | 14:1389e565f95e |
|---|---|
| 87 | 87 |
| 88 # store options | 88 # store options |
| 89 opt<- parse_args(OptionParser(option_list=option_list)) | 89 opt<- parse_args(OptionParser(option_list=option_list)) |
| 90 | 90 |
| 91 print(opt) | 91 print(opt) |
| 92 | |
| 93 # check if the sqlite databases have any spectra | |
| 94 checkSPeakMeta <- function(dbPth, nme){ | |
| 95 if(is.null(dbPth)){ | |
| 96 return(TRUE) | |
| 97 }else if ((file.exists(dbPth)) & (file.info(dbPth)$size>0)){ | |
| 98 con <- DBI::dbConnect(RSQLite::SQLite(), dbPth) | |
| 99 if (DBI::dbExistsTable(con, "s_peak_meta")){ | |
| 100 spm <- DBI::dbGetQuery(con, 'SELECT * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1') | |
| 101 return(TRUE) | |
| 102 }else if(DBI::dbExistsTable(con, "library_spectra_meta")){ | |
| 103 spm <- DBI::dbGetQuery(con, 'SELECT * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1') | |
| 104 return(TRUE) | |
| 105 }else{ | |
| 106 print(paste("No spectra available for ",nme)) | |
| 107 return(FALSE) | |
| 108 } | |
| 109 }else{ | |
| 110 print(paste("file empty or does not exist for", nme)) | |
| 111 return(FALSE) | |
| 112 } | |
| 113 | |
| 114 | |
| 115 } | |
| 116 | |
| 117 | |
| 118 addQueryNameColumn <- function(sm){ | |
| 119 if (is.null(sm$matchedResults) || length(sm$matchedResults)==1 || nrow(sm$matchedResults)==0){ | |
| 120 return(sm) | |
| 121 } | |
| 122 | |
| 123 con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth) | |
| 124 if (DBI::dbExistsTable(con, "s_peak_meta")){ | |
| 125 spm <- DBI::dbGetQuery(con, 'SELECT pid, name AS query_entry_name FROM s_peak_meta') | |
| 126 }else if(DBI::dbExistsTable(con, "library_spectra_meta")){ | |
| 127 spm <- DBI::dbGetQuery(con, 'SELECT id AS pid, name AS query_entry_name FROM library_spectra_meta') | |
| 128 } | |
| 129 print(sm$matchedResults) | |
| 130 if ('pid' %in% colnames(sm$matchedResults)){ | |
| 131 sm$matchedResults <- merge(sm$matchedResults, spm, by.x='pid', by.y='pid') | |
| 132 }else{ | |
| 133 sm$matchedResults <- merge(sm$matchedResults, spm, by.x='qpid', by.y='pid') | |
| 134 } | |
| 135 | |
| 136 print(sm$xcmsMatchedResults) | |
| 137 if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults)==1 || nrow(sm$xcmsMatchedResults)==0){ | |
| 138 return(sm) | |
| 139 }else{ | |
| 140 if ('pid' %in% colnames(sm$xcmsMatchedResults)){ | |
| 141 sm$xcmsMatchedResults<- merge(sm$xcmsMatchedResults, spm, by.x='pid', by.y='pid') | |
| 142 }else{ | |
| 143 sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x='qpid', by.y='pid') | |
| 144 } | |
| 145 } | |
| 146 | |
| 147 return(sm) | |
| 148 | |
| 149 } | |
| 150 | |
| 151 | |
| 152 updateDbF <- function(q_con, l_con){ | |
| 153 message('Adding extra details to database') | |
| 154 q_con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth) | |
| 155 if (DBI::dbExistsTable(q_con, "l_s_peak_meta")){ | |
| 156 l_s_peak_meta <- DBI::dbGetQuery(q_con, 'SELECT * FROM l_s_peak_meta') | |
| 157 colnames(l_s_peak_meta)[1] <- 'pid' | |
| 158 } | |
| 159 | |
| 160 l_con <- DBI::dbConnect(RSQLite::SQLite(),l_dbPth) | |
| 161 if (DBI::dbExistsTable(l_con, "s_peaks")){ | |
| 162 l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse=','))) | |
| 163 | |
| 164 }else if(DBI::dbExistsTable(l_con, "library_spectra")){ | |
| 165 l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT * FROM library_spectra | |
| 166 WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse=','))) | |
| 167 }else{ | |
| 168 l_s_peaks = NULL | |
| 169 } | |
| 170 | |
| 171 if (DBI::dbExistsTable(l_con, "source")){ | |
| 172 l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM source') | |
| 173 }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) { | |
| 174 l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM library_spectra_source') | |
| 175 }else{ | |
| 176 l_source = NULL | |
| 177 } | |
| 178 | |
| 179 if (!is.null(l_s_peaks)){ | |
| 180 DBI::dbWriteTable(q_con, name='l_s_peaks', value=l_s_peaks, row.names=FALSE, append=TRUE) | |
| 181 } | |
| 182 | |
| 183 if (!is.null(l_source)){ | |
| 184 DBI::dbWriteTable(q_con, name='l_source', value=l_source, row.names=FALSE, append=TRUE) | |
| 185 } | |
| 186 | |
| 187 | |
| 188 } | |
| 189 | |
| 92 | 190 |
| 93 extractMultiple <- function(optParam){ | 191 extractMultiple <- function(optParam){ |
| 94 if (!is.na(optParam)){ | 192 if (!is.na(optParam)){ |
| 95 param <- trimws(strsplit(optParam, ',')[[1]]) | 193 param <- trimws(strsplit(optParam, ',')[[1]]) |
| 96 param <- param[param != ""] | 194 param <- param[param != ""] |
| 116 l_dbType <- opt$l_dbType | 214 l_dbType <- opt$l_dbType |
| 117 l_dbPth <- opt$l_dbPth | 215 l_dbPth <- opt$l_dbPth |
| 118 } | 216 } |
| 119 | 217 |
| 120 | 218 |
| 121 | |
| 122 | |
| 123 q_polarity <- extractMultiple(opt$q_polarity) | 219 q_polarity <- extractMultiple(opt$q_polarity) |
| 124 l_polarity <- extractMultiple(opt$l_polarity) | 220 l_polarity <- extractMultiple(opt$l_polarity) |
| 125 | 221 |
| 126 q_xcmsGroups <- extractMultiple(opt$q_xcmsGroups) | 222 q_xcmsGroups <- extractMultiple(opt$q_xcmsGroups) |
| 127 l_xcmsGroups <- extractMultiple(opt$l_xcmsGroups) | 223 l_xcmsGroups <- extractMultiple(opt$l_xcmsGroups) |
| 191 q_rtrangeMin <- opt$q_rtrangeMin | 287 q_rtrangeMin <- opt$q_rtrangeMin |
| 192 }else{ | 288 }else{ |
| 193 q_rtrangeMin <- NA | 289 q_rtrangeMin <- NA |
| 194 } | 290 } |
| 195 | 291 |
| 196 | 292 q_check <- checkSPeakMeta(opt$q_dbPth, 'query') |
| 197 | 293 l_check <- checkSPeakMeta(opt$l_dbPth, 'library') |
| 198 sm <- msPurity::spectralMatching( | 294 |
| 199 q_purity = opt$q_purity, | 295 if (q_check && l_check){ |
| 200 l_purity = opt$l_purity, | 296 sm <- msPurity::spectralMatching( |
| 201 | 297 q_purity = opt$q_purity, |
| 202 q_ppmProd = opt$q_ppmProd, | 298 l_purity = opt$l_purity, |
| 203 l_ppmProd = opt$l_ppmProd, | 299 |
| 204 | 300 q_ppmProd = opt$q_ppmProd, |
| 205 q_ppmPrec = opt$q_ppmPrec, | 301 l_ppmProd = opt$l_ppmProd, |
| 206 l_ppmPrec = opt$l_ppmPrec, | 302 |
| 207 | 303 q_ppmPrec = opt$q_ppmPrec, |
| 208 q_raThres = opt$q_raThres, | 304 l_ppmPrec = opt$l_ppmPrec, |
| 209 l_raThres = opt$l_raThres, | 305 |
| 210 | 306 q_raThres = opt$q_raThres, |
| 211 q_pol = q_polarity, | 307 l_raThres = opt$l_raThres, |
| 212 l_pol = l_polarity, | 308 |
| 213 | 309 q_pol = q_polarity, |
| 214 q_xcmsGroups = q_xcmsGroups, | 310 l_pol = l_polarity, |
| 215 l_xcmsGroups = l_xcmsGroups, | 311 |
| 216 | 312 q_xcmsGroups = q_xcmsGroups, |
| 217 q_pids = q_pids, | 313 l_xcmsGroups = l_xcmsGroups, |
| 218 l_pids = l_pids, | 314 |
| 219 | 315 q_pids = q_pids, |
| 220 q_sources = q_sources, | 316 l_pids = l_pids, |
| 221 l_sources = l_sources, | 317 |
| 222 | 318 q_sources = q_sources, |
| 223 q_instrumentTypes = q_instrumentTypes, | 319 l_sources = l_sources, |
| 224 l_instrumentTypes = l_instrumentTypes, | 320 |
| 225 | 321 q_instrumentTypes = q_instrumentTypes, |
| 226 q_spectraFilter= q_spectraFilter, | 322 l_instrumentTypes = l_instrumentTypes, |
| 227 l_spectraFilter= l_spectraFilter, | 323 |
| 228 | 324 q_spectraFilter= q_spectraFilter, |
| 229 l_rtrange=c(l_rtrangeMin, l_rtrangeMax), | 325 l_spectraFilter= l_spectraFilter, |
| 230 q_rtrange=c(q_rtrangeMin, q_rtrangeMax), | 326 |
| 231 | 327 l_rtrange=c(l_rtrangeMin, l_rtrangeMax), |
| 232 q_accessions = opt$q_accessions, | 328 q_rtrange=c(q_rtrangeMin, q_rtrangeMax), |
| 233 l_accessions= opt$l_accessions, | 329 |
| 234 | 330 q_accessions = opt$q_accessions, |
| 235 raW = opt$raW, | 331 l_accessions= opt$l_accessions, |
| 236 mzW = opt$mzW, | 332 |
| 237 rttol=opt$rttol, | 333 raW = opt$raW, |
| 238 cores=opt$cores, | 334 mzW = opt$mzW, |
| 239 | 335 rttol=opt$rttol, |
| 240 copyDb=copyDb, | 336 cores=opt$cores, |
| 241 updateDb=updateDb, | 337 |
| 242 outPth = "db_with_spectral_matching.sqlite", | 338 copyDb=copyDb, |
| 243 | 339 updateDb=updateDb, |
| 244 q_dbPth = q_dbPth, | 340 outPth = "db_with_spectral_matching.sqlite", |
| 245 q_dbType = q_dbType, | 341 |
| 246 q_dbName = q_dbName, | 342 q_dbPth = q_dbPth, |
| 247 q_dbHost = q_dbHost, | 343 q_dbType = q_dbType, |
| 248 q_dbUser = q_dbUser, | 344 q_dbName = q_dbName, |
| 249 q_dbPass = q_dbPass, | 345 q_dbHost = q_dbHost, |
| 250 q_dbPort = q_dbPort, | 346 q_dbUser = q_dbUser, |
| 251 | 347 q_dbPass = q_dbPass, |
| 252 l_dbPth = l_dbPth, | 348 q_dbPort = q_dbPort, |
| 253 l_dbType = l_dbType, | 349 |
| 254 l_dbName = l_dbName, | 350 l_dbPth = l_dbPth, |
| 255 l_dbHost = l_dbHost, | 351 l_dbType = l_dbType, |
| 256 l_dbUser = l_dbUser, | 352 l_dbName = l_dbName, |
| 257 l_dbPass = l_dbPass, | 353 l_dbHost = l_dbHost, |
| 258 l_dbPort = l_dbPort | 354 l_dbUser = l_dbUser, |
| 259 | 355 l_dbPass = l_dbPass, |
| 260 ) | 356 l_dbPort = l_dbPort |
| 261 | 357 |
| 262 | 358 ) |
| 263 | 359 |
| 264 write.table(sm$matchedResults, 'matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) | 360 sm <- addQueryNameColumn(sm) |
| 265 write.table(sm$xcmsMatchedResults, 'xcms_matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) | 361 # Get name of the query results (and merged with the data frames) |
| 266 | 362 write.table(sm$matchedResults, 'matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) |
| 267 | 363 write.table(sm$xcmsMatchedResults, 'xcms_matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) |
| 268 # Add extra details from library spectra in resulting database | 364 |
| 269 # First get all the ids from the l_s_peak_meta from the query database | 365 if(updateDb){ |
| 270 if(updateDb){ | 366 updateDbF(q_con, l_con) |
| 271 message('Adding extra details to database') | 367 } |
| 272 q_con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth) | 368 } |
| 273 if (DBI::dbExistsTable(q_con, "l_s_peak_meta")){ | |
| 274 l_s_peak_meta <- DBI::dbGetQuery(q_con, 'SELECT * FROM l_s_peak_meta') | |
| 275 colnames(l_s_peak_meta)[1] <- 'pid' | |
| 276 } | |
| 277 | |
| 278 l_con <- DBI::dbConnect(RSQLite::SQLite(),l_dbPth) | |
| 279 if (DBI::dbExistsTable(l_con, "s_peaks")){ | |
| 280 l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse=','))) | |
| 281 | |
| 282 }else if(DBI::dbExistsTable(l_con, "library_spectra")){ | |
| 283 l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT * FROM library_spectra | |
| 284 WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse=','))) | |
| 285 }else{ | |
| 286 l_s_peaks = NULL | |
| 287 } | |
| 288 | |
| 289 if (DBI::dbExistsTable(l_con, "source")){ | |
| 290 l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM source') | |
| 291 }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) { | |
| 292 l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM library_spectra_source') | |
| 293 }else{ | |
| 294 l_source = NULL | |
| 295 } | |
| 296 | |
| 297 if (!is.null(l_s_peaks)){ | |
| 298 DBI::dbWriteTable(q_con, name='l_s_peaks', value=l_s_peaks, row.names=FALSE, append=TRUE) | |
| 299 } | |
| 300 | |
| 301 if (!is.null(l_source)){ | |
| 302 DBI::dbWriteTable(q_con, name='l_source', value=l_source, row.names=FALSE, append=TRUE) | |
| 303 } | |
| 304 | |
| 305 } | |
| 306 | |
| 307 | |
| 308 | |
| 309 | |
| 310 |
