Mercurial > repos > tomnl > mspurity_flagremove
diff spectralMatching.R @ 15:b71677d4f958 draft default tip
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit cb903cd93f9378cfb5eeb68512a54178dcea7bbc
| author | tomnl |
|---|---|
| date | Wed, 27 Nov 2019 12:31:10 +0000 |
| parents | 2c8b5a640a04 |
| children |
line wrap: on
line diff
--- a/spectralMatching.R Fri Sep 27 09:12:41 2019 -0400 +++ b/spectralMatching.R Wed Nov 27 12:31:10 2019 +0000 @@ -90,6 +90,104 @@ print(opt) +# check if the sqlite databases have any spectra +checkSPeakMeta <- function(dbPth, nme){ + if(is.null(dbPth)){ + return(TRUE) + }else if ((file.exists(dbPth)) & (file.info(dbPth)$size>0)){ + con <- DBI::dbConnect(RSQLite::SQLite(), dbPth) + if (DBI::dbExistsTable(con, "s_peak_meta")){ + spm <- DBI::dbGetQuery(con, 'SELECT * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1') + return(TRUE) + }else if(DBI::dbExistsTable(con, "library_spectra_meta")){ + spm <- DBI::dbGetQuery(con, 'SELECT * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1') + return(TRUE) + }else{ + print(paste("No spectra available for ",nme)) + return(FALSE) + } + }else{ + print(paste("file empty or does not exist for", nme)) + return(FALSE) + } + + +} + + +addQueryNameColumn <- function(sm){ + if (is.null(sm$matchedResults) || length(sm$matchedResults)==1 || nrow(sm$matchedResults)==0){ + return(sm) + } + + con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth) + if (DBI::dbExistsTable(con, "s_peak_meta")){ + spm <- DBI::dbGetQuery(con, 'SELECT pid, name AS query_entry_name FROM s_peak_meta') + }else if(DBI::dbExistsTable(con, "library_spectra_meta")){ + spm <- DBI::dbGetQuery(con, 'SELECT id AS pid, name AS query_entry_name FROM library_spectra_meta') + } + print(sm$matchedResults) + if ('pid' %in% colnames(sm$matchedResults)){ + sm$matchedResults <- merge(sm$matchedResults, spm, by.x='pid', by.y='pid') + }else{ + sm$matchedResults <- merge(sm$matchedResults, spm, by.x='qpid', by.y='pid') + } + + print(sm$xcmsMatchedResults) + if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults)==1 || nrow(sm$xcmsMatchedResults)==0){ + return(sm) + }else{ + if ('pid' %in% colnames(sm$xcmsMatchedResults)){ + sm$xcmsMatchedResults<- merge(sm$xcmsMatchedResults, spm, by.x='pid', by.y='pid') + }else{ + sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x='qpid', by.y='pid') + } + } + + return(sm) + +} + + +updateDbF <- function(q_con, l_con){ + message('Adding extra details to database') + q_con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth) + if (DBI::dbExistsTable(q_con, "l_s_peak_meta")){ + l_s_peak_meta <- DBI::dbGetQuery(q_con, 'SELECT * FROM l_s_peak_meta') + colnames(l_s_peak_meta)[1] <- 'pid' + } + + l_con <- DBI::dbConnect(RSQLite::SQLite(),l_dbPth) + if (DBI::dbExistsTable(l_con, "s_peaks")){ + l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse=','))) + + }else if(DBI::dbExistsTable(l_con, "library_spectra")){ + l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT * FROM library_spectra + WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse=','))) + }else{ + l_s_peaks = NULL + } + + if (DBI::dbExistsTable(l_con, "source")){ + l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM source') + }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) { + l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM library_spectra_source') + }else{ + l_source = NULL + } + + if (!is.null(l_s_peaks)){ + DBI::dbWriteTable(q_con, name='l_s_peaks', value=l_s_peaks, row.names=FALSE, append=TRUE) + } + + if (!is.null(l_source)){ + DBI::dbWriteTable(q_con, name='l_source', value=l_source, row.names=FALSE, append=TRUE) + } + + +} + + extractMultiple <- function(optParam){ if (!is.na(optParam)){ param <- trimws(strsplit(optParam, ',')[[1]]) @@ -118,8 +216,6 @@ } - - q_polarity <- extractMultiple(opt$q_polarity) l_polarity <- extractMultiple(opt$l_polarity) @@ -193,118 +289,80 @@ q_rtrangeMin <- NA } - - -sm <- msPurity::spectralMatching( - q_purity = opt$q_purity, - l_purity = opt$l_purity, - - q_ppmProd = opt$q_ppmProd, - l_ppmProd = opt$l_ppmProd, - - q_ppmPrec = opt$q_ppmPrec, - l_ppmPrec = opt$l_ppmPrec, - - q_raThres = opt$q_raThres, - l_raThres = opt$l_raThres, - - q_pol = q_polarity, - l_pol = l_polarity, - - q_xcmsGroups = q_xcmsGroups, - l_xcmsGroups = l_xcmsGroups, - - q_pids = q_pids, - l_pids = l_pids, - - q_sources = q_sources, - l_sources = l_sources, - - q_instrumentTypes = q_instrumentTypes, - l_instrumentTypes = l_instrumentTypes, - - q_spectraFilter= q_spectraFilter, - l_spectraFilter= l_spectraFilter, - - l_rtrange=c(l_rtrangeMin, l_rtrangeMax), - q_rtrange=c(q_rtrangeMin, q_rtrangeMax), - - q_accessions = opt$q_accessions, - l_accessions= opt$l_accessions, - - raW = opt$raW, - mzW = opt$mzW, - rttol=opt$rttol, - cores=opt$cores, - - copyDb=copyDb, - updateDb=updateDb, - outPth = "db_with_spectral_matching.sqlite", - - q_dbPth = q_dbPth, - q_dbType = q_dbType, - q_dbName = q_dbName, - q_dbHost = q_dbHost, - q_dbUser = q_dbUser, - q_dbPass = q_dbPass, - q_dbPort = q_dbPort, +q_check <- checkSPeakMeta(opt$q_dbPth, 'query') +l_check <- checkSPeakMeta(opt$l_dbPth, 'library') - l_dbPth = l_dbPth, - l_dbType = l_dbType, - l_dbName = l_dbName, - l_dbHost = l_dbHost, - l_dbUser = l_dbUser, - l_dbPass = l_dbPass, - l_dbPort = l_dbPort - - ) - - - -write.table(sm$matchedResults, 'matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) -write.table(sm$xcmsMatchedResults, 'xcms_matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) - - -# Add extra details from library spectra in resulting database -# First get all the ids from the l_s_peak_meta from the query database -if(updateDb){ - message('Adding extra details to database') - q_con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth) - if (DBI::dbExistsTable(q_con, "l_s_peak_meta")){ - l_s_peak_meta <- DBI::dbGetQuery(q_con, 'SELECT * FROM l_s_peak_meta') - colnames(l_s_peak_meta)[1] <- 'pid' - } - - l_con <- DBI::dbConnect(RSQLite::SQLite(),l_dbPth) - if (DBI::dbExistsTable(l_con, "s_peaks")){ - l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse=','))) - - }else if(DBI::dbExistsTable(l_con, "library_spectra")){ - l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT * FROM library_spectra - WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse=','))) - }else{ - l_s_peaks = NULL - } - - if (DBI::dbExistsTable(l_con, "source")){ - l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM source') - }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) { - l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM library_spectra_source') - }else{ - l_source = NULL - } - - if (!is.null(l_s_peaks)){ - DBI::dbWriteTable(q_con, name='l_s_peaks', value=l_s_peaks, row.names=FALSE, append=TRUE) - } - - if (!is.null(l_source)){ - DBI::dbWriteTable(q_con, name='l_source', value=l_source, row.names=FALSE, append=TRUE) - } - +if (q_check && l_check){ + sm <- msPurity::spectralMatching( + q_purity = opt$q_purity, + l_purity = opt$l_purity, + + q_ppmProd = opt$q_ppmProd, + l_ppmProd = opt$l_ppmProd, + + q_ppmPrec = opt$q_ppmPrec, + l_ppmPrec = opt$l_ppmPrec, + + q_raThres = opt$q_raThres, + l_raThres = opt$l_raThres, + + q_pol = q_polarity, + l_pol = l_polarity, + + q_xcmsGroups = q_xcmsGroups, + l_xcmsGroups = l_xcmsGroups, + + q_pids = q_pids, + l_pids = l_pids, + + q_sources = q_sources, + l_sources = l_sources, + + q_instrumentTypes = q_instrumentTypes, + l_instrumentTypes = l_instrumentTypes, + + q_spectraFilter= q_spectraFilter, + l_spectraFilter= l_spectraFilter, + + l_rtrange=c(l_rtrangeMin, l_rtrangeMax), + q_rtrange=c(q_rtrangeMin, q_rtrangeMax), + + q_accessions = opt$q_accessions, + l_accessions= opt$l_accessions, + + raW = opt$raW, + mzW = opt$mzW, + rttol=opt$rttol, + cores=opt$cores, + + copyDb=copyDb, + updateDb=updateDb, + outPth = "db_with_spectral_matching.sqlite", + + q_dbPth = q_dbPth, + q_dbType = q_dbType, + q_dbName = q_dbName, + q_dbHost = q_dbHost, + q_dbUser = q_dbUser, + q_dbPass = q_dbPass, + q_dbPort = q_dbPort, + + l_dbPth = l_dbPth, + l_dbType = l_dbType, + l_dbName = l_dbName, + l_dbHost = l_dbHost, + l_dbUser = l_dbUser, + l_dbPass = l_dbPass, + l_dbPort = l_dbPort + + ) + + sm <- addQueryNameColumn(sm) + # Get name of the query results (and merged with the data frames) + write.table(sm$matchedResults, 'matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) + write.table(sm$xcmsMatchedResults, 'xcms_matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) + + if(updateDb){ + updateDbF(q_con, l_con) + } } - - - - -
