diff spectralMatching.R @ 15:b71677d4f958 draft default tip

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit cb903cd93f9378cfb5eeb68512a54178dcea7bbc
author tomnl
date Wed, 27 Nov 2019 12:31:10 +0000
parents 2c8b5a640a04
children
line wrap: on
line diff
--- a/spectralMatching.R	Fri Sep 27 09:12:41 2019 -0400
+++ b/spectralMatching.R	Wed Nov 27 12:31:10 2019 +0000
@@ -90,6 +90,104 @@
 
 print(opt)
 
+# check if the sqlite databases have any spectra
+checkSPeakMeta <- function(dbPth, nme){
+    if(is.null(dbPth)){
+        return(TRUE)
+    }else if ((file.exists(dbPth)) & (file.info(dbPth)$size>0)){
+        con <- DBI::dbConnect(RSQLite::SQLite(), dbPth)
+        if (DBI::dbExistsTable(con, "s_peak_meta")){
+            spm <- DBI::dbGetQuery(con, 'SELECT  * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1')
+            return(TRUE)
+        }else if(DBI::dbExistsTable(con, "library_spectra_meta")){
+            spm <- DBI::dbGetQuery(con, 'SELECT  * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1')
+            return(TRUE)
+        }else{
+            print(paste("No spectra available for ",nme))
+            return(FALSE)
+        }
+    }else{
+        print(paste("file empty or does not exist for", nme))
+        return(FALSE)
+    }
+
+        
+}
+
+
+addQueryNameColumn <- function(sm){
+    if (is.null(sm$matchedResults) || length(sm$matchedResults)==1 || nrow(sm$matchedResults)==0){
+        return(sm)
+    }
+
+    con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth)
+    if (DBI::dbExistsTable(con, "s_peak_meta")){
+        spm <- DBI::dbGetQuery(con, 'SELECT  pid, name AS query_entry_name FROM s_peak_meta')
+    }else if(DBI::dbExistsTable(con, "library_spectra_meta")){
+        spm <- DBI::dbGetQuery(con, 'SELECT  id AS pid, name  AS query_entry_name FROM library_spectra_meta')
+    }
+    print(sm$matchedResults)
+    if ('pid' %in% colnames(sm$matchedResults)){
+        sm$matchedResults <- merge(sm$matchedResults, spm, by.x='pid', by.y='pid')    
+    }else{
+        sm$matchedResults <- merge(sm$matchedResults, spm, by.x='qpid', by.y='pid')
+    }
+    
+    print(sm$xcmsMatchedResults)
+    if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults)==1 || nrow(sm$xcmsMatchedResults)==0){
+        return(sm)
+    }else{
+        if ('pid' %in% colnames(sm$xcmsMatchedResults)){
+            sm$xcmsMatchedResults<- merge(sm$xcmsMatchedResults, spm, by.x='pid', by.y='pid')    
+        }else{
+            sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x='qpid', by.y='pid')
+        }
+    }
+    
+    return(sm)
+    
+}
+
+
+updateDbF <- function(q_con, l_con){
+    message('Adding extra details to database')
+    q_con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth)
+    if (DBI::dbExistsTable(q_con, "l_s_peak_meta")){
+        l_s_peak_meta <- DBI::dbGetQuery(q_con, 'SELECT  * FROM l_s_peak_meta')
+        colnames(l_s_peak_meta)[1] <- 'pid'
+    }
+    
+    l_con <- DBI::dbConnect(RSQLite::SQLite(),l_dbPth)
+    if (DBI::dbExistsTable(l_con, "s_peaks")){
+        l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT  * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse=',')))
+        
+    }else if(DBI::dbExistsTable(l_con, "library_spectra")){
+        l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT  * FROM library_spectra
+                                                WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse=',')))
+    }else{
+        l_s_peaks = NULL
+    }
+    
+    if (DBI::dbExistsTable(l_con, "source")){
+        l_source <- DBI::dbGetQuery(l_con, 'SELECT  * FROM source')
+    }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) {
+        l_source <- DBI::dbGetQuery(l_con, 'SELECT  * FROM library_spectra_source')
+    }else{
+        l_source = NULL
+    }
+    
+    if (!is.null(l_s_peaks)){
+        DBI::dbWriteTable(q_con, name='l_s_peaks', value=l_s_peaks, row.names=FALSE, append=TRUE)
+    }
+    
+    if (!is.null(l_source)){
+        DBI::dbWriteTable(q_con, name='l_source', value=l_source, row.names=FALSE, append=TRUE)
+    }
+    
+    
+}
+
+
 extractMultiple <- function(optParam){
   if (!is.na(optParam)){
      param <- trimws(strsplit(optParam, ',')[[1]])
@@ -118,8 +216,6 @@
 }
 
 
-
-
 q_polarity <- extractMultiple(opt$q_polarity)
 l_polarity <- extractMultiple(opt$l_polarity)
 
@@ -193,118 +289,80 @@
   q_rtrangeMin <- NA
 }
 
-
-
-sm <- msPurity::spectralMatching(
-                           q_purity =  opt$q_purity,
-                           l_purity =  opt$l_purity,
-
-                           q_ppmProd =  opt$q_ppmProd,
-                           l_ppmProd =  opt$l_ppmProd,
-
-                           q_ppmPrec =  opt$q_ppmPrec,
-                           l_ppmPrec =  opt$l_ppmPrec,
-
-                           q_raThres =  opt$q_raThres,
-                           l_raThres =  opt$l_raThres,
-
-                           q_pol =  q_polarity,
-                           l_pol =  l_polarity,
-
-                           q_xcmsGroups = q_xcmsGroups,
-                           l_xcmsGroups = l_xcmsGroups,
-
-                           q_pids = q_pids,
-                           l_pids = l_pids,
-
-                           q_sources = q_sources,
-                           l_sources = l_sources,
-
-                           q_instrumentTypes = q_instrumentTypes,
-                           l_instrumentTypes = l_instrumentTypes,
-
-                           q_spectraFilter= q_spectraFilter,
-                           l_spectraFilter= l_spectraFilter,
-
-                           l_rtrange=c(l_rtrangeMin, l_rtrangeMax),
-                           q_rtrange=c(q_rtrangeMin, q_rtrangeMax),
-
-                           q_accessions = opt$q_accessions,
-                           l_accessions= opt$l_accessions,
-
-                           raW = opt$raW,
-                           mzW = opt$mzW,
-                           rttol=opt$rttol,
-                           cores=opt$cores,
-
-                           copyDb=copyDb,
-                           updateDb=updateDb,
-                           outPth = "db_with_spectral_matching.sqlite",
-
-                           q_dbPth = q_dbPth,
-                           q_dbType = q_dbType,
-                           q_dbName = q_dbName,
-                           q_dbHost = q_dbHost,
-                           q_dbUser = q_dbUser,
-                           q_dbPass = q_dbPass,
-                           q_dbPort = q_dbPort,
+q_check <- checkSPeakMeta(opt$q_dbPth, 'query')
+l_check <- checkSPeakMeta(opt$l_dbPth, 'library')
 
-                           l_dbPth = l_dbPth,
-                           l_dbType = l_dbType,
-                           l_dbName = l_dbName,
-                           l_dbHost = l_dbHost,
-                           l_dbUser = l_dbUser,
-                           l_dbPass = l_dbPass,
-                           l_dbPort = l_dbPort
-
-                           )
-
-
-
-write.table(sm$matchedResults, 'matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE)
-write.table(sm$xcmsMatchedResults, 'xcms_matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE)
-
-
-# Add extra details from library spectra in resulting database
-# First get all the ids from the l_s_peak_meta from the query database
-if(updateDb){
-  message('Adding extra details to database')
-  q_con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth)
-  if (DBI::dbExistsTable(q_con, "l_s_peak_meta")){
-    l_s_peak_meta <- DBI::dbGetQuery(q_con, 'SELECT  * FROM l_s_peak_meta')
-    colnames(l_s_peak_meta)[1] <- 'pid'
-  }
-
-  l_con <- DBI::dbConnect(RSQLite::SQLite(),l_dbPth)
-  if (DBI::dbExistsTable(l_con, "s_peaks")){
-    l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT  * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse=',')))
-
-  }else if(DBI::dbExistsTable(l_con, "library_spectra")){
-    l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT  * FROM library_spectra
-                                                WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse=',')))
-  }else{
-    l_s_peaks = NULL
-  }
-
-  if (DBI::dbExistsTable(l_con, "source")){
-    l_source <- DBI::dbGetQuery(l_con, 'SELECT  * FROM source')
-  }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) {
-    l_source <- DBI::dbGetQuery(l_con, 'SELECT  * FROM library_spectra_source')
-  }else{
-    l_source = NULL
-  }
-
-  if (!is.null(l_s_peaks)){
-    DBI::dbWriteTable(q_con, name='l_s_peaks', value=l_s_peaks, row.names=FALSE, append=TRUE)
-  }
-
-  if (!is.null(l_source)){
-    DBI::dbWriteTable(q_con, name='l_source', value=l_source, row.names=FALSE, append=TRUE)
-  }
-
+if (q_check && l_check){
+    sm <- msPurity::spectralMatching(
+        q_purity =  opt$q_purity,
+        l_purity =  opt$l_purity,
+        
+        q_ppmProd =  opt$q_ppmProd,
+        l_ppmProd =  opt$l_ppmProd,
+        
+        q_ppmPrec =  opt$q_ppmPrec,
+        l_ppmPrec =  opt$l_ppmPrec,
+        
+        q_raThres =  opt$q_raThres,
+        l_raThres =  opt$l_raThres,
+        
+        q_pol =  q_polarity,
+        l_pol =  l_polarity,
+        
+        q_xcmsGroups = q_xcmsGroups,
+        l_xcmsGroups = l_xcmsGroups,
+        
+        q_pids = q_pids,
+        l_pids = l_pids,
+        
+        q_sources = q_sources,
+        l_sources = l_sources,
+        
+        q_instrumentTypes = q_instrumentTypes,
+        l_instrumentTypes = l_instrumentTypes,
+        
+        q_spectraFilter= q_spectraFilter,
+        l_spectraFilter= l_spectraFilter,
+        
+        l_rtrange=c(l_rtrangeMin, l_rtrangeMax),
+        q_rtrange=c(q_rtrangeMin, q_rtrangeMax),
+        
+        q_accessions = opt$q_accessions,
+        l_accessions= opt$l_accessions,
+        
+        raW = opt$raW,
+        mzW = opt$mzW,
+        rttol=opt$rttol,
+        cores=opt$cores,
+        
+        copyDb=copyDb,
+        updateDb=updateDb,
+        outPth = "db_with_spectral_matching.sqlite",
+        
+        q_dbPth = q_dbPth,
+        q_dbType = q_dbType,
+        q_dbName = q_dbName,
+        q_dbHost = q_dbHost,
+        q_dbUser = q_dbUser,
+        q_dbPass = q_dbPass,
+        q_dbPort = q_dbPort,
+        
+        l_dbPth = l_dbPth,
+        l_dbType = l_dbType,
+        l_dbName = l_dbName,
+        l_dbHost = l_dbHost,
+        l_dbUser = l_dbUser,
+        l_dbPass = l_dbPass,
+        l_dbPort = l_dbPort
+        
+    )
+    
+    sm <- addQueryNameColumn(sm)
+    # Get name of the query results (and merged with the data frames)
+    write.table(sm$matchedResults, 'matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE)
+    write.table(sm$xcmsMatchedResults, 'xcms_matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE)
+    
+    if(updateDb){
+        updateDbF(q_con, l_con)
+    }
 }
-
-
-
-
-