diff create_sqlite_db.R @ 19:906e8e2be944 draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 04023134d8f28e85927ca293373c506484149ead-dirty
author tomnl
date Thu, 31 May 2018 09:15:28 -0400
parents f13a7d89afdd
children 74917d0e89cc
line wrap: on
line diff
--- a/create_sqlite_db.R	Tue May 15 08:16:18 2018 -0400
+++ b/create_sqlite_db.R	Thu May 31 09:15:28 2018 -0400
@@ -138,30 +138,65 @@
 
 con <- DBI::dbConnect(RSQLite::SQLite(), db_pth)
 
-add_extra_table_elucidation <- function(name, pth){
-
-  if (!is.null(pth)){
- 
-     print(pth)
-     df <- read.table(pth,  header = TRUE, sep='\t', stringsAsFactors = FALSE,  comment.char = "")
-     # bug for repeating headers
-     df <- df[!df$UID=='UID',]
+add_extra_table_elucidation <- function(name, pth, db_con, filter_Score=NA, filter_Rank=NA){
+    if (is.null(pth)){
+        return(0)
+    }
+    index <- 0
+    chunkSize <- 5000
+    print(pth)
+    con <- file(description=pth,open="r")   
+    df <- read.table(con, nrows=chunkSize,  header = TRUE, sep='\t', stringsAsFactors = FALSE,  comment.char = "")
+    headers = colnames(df)
+    print(head(df))
+    write_to_table(df, db_con, name, FALSE, filter_Score, filter_Rank)
 
-     # get peakid, an scan id
-     df_ids <- stringr::str_split_fixed(df$UID, '-', 3)
-     colnames(df_ids) <- c('grp_id', 'file_id', 'pid')
-     df <- cbind(df_ids, df)
-     # export to database
-     
+    repeat {
+        index <- index + 1
+        print(paste('Processing rows:', index * chunkSize))
+ 
+        if (nrow(df) != chunkSize){
+                print('Processed all files!')
+                break
+        }
+       
+        df <- read.table(con, nrows=chunkSize, skip=0, header = FALSE, sep='\t', stringsAsFactors = FALSE,  comment.char = "")
+        colnames(df) <- headers
 
-     DBI::dbWriteTable(con, name=name, value=df, row.names=FALSE)
+
+	write_to_table(df, db_con, name, TRUE, filter_Score, filter_Rank)
 
-  }
+ 
+        break
+    }
+   close(con)
 
 }
 
+write_to_table <- function(df, db_con, name, append, filter_Score, filter_Rank){
 
-add_probmetab <- function(pth){
+       df <- df[!df$UID=='UID',]
+        print(filter_Score)
+        print(filter_Rank)
+        print('filter rank and score')
+
+        if (!is.na(filter_Score)){
+           df <- df[df$Score>=filter_Score,]
+        }      
+
+        if (!is.na(filter_Rank)){
+      
+            df <- df[df$Rank<=filter_Rank,]
+        }
+
+        # get peakid, an scan id
+        df_ids <- stringr::str_split_fixed(df$UID, '-', 3)
+        colnames(df_ids) <- c('grp_id', 'file_id', 'pid')
+        df <- cbind(df_ids, df)
+        DBI::dbWriteTable(db_con, name=name, value=df, row.names=FALSE, append=append)
+}
+
+add_probmetab <- function(pth, con){
   if (!is.null(pth)){
 
       df <- read.table(pth,  header = TRUE, sep='\t', stringsAsFactors = FALSE,  comment.char = "")
@@ -204,9 +239,9 @@
 
 }
 
-add_extra_table_elucidation('metfrag_results', opt$metfrag_result)
-add_extra_table_elucidation('sirius_csifingerid_results', opt$sirius_csifingerid_result)
-add_probmetab(opt$probmetab_result)
+add_extra_table_elucidation('metfrag_results', opt$metfrag_result, con, filter_Score=0.6, filter_Rank=NA)
+add_extra_table_elucidation('sirius_csifingerid_results', opt$sirius_csifingerid_result, con, filter_Score=NA, filter_Rank=5)
+add_probmetab(opt$probmetab_result, con)