Mercurial > repos > tomnl > create_sqlite_db
diff create_sqlite_db.R @ 19:906e8e2be944 draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 04023134d8f28e85927ca293373c506484149ead-dirty
author | tomnl |
---|---|
date | Thu, 31 May 2018 09:15:28 -0400 |
parents | f13a7d89afdd |
children | 74917d0e89cc |
line wrap: on
line diff
--- a/create_sqlite_db.R Tue May 15 08:16:18 2018 -0400 +++ b/create_sqlite_db.R Thu May 31 09:15:28 2018 -0400 @@ -138,30 +138,65 @@ con <- DBI::dbConnect(RSQLite::SQLite(), db_pth) -add_extra_table_elucidation <- function(name, pth){ - - if (!is.null(pth)){ - - print(pth) - df <- read.table(pth, header = TRUE, sep='\t', stringsAsFactors = FALSE, comment.char = "") - # bug for repeating headers - df <- df[!df$UID=='UID',] +add_extra_table_elucidation <- function(name, pth, db_con, filter_Score=NA, filter_Rank=NA){ + if (is.null(pth)){ + return(0) + } + index <- 0 + chunkSize <- 5000 + print(pth) + con <- file(description=pth,open="r") + df <- read.table(con, nrows=chunkSize, header = TRUE, sep='\t', stringsAsFactors = FALSE, comment.char = "") + headers = colnames(df) + print(head(df)) + write_to_table(df, db_con, name, FALSE, filter_Score, filter_Rank) - # get peakid, an scan id - df_ids <- stringr::str_split_fixed(df$UID, '-', 3) - colnames(df_ids) <- c('grp_id', 'file_id', 'pid') - df <- cbind(df_ids, df) - # export to database - + repeat { + index <- index + 1 + print(paste('Processing rows:', index * chunkSize)) + + if (nrow(df) != chunkSize){ + print('Processed all files!') + break + } + + df <- read.table(con, nrows=chunkSize, skip=0, header = FALSE, sep='\t', stringsAsFactors = FALSE, comment.char = "") + colnames(df) <- headers - DBI::dbWriteTable(con, name=name, value=df, row.names=FALSE) + + write_to_table(df, db_con, name, TRUE, filter_Score, filter_Rank) - } + + break + } + close(con) } +write_to_table <- function(df, db_con, name, append, filter_Score, filter_Rank){ -add_probmetab <- function(pth){ + df <- df[!df$UID=='UID',] + print(filter_Score) + print(filter_Rank) + print('filter rank and score') + + if (!is.na(filter_Score)){ + df <- df[df$Score>=filter_Score,] + } + + if (!is.na(filter_Rank)){ + + df <- df[df$Rank<=filter_Rank,] + } + + # get peakid, an scan id + df_ids <- stringr::str_split_fixed(df$UID, '-', 3) + colnames(df_ids) <- c('grp_id', 'file_id', 'pid') + df <- cbind(df_ids, df) + DBI::dbWriteTable(db_con, name=name, value=df, row.names=FALSE, append=append) +} + +add_probmetab <- function(pth, con){ if (!is.null(pth)){ df <- read.table(pth, header = TRUE, sep='\t', stringsAsFactors = FALSE, comment.char = "") @@ -204,9 +239,9 @@ } -add_extra_table_elucidation('metfrag_results', opt$metfrag_result) -add_extra_table_elucidation('sirius_csifingerid_results', opt$sirius_csifingerid_result) -add_probmetab(opt$probmetab_result) +add_extra_table_elucidation('metfrag_results', opt$metfrag_result, con, filter_Score=0.6, filter_Rank=NA) +add_extra_table_elucidation('sirius_csifingerid_results', opt$sirius_csifingerid_result, con, filter_Score=NA, filter_Rank=5) +add_probmetab(opt$probmetab_result, con)