anticipated_purity_lcms: create_sqlite

comparison create_sqlite_db.R @ 20:ce268299ecd2 draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 04023134d8f28e85927ca293373c506484149ead-dirty

author	tomnl
date	Thu, 31 May 2018 09:12:15 -0400
parents	9d1c2dcba63d
children	e8291b9d129b

comparison

equal deleted inserted replaced

-:9d1c2dcba63d
+:ce268299ecd2
 rtrawColumns = rtrawColumns)
 }
 con <- DBI::dbConnect(RSQLite::SQLite(), db_pth)
-add_extra_table_elucidation <- function(name, pth){
+add_extra_table_elucidation <- function(name, pth, db_con, filter_Score=NA, filter_Rank=NA){
+if (is.null(pth)){
-if (!is.null(pth)){
+return(0)
+}
+index <- 0
+chunkSize <- 5000
+print(pth)
+con <- file(description=pth,open="r")
+df <- read.table(con, nrows=chunkSize,  header = TRUE, sep='\t', stringsAsFactors = FALSE,  comment.char = "")
+headers = colnames(df)
+print(head(df))
+write_to_table(df, db_con, name, FALSE, filter_Score, filter_Rank)
+repeat {
+index <- index + 1
+print(paste('Processing rows:', index * chunkSize))
-print(pth)
+if (nrow(df) != chunkSize){
-df <- read.table(pth,  header = TRUE, sep='\t', stringsAsFactors = FALSE,  comment.char = "")
+print('Processed all files!')
-# bug for repeating headers
+break
-df <- df[!df$UID=='UID',]
+}
-# get peakid, an scan id
+df <- read.table(con, nrows=chunkSize, skip=0, header = FALSE, sep='\t', stringsAsFactors = FALSE,  comment.char = "")
-df_ids <- stringr::str_split_fixed(df$UID, '-', 3)
+colnames(df) <- headers
-colnames(df_ids) <- c('grp_id', 'file_id', 'pid')
-df <- cbind(df_ids, df)
-# export to database
+	write_to_table(df, db_con, name, TRUE, filter_Score, filter_Rank)
-DBI::dbWriteTable(con, name=name, value=df, row.names=FALSE)
+break
+}
-}
+close(con)
 }
+write_to_table <- function(df, db_con, name, append, filter_Score, filter_Rank){
-add_probmetab <- function(pth){
+df <- df[!df$UID=='UID',]
+print(filter_Score)
+print(filter_Rank)
+print('filter rank and score')
+if (!is.na(filter_Score)){
+df <- df[df$Score>=filter_Score,]
+}
+if (!is.na(filter_Rank)){
+df <- df[df$Rank<=filter_Rank,]
+}
+# get peakid, an scan id
+df_ids <- stringr::str_split_fixed(df$UID, '-', 3)
+colnames(df_ids) <- c('grp_id', 'file_id', 'pid')
+df <- cbind(df_ids, df)
+DBI::dbWriteTable(db_con, name=name, value=df, row.names=FALSE, append=append)
+}
+add_probmetab <- function(pth, con){
 if (!is.null(pth)){
 df <- read.table(pth,  header = TRUE, sep='\t', stringsAsFactors = FALSE,  comment.char = "")
 df$grp_id <- 1:nrow(df)
 start <- T
 }
 }
-add_extra_table_elucidation('metfrag_results', opt$metfrag_result)
+add_extra_table_elucidation('metfrag_results', opt$metfrag_result, con, filter_Score=0.6, filter_Rank=NA)
-add_extra_table_elucidation('sirius_csifingerid_results', opt$sirius_csifingerid_result)
+add_extra_table_elucidation('sirius_csifingerid_results', opt$sirius_csifingerid_result, con, filter_Score=NA, filter_Rank=5)
-add_probmetab(opt$probmetab_result)
+add_probmetab(opt$probmetab_result, con)
 cmd <- paste('SELECT cpg.grpid, cpg.mz, cpg.mzmin, cpg.mzmax, cpg.rt, cpg.rtmin, cpg.rtmax, c_peaks.cid, ',

Mercurial > repos > tomnl > anticipated_purity_lcms

comparison create_sqlite_db.R @ 20:ce268299ecd2 draft