comparison create_sqlite_db.R @ 20:ce268299ecd2 draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 04023134d8f28e85927ca293373c506484149ead-dirty
author tomnl
date Thu, 31 May 2018 09:12:15 -0400
parents 9d1c2dcba63d
children e8291b9d129b
comparison
equal deleted inserted replaced
19:9d1c2dcba63d 20:ce268299ecd2
136 rtrawColumns = rtrawColumns) 136 rtrawColumns = rtrawColumns)
137 } 137 }
138 138
139 con <- DBI::dbConnect(RSQLite::SQLite(), db_pth) 139 con <- DBI::dbConnect(RSQLite::SQLite(), db_pth)
140 140
141 add_extra_table_elucidation <- function(name, pth){ 141 add_extra_table_elucidation <- function(name, pth, db_con, filter_Score=NA, filter_Rank=NA){
142 142 if (is.null(pth)){
143 if (!is.null(pth)){ 143 return(0)
144 }
145 index <- 0
146 chunkSize <- 5000
147 print(pth)
148 con <- file(description=pth,open="r")
149 df <- read.table(con, nrows=chunkSize, header = TRUE, sep='\t', stringsAsFactors = FALSE, comment.char = "")
150 headers = colnames(df)
151 print(head(df))
152 write_to_table(df, db_con, name, FALSE, filter_Score, filter_Rank)
153
154 repeat {
155 index <- index + 1
156 print(paste('Processing rows:', index * chunkSize))
144 157
145 print(pth) 158 if (nrow(df) != chunkSize){
146 df <- read.table(pth, header = TRUE, sep='\t', stringsAsFactors = FALSE, comment.char = "") 159 print('Processed all files!')
147 # bug for repeating headers 160 break
148 df <- df[!df$UID=='UID',] 161 }
149 162
150 # get peakid, an scan id 163 df <- read.table(con, nrows=chunkSize, skip=0, header = FALSE, sep='\t', stringsAsFactors = FALSE, comment.char = "")
151 df_ids <- stringr::str_split_fixed(df$UID, '-', 3) 164 colnames(df) <- headers
152 colnames(df_ids) <- c('grp_id', 'file_id', 'pid') 165
153 df <- cbind(df_ids, df) 166
154 # export to database 167 write_to_table(df, db_con, name, TRUE, filter_Score, filter_Rank)
155 168
156 169
157 DBI::dbWriteTable(con, name=name, value=df, row.names=FALSE) 170 break
158 171 }
159 } 172 close(con)
160 173
161 } 174 }
162 175
163 176 write_to_table <- function(df, db_con, name, append, filter_Score, filter_Rank){
164 add_probmetab <- function(pth){ 177
178 df <- df[!df$UID=='UID',]
179 print(filter_Score)
180 print(filter_Rank)
181 print('filter rank and score')
182
183 if (!is.na(filter_Score)){
184 df <- df[df$Score>=filter_Score,]
185 }
186
187 if (!is.na(filter_Rank)){
188
189 df <- df[df$Rank<=filter_Rank,]
190 }
191
192 # get peakid, an scan id
193 df_ids <- stringr::str_split_fixed(df$UID, '-', 3)
194 colnames(df_ids) <- c('grp_id', 'file_id', 'pid')
195 df <- cbind(df_ids, df)
196 DBI::dbWriteTable(db_con, name=name, value=df, row.names=FALSE, append=append)
197 }
198
199 add_probmetab <- function(pth, con){
165 if (!is.null(pth)){ 200 if (!is.null(pth)){
166 201
167 df <- read.table(pth, header = TRUE, sep='\t', stringsAsFactors = FALSE, comment.char = "") 202 df <- read.table(pth, header = TRUE, sep='\t', stringsAsFactors = FALSE, comment.char = "")
168 df$grp_id <- 1:nrow(df) 203 df$grp_id <- 1:nrow(df)
169 start <- T 204 start <- T
202 } 237 }
203 238
204 239
205 } 240 }
206 241
207 add_extra_table_elucidation('metfrag_results', opt$metfrag_result) 242 add_extra_table_elucidation('metfrag_results', opt$metfrag_result, con, filter_Score=0.6, filter_Rank=NA)
208 add_extra_table_elucidation('sirius_csifingerid_results', opt$sirius_csifingerid_result) 243 add_extra_table_elucidation('sirius_csifingerid_results', opt$sirius_csifingerid_result, con, filter_Score=NA, filter_Rank=5)
209 add_probmetab(opt$probmetab_result) 244 add_probmetab(opt$probmetab_result, con)
210 245
211 246
212 247
213 248
214 cmd <- paste('SELECT cpg.grpid, cpg.mz, cpg.mzmin, cpg.mzmax, cpg.rt, cpg.rtmin, cpg.rtmax, c_peaks.cid, ', 249 cmd <- paste('SELECT cpg.grpid, cpg.mz, cpg.mzmin, cpg.mzmax, cpg.rt, cpg.rtmin, cpg.rtmax, c_peaks.cid, ',