Mercurial > repos > tomnl > anticipated_purity_lcms
comparison create_sqlite_db.R @ 20:ce268299ecd2 draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 04023134d8f28e85927ca293373c506484149ead-dirty
author | tomnl |
---|---|
date | Thu, 31 May 2018 09:12:15 -0400 |
parents | 9d1c2dcba63d |
children | e8291b9d129b |
comparison
equal
deleted
inserted
replaced
19:9d1c2dcba63d | 20:ce268299ecd2 |
---|---|
136 rtrawColumns = rtrawColumns) | 136 rtrawColumns = rtrawColumns) |
137 } | 137 } |
138 | 138 |
139 con <- DBI::dbConnect(RSQLite::SQLite(), db_pth) | 139 con <- DBI::dbConnect(RSQLite::SQLite(), db_pth) |
140 | 140 |
141 add_extra_table_elucidation <- function(name, pth){ | 141 add_extra_table_elucidation <- function(name, pth, db_con, filter_Score=NA, filter_Rank=NA){ |
142 | 142 if (is.null(pth)){ |
143 if (!is.null(pth)){ | 143 return(0) |
144 } | |
145 index <- 0 | |
146 chunkSize <- 5000 | |
147 print(pth) | |
148 con <- file(description=pth,open="r") | |
149 df <- read.table(con, nrows=chunkSize, header = TRUE, sep='\t', stringsAsFactors = FALSE, comment.char = "") | |
150 headers = colnames(df) | |
151 print(head(df)) | |
152 write_to_table(df, db_con, name, FALSE, filter_Score, filter_Rank) | |
153 | |
154 repeat { | |
155 index <- index + 1 | |
156 print(paste('Processing rows:', index * chunkSize)) | |
144 | 157 |
145 print(pth) | 158 if (nrow(df) != chunkSize){ |
146 df <- read.table(pth, header = TRUE, sep='\t', stringsAsFactors = FALSE, comment.char = "") | 159 print('Processed all files!') |
147 # bug for repeating headers | 160 break |
148 df <- df[!df$UID=='UID',] | 161 } |
149 | 162 |
150 # get peakid, an scan id | 163 df <- read.table(con, nrows=chunkSize, skip=0, header = FALSE, sep='\t', stringsAsFactors = FALSE, comment.char = "") |
151 df_ids <- stringr::str_split_fixed(df$UID, '-', 3) | 164 colnames(df) <- headers |
152 colnames(df_ids) <- c('grp_id', 'file_id', 'pid') | 165 |
153 df <- cbind(df_ids, df) | 166 |
154 # export to database | 167 write_to_table(df, db_con, name, TRUE, filter_Score, filter_Rank) |
155 | 168 |
156 | 169 |
157 DBI::dbWriteTable(con, name=name, value=df, row.names=FALSE) | 170 break |
158 | 171 } |
159 } | 172 close(con) |
160 | 173 |
161 } | 174 } |
162 | 175 |
163 | 176 write_to_table <- function(df, db_con, name, append, filter_Score, filter_Rank){ |
164 add_probmetab <- function(pth){ | 177 |
178 df <- df[!df$UID=='UID',] | |
179 print(filter_Score) | |
180 print(filter_Rank) | |
181 print('filter rank and score') | |
182 | |
183 if (!is.na(filter_Score)){ | |
184 df <- df[df$Score>=filter_Score,] | |
185 } | |
186 | |
187 if (!is.na(filter_Rank)){ | |
188 | |
189 df <- df[df$Rank<=filter_Rank,] | |
190 } | |
191 | |
192 # get peakid, an scan id | |
193 df_ids <- stringr::str_split_fixed(df$UID, '-', 3) | |
194 colnames(df_ids) <- c('grp_id', 'file_id', 'pid') | |
195 df <- cbind(df_ids, df) | |
196 DBI::dbWriteTable(db_con, name=name, value=df, row.names=FALSE, append=append) | |
197 } | |
198 | |
199 add_probmetab <- function(pth, con){ | |
165 if (!is.null(pth)){ | 200 if (!is.null(pth)){ |
166 | 201 |
167 df <- read.table(pth, header = TRUE, sep='\t', stringsAsFactors = FALSE, comment.char = "") | 202 df <- read.table(pth, header = TRUE, sep='\t', stringsAsFactors = FALSE, comment.char = "") |
168 df$grp_id <- 1:nrow(df) | 203 df$grp_id <- 1:nrow(df) |
169 start <- T | 204 start <- T |
202 } | 237 } |
203 | 238 |
204 | 239 |
205 } | 240 } |
206 | 241 |
207 add_extra_table_elucidation('metfrag_results', opt$metfrag_result) | 242 add_extra_table_elucidation('metfrag_results', opt$metfrag_result, con, filter_Score=0.6, filter_Rank=NA) |
208 add_extra_table_elucidation('sirius_csifingerid_results', opt$sirius_csifingerid_result) | 243 add_extra_table_elucidation('sirius_csifingerid_results', opt$sirius_csifingerid_result, con, filter_Score=NA, filter_Rank=5) |
209 add_probmetab(opt$probmetab_result) | 244 add_probmetab(opt$probmetab_result, con) |
210 | 245 |
211 | 246 |
212 | 247 |
213 | 248 |
214 cmd <- paste('SELECT cpg.grpid, cpg.mz, cpg.mzmin, cpg.mzmax, cpg.rt, cpg.rtmin, cpg.rtmax, c_peaks.cid, ', | 249 cmd <- paste('SELECT cpg.grpid, cpg.mz, cpg.mzmin, cpg.mzmax, cpg.rt, cpg.rtmin, cpg.rtmax, c_peaks.cid, ', |