comparison create_sqlite_db.R @ 20:74917d0e89cc draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 04023134d8f28e85927ca293373c506484149ead-dirty
author tomnl
date Thu, 31 May 2018 11:29:47 -0400
parents 906e8e2be944
children 7f57d501badb
comparison
equal deleted inserted replaced
19:906e8e2be944 20:74917d0e89cc
140 140
141 add_extra_table_elucidation <- function(name, pth, db_con, filter_Score=NA, filter_Rank=NA){ 141 add_extra_table_elucidation <- function(name, pth, db_con, filter_Score=NA, filter_Rank=NA){
142 if (is.null(pth)){ 142 if (is.null(pth)){
143 return(0) 143 return(0)
144 } 144 }
145 index <- 0 145 DBI::dbWriteTable(conn=db_con, name=name, value=pth, sep='\t', header=T)
146 chunkSize <- 5000 146
147 print(pth) 147
148 con <- file(description=pth,open="r") 148 }
149 df <- read.table(con, nrows=chunkSize, header = TRUE, sep='\t', stringsAsFactors = FALSE, comment.char = "") 149
150 headers = colnames(df) 150 write_to_table <- function(df, db_con, name, append){
151 print(head(df))
152 write_to_table(df, db_con, name, FALSE, filter_Score, filter_Rank)
153
154 repeat {
155 index <- index + 1
156 print(paste('Processing rows:', index * chunkSize))
157
158 if (nrow(df) != chunkSize){
159 print('Processed all files!')
160 break
161 }
162
163 df <- read.table(con, nrows=chunkSize, skip=0, header = FALSE, sep='\t', stringsAsFactors = FALSE, comment.char = "")
164 colnames(df) <- headers
165
166
167 write_to_table(df, db_con, name, TRUE, filter_Score, filter_Rank)
168
169
170 break
171 }
172 close(con)
173
174 }
175
176 write_to_table <- function(df, db_con, name, append, filter_Score, filter_Rank){
177 151
178 df <- df[!df$UID=='UID',] 152 df <- df[!df$UID=='UID',]
179 print(filter_Score) 153 print(filter_Score)
180 print(filter_Rank) 154 print(filter_Rank)
181 print('filter rank and score') 155 print('filter rank and score')
182 156
183 if (!is.na(filter_Score)){
184 df <- df[df$Score>=filter_Score,]
185 }
186
187 if (!is.na(filter_Rank)){
188
189 df <- df[df$Rank<=filter_Rank,]
190 }
191 157
192 # get peakid, an scan id 158 # get peakid, an scan id
193 df_ids <- stringr::str_split_fixed(df$UID, '-', 3) 159 df_ids <- stringr::str_split_fixed(df$UID, '-', 3)
194 colnames(df_ids) <- c('grp_id', 'file_id', 'pid') 160 colnames(df_ids) <- c('grp_id', 'file_id', 'pid')
195 df <- cbind(df_ids, df) 161 df <- cbind(df_ids, df)
241 207
242 add_extra_table_elucidation('metfrag_results', opt$metfrag_result, con, filter_Score=0.6, filter_Rank=NA) 208 add_extra_table_elucidation('metfrag_results', opt$metfrag_result, con, filter_Score=0.6, filter_Rank=NA)
243 add_extra_table_elucidation('sirius_csifingerid_results', opt$sirius_csifingerid_result, con, filter_Score=NA, filter_Rank=5) 209 add_extra_table_elucidation('sirius_csifingerid_results', opt$sirius_csifingerid_result, con, filter_Score=NA, filter_Rank=5)
244 add_probmetab(opt$probmetab_result, con) 210 add_probmetab(opt$probmetab_result, con)
245 211
246 212 if (is.null(xset)){
247 213 DBI::dbWriteTable(con, name='xset_classes', value=xa@xcmsSet@phenoData, row.names=TRUE)
214 }else{
215
216 DBI::dbWriteTable(con, name='xset_classes', value=xset@phenoData, row.names=TRUE)
217
218 }
248 219
249 cmd <- paste('SELECT cpg.grpid, cpg.mz, cpg.mzmin, cpg.mzmax, cpg.rt, cpg.rtmin, cpg.rtmax, c_peaks.cid, ', 220 cmd <- paste('SELECT cpg.grpid, cpg.mz, cpg.mzmin, cpg.mzmax, cpg.rt, cpg.rtmin, cpg.rtmax, c_peaks.cid, ',
250 'c_peaks.mzmin AS c_peak_mzmin, c_peaks.mzmax AS c_peak_mzmax, ', 221 'c_peaks.mzmin AS c_peak_mzmin, c_peaks.mzmax AS c_peak_mzmax, ',
251 'c_peaks.rtmin AS c_peak_rtmin, c_peaks.rtmax AS c_peak_rtmax, s_peak_meta.*, fileinfo.filename, fileinfo.nm_save ', 222 'c_peaks.rtmin AS c_peak_rtmin, c_peaks.rtmax AS c_peak_rtmax, s_peak_meta.*, fileinfo.filename, fileinfo.nm_save ',
252 'FROM c_peak_groups AS cpg ', 223 'FROM c_peak_groups AS cpg ',