comparison search-mz @ 1:45e985cd8e9e draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8-dirty
author prog
date Tue, 31 Jan 2017 05:27:24 -0500
parents 3afe41d3e9e7
children 1ba222315fd5
comparison
equal deleted inserted replaced
0:3afe41d3e9e7 1:45e985cd8e9e
15 source(file.path(dirname(script.path), 'strhlp.R'), chdir = TRUE) 15 source(file.path(dirname(script.path), 'strhlp.R'), chdir = TRUE)
16 source(file.path(dirname(script.path), 'fshlp.R'), chdir = TRUE) 16 source(file.path(dirname(script.path), 'fshlp.R'), chdir = TRUE)
17 source(file.path(dirname(script.path), 'biodb-common.R'), chdir = TRUE) 17 source(file.path(dirname(script.path), 'biodb-common.R'), chdir = TRUE)
18 source(file.path(dirname(script.path), 'nethlp.R'), chdir = TRUE) 18 source(file.path(dirname(script.path), 'nethlp.R'), chdir = TRUE)
19 19
20 # Missing paste0() function in R 2.14.1
21 if (as.integer(R.Version()$major) == 2 && as.numeric(R.Version()$minor) < 15)
22 paste0 <- function(...) paste(..., sep = '')
23
20 ############# 24 #############
21 # CONSTANTS # 25 # CONSTANTS #
22 ############# 26 #############
23 27
24 PROG <- sub('^.*/([^/]+)$', '\\1', commandArgs()[4], perl = TRUE) 28 PROG <- sub('^.*/([^/]+)$', '\\1', commandArgs()[4], perl = TRUE)
29 USERAGENT <- 'search-mz ; pierrick.roger@gmail.com'
25 30
26 # Authorized database types 31 # Authorized database types
27 MSDB.XLS <- 'xls' 32 MSDB.XLS <- 'xls'
28 MSDB.4TABSQL <- '4tabsql' 33 MSDB.4TABSQL <- '4tabsql'
29 MSDB.FILE <- 'file' 34 MSDB.FILE <- 'file'
42 MSDB.DFT[['mztolunit']] <- MSDB.DFT.MZTOLUNIT 47 MSDB.DFT[['mztolunit']] <- MSDB.DFT.MZTOLUNIT
43 MSDB.DFT[['precursor-rt-tol']] <- 5 48 MSDB.DFT[['precursor-rt-tol']] <- 5
44 MSDB.DFT[['molids-sep']] <- MSDB.DFT.MATCH.SEP 49 MSDB.DFT[['molids-sep']] <- MSDB.DFT.MATCH.SEP
45 MSDB.DFT[['db-fields']] <- concat.kv.list(msdb.get.dft.db.fields()) 50 MSDB.DFT[['db-fields']] <- concat.kv.list(msdb.get.dft.db.fields())
46 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES) 51 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES)
47 MSDB.DFT[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields())
48 MSDB.DFT[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields())
49 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',') 52 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',')
50 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',') 53 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',')
54 DEFAULT.ARG.VALUES <- MSDB.DFT
55 DEFAULT.ARG.VALUES[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields())
56 DEFAULT.ARG.VALUES[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields())
51 57
52 ############## 58 ##############
53 # PRINT HELP # 59 # PRINT HELP #
54 ############## 60 ##############
55 61
101 # Parse retention time columns 107 # Parse retention time columns
102 if ( ! is.null(opt$rtcol)) 108 if ( ! is.null(opt$rtcol))
103 opt$rtcol <- strsplit(opt$rtcol, ',')[[1]] 109 opt$rtcol <- strsplit(opt$rtcol, ',')[[1]]
104 110
105 # Parse input column names 111 # Parse input column names
106 if ( ! is.null(opt[['input-col-names']])) { 112 if (is.null(opt[['input-col-names']])) {
113 opt[['input-col-names']] <- msdb.get.dft.input.fields()
114 }
115 else {
107 custcols <- split.kv.list(opt[['input-col-names']]) 116 custcols <- split.kv.list(opt[['input-col-names']])
108 dftcols <- split.kv.list(MSDB.DFT[['input-col-names']]) 117 dftcols <- msdb.get.dft.input.fields()
109 opt[['input-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)]) 118 opt[['input-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)])
110 } 119 }
111 120
112 # Parse output column names 121 # Parse output column names
113 if ( ! is.null(opt[['output-col-names']])) { 122 if (is.null(opt[['output-col-names']])) {
123 # By default keep input col names for output
124 opt[['output-col-names']] <- msdb.get.dft.output.fields()
125 input.cols <- names(opt[['input-col-names']])
126 output.cols <- names(opt[['output-col-names']])
127 opt[['output-col-names']] <- c(opt[['input-col-names']][input.cols %in% output.cols], opt[['output-col-names']][ ! output.cols %in% input.cols])
128 }
129 else {
114 custcols <- split.kv.list(opt[['output-col-names']]) 130 custcols <- split.kv.list(opt[['output-col-names']])
115 dftcols <- split.kv.list(MSDB.DFT[['output-col-names']]) 131 dftcols <- msdb.get.dft.output.fields()
116 opt[['output-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)]) 132 opt[['output-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)])
117 } 133 }
118 134
119 # Parse lists of precursors 135 # Parse lists of precursors
120 if ( ! is.null(opt[['pos-prec']])) 136 if ( ! is.null(opt[['pos-prec']]))
129 # PRINT DEFAULT ARGUMENT VALUES # 145 # PRINT DEFAULT ARGUMENT VALUES #
130 ################################# 146 #################################
131 147
132 print.dft.arg.val <- function(opt) { 148 print.dft.arg.val <- function(opt) {
133 149
134 print.flags <- MSDB.DFT 150 print.flags <- DEFAULT.ARG.VALUES
135 names(print.flags) <- vapply(names(print.flags), function(x) paste0('print-', x), FUN.VALUE = '') 151 names(print.flags) <- vapply(names(print.flags), function(x) paste0('print-', x), FUN.VALUE = '')
136 for (f in names(print.flags)) 152 for (f in names(print.flags))
137 if ( ! is.null(opt[[f]])) { 153 if ( ! is.null(opt[[f]])) {
138 cat(print.flags[[f]]) 154 cat(print.flags[[f]])
139 q(status = 0) 155 q(status = 0)
142 158
143 make.getopt.spec.print.dft <- function() { 159 make.getopt.spec.print.dft <- function() {
144 160
145 spec <- character() 161 spec <- character()
146 162
147 for (f in names(MSDB.DFT)) 163 for (f in names(DEFAULT.ARG.VALUES))
148 spec <- c(spec, paste0('print-', f), NA_character_, 0, 'logical', paste0('Print default value of --', f)) 164 spec <- c(spec, paste0('print-', f), NA_character_, 0, 'logical', paste0('Print default value of --', f))
149 165
150 return(spec) 166 return(spec)
151 } 167 }
152 168
177 'no-main-table-in-html-output', NA_character_, 0, 'logical', 'Do not display main table in HTML output.', 193 'no-main-table-in-html-output', NA_character_, 0, 'logical', 'Do not display main table in HTML output.',
178 'precursor-match', NA_character_, 0, 'logical', 'Remove peaks whose molecule precursor peak has not been matched. Unset by default.', 194 'precursor-match', NA_character_, 0, 'logical', 'Remove peaks whose molecule precursor peak has not been matched. Unset by default.',
179 'precursor-rt-tol', NA_character_, 1, 'numeric', paste0('Precursor retention time tolerance. Only used when precursor-match is enabled. Default is ', MSDB.DFT[['precursor-rt-tol']], '.'), 195 'precursor-rt-tol', NA_character_, 1, 'numeric', paste0('Precursor retention time tolerance. Only used when precursor-match is enabled. Default is ', MSDB.DFT[['precursor-rt-tol']], '.'),
180 'pos-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in positive mode. Default is "', MSDB.DFT[['pos-prec']], '".'), 196 'pos-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in positive mode. Default is "', MSDB.DFT[['pos-prec']], '".'),
181 'neg-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in negative mode. Default is "', MSDB.DFT[['neg-prec']], '".'), 197 'neg-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in negative mode. Default is "', MSDB.DFT[['neg-prec']], '".'),
182 'input-col-names', NA_character_, 1, 'character', paste0('Set the input column names. Default is "', MSDB.DFT[['input-col-names']], '".'), 198 'input-col-names', NA_character_, 1, 'character', paste0('Set the input column names. Default is "', DEFAULT.ARG.VALUES[['input-col-names']], '".'),
183 'output-col-names', NA_character_, 1, 'character', paste0('Set the output column names. Default is "', MSDB.DFT[['output-col-names']], '".'), 199 'output-col-names', NA_character_, 1, 'character', paste0('Set the output column names. Default is "', DEFAULT.ARG.VALUES[['output-col-names']], '".'),
184 'molids-sep', NA_character_, 1, 'character', paste0('Set character separator used to when concatenating molecule IDs in output. Default is "', MSDB.DFT[['molids-sep']] , '".'), 200 'molids-sep', NA_character_, 1, 'character', paste0('Set character separator used to when concatenating molecule IDs in output. Default is "', MSDB.DFT[['molids-sep']] , '".'),
185 'first-val', NA_character_, 0, 'logical', 'Keep only the first value in multi-value fields. Unset by default.', 201 'first-val', NA_character_, 0, 'logical', 'Keep only the first value in multi-value fields. Unset by default.',
186 'excel2011comp', NA_character_, 0, 'logical', 'Excel 2011 compatiblity mode. Output ASCII text files instead of UTF-8 files, where greek letters are replaced with their latin names, plusminus sign is replaced with +- and apostrophe is replaced with \"prime\". All other non-ASCII characters are repladed with underscore.', 202 'excel2011comp', NA_character_, 0, 'logical', 'Excel 2011 compatiblity mode. Output ASCII text files instead of UTF-8 files, where greek letters are replaced with their latin names, plusminus sign is replaced with +- and apostrophe is replaced with \"prime\". All other non-ASCII characters are repladed with underscore.',
187 'database', 'd', 1, 'character', paste0('Set database to use: "xls" for an Excel database, "file" for a single file database, "4tabsql" for a 4Tab SQL database, and "peakforest" for a connection to PeakForest database.'), 203 'database', 'd', 1, 'character', paste0('Set database to use: "xls" for an Excel database, "file" for a single file database, "4tabsql" for a 4Tab SQL database, and "peakforest" for a connection to PeakForest database.'),
188 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.', 204 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.',
189 'cache-dir', NA_character_, 1, 'character', 'Path to directory where to store cache files. Only used when database flag is set to "xls".', 205 'cache-dir', NA_character_, 1, 'character', 'Path to directory where to store cache files. Only used when database flag is set to "xls".',
190 'useragent', NA_character_, 1, 'character', 'User agent. Used by the "Peakforest" database.',
191 'db-name', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.', 206 'db-name', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.',
192 'db-user', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.', 207 'db-user', NA_character_, 1, 'character', 'User of the database. Used by the "4tabsql" database.',
193 'db-password', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.', 208 'db-password', NA_character_, 1, 'character', 'Password of the database user. Used by the "4tabsql" database.',
194 'db-fields', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the field names to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-fields']], '".'), 209 'db-fields', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the field names to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-fields']], '".'),
195 'db-ms-modes', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the MS modes to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-ms-modes']], '".'), 210 'db-ms-modes', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the MS modes to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-ms-modes']], '".'),
211 'db-token', NA_character_, 1, 'character', 'Database token. Used by Peakforest database.',
196 'debug', NA_character_, 0, 'logical', 'Set debug mode.' 212 'debug', NA_character_, 0, 'logical', 'Set debug mode.'
197 ) 213 )
198 214
199 spec <- c(spec, make.getopt.spec.print.dft()) 215 spec <- c(spec, make.getopt.spec.print.dft())
200 216
222 opt <- set.dft.arg.val(opt) # Set default values 238 opt <- set.dft.arg.val(opt) # Set default values
223 opt <- parse.arg.val(opt) # Parse list values 239 opt <- parse.arg.val(opt) # Parse list values
224 240
225 # Check values 241 # Check values
226 error <- .check.db.conn.opts(opt) 242 error <- .check.db.conn.opts(opt)
227 if (is.null(opt[['output-file']])) { 243 if (is.null(opt[['output-file']]) && is.null(opt[['list-cols']])) {
228 warning("You must set a path for the output file.") 244 warning("You must set a path for the output file.")
229 error <- TRUE 245 error <- TRUE
230 } 246 }
231 if (is.null(opt[['list-cols']])) { 247 if (is.null(opt[['list-cols']])) {
232 if (is.null(opt[['input-file']])) { 248 if (is.null(opt[['input-file']])) {
323 } 339 }
324 } 340 }
325 if (opt$database == MSDB.PEAKFOREST) { 341 if (opt$database == MSDB.PEAKFOREST) {
326 if (is.null(opt$url)) { 342 if (is.null(opt$url)) {
327 warning("When using PeakForest database, you must specify the URL of the PeakForest server with option --url.") 343 warning("When using PeakForest database, you must specify the URL of the PeakForest server with option --url.")
328 error <- TRUE
329 }
330 if (is.null(opt$useragent)) {
331 warning("When using PeakForest database, you must specify a user agent with option --useragent.")
332 error <- TRUE 344 error <- TRUE
333 } 345 }
334 } 346 }
335 347
336 return(error) 348 return(error)
361 precursors[[MSDB.TAG.POS]] <- opt[['pos-prec']] 373 precursors[[MSDB.TAG.POS]] <- opt[['pos-prec']]
362 precursors[[MSDB.TAG.NEG]] <- opt[['neg-prec']] 374 precursors[[MSDB.TAG.NEG]] <- opt[['neg-prec']]
363 } 375 }
364 376
365 db <- switch(opt$database, 377 db <- switch(opt$database,
366 peakforest = MsPeakForestDb$new(url = opt$url, useragent = opt$useragent), 378 peakforest = MsPeakForestDb$new(url = opt$url, useragent = USERAGENT, token = opt[['db-token']]),
367 xls = MsXlsDb(db_dir = opt$url, cache_dir = opt[['cache-dir']]), 379 xls = MsXlsDb$new(db_dir = opt$url, cache_dir = opt[['cache-dir']]),
368 '4tabsql' = Ms4TabSqlDb(host = extract.address(opt$url), port = extract.port(opt$url), dbname = opt[['db-name']], user = opt[['db-user']], password = opt[['db-password']]), 380 '4tabsql' = Ms4TabSqlDb$new(host = extract.address(opt$url), port = extract.port(opt$url), dbname = opt[['db-name']], user = opt[['db-user']], password = opt[['db-password']]),
369 file = MsFileDb(file = opt$url), 381 file = MsFileDb$new(file = opt$url),
370 NULL) 382 NULL)
371 db$setPrecursors(precursors) 383 db$setPrecursors(precursors)
372 if (db$areDbFieldsSettable()) 384 if (db$areDbFieldsSettable())
373 db$setDbFields(opt[['db-fields']]) 385 db$setDbFields(opt[['db-fields']])
374 if (db$areDbMsModesSettable()) 386 if (db$areDbMsModesSettable())
383 ############### 395 ###############
384 396
385 output.html <- function(db, main, peaks, file, opt, output.fields) { 397 output.html <- function(db, main, peaks, file, opt, output.fields) {
386 398
387 # Replace public database IDs by URLs 399 # Replace public database IDs by URLs
388 if ( ! is.null(peaks)) 400 if ( ! is.null(peaks) || ! is.null(main)) {
401 # Conversion from extdb id field to extdb name
402 extdb2classdb = list()
403 extdb2classdb[MSDB.TAG.KEGG] = BIODB.KEGG
404 extdb2classdb[MSDB.TAG.HMDB] = BIODB.HMDB
405 extdb2classdb[MSDB.TAG.CHEBI] = BIODB.CHEBI
406 extdb2classdb[MSDB.TAG.PUBCHEM] = BIODB.PUBCHEMCOMP
407
408 # Loop on all dbs
389 for (extdb in c(MSDB.TAG.KEGG, MSDB.TAG.HMDB, MSDB.TAG.CHEBI, MSDB.TAG.PUBCHEM)) { 409 for (extdb in c(MSDB.TAG.KEGG, MSDB.TAG.HMDB, MSDB.TAG.CHEBI, MSDB.TAG.PUBCHEM)) {
390 field <- output.fields[[extdb]] 410 field <- output.fields[[extdb]]
391 if (field %in% colnames(peaks)) 411 if ( ! is.null(peaks) && field %in% colnames(peaks))
392 peaks[[field]] <- vapply(peaks[[field]], function(id) paste0('<a href="', get.entry.url(class = extdb, accession = id, content.type = RBIODB.HTML), '">', id, '</a>'), FUN.VALUE = '') 412 peaks[[field]] <- vapply(peaks[[field]], function(id) if (is.na(id)) '' else paste0('<a href="', get.entry.url(class = extdb2classdb[[extdb]], accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = '')
393 } 413 if ( ! is.null(main) && field %in% colnames(main))
414 main[[field]] <- vapply(main[[field]], function(ids) if (is.na(ids) || nchar(ids) == 0) '' else paste(vapply(strsplit(ids, opt[['molids-sep']])[[1]], function(id) paste0('<a href="', get.entry.url(class = extdb2classdb[[extdb]], accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = ''), collapse = opt[['molids-sep']]), FUN.VALUE = '')
415 }
416 }
394 417
395 # Write HTML 418 # Write HTML
396 html <- HtmlWriter(file = file) 419 html <- HtmlWriter(file = file)
397 html$writeBegTag('html') 420 html$writeBegTag('html')
398 html$writeBegTag('header') 421 html$writeBegTag('header')
422 html$writeTag('meta', attr = c(charset = "UTF-8"))
399 html$writeTag('title', text = "LC/MS matching results") 423 html$writeTag('title', text = "LC/MS matching results")
400 html$writeBegTag('style') 424 html$writeBegTag('style')
401 html$write('table, th, td { border-collapse: collapse; }') 425 html$write('table, th, td { border-collapse: collapse; }')
402 html$write('table, th { border: 1px solid black; }') 426 html$write('table, th { border: 1px solid black; }')
403 html$write('td { border-left: 1px solid black; border-right: 1px solid black; }') 427 html$write('td { border-left: 1px solid black; border-right: 1px solid black; }')
411 html$writeTag('h1', text = "LC/MS matching") 435 html$writeTag('h1', text = "LC/MS matching")
412 436
413 # Write parameters 437 # Write parameters
414 html$writeTag('h2', text = "Parameters") 438 html$writeTag('h2', text = "Parameters")
415 html$writeBegTag('ul') 439 html$writeBegTag('ul')
416 html$writeTag('li', paste0("Mode = ", opt$mode, ".")) 440 html$writeTag('li', text = paste0("Mode = ", opt$mode, "."))
417 html$writeTag('li', paste0("M/Z precision = ", opt$mzprec, ".")) 441 html$writeTag('li', text = paste0("M/Z precision = ", opt$mzprec, "."))
418 html$writeTag('li', paste0("M/Z shift = ", opt$mzshift, ".")) 442 html$writeTag('li', text = paste0("M/Z shift = ", opt$mzshift, "."))
419 html$writeTag('li', paste0("Precursor match = ", (if (is.null(opt[['precursor-match']])) "no" else "yes"), ".")) 443 html$writeTag('li', text = paste0("Precursor match = ", (if (is.null(opt[['precursor-match']])) "no" else "yes"), "."))
420 if ( ! is.null(opt[['precursor-match']])) { 444 if ( ! is.null(opt[['precursor-match']])) {
421 html$writeTag('li', paste0("Positive precursors = ", paste0(opt[['pos-prec']], collapse = ', '), ".")) 445 html$writeTag('li', text = paste0("Positive precursors = ", paste0(opt[['pos-prec']], collapse = ', '), "."))
422 html$writeTag('li', paste0("Negative precursors = ", paste0(opt[['neg-prec']], collapse = ', '), ".")) 446 html$writeTag('li', text = paste0("Negative precursors = ", paste0(opt[['neg-prec']], collapse = ', '), "."))
423 } 447 }
424 if ( ! is.null(opt$rtcol)) { 448 if ( ! is.null(opt$rtcol)) {
425 html$writeTag('li', paste0("Columns = ", paste(opt$rtcol, collapse = ", "), ".")) 449 html$writeTag('li', text = paste0("Columns = ", paste(opt$rtcol, collapse = ", "), "."))
426 html$writeTag('li', paste0("RTX = ", opt$rttolx, ".")) 450 html$writeTag('li', text = paste0("RTX = ", opt$rttolx, "."))
427 html$writeTag('li', paste0("RTY = ", opt$rttoly, ".")) 451 html$writeTag('li', text = paste0("RTY = ", opt$rttoly, "."))
428 if ( ! is.null(opt[['precursor-match']])) 452 if ( ! is.null(opt[['precursor-match']]))
429 html$writeTag('li', paste0("RTZ = ", opt[['precursor-rt-tol']], ".")) 453 html$writeTag('li', text = paste0("RTZ = ", opt[['precursor-rt-tol']], "."))
430 } 454 }
431 html$writeEndTag('ul') 455 html$writeEndTag('ul')
432 456
433 # Write results 457 # Write results
434 html$writeTag('h2', text = "Results") 458 html$writeTag('h2', text = "Results")
467 db <- .load.db(opt) 491 db <- .load.db(opt)
468 492
469 # Print columns 493 # Print columns
470 if ( ! is.null(opt[['list-cols']])) { 494 if ( ! is.null(opt[['list-cols']])) {
471 cols <- db$getChromCol() 495 cols <- db$getChromCol()
472 df.write.tsv(cols, file = opt[['output-file']]) 496 df.write.tsv(cols, file = if (is.null(opt[['output-file']])) stdout() else opt[['output-file']])
473 q(status = 0) 497 q(status = 0)
474 } 498 }
475 499
476 # Read input 500 # Read input
477 if ( ! is.null(opt[['input-file']]) && ! file.exists(opt[['input-file']])) 501 if ( ! is.null(opt[['input-file']]) && ! file.exists(opt[['input-file']]))
478 stop(paste0("Input file \"", opt[['input-file']], "\" does not exist.")) 502 stop(paste0("Input file \"", opt[['input-file']], "\" does not exist."))
479 if (file.info(opt[['input-file']])$size > 0) { 503 if (file.info(opt[['input-file']])$size > 0) {
480 504
481 # Load file into data frame 505 # Load file into data frame
482 input <- read.table(file = opt[['input-file']], header = TRUE, sep = "\t") 506 input <- read.table(file = opt[['input-file']], header = TRUE, sep = "\t", stringsAsFactor = FALSE)
483 507
484 # Convert each column that is identified by a number into a name 508 # Convert each column that is identified by a number into a name
485 for (field in names(opt[['input-col-names']])) { 509 for (field in names(opt[['input-col-names']])) {
486 if ( ! opt[['input-col-names']][[field]] %in% colnames(input) && length(grep('^[0-9]+$', opt[['input-col-names']][[field]])) > 0) { 510 if ( ! opt[['input-col-names']][[field]] %in% colnames(input) && length(grep('^[0-9]+$', opt[['input-col-names']][[field]])) > 0) {
487 col.index <- as.integer(opt[['input-col-names']][[field]]) 511 col.index <- as.integer(opt[['input-col-names']][[field]])
504 if ( ! is.null(opt[['all-cols']])) 528 if ( ! is.null(opt[['all-cols']]))
505 opt$rtcol <- db$getChromCol() 529 opt$rtcol <- db$getChromCol()
506 530
507 # Check chrom columns 531 # Check chrom columns
508 if ( ! is.null(opt[['check-cols']]) && ! is.null(opt$rtcol)) { 532 if ( ! is.null(opt[['check-cols']]) && ! is.null(opt$rtcol)) {
509 dbcols <- db$getChromCol() 533 dbcols <- db$getChromCol()[['id']]
510 unknown.cols <- opt$rtcol[ ! opt$rtcol %in% dbcols] 534 unknown.cols <- opt$rtcol[ ! opt$rtcol %in% dbcols]
511 if (length(unknown.cols) > 0) { 535 if (length(unknown.cols) > 0) {
512 stop(paste0("Unknown chromatographic column", (if (length(unknown.cols) > 1) 's' else ''), ': ', paste(unknown.cols, collapse = ', '), ".\nAllowed chromatographic column names are:\n", paste(dbcols, collapse = "\n"))) 536 stop(paste0("Unknown chromatographic column", (if (length(unknown.cols) > 1) 's' else ''), ': ', paste(unknown.cols, collapse = ', '), ".\nAllowed chromatographic column names are:\n", paste(dbcols, collapse = "\n")))
513 } 537 }
514 } 538 }
530 # Search database 554 # Search database
531 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG 555 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG
532 db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[['precursor-match']]), precursor.rt.tol = opt[['precursor-rt-tol']]) 556 db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[['precursor-match']]), precursor.rt.tol = opt[['precursor-rt-tol']])
533 557
534 # Write output 558 # Write output
559 main.output$moveColumnsToBeginning(colnames(input))
560 peaks.output$moveColumnsToBeginning(colnames(input))
535 # TODO Create a class MsDbOutputCsvFileStream 561 # TODO Create a class MsDbOutputCsvFileStream
536 df.write.tsv(main.output$getDataFrame(), file = opt[['output-file']], row.names = FALSE) 562 df.write.tsv(main.output$getDataFrame(), file = opt[['output-file']], row.names = FALSE)
537 if ( ! is.null(opt[['peak-output-file']])) 563 if ( ! is.null(opt[['peak-output-file']]))
538 # TODO Create a class MsDbOutputCsvFileStream 564 # TODO Create a class MsDbOutputCsvFileStream
539 df.write.tsv(peaks.output$getDataFrame(), file = opt[['peak-output-file']], row.names = FALSE) 565 df.write.tsv(peaks.output$getDataFrame(), file = opt[['peak-output-file']], row.names = FALSE)