comparison search-mz @ 4:1ba222315fd5 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 947b8707b06176a4801de64a71c8771617311ffb
author prog
date Thu, 16 Mar 2017 05:05:55 -0400
parents 45e985cd8e9e
children 18254e8d1b72
comparison
equal deleted inserted replaced
3:abfba8eb1c8d 4:1ba222315fd5
49 MSDB.DFT[['molids-sep']] <- MSDB.DFT.MATCH.SEP 49 MSDB.DFT[['molids-sep']] <- MSDB.DFT.MATCH.SEP
50 MSDB.DFT[['db-fields']] <- concat.kv.list(msdb.get.dft.db.fields()) 50 MSDB.DFT[['db-fields']] <- concat.kv.list(msdb.get.dft.db.fields())
51 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES) 51 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES)
52 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',') 52 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',')
53 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',') 53 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',')
54 MSDB.DFT[['db-rt-unit']] <- MSDB.RTUNIT.SEC
55 MSDB.DFT[['rtunit']] <- MSDB.RTUNIT.SEC
54 DEFAULT.ARG.VALUES <- MSDB.DFT 56 DEFAULT.ARG.VALUES <- MSDB.DFT
55 DEFAULT.ARG.VALUES[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields()) 57 DEFAULT.ARG.VALUES[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields())
56 DEFAULT.ARG.VALUES[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields()) 58 DEFAULT.ARG.VALUES[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields())
57 59
58 ############## 60 ##############
179 'mztolunit', NA_character_, 1, 'character', paste0('Tolerance on m/z, in ppm. Default is ', MSDB.DFT$mztolunit,'.'), 181 'mztolunit', NA_character_, 1, 'character', paste0('Tolerance on m/z, in ppm. Default is ', MSDB.DFT$mztolunit,'.'),
180 'rttol', 'r', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), 182 'rttol', 'r', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'),
181 'rttolx', 'x', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), 183 'rttolx', 'x', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'),
182 'rttoly', 'y', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), 184 'rttoly', 'y', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'),
183 'rtcol', 'c', 1, 'character', paste0('Chromatographic column to use. Unset by default. If set, use the corresponding column to filter on retention times, if retention times are provided.'), 185 'rtcol', 'c', 1, 'character', paste0('Chromatographic column to use. Unset by default. If set, use the corresponding column to filter on retention times, if retention times are provided.'),
186 'rtunit', NA_character_, 1, 'character', paste0('Retention time unit for the input file. Default is ', MSDB.DFT$rtunit, '. Allowed values are:', paste(MSDB.RTUNIT.VALS, collapse = ", "), '.'),
184 'all-cols', NA_character_, 0, 'logical', 'Use all available chromatographic columns to match retention times.', 187 'all-cols', NA_character_, 0, 'logical', 'Use all available chromatographic columns to match retention times.',
185 'check-cols', NA_character_, 0, 'logical', 'Check that the chromatographic column names specified with option -c really exist.', 188 'check-cols', NA_character_, 0, 'logical', 'Check that the chromatographic column names specified with option -c really exist.',
186 'list-cols', NA_character_, 0, 'logical', 'List all chromatographic columns present in the database. Write list inside the file specified by -o option.', 189 'list-cols', NA_character_, 0, 'logical', 'List all chromatographic columns present in the database. Write list inside the file specified by -o option.',
187 'same-rows', 'a', 0, 'logical', 'If set, output exactly the same number of rows as the input. This means that in case of multiple matches for one mz, then only one line is output (i.e.: the mz value is not duplicated on several lines). In the main output file, an "ms.matching" column is output with inside, for each mz, a comma separated list of matched component/molecule IDs. If unset, then only the main output file is used, and one single is written to it with one line per peak match, and eventual mz line duplicated if there are multiple matches for this mz.', 190 'same-rows', 'a', 0, 'logical', 'If set, output exactly the same number of rows as the input. This means that in case of multiple matches for one mz, then only one line is output (i.e.: the mz value is not duplicated on several lines). In the main output file, an "ms.matching" column is output with inside, for each mz, a comma separated list of matched component/molecule IDs. If unset, then only the main output file is used, and one single is written to it with one line per peak match, and eventual mz line duplicated if there are multiple matches for this mz.',
188 'same-cols', 'b', 0, 'logical', 'If set, output the same columns as inside the input. All input columns are copied to the output.', 191 'same-cols', 'b', 0, 'logical', 'If set, output the same columns as inside the input. All input columns are copied to the output.',
204 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.', 207 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.',
205 'cache-dir', NA_character_, 1, 'character', 'Path to directory where to store cache files. Only used when database flag is set to "xls".', 208 'cache-dir', NA_character_, 1, 'character', 'Path to directory where to store cache files. Only used when database flag is set to "xls".',
206 'db-name', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.', 209 'db-name', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.',
207 'db-user', NA_character_, 1, 'character', 'User of the database. Used by the "4tabsql" database.', 210 'db-user', NA_character_, 1, 'character', 'User of the database. Used by the "4tabsql" database.',
208 'db-password', NA_character_, 1, 'character', 'Password of the database user. Used by the "4tabsql" database.', 211 'db-password', NA_character_, 1, 'character', 'Password of the database user. Used by the "4tabsql" database.',
209 'db-fields', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the field names to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-fields']], '".'), 212 'db-fields', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the field names to be used in the single file database. Default is "', MSDB.DFT[['db-fields']], '".'),
210 'db-ms-modes', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the MS modes to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-ms-modes']], '".'), 213 'db-ms-modes', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the MS modes to be used in the single file database. Default is "', MSDB.DFT[['db-ms-modes']], '".'),
214 'db-rt-unit', NA_character_, 1, 'character', paste0('Retention time unit for the database, used in the single file database. Default is "', MSDB.DFT[['db-rt-unit']], '". Allowed values are:', paste(MSDB.RTUNIT.VALS, collapse = ", "), '.'),
211 'db-token', NA_character_, 1, 'character', 'Database token. Used by Peakforest database.', 215 'db-token', NA_character_, 1, 'character', 'Database token. Used by Peakforest database.',
212 'debug', NA_character_, 0, 'logical', 'Set debug mode.' 216 'debug', NA_character_, 0, 'logical', 'Set debug mode.'
213 ) 217 )
214 218
215 spec <- c(spec, make.getopt.spec.print.dft()) 219 spec <- c(spec, make.getopt.spec.print.dft())
540 # Check that an RT column exists when using MZ/RT matching 544 # Check that an RT column exists when using MZ/RT matching
541 if ( ! is.null(opt$rtcol) && ! opt[['input-col-names']][['rt']] %in% colnames(input)) 545 if ( ! is.null(opt$rtcol) && ! opt[['input-col-names']][['rt']] %in% colnames(input))
542 stop(paste0("You are running an MZ/RT match run on your input data, but no retention time column named '", opt[['input-col-names']][['rt']],"' can be found inside your input file.")) 546 stop(paste0("You are running an MZ/RT match run on your input data, but no retention time column named '", opt[['input-col-names']][['rt']],"' can be found inside your input file."))
543 547
544 # Set streams 548 # Set streams
545 input.stream <- MsDbInputDataFrameStream$new(df = input, input.fields = opt[['input-col-names']]) 549 input.stream <- MsDbInputDataFrameStream$new(df = input, input.fields = opt[['input-col-names']], rtunit = opt[['rtunit']])
546 main.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], one.line = ! is.null(opt[['same-rows']]), match.sep = opt[['molids-sep']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']])) 550 main.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], one.line = ! is.null(opt[['same-rows']]), match.sep = opt[['molids-sep']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']]))
547 peaks.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']])) 551 peaks.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']]))
548 invisible(db$setInputStream(input.stream)) 552 invisible(db$setInputStream(input.stream))
549 db$addOutputStreams(c(main.output, peaks.output)) 553 db$addOutputStreams(c(main.output, peaks.output))
550 554
551 # Set M/Z tolerance unit 555 # Set database units
552 db$setMzTolUnit(opt$mztolunit) 556 db$setMzTolUnit(opt$mztolunit)
557 if ( ! is.null(opt[['db-rt-unit']]) && opt$database == 'file')
558 db$setRtUnit(opt[['db-rt-unit']])
553 559
554 # Search database 560 # Search database
555 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG 561 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG
556 db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[['precursor-match']]), precursor.rt.tol = opt[['precursor-rt-tol']]) 562 db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[['precursor-match']]), precursor.rt.tol = opt[['precursor-rt-tol']])
557 563