Mercurial > repos > prog > lcmsmatching
comparison search-mz @ 4:1ba222315fd5 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 947b8707b06176a4801de64a71c8771617311ffb
| author | prog |
|---|---|
| date | Thu, 16 Mar 2017 05:05:55 -0400 |
| parents | 45e985cd8e9e |
| children | 18254e8d1b72 |
comparison
equal
deleted
inserted
replaced
| 3:abfba8eb1c8d | 4:1ba222315fd5 |
|---|---|
| 49 MSDB.DFT[['molids-sep']] <- MSDB.DFT.MATCH.SEP | 49 MSDB.DFT[['molids-sep']] <- MSDB.DFT.MATCH.SEP |
| 50 MSDB.DFT[['db-fields']] <- concat.kv.list(msdb.get.dft.db.fields()) | 50 MSDB.DFT[['db-fields']] <- concat.kv.list(msdb.get.dft.db.fields()) |
| 51 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES) | 51 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES) |
| 52 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',') | 52 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',') |
| 53 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',') | 53 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',') |
| 54 MSDB.DFT[['db-rt-unit']] <- MSDB.RTUNIT.SEC | |
| 55 MSDB.DFT[['rtunit']] <- MSDB.RTUNIT.SEC | |
| 54 DEFAULT.ARG.VALUES <- MSDB.DFT | 56 DEFAULT.ARG.VALUES <- MSDB.DFT |
| 55 DEFAULT.ARG.VALUES[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields()) | 57 DEFAULT.ARG.VALUES[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields()) |
| 56 DEFAULT.ARG.VALUES[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields()) | 58 DEFAULT.ARG.VALUES[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields()) |
| 57 | 59 |
| 58 ############## | 60 ############## |
| 179 'mztolunit', NA_character_, 1, 'character', paste0('Tolerance on m/z, in ppm. Default is ', MSDB.DFT$mztolunit,'.'), | 181 'mztolunit', NA_character_, 1, 'character', paste0('Tolerance on m/z, in ppm. Default is ', MSDB.DFT$mztolunit,'.'), |
| 180 'rttol', 'r', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), | 182 'rttol', 'r', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), |
| 181 'rttolx', 'x', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), | 183 'rttolx', 'x', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), |
| 182 'rttoly', 'y', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), | 184 'rttoly', 'y', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), |
| 183 'rtcol', 'c', 1, 'character', paste0('Chromatographic column to use. Unset by default. If set, use the corresponding column to filter on retention times, if retention times are provided.'), | 185 'rtcol', 'c', 1, 'character', paste0('Chromatographic column to use. Unset by default. If set, use the corresponding column to filter on retention times, if retention times are provided.'), |
| 186 'rtunit', NA_character_, 1, 'character', paste0('Retention time unit for the input file. Default is ', MSDB.DFT$rtunit, '. Allowed values are:', paste(MSDB.RTUNIT.VALS, collapse = ", "), '.'), | |
| 184 'all-cols', NA_character_, 0, 'logical', 'Use all available chromatographic columns to match retention times.', | 187 'all-cols', NA_character_, 0, 'logical', 'Use all available chromatographic columns to match retention times.', |
| 185 'check-cols', NA_character_, 0, 'logical', 'Check that the chromatographic column names specified with option -c really exist.', | 188 'check-cols', NA_character_, 0, 'logical', 'Check that the chromatographic column names specified with option -c really exist.', |
| 186 'list-cols', NA_character_, 0, 'logical', 'List all chromatographic columns present in the database. Write list inside the file specified by -o option.', | 189 'list-cols', NA_character_, 0, 'logical', 'List all chromatographic columns present in the database. Write list inside the file specified by -o option.', |
| 187 'same-rows', 'a', 0, 'logical', 'If set, output exactly the same number of rows as the input. This means that in case of multiple matches for one mz, then only one line is output (i.e.: the mz value is not duplicated on several lines). In the main output file, an "ms.matching" column is output with inside, for each mz, a comma separated list of matched component/molecule IDs. If unset, then only the main output file is used, and one single is written to it with one line per peak match, and eventual mz line duplicated if there are multiple matches for this mz.', | 190 'same-rows', 'a', 0, 'logical', 'If set, output exactly the same number of rows as the input. This means that in case of multiple matches for one mz, then only one line is output (i.e.: the mz value is not duplicated on several lines). In the main output file, an "ms.matching" column is output with inside, for each mz, a comma separated list of matched component/molecule IDs. If unset, then only the main output file is used, and one single is written to it with one line per peak match, and eventual mz line duplicated if there are multiple matches for this mz.', |
| 188 'same-cols', 'b', 0, 'logical', 'If set, output the same columns as inside the input. All input columns are copied to the output.', | 191 'same-cols', 'b', 0, 'logical', 'If set, output the same columns as inside the input. All input columns are copied to the output.', |
| 204 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.', | 207 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.', |
| 205 'cache-dir', NA_character_, 1, 'character', 'Path to directory where to store cache files. Only used when database flag is set to "xls".', | 208 'cache-dir', NA_character_, 1, 'character', 'Path to directory where to store cache files. Only used when database flag is set to "xls".', |
| 206 'db-name', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.', | 209 'db-name', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.', |
| 207 'db-user', NA_character_, 1, 'character', 'User of the database. Used by the "4tabsql" database.', | 210 'db-user', NA_character_, 1, 'character', 'User of the database. Used by the "4tabsql" database.', |
| 208 'db-password', NA_character_, 1, 'character', 'Password of the database user. Used by the "4tabsql" database.', | 211 'db-password', NA_character_, 1, 'character', 'Password of the database user. Used by the "4tabsql" database.', |
| 209 'db-fields', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the field names to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-fields']], '".'), | 212 'db-fields', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the field names to be used in the single file database. Default is "', MSDB.DFT[['db-fields']], '".'), |
| 210 'db-ms-modes', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the MS modes to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-ms-modes']], '".'), | 213 'db-ms-modes', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the MS modes to be used in the single file database. Default is "', MSDB.DFT[['db-ms-modes']], '".'), |
| 214 'db-rt-unit', NA_character_, 1, 'character', paste0('Retention time unit for the database, used in the single file database. Default is "', MSDB.DFT[['db-rt-unit']], '". Allowed values are:', paste(MSDB.RTUNIT.VALS, collapse = ", "), '.'), | |
| 211 'db-token', NA_character_, 1, 'character', 'Database token. Used by Peakforest database.', | 215 'db-token', NA_character_, 1, 'character', 'Database token. Used by Peakforest database.', |
| 212 'debug', NA_character_, 0, 'logical', 'Set debug mode.' | 216 'debug', NA_character_, 0, 'logical', 'Set debug mode.' |
| 213 ) | 217 ) |
| 214 | 218 |
| 215 spec <- c(spec, make.getopt.spec.print.dft()) | 219 spec <- c(spec, make.getopt.spec.print.dft()) |
| 540 # Check that an RT column exists when using MZ/RT matching | 544 # Check that an RT column exists when using MZ/RT matching |
| 541 if ( ! is.null(opt$rtcol) && ! opt[['input-col-names']][['rt']] %in% colnames(input)) | 545 if ( ! is.null(opt$rtcol) && ! opt[['input-col-names']][['rt']] %in% colnames(input)) |
| 542 stop(paste0("You are running an MZ/RT match run on your input data, but no retention time column named '", opt[['input-col-names']][['rt']],"' can be found inside your input file.")) | 546 stop(paste0("You are running an MZ/RT match run on your input data, but no retention time column named '", opt[['input-col-names']][['rt']],"' can be found inside your input file.")) |
| 543 | 547 |
| 544 # Set streams | 548 # Set streams |
| 545 input.stream <- MsDbInputDataFrameStream$new(df = input, input.fields = opt[['input-col-names']]) | 549 input.stream <- MsDbInputDataFrameStream$new(df = input, input.fields = opt[['input-col-names']], rtunit = opt[['rtunit']]) |
| 546 main.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], one.line = ! is.null(opt[['same-rows']]), match.sep = opt[['molids-sep']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']])) | 550 main.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], one.line = ! is.null(opt[['same-rows']]), match.sep = opt[['molids-sep']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']])) |
| 547 peaks.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']])) | 551 peaks.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']])) |
| 548 invisible(db$setInputStream(input.stream)) | 552 invisible(db$setInputStream(input.stream)) |
| 549 db$addOutputStreams(c(main.output, peaks.output)) | 553 db$addOutputStreams(c(main.output, peaks.output)) |
| 550 | 554 |
| 551 # Set M/Z tolerance unit | 555 # Set database units |
| 552 db$setMzTolUnit(opt$mztolunit) | 556 db$setMzTolUnit(opt$mztolunit) |
| 557 if ( ! is.null(opt[['db-rt-unit']]) && opt$database == 'file') | |
| 558 db$setRtUnit(opt[['db-rt-unit']]) | |
| 553 | 559 |
| 554 # Search database | 560 # Search database |
| 555 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG | 561 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG |
| 556 db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[['precursor-match']]), precursor.rt.tol = opt[['precursor-rt-tol']]) | 562 db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[['precursor-match']]), precursor.rt.tol = opt[['precursor-rt-tol']]) |
| 557 | 563 |
