Mercurial > repos > prog > lcmsmatching
comparison search-mz @ 6:b8f70d8216b3 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
| author | prog |
|---|---|
| date | Mon, 27 Mar 2017 06:27:29 -0400 |
| parents | 18254e8d1b72 |
| children |
comparison
equal
deleted
inserted
replaced
| 5:18254e8d1b72 | 6:b8f70d8216b3 |
|---|---|
| 1 #!/usr/bin/env Rscript | 1 #!/usr/bin/env Rscript |
| 2 # vi: ft=R | 2 # vi: ft=R fdm=marker |
| 3 args <- commandArgs(trailingOnly = F) | 3 args <- commandArgs(trailingOnly = F) |
| 4 script.path <- sub("--file=","",args[grep("--file=",args)]) | 4 script.path <- sub("--file=","",args[grep("--file=",args)]) |
| 5 library(getopt) | 5 library(getopt) |
| 6 source(file.path(dirname(script.path), 'msdb-common.R'), chdir = TRUE) | 6 source(file.path(dirname(script.path), 'msdb-common.R'), chdir = TRUE) |
| 7 source(file.path(dirname(script.path), 'MsFileDb.R'), chdir = TRUE) | |
| 8 source(file.path(dirname(script.path), 'MsPeakForestDb.R'), chdir = TRUE) | |
| 9 source(file.path(dirname(script.path), 'MsXlsDb.R'), chdir = TRUE) | |
| 10 source(file.path(dirname(script.path), 'Ms4TabSqlDb.R'), chdir = TRUE) | |
| 11 source(file.path(dirname(script.path), 'MsDbLogger.R'), chdir = TRUE) | 7 source(file.path(dirname(script.path), 'MsDbLogger.R'), chdir = TRUE) |
| 12 source(file.path(dirname(script.path), 'MsDbInputDataFrameStream.R'), chdir = TRUE) | 8 source(file.path(dirname(script.path), 'MsDbInputDataFrameStream.R'), chdir = TRUE) |
| 13 source(file.path(dirname(script.path), 'MsDbOutputDataFrameStream.R'), chdir = TRUE) | 9 source(file.path(dirname(script.path), 'MsDbOutputDataFrameStream.R'), chdir = TRUE) |
| 14 source(file.path(dirname(script.path), 'htmlhlp.R'), chdir = TRUE) | 10 source(file.path(dirname(script.path), 'htmlhlp.R'), chdir = TRUE) |
| 15 source(file.path(dirname(script.path), 'strhlp.R'), chdir = TRUE) | 11 source(file.path(dirname(script.path), 'strhlp.R'), chdir = TRUE) |
| 19 | 15 |
| 20 # Missing paste0() function in R 2.14.1 | 16 # Missing paste0() function in R 2.14.1 |
| 21 if (as.integer(R.Version()$major) == 2 && as.numeric(R.Version()$minor) < 15) | 17 if (as.integer(R.Version()$major) == 2 && as.numeric(R.Version()$minor) < 15) |
| 22 paste0 <- function(...) paste(..., sep = '') | 18 paste0 <- function(...) paste(..., sep = '') |
| 23 | 19 |
| 24 ############# | 20 # Constants {{{1 |
| 25 # CONSTANTS # | 21 ################################################################ |
| 26 ############# | |
| 27 | 22 |
| 28 PROG <- sub('^.*/([^/]+)$', '\\1', commandArgs()[4], perl = TRUE) | 23 PROG <- sub('^.*/([^/]+)$', '\\1', commandArgs()[4], perl = TRUE) |
| 29 USERAGENT <- 'search-mz ; pierrick.roger@gmail.com' | 24 USERAGENT <- 'search-mz ; pierrick.roger@gmail.com' |
| 30 | 25 |
| 31 # Authorized database types | 26 # Authorized database types |
| 32 MSDB.XLS <- 'xls' | 27 MSDB.XLS <- 'xls' |
| 33 MSDB.4TABSQL <- '4tabsql' | 28 MSDB.4TABSQL <- '4tabsql' |
| 34 MSDB.FILE <- 'file' | 29 MSDB.FILE <- 'file' |
| 35 MSDB.PEAKFOREST <- 'peakforest' | 30 MSDB.PEAKFOREST <- 'peakforest' |
| 36 MSDB.VALS <- c(MSDB.XLS, MSDB.4TABSQL, MSDB.FILE, MSDB.PEAKFOREST) | 31 MSDB.VALS <- c(MSDB.XLS, MSDB.4TABSQL, MSDB.FILE, MSDB.PEAKFOREST) |
| 32 DB.SRC.FILE <- list () | |
| 33 DB.SRC.FILE[[MSDB.FILE]] <- 'MsFileDb.R' | |
| 34 DB.SRC.FILE[[MSDB.PEAKFOREST]] <- 'MsPeakForestDb.R' | |
| 35 DB.SRC.FILE[[MSDB.XLS]] <- 'MsXlsDb.R' | |
| 36 DB.SRC.FILE[[MSDB.4TABSQL]] <- 'Ms4TabSqlDb.R' | |
| 37 | 37 |
| 38 # Authorized mode values | 38 # Authorized mode values |
| 39 POS_MODE <- 'pos' | 39 POS_MODE <- 'pos' |
| 40 NEG_MODE <- 'neg' | 40 NEG_MODE <- 'neg' |
| 41 MSDB.MODE.VALS <- c(POS_MODE, NEG_MODE) | 41 MSDB.MODE.VALS <- c(POS_MODE, NEG_MODE) |
| 53 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',') | 53 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',') |
| 54 MSDB.DFT[['db-rt-unit']] <- MSDB.RTUNIT.SEC | 54 MSDB.DFT[['db-rt-unit']] <- MSDB.RTUNIT.SEC |
| 55 MSDB.DFT[['rtunit']] <- MSDB.RTUNIT.SEC | 55 MSDB.DFT[['rtunit']] <- MSDB.RTUNIT.SEC |
| 56 DEFAULT.ARG.VALUES <- MSDB.DFT | 56 DEFAULT.ARG.VALUES <- MSDB.DFT |
| 57 DEFAULT.ARG.VALUES[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields()) | 57 DEFAULT.ARG.VALUES[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields()) |
| 58 DEFAULT.ARG.VALUES[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields()) | 58 |
| 59 | 59 # Print help {{{1 |
| 60 ############## | 60 ################################################################ |
| 61 # PRINT HELP # | 61 |
| 62 ############## | 62 print.help <- function() { |
| 63 | 63 |
| 64 print.help <- function(spec, status = 0) { | 64 cat("USAGE:\n") |
| 65 cat(getopt(spec, usage = TRUE, command = PROG)) | 65 prog.mz.match <- paste(PROG, ' -d (', paste(MSDB.VALS, collapse = '|'), ') --url (file|dir|database URL) -i <file> -m (', paste(MSDB.MODE.VALS, collapse = '|'), ") -p <mz precision> -s <mz shift> -u (", paste(MSDB.MZTOLUNIT.VALS, collapse = '|'), ") -o <file>", sep = '') |
| 66 q(status = status) | 66 cat("\t(1) ", prog.mz.match, " ...\n", sep = '') |
| 67 } | 67 cat("\n") |
| 68 | 68 cat("\t(2) ", prog.mz.match, "(--all-cols|-c <cols>) -x <X RT tolerance> -y <Y RT tolerance>", " ...\n", sep = '') |
| 69 ############################### | 69 cat("\n") |
| 70 # SET DEFAULT ARGUMENT VALUES # | 70 cat("\t(3) ", PROG, ' -d (', paste(MSDB.VALS, collapse = '|'), ") --url (file|dir|database URL) --list-cols\n", sep = '') |
| 71 ############################### | 71 |
| 72 cat("\nDETAILS:\n") | |
| 73 cat("Form (1) is for running an MZ match on a database.\n") | |
| 74 cat("Form (2) is for running an MZ/RT match on a database.\n") | |
| 75 cat("Form (3) is for getting a list of available chromatographic columns in a database.\n") | |
| 76 | |
| 77 cat("\nOPTIONS:\n") | |
| 78 spec <- matrix(make.getopt.spec(), byrow = TRUE, ncol = 5) | |
| 79 max.length.opt.cols <- max(nchar(spec[,1])) + 1 | |
| 80 sections <- list(database = "Database setting", input = "Input file", output = "Output files", mz = "M/Z matching", rt = "RT matching", precursor = "Precursor matching", misc = "Miscellaneous") | |
| 81 for (section in names(sections)) { | |
| 82 cat("\n\t", sections[[section]], ":\n", sep = '') | |
| 83 spec <- matrix(make.getopt.spec(section), byrow = TRUE, ncol = 5) | |
| 84 for (i in seq(nrow(spec))) { | |
| 85 opt <- '' | |
| 86 if ( ! is.na(spec[i,2])) | |
| 87 opt <- paste('-', spec[i,2], '|', sep = '') | |
| 88 opt <- paste(opt, '--', spec[i, 1], sep = '') | |
| 89 nb.space.padding <- max.length.opt.cols - nchar(opt) + 6 | |
| 90 padding <- paste(rep(' ', nb.space.padding), sep = '') | |
| 91 cat("\t\t", opt, padding, "\t", spec[i, 5], "\n", sep = '') | |
| 92 } | |
| 93 } | |
| 94 | |
| 95 cat("\nEXAMPLES:\n") | |
| 96 | |
| 97 cat("\nSimple M/Z matching with a file database:\n") | |
| 98 cat("\t./", PROG, " -d file --url mydbfile.tsv -i input.tsv -m pos -o output.tsv\n", sep = '') | |
| 99 | |
| 100 cat("\nFile database with M/Z tolerance:\n") | |
| 101 cat("\t./", PROG, " -d file --url mydbfile.tsv -i input.tsv -m pos -o output.tsv -p 0.5 -s 0\n", sep = '') | |
| 102 | |
| 103 cat("\nFile database with M/Z tolerance unit:\n") | |
| 104 cat("\t./", PROG, " -d file --url mydbfile.tsv -i input.tsv -m pos -o output.tsv -p 1 -s 0.5 -u plain\n", sep = '') | |
| 105 | |
| 106 cat("\nPeakforest database:\n") | |
| 107 cat("\t./", PROG, " -d peakforest --url https://rest.peakforest.org/ --db-token <your Peakforest token> -i input.tsv -m pos -o output.tsv\n", sep = '') | |
| 108 } | |
| 109 | |
| 110 # Set default argument values {{{1 | |
| 111 ################################################################ | |
| 72 | 112 |
| 73 set.dft.arg.val <-function(opt) { | 113 set.dft.arg.val <-function(opt) { |
| 74 | 114 |
| 75 for (f in names(MSDB.DFT)) | 115 for (f in names(MSDB.DFT)) |
| 76 if (is.null(opt[[f]])) | 116 if (is.null(opt[[f]])) |
| 84 opt$rtcol <- NULL | 124 opt$rtcol <- NULL |
| 85 | 125 |
| 86 return(opt) | 126 return(opt) |
| 87 } | 127 } |
| 88 | 128 |
| 89 ######################### | 129 # Parse argument values {{{1 |
| 90 # PARSE ARGUMENT VALUES # | 130 ################################################################ |
| 91 ######################### | |
| 92 | 131 |
| 93 parse.arg.val <- function(opt) { | 132 parse.arg.val <- function(opt) { |
| 94 | 133 |
| 95 # Parse input column names | 134 # Parse input column names |
| 96 if ( ! is.null(opt[['db-fields']])) { | 135 if ( ! is.null(opt[['db-fields']])) { |
| 97 cust <- split.kv.list(opt[['db-fields']]) | 136 cust <- split.kv.list(opt[['db-fields']]) |
| 137 cust <- cust[cust != 'NA'] | |
| 98 opt[['db-fields']] <- split.kv.list(MSDB.DFT[['db-fields']]) | 138 opt[['db-fields']] <- split.kv.list(MSDB.DFT[['db-fields']]) |
| 139 cust <- cust[names(cust) %in% names(opt[['db-fields']])] | |
| 99 opt[['db-fields']][names(cust)] <- cust | 140 opt[['db-fields']][names(cust)] <- cust |
| 100 } | 141 } |
| 101 | 142 |
| 102 # Parse MS modes | 143 # Parse MS modes |
| 103 if ( ! is.null(opt[['db-ms-modes']])) { | 144 if ( ! is.null(opt[['db-ms-modes']])) { |
| 114 if (is.null(opt[['input-col-names']])) { | 155 if (is.null(opt[['input-col-names']])) { |
| 115 opt[['input-col-names']] <- msdb.get.dft.input.fields() | 156 opt[['input-col-names']] <- msdb.get.dft.input.fields() |
| 116 } | 157 } |
| 117 else { | 158 else { |
| 118 custcols <- split.kv.list(opt[['input-col-names']]) | 159 custcols <- split.kv.list(opt[['input-col-names']]) |
| 160 custcols <- custcols[custcols != 'NA'] | |
| 119 dftcols <- msdb.get.dft.input.fields() | 161 dftcols <- msdb.get.dft.input.fields() |
| 120 opt[['input-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)]) | 162 opt[['input-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)]) |
| 121 } | |
| 122 | |
| 123 # Parse output column names | |
| 124 if (is.null(opt[['output-col-names']])) { | |
| 125 # By default keep input col names for output | |
| 126 opt[['output-col-names']] <- msdb.get.dft.output.fields() | |
| 127 input.cols <- names(opt[['input-col-names']]) | |
| 128 output.cols <- names(opt[['output-col-names']]) | |
| 129 opt[['output-col-names']] <- c(opt[['input-col-names']][input.cols %in% output.cols], opt[['output-col-names']][ ! output.cols %in% input.cols]) | |
| 130 } | |
| 131 else { | |
| 132 custcols <- split.kv.list(opt[['output-col-names']]) | |
| 133 dftcols <- msdb.get.dft.output.fields() | |
| 134 opt[['output-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)]) | |
| 135 } | 163 } |
| 136 | 164 |
| 137 # Parse lists of precursors | 165 # Parse lists of precursors |
| 138 if ( ! is.null(opt[['pos-prec']])) | 166 if ( ! is.null(opt[['pos-prec']])) |
| 139 opt[['pos-prec']] <- split.str(opt[['pos-prec']], unlist = TRUE) | 167 opt[['pos-prec']] <- split.str(opt[['pos-prec']], unlist = TRUE) |
| 141 opt[['neg-prec']] <- split.str(opt[['neg-prec']], unlist = TRUE) | 169 opt[['neg-prec']] <- split.str(opt[['neg-prec']], unlist = TRUE) |
| 142 | 170 |
| 143 return(opt) | 171 return(opt) |
| 144 } | 172 } |
| 145 | 173 |
| 146 ################################# | 174 # Make getopt specifications {{{1 |
| 147 # PRINT DEFAULT ARGUMENT VALUES # | 175 ################################################################ |
| 148 ################################# | 176 |
| 149 | 177 make.getopt.spec <- function(sections = NULL) { |
| 150 print.dft.arg.val <- function(opt) { | 178 |
| 151 | 179 spec <- character(0) |
| 152 print.flags <- DEFAULT.ARG.VALUES | 180 |
| 153 names(print.flags) <- vapply(names(print.flags), function(x) paste0('print-', x), FUN.VALUE = '') | 181 if (is.null(sections) || 'input' %in% sections) |
| 154 for (f in names(print.flags)) | 182 spec <- c(spec, |
| 155 if ( ! is.null(opt[[f]])) { | 183 'input-file', 'i', 1, 'character', 'Set input file.', |
| 156 cat(print.flags[[f]]) | 184 'input-col-names', 'j', 1, 'character', paste0('Set the input column names. Default is "', DEFAULT.ARG.VALUES[['input-col-names']], '".') |
| 157 q(status = 0) | 185 ) |
| 158 } | 186 |
| 159 } | 187 if (is.null(sections) || 'mz' %in% sections) |
| 160 | 188 spec <- c(spec, |
| 161 make.getopt.spec.print.dft <- function() { | 189 'mode', 'm', 1, 'character', paste0('MS mode. Possible values are:', paste(MSDB.MODE.VALS, collapse = ", "), '.'), |
| 162 | 190 'mzshift', 's', 1, 'numeric', paste0('Shift on m/z. Default is ', MSDB.DFT$mzshift,'.'), |
| 163 spec <- character() | 191 'mzprec', 'p', 1, 'numeric', paste0('Tolerance on m/z. Default is ', MSDB.DFT$mzprec,'.'), |
| 164 | 192 'mztolunit', 'u', 1, 'character', paste0('Unit used for tolerance values (options -s and -p) on M/Z. Default is ', MSDB.DFT$mztolunit,'.') |
| 165 for (f in names(DEFAULT.ARG.VALUES)) | 193 ) |
| 166 spec <- c(spec, paste0('print-', f), NA_character_, 0, 'logical', paste0('Print default value of --', f)) | 194 |
| 195 if (is.null(sections) || 'rt' %in% sections) | |
| 196 spec <- c(spec, | |
| 197 'all-cols', 'A', 0, 'logical', 'Use all available chromatographic columns to match retention times.', | |
| 198 'rtcol', 'c', 1, 'character', paste0('Chromatographic column to use. Unset by default. If set, use the corresponding column to filter on retention times, if retention times are provided.'), | |
| 199 'check-cols', 'k', 0, 'logical', 'Check that the chromatographic column names specified with option -c really exist.', | |
| 200 'list-cols', 'l', 0, 'logical', 'List all chromatographic columns present in the database. Write list inside the file specified by -o option.', | |
| 201 'rttol', 'r', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), | |
| 202 'rttolx', 'x', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), | |
| 203 'rttoly', 'y', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), | |
| 204 'rtunit', 'v', 1, 'character', paste0('Retention time unit for the input file. Default is ', MSDB.DFT$rtunit, '. Allowed values are:', paste(MSDB.RTUNIT.VALS, collapse = ", "), '.') | |
| 205 ) | |
| 206 | |
| 207 if (is.null(sections) || 'precursor' %in% sections) | |
| 208 spec <- c(spec, | |
| 209 'precursor-match', 'Q', 0, 'logical', 'Remove peaks whose molecule precursor peak has not been matched. Unset by default.', | |
| 210 'precursor-rt-tol', 'R', 1, 'numeric', paste0('Precursor retention time tolerance. Only used when precursor-match is enabled. Default is ', MSDB.DFT[['precursor-rt-tol']], '.'), | |
| 211 'pos-prec', 'Y', 1, 'character', paste0('Set the list of precursors to use in positive mode. Default is "', MSDB.DFT[['pos-prec']], '".'), | |
| 212 'neg-prec', 'Z', 1, 'character', paste0('Set the list of precursors to use in negative mode. Default is "', MSDB.DFT[['neg-prec']], '".') | |
| 213 ) | |
| 214 | |
| 215 if (is.null(sections) || 'output' %in% sections) | |
| 216 spec <- c(spec, | |
| 217 'output-file', 'o', 1, 'character', 'Set file to use for the main output.', | |
| 218 'peak-output-file', 'O', 1, 'character', 'If set and if --same-rows is set, then output all matches inside the specified file, with one mz match per line. The output columns are: mz, rt, id, col, colrt, composition, attribution. This means that if an mz value is matched several times, then it will repeated on several lines, with one match description per line.', | |
| 219 'html-output-file', 'H', 1, 'character', 'Set file to use for the HTML output.', | |
| 220 'no-main-table-in-html-output', 't', 0, 'logical', 'Do not display main table in HTML output.', | |
| 221 'same-rows', 'a', 0, 'logical', 'If set, output exactly the same number of rows as the input. This means that in case of multiple matches for one mz, then only one line is output (i.e.: the mz value is not duplicated on several lines). In the main output file, an "ms.matching" column is output with inside, for each mz, a comma separated list of matched component/molecule IDs. If unset, then only the main output file is used, and one single is written to it with one line per peak match, and eventual mz line duplicated if there are multiple matches for this mz.', | |
| 222 'same-cols', 'b', 0, 'logical', 'If set, output the same columns as inside the input. All input columns are copied to the output.', | |
| 223 'molids-sep', 'S', 1, 'character', paste0('Set character separator used to when concatenating molecule IDs in output. Default is "', MSDB.DFT[['molids-sep']] , '".'), | |
| 224 'first-val', '1', 0, 'logical', 'Keep only the first value in multi-value fields. Unset by default.', | |
| 225 'excel2011comp', 'X', 0, 'logical', 'Excel 2011 compatiblity mode. Output ASCII text files instead of UTF-8 files, where greek letters are replaced with their latin names, plusminus sign is replaced with +- and apostrophe is replaced with \"prime\". All other non-ASCII characters are repladed with underscore.' | |
| 226 ) | |
| 227 | |
| 228 if (is.null(sections) || 'database' %in% sections) | |
| 229 spec <- c(spec, | |
| 230 'database', 'd', 1, 'character', paste0('Set database to use: "xls" for an Excel database, "file" for a single file database, "4tabsql" for a 4Tab SQL database, and "peakforest" for a connection to PeakForest database.'), | |
| 231 'url', 'W', 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.', | |
| 232 'cache-dir', 'C', 1, 'character', 'Path to directory where to store cache files. Only used when database flag is set to "xls".', | |
| 233 'db-name', 'N', 1, 'character', 'Name of the database. Used by the "4tabsql" database.', | |
| 234 'db-user', 'U', 1, 'character', 'User of the database. Used by the "4tabsql" database.', | |
| 235 'db-password', 'P', 1, 'character', 'Password of the database user. Used by the "4tabsql" database.', | |
| 236 'db-ms-modes', 'M', 1, 'character', paste0('Comma separated key/value list giving the MS modes to be used in the single file database. Default is "', MSDB.DFT[['db-ms-modes']], '".'), | |
| 237 'db-rt-unit', 'V', 1, 'character', paste0('Retention time unit for the database, used in the single file database. Default is "', MSDB.DFT[['db-rt-unit']], '". Allowed values are:', paste(MSDB.RTUNIT.VALS, collapse = ", "), '.'), | |
| 238 'db-token', 'T', 1, 'character', 'Database token. Used by Peakforest database.', | |
| 239 'db-fields', 'F', 1, 'character', paste0('Comma separated key/value list giving the field names to be used in the single file database. Default is "', MSDB.DFT[['db-fields']], '".') | |
| 240 ) | |
| 241 | |
| 242 if (is.null(sections) || 'misc' %in% sections) | |
| 243 spec <- c(spec, | |
| 244 'help', 'h', 0, 'logical', 'Print this help.', | |
| 245 'debug', 'g', 0, 'logical', 'Set debug mode.' | |
| 246 ) | |
| 167 | 247 |
| 168 return(spec) | 248 return(spec) |
| 169 } | 249 } |
| 170 | 250 |
| 171 ############################## | 251 # Read args {{{1 |
| 172 # MAKE GETOPT SPECIFICATIONS # | 252 ################################################################ |
| 173 ############################## | |
| 174 | |
| 175 make.getopt.spec <- function() { | |
| 176 spec = c( | |
| 177 'help', 'h', 0, 'logical', 'Print this help.', | |
| 178 'mode', 'm', 1, 'character', paste0('MS mode. Possible values are:', paste(MSDB.MODE.VALS, collapse = ", "), '.'), | |
| 179 'mzshift', 's', 1, 'numeric', paste0('Shift on m/z, in ppm. Default is ', MSDB.DFT$mzshift,'.'), | |
| 180 'mzprec', 'p', 1, 'numeric', paste0('Tolerance on m/z, in ppm. Default is ', MSDB.DFT$mzprec,'.'), | |
| 181 'mztolunit', NA_character_, 1, 'character', paste0('Tolerance on m/z, in ppm. Default is ', MSDB.DFT$mztolunit,'.'), | |
| 182 'rttol', 'r', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), | |
| 183 'rttolx', 'x', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), | |
| 184 'rttoly', 'y', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), | |
| 185 'rtcol', 'c', 1, 'character', paste0('Chromatographic column to use. Unset by default. If set, use the corresponding column to filter on retention times, if retention times are provided.'), | |
| 186 'rtunit', NA_character_, 1, 'character', paste0('Retention time unit for the input file. Default is ', MSDB.DFT$rtunit, '. Allowed values are:', paste(MSDB.RTUNIT.VALS, collapse = ", "), '.'), | |
| 187 'all-cols', NA_character_, 0, 'logical', 'Use all available chromatographic columns to match retention times.', | |
| 188 'check-cols', NA_character_, 0, 'logical', 'Check that the chromatographic column names specified with option -c really exist.', | |
| 189 'list-cols', NA_character_, 0, 'logical', 'List all chromatographic columns present in the database. Write list inside the file specified by -o option.', | |
| 190 'same-rows', 'a', 0, 'logical', 'If set, output exactly the same number of rows as the input. This means that in case of multiple matches for one mz, then only one line is output (i.e.: the mz value is not duplicated on several lines). In the main output file, an "ms.matching" column is output with inside, for each mz, a comma separated list of matched component/molecule IDs. If unset, then only the main output file is used, and one single is written to it with one line per peak match, and eventual mz line duplicated if there are multiple matches for this mz.', | |
| 191 'same-cols', 'b', 0, 'logical', 'If set, output the same columns as inside the input. All input columns are copied to the output.', | |
| 192 'input-file', 'i', 1, 'character', 'Set input file.', | |
| 193 'output-file', 'o', 1, 'character', 'Set file to use for the main output.', | |
| 194 'peak-output-file', NA_character_, 1, 'character', 'If set and if --same-rows is set, then output all matches inside the specified file, with one mz match per line. The output columns are: mz, rt, id, col, colrt, composition, attribution. This means that if an mz value is matched several times, then it will repeated on several lines, with one match description per line.', | |
| 195 'html-output-file', NA_character_, 1, 'character', 'Set file to use for the HTML output.', | |
| 196 'no-main-table-in-html-output', NA_character_, 0, 'logical', 'Do not display main table in HTML output.', | |
| 197 'precursor-match', NA_character_, 0, 'logical', 'Remove peaks whose molecule precursor peak has not been matched. Unset by default.', | |
| 198 'precursor-rt-tol', NA_character_, 1, 'numeric', paste0('Precursor retention time tolerance. Only used when precursor-match is enabled. Default is ', MSDB.DFT[['precursor-rt-tol']], '.'), | |
| 199 'pos-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in positive mode. Default is "', MSDB.DFT[['pos-prec']], '".'), | |
| 200 'neg-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in negative mode. Default is "', MSDB.DFT[['neg-prec']], '".'), | |
| 201 'input-col-names', NA_character_, 1, 'character', paste0('Set the input column names. Default is "', DEFAULT.ARG.VALUES[['input-col-names']], '".'), | |
| 202 'output-col-names', NA_character_, 1, 'character', paste0('Set the output column names. Default is "', DEFAULT.ARG.VALUES[['output-col-names']], '".'), | |
| 203 'molids-sep', NA_character_, 1, 'character', paste0('Set character separator used to when concatenating molecule IDs in output. Default is "', MSDB.DFT[['molids-sep']] , '".'), | |
| 204 'first-val', NA_character_, 0, 'logical', 'Keep only the first value in multi-value fields. Unset by default.', | |
| 205 'excel2011comp', NA_character_, 0, 'logical', 'Excel 2011 compatiblity mode. Output ASCII text files instead of UTF-8 files, where greek letters are replaced with their latin names, plusminus sign is replaced with +- and apostrophe is replaced with \"prime\". All other non-ASCII characters are repladed with underscore.', | |
| 206 'database', 'd', 1, 'character', paste0('Set database to use: "xls" for an Excel database, "file" for a single file database, "4tabsql" for a 4Tab SQL database, and "peakforest" for a connection to PeakForest database.'), | |
| 207 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.', | |
| 208 'cache-dir', NA_character_, 1, 'character', 'Path to directory where to store cache files. Only used when database flag is set to "xls".', | |
| 209 'db-name', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.', | |
| 210 'db-user', NA_character_, 1, 'character', 'User of the database. Used by the "4tabsql" database.', | |
| 211 'db-password', NA_character_, 1, 'character', 'Password of the database user. Used by the "4tabsql" database.', | |
| 212 'db-fields', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the field names to be used in the single file database. Default is "', MSDB.DFT[['db-fields']], '".'), | |
| 213 'db-ms-modes', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the MS modes to be used in the single file database. Default is "', MSDB.DFT[['db-ms-modes']], '".'), | |
| 214 'db-rt-unit', NA_character_, 1, 'character', paste0('Retention time unit for the database, used in the single file database. Default is "', MSDB.DFT[['db-rt-unit']], '". Allowed values are:', paste(MSDB.RTUNIT.VALS, collapse = ", "), '.'), | |
| 215 'db-token', NA_character_, 1, 'character', 'Database token. Used by Peakforest database.', | |
| 216 'debug', NA_character_, 0, 'logical', 'Set debug mode.' | |
| 217 ) | |
| 218 | |
| 219 spec <- c(spec, make.getopt.spec.print.dft()) | |
| 220 | |
| 221 if ( ! is.null(spec)) | |
| 222 spec <- matrix(spec, byrow = TRUE, ncol = 5) | |
| 223 | |
| 224 return(spec) | |
| 225 } | |
| 226 | |
| 227 ############# | |
| 228 # READ ARGS # | |
| 229 ############# | |
| 230 | 253 |
| 231 read_args <- function() { | 254 read_args <- function() { |
| 232 | 255 |
| 233 # options | 256 # Get options |
| 234 spec <- make.getopt.spec() | 257 opt <- getopt(matrix(make.getopt.spec(), byrow = TRUE, ncol = 5)) |
| 235 opt <- getopt(spec) | |
| 236 | 258 |
| 237 # help | 259 # help |
| 238 if ( ! is.null(opt$help)) | 260 if ( ! is.null(opt$help)) { |
| 239 print.help(spec) | 261 print.help() |
| 240 | 262 quit() |
| 241 print.dft.arg.val(opt) # Print default values | 263 } |
| 264 | |
| 242 opt <- set.dft.arg.val(opt) # Set default values | 265 opt <- set.dft.arg.val(opt) # Set default values |
| 243 opt <- parse.arg.val(opt) # Parse list values | 266 opt <- parse.arg.val(opt) # Parse list values |
| 244 | 267 |
| 245 # Check values | 268 # Check values |
| 246 error <- .check.db.conn.opts(opt) | 269 error <- check.args(opt) |
| 247 if (is.null(opt[['output-file']]) && is.null(opt[['list-cols']])) { | 270 |
| 248 warning("You must set a path for the output file.") | 271 return(opt) |
| 249 error <- TRUE | 272 } |
| 250 } | 273 |
| 274 # Check args {{{1 | |
| 275 ################################################################ | |
| 276 | |
| 277 check.args <- function(opt) { | |
| 278 | |
| 279 # Check database type | |
| 280 if (is.null(opt$database)) | |
| 281 stop("You must provide a database type through --database option.") | |
| 282 if ( ! opt$database %in% MSDB.VALS) | |
| 283 stop(paste0("Invalid value \"", opt$database, "\" for --database option.")) | |
| 284 | |
| 285 # Check filedb database | |
| 286 if (opt$database == MSDB.FILE) { | |
| 287 if (is.null(opt$url)) | |
| 288 stop("When using single file database, you must specify the location of the database file with option --url.") | |
| 289 if ( ! file.exists(opt$url)) | |
| 290 stop(paste0("The file path \"", opt$url,"\" specified with --db-file option is not valid.")) | |
| 291 } | |
| 292 | |
| 293 # Check Excel database | |
| 294 if (opt$database == MSDB.XLS) { | |
| 295 if (is.null(opt$url)) | |
| 296 stop("When using Excel database, you must specify the location of the Excel files directory with option --url.") | |
| 297 if ( ! file.exists(opt$url)) | |
| 298 stop(paste0("The directory path \"", opt$url,"\" specified with --xls-dir option is not valid.")) | |
| 299 } | |
| 300 | |
| 301 # Check 4 tab database | |
| 302 if (opt$database == MSDB.4TABSQL) { | |
| 303 if (is.null(opt$url)) | |
| 304 stop("When using 4Tab SQL database, you must specify the URL of the SQL server with option --url.") | |
| 305 if (is.null(opt[['db-name']])) | |
| 306 stop("When using 4Tab SQL database, you must specify the database name through the --db-name option.") | |
| 307 if (is.null(opt[['db-user']])) | |
| 308 stop("When using 4Tab SQL database, you must specify the database user through the --db-user option.") | |
| 309 if (is.null(opt[['db-password']])) | |
| 310 stop("When using 4Tab SQL database, you must specify the database user password through the --db-password option.") | |
| 311 } | |
| 312 | |
| 313 # Check Peakforest database | |
| 314 if (opt$database == MSDB.PEAKFOREST) { | |
| 315 if (is.null(opt$url)) | |
| 316 stop("When using PeakForest database, you must specify the URL of the PeakForest server with option --url.") | |
| 317 } | |
| 318 | |
| 251 if (is.null(opt[['list-cols']])) { | 319 if (is.null(opt[['list-cols']])) { |
| 252 if (is.null(opt[['input-file']])) { | 320 |
| 253 warning("You must provide an input file.") | 321 if (is.null(opt[['output-file']])) |
| 254 error <- TRUE | 322 stop("You must set a path for the output file.") |
| 255 } | 323 |
| 256 if (is.null(opt$mode) || ( ! opt$mode %in% MSDB.MODE.VALS)) { | 324 if (is.null(opt[['input-file']])) |
| 257 warning("You must specify a mode through the --mode option.") | 325 stop("You must provide an input file.") |
| 258 error <- TRUE | 326 |
| 259 } | 327 if (is.null(opt$mode) || ( ! opt$mode %in% MSDB.MODE.VALS)) |
| 260 if (is.null(opt$mzprec)) { | 328 stop("You must specify a mode through the --mode option.") |
| 261 warning("You must set a precision in MZ with the --mzprec option.") | 329 |
| 262 error <- TRUE | 330 if (is.null(opt$mzprec)) |
| 263 } | 331 stop("You must set a precision in MZ with the --mzprec option.") |
| 264 if ( ( ! is.null(opt$rtcol) || ! is.null(opt[['all-cols']])) && (is.null(opt$rttolx) || is.null(opt$rttoly))) { | 332 |
| 265 warning("When chromatographic columns are set, you must provide values for --rttolx and -rttoly.") | 333 if ( ( ! is.null(opt$rtcol) || ! is.null(opt[['all-cols']])) && (is.null(opt$rttolx) || is.null(opt$rttoly))) |
| 266 error <- TRUE | 334 stop("When chromatographic columns are set, you must provide values for --rttolx and -rttoly.") |
| 267 } | 335 |
| 268 if (is.null(opt$mztolunit) || ( ! opt$mztolunit %in% MSDB.MZTOLUNIT.VALS)) { | 336 if (is.null(opt$mztolunit) || ( ! opt$mztolunit %in% MSDB.MZTOLUNIT.VALS)) |
| 269 warning("You must specify an M/Z tolerance unit through the --mztolunit option.") | 337 stop("You must specify an M/Z tolerance unit through the --mztolunit option.") |
| 270 error <- TRUE | 338 } |
| 271 } | 339 } |
| 272 } | 340 |
| 273 | 341 # Load database {{{1 |
| 274 # help | 342 ################################################################ |
| 275 if (error) | 343 |
| 276 print.help(spec, status = 1) | 344 .load.db <- function(opt) { |
| 277 | 345 |
| 278 return(opt) | 346 if (is.null(opt[['pos-prec']]) && is.null(opt[['neg-prec']])) { |
| 279 } | 347 precursors <- NULL |
| 280 | 348 } else { |
| 281 ##################################### | 349 precursors <- list() |
| 282 # CHECK DATABASE CONNECTION OPTIONS # | 350 precursors[[MSDB.TAG.POS]] <- opt[['pos-prec']] |
| 283 ##################################### | 351 precursors[[MSDB.TAG.NEG]] <- opt[['neg-prec']] |
| 284 | 352 } |
| 285 .check.db.conn.opts <- function(opt) { | 353 |
| 286 | 354 db <- switch(opt$database, |
| 287 # Print default values | 355 peakforest = MsPeakForestDb$new(url = opt$url, useragent = USERAGENT, token = opt[['db-token']]), |
| 288 if ( ! is.null(opt[['print-db-fields']])) { | 356 xls = MsXlsDb$new(db_dir = opt$url, cache_dir = opt[['cache-dir']]), |
| 289 cat(MSDB.DFT[['db-fields']]) | 357 '4tabsql' = Ms4TabSqlDb$new(host = extract.address(opt$url), port = extract.port(opt$url), dbname = opt[['db-name']], user = opt[['db-user']], password = opt[['db-password']]), |
| 290 q(status = 0) | 358 file = MsFileDb$new(file = opt$url), |
| 291 } | 359 NULL) |
| 292 if ( ! is.null(opt[['print-db-ms-modes']])) { | 360 db$setPrecursors(precursors) |
| 293 cat(MSDB.DFT[['db-ms-modes']]) | 361 if (db$areDbFieldsSettable()) |
| 294 q(status = 0) | 362 db$setDbFields(opt[['db-fields']]) |
| 295 } | 363 if (db$areDbMsModesSettable()) |
| 296 | 364 db$setDbMsModes(opt[['db-ms-modes']]) |
| 297 # Check values | 365 db$addObservers(MsDbLogger$new()) |
| 298 error <- FALSE | 366 |
| 299 if (is.null(opt$database)) { | 367 return(db) |
| 300 warning("You must provide a database type through --database option.") | 368 } |
| 301 error <- TRUE | 369 |
| 302 } | 370 # Output HTML {{{1 |
| 303 if ( ! opt$database %in% MSDB.VALS) { | 371 ################################################################ |
| 304 warning(paste0("Invalid value \"", opt$database, "\" for --database option.")) | 372 |
| 305 error <- TRUE | 373 output.html <- function(db, peaks, file) { |
| 306 } | |
| 307 if (opt$database == MSDB.FILE) { | |
| 308 if (is.null(opt$url)) { | |
| 309 warning("When using single file database, you must specify the location of the database file with option --url.") | |
| 310 error <- TRUE | |
| 311 } | |
| 312 if ( ! file.exists(opt$url)) { | |
| 313 warning(paste0("The file path \"", opt$url,"\" specified with --db-file option is not valid.")) | |
| 314 error <- TRUE | |
| 315 } | |
| 316 } | |
| 317 if (opt$database == MSDB.XLS) { | |
| 318 if (is.null(opt$url)) { | |
| 319 warning("When using Excel database, you must specify the location of the Excel files directory with option --url.") | |
| 320 error <- TRUE | |
| 321 } | |
| 322 if ( ! file.exists(opt$url)) { | |
| 323 warning(paste0("The directory path \"", opt$url,"\" specified with --xls-dir option is not valid.")) | |
| 324 error <- TRUE | |
| 325 } | |
| 326 } | |
| 327 if (opt$database == MSDB.4TABSQL) { | |
| 328 if (is.null(opt$url)) { | |
| 329 warning("When using 4Tab SQL database, you must specify the URL of the SQL server with option --url.") | |
| 330 error <- TRUE | |
| 331 } | |
| 332 if (is.null(opt[['db-name']])) { | |
| 333 warning("When using 4Tab SQL database, you must specify the database name through the --db-name option.") | |
| 334 error <- TRUE | |
| 335 } | |
| 336 if (is.null(opt[['db-user']])) { | |
| 337 warning("When using 4Tab SQL database, you must specify the database user through the --db-user option.") | |
| 338 error <- TRUE | |
| 339 } | |
| 340 if (is.null(opt[['db-password']])) { | |
| 341 warning("When using 4Tab SQL database, you must specify the database user password through the --db-password option.") | |
| 342 error <- TRUE | |
| 343 } | |
| 344 } | |
| 345 if (opt$database == MSDB.PEAKFOREST) { | |
| 346 if (is.null(opt$url)) { | |
| 347 warning("When using PeakForest database, you must specify the URL of the PeakForest server with option --url.") | |
| 348 error <- TRUE | |
| 349 } | |
| 350 } | |
| 351 | |
| 352 return(error) | |
| 353 } | |
| 354 | |
| 355 ############################# | |
| 356 # DISPLAY COMMAND LINE HELP # | |
| 357 ############################# | |
| 358 | |
| 359 .disp.cmd.line.help <- function(optspec, opt, prog, error = FALSE) { | |
| 360 | |
| 361 if ( ! is.null(opt$help) || error ) { | |
| 362 cat(getopt(optspec, usage = TRUE, command = prog)) | |
| 363 q(status = 1) | |
| 364 } | |
| 365 } | |
| 366 | |
| 367 ################# | |
| 368 # LOAD DATABASE # | |
| 369 ################# | |
| 370 | |
| 371 .load.db <- function(opt) { | |
| 372 | |
| 373 if (is.null(opt[['pos-prec']]) && is.null(opt[['neg-prec']])) { | |
| 374 precursors <- NULL | |
| 375 } else { | |
| 376 precursors <- list() | |
| 377 precursors[[MSDB.TAG.POS]] <- opt[['pos-prec']] | |
| 378 precursors[[MSDB.TAG.NEG]] <- opt[['neg-prec']] | |
| 379 } | |
| 380 | |
| 381 db <- switch(opt$database, | |
| 382 peakforest = MsPeakForestDb$new(url = opt$url, useragent = USERAGENT, token = opt[['db-token']]), | |
| 383 xls = MsXlsDb$new(db_dir = opt$url, cache_dir = opt[['cache-dir']]), | |
| 384 '4tabsql' = Ms4TabSqlDb$new(host = extract.address(opt$url), port = extract.port(opt$url), dbname = opt[['db-name']], user = opt[['db-user']], password = opt[['db-password']]), | |
| 385 file = MsFileDb$new(file = opt$url), | |
| 386 NULL) | |
| 387 db$setPrecursors(precursors) | |
| 388 if (db$areDbFieldsSettable()) | |
| 389 db$setDbFields(opt[['db-fields']]) | |
| 390 if (db$areDbMsModesSettable()) | |
| 391 db$setDbMsModes(opt[['db-ms-modes']]) | |
| 392 db$addObservers(MsDbLogger$new()) | |
| 393 | |
| 394 return(db) | |
| 395 } | |
| 396 | |
| 397 ############### | |
| 398 # OUTPUT HTML # | |
| 399 ############### | |
| 400 | |
| 401 output.html <- function(db, main, peaks, file, opt, output.fields) { | |
| 402 | 374 |
| 403 # Replace public database IDs by URLs | 375 # Replace public database IDs by URLs |
| 404 if ( ! is.null(peaks) || ! is.null(main)) { | 376 if ( ! is.null(peaks)) { |
| 405 # Conversion from extdb id field to extdb name | 377 # Conversion from extdb id field to extdb name |
| 406 extdb2classdb = list() | 378 extdb2classdb = list() |
| 407 extdb2classdb[MSDB.TAG.KEGG] = BIODB.KEGG | 379 extdb2classdb[MSDB.TAG.KEGG] = BIODB.KEGG |
| 408 extdb2classdb[MSDB.TAG.HMDB] = BIODB.HMDB | 380 extdb2classdb[MSDB.TAG.HMDB] = BIODB.HMDB |
| 409 extdb2classdb[MSDB.TAG.CHEBI] = BIODB.CHEBI | 381 extdb2classdb[MSDB.TAG.CHEBI] = BIODB.CHEBI |
| 410 extdb2classdb[MSDB.TAG.PUBCHEM] = BIODB.PUBCHEMCOMP | 382 extdb2classdb[MSDB.TAG.PUBCHEM] = BIODB.PUBCHEMCOMP |
| 411 | 383 |
| 412 # Loop on all dbs | 384 # Loop on all dbs |
| 413 for (extdb in c(MSDB.TAG.KEGG, MSDB.TAG.HMDB, MSDB.TAG.CHEBI, MSDB.TAG.PUBCHEM)) { | 385 for (extdb in c(MSDB.TAG.KEGG, MSDB.TAG.HMDB, MSDB.TAG.CHEBI, MSDB.TAG.PUBCHEM)) { |
| 414 field <- output.fields[[extdb]] | 386 if ( ! is.null(peaks) && extdb %in% colnames(peaks)) |
| 415 if ( ! is.null(peaks) && field %in% colnames(peaks)) | 387 peaks[[extdb]] <- vapply(peaks[[extdb]], function(id) if (is.na(id)) '' else paste0('<a href="', get.entry.url(class = extdb2classdb[[extdb]], accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = '') |
| 416 peaks[[field]] <- vapply(peaks[[field]], function(id) if (is.na(id)) '' else paste0('<a href="', get.entry.url(class = extdb2classdb[[extdb]], accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = '') | |
| 417 if ( ! is.null(main) && field %in% colnames(main)) | |
| 418 main[[field]] <- vapply(main[[field]], function(ids) if (is.na(ids) || nchar(ids) == 0) '' else paste(vapply(strsplit(ids, opt[['molids-sep']])[[1]], function(id) paste0('<a href="', get.entry.url(class = extdb2classdb[[extdb]], accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = ''), collapse = opt[['molids-sep']]), FUN.VALUE = '') | |
| 419 } | 388 } |
| 420 } | 389 } |
| 421 | 390 |
| 422 # Write HTML | 391 # Write HTML |
| 423 html <- HtmlWriter(file = file) | 392 html <- HtmlWriter(file = file) |
| 434 html$write('tr:nth-child(even) { background-color: LemonChiffon; }') | 403 html$write('tr:nth-child(even) { background-color: LemonChiffon; }') |
| 435 html$write('tr:nth-child(odd) { background-color: LightGreen; }') | 404 html$write('tr:nth-child(odd) { background-color: LightGreen; }') |
| 436 html$writeEndTag('style') | 405 html$writeEndTag('style') |
| 437 html$writeEndTag('header') | 406 html$writeEndTag('header') |
| 438 html$writeBegTag('body') | 407 html$writeBegTag('body') |
| 439 html$writeTag('h1', text = "LC/MS matching") | |
| 440 | |
| 441 # Write parameters | |
| 442 html$writeTag('h2', text = "Parameters") | |
| 443 html$writeBegTag('ul') | |
| 444 html$writeTag('li', text = paste0("Mode = ", opt$mode, ".")) | |
| 445 html$writeTag('li', text = paste0("M/Z precision = ", opt$mzprec, ".")) | |
| 446 html$writeTag('li', text = paste0("M/Z shift = ", opt$mzshift, ".")) | |
| 447 html$writeTag('li', text = paste0("Precursor match = ", (if (is.null(opt[['precursor-match']])) "no" else "yes"), ".")) | |
| 448 if ( ! is.null(opt[['precursor-match']])) { | |
| 449 html$writeTag('li', text = paste0("Positive precursors = ", paste0(opt[['pos-prec']], collapse = ', '), ".")) | |
| 450 html$writeTag('li', text = paste0("Negative precursors = ", paste0(opt[['neg-prec']], collapse = ', '), ".")) | |
| 451 } | |
| 452 if ( ! is.null(opt$rtcol)) { | |
| 453 html$writeTag('li', text = paste0("Columns = ", paste(opt$rtcol, collapse = ", "), ".")) | |
| 454 html$writeTag('li', text = paste0("RTX = ", opt$rttolx, ".")) | |
| 455 html$writeTag('li', text = paste0("RTY = ", opt$rttoly, ".")) | |
| 456 if ( ! is.null(opt[['precursor-match']])) | |
| 457 html$writeTag('li', text = paste0("RTZ = ", opt[['precursor-rt-tol']], ".")) | |
| 458 } | |
| 459 html$writeEndTag('ul') | |
| 460 | 408 |
| 461 # Write results | 409 # Write results |
| 462 html$writeTag('h2', text = "Results") | |
| 463 results <- FALSE | 410 results <- FALSE |
| 464 if ( ! is.null(main) && nrow(main) > 0 && is.null(opt[['no-main-table-in-html-output']])) { | |
| 465 html$writeTag('h3', text = "Main output") | |
| 466 html$writeTable(main) | |
| 467 results <- TRUE | |
| 468 } | |
| 469 if ( ! is.null(peaks) && nrow(peaks) > 0) { | 411 if ( ! is.null(peaks) && nrow(peaks) > 0) { |
| 470 html$writeTag('h3', text = "Matched peaks") | 412 html$writeTag('h3', text = "Matched peaks") |
| 471 html$writeTable(peaks) | 413 html$writeTable(peaks) |
| 472 results <- TRUE | 414 results <- TRUE |
| 473 } | 415 } |
| 476 | 418 |
| 477 html$writeEndTag('body') | 419 html$writeEndTag('body') |
| 478 html$writeEndTag('html') | 420 html$writeEndTag('html') |
| 479 } | 421 } |
| 480 | 422 |
| 481 ######## | 423 # MAIN {{{1 |
| 482 # MAIN # | 424 ################################################################ |
| 483 ######## | |
| 484 | |
| 485 options(error = function() { traceback(2) ; quit(status = 1) }, warn = 2 ) | |
| 486 | 425 |
| 487 # Read command line arguments | 426 # Read command line arguments |
| 488 opt <- read_args() | 427 opt <- read_args() |
| 489 | 428 |
| 490 if (is.null(opt$debug)) { | 429 if (is.null(opt$debug)) { |
| 491 options(error = function() { quit(status = 1) }, warn = 0 ) | 430 options(error = function() { quit(status = 1) }, warn = 0 ) |
| 492 } | 431 } |
| 493 | 432 |
| 494 # Load database | 433 # Load database |
| 434 source(file.path(dirname(script.path), DB.SRC.FILE[[opt$database]]), chdir = TRUE) | |
| 495 db <- .load.db(opt) | 435 db <- .load.db(opt) |
| 496 | 436 |
| 497 # Print columns | 437 # Print columns |
| 498 if ( ! is.null(opt[['list-cols']])) { | 438 if ( ! is.null(opt[['list-cols']])) { |
| 499 cols <- db$getChromCol() | 439 cols <- db$getChromCol() |
| 505 if ( ! is.null(opt[['input-file']]) && ! file.exists(opt[['input-file']])) | 445 if ( ! is.null(opt[['input-file']]) && ! file.exists(opt[['input-file']])) |
| 506 stop(paste0("Input file \"", opt[['input-file']], "\" does not exist.")) | 446 stop(paste0("Input file \"", opt[['input-file']], "\" does not exist.")) |
| 507 if (file.info(opt[['input-file']])$size > 0) { | 447 if (file.info(opt[['input-file']])$size > 0) { |
| 508 | 448 |
| 509 # Load file into data frame | 449 # Load file into data frame |
| 510 input <- read.table(file = opt[['input-file']], header = TRUE, sep = "\t", stringsAsFactor = FALSE) | 450 input <- read.table(file = opt[['input-file']], header = TRUE, sep = "\t", stringsAsFactor = FALSE, check.names = FALSE, comment.char = '') |
| 511 | 451 |
| 512 # Convert each column that is identified by a number into a name | 452 # Convert each column that is identified by a number into a name |
| 513 for (field in names(opt[['input-col-names']])) { | 453 for (field in names(opt[['input-col-names']])) { |
| 514 if ( ! opt[['input-col-names']][[field]] %in% colnames(input) && length(grep('^[0-9]+$', opt[['input-col-names']][[field]])) > 0) { | 454 if ( ! opt[['input-col-names']][[field]] %in% colnames(input) && length(grep('^[0-9]+$', opt[['input-col-names']][[field]])) > 0) { |
| 515 col.index <- as.integer(opt[['input-col-names']][[field]]) | 455 col.index <- as.integer(opt[['input-col-names']][[field]]) |
| 543 | 483 |
| 544 # Check that an RT column exists when using MZ/RT matching | 484 # Check that an RT column exists when using MZ/RT matching |
| 545 if ( ! is.null(opt$rtcol) && ! opt[['input-col-names']][['rt']] %in% colnames(input)) | 485 if ( ! is.null(opt$rtcol) && ! opt[['input-col-names']][['rt']] %in% colnames(input)) |
| 546 stop(paste0("You are running an MZ/RT match run on your input data, but no retention time column named '", opt[['input-col-names']][['rt']],"' can be found inside your input file.")) | 486 stop(paste0("You are running an MZ/RT match run on your input data, but no retention time column named '", opt[['input-col-names']][['rt']],"' can be found inside your input file.")) |
| 547 | 487 |
| 488 # Set output col names | |
| 489 output.col.names <- opt[['input-col-names']] | |
| 490 | |
| 548 # Set streams | 491 # Set streams |
| 549 input.stream <- MsDbInputDataFrameStream$new(df = input, input.fields = opt[['input-col-names']], rtunit = opt[['rtunit']]) | 492 input.stream <- MsDbInputDataFrameStream$new(df = input, input.fields = opt[['input-col-names']], rtunit = opt[['rtunit']]) |
| 550 main.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], one.line = ! is.null(opt[['same-rows']]), match.sep = opt[['molids-sep']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']])) | 493 main.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = output.col.names, one.line = ! is.null(opt[['same-rows']]), match.sep = opt[['molids-sep']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']]), rtunit = opt[['rtunit']]) |
| 551 peaks.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']])) | 494 peaks.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = output.col.names, first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']]), rtunit = opt[['rtunit']]) |
| 552 invisible(db$setInputStream(input.stream)) | 495 invisible(db$setInputStream(input.stream)) |
| 553 db$addOutputStreams(c(main.output, peaks.output)) | 496 db$addOutputStreams(c(main.output, peaks.output)) |
| 554 | 497 |
| 555 # Set database units | 498 # Set database units |
| 556 db$setMzTolUnit(opt$mztolunit) | 499 db$setMzTolUnit(opt$mztolunit) |
| 569 if ( ! is.null(opt[['peak-output-file']])) | 512 if ( ! is.null(opt[['peak-output-file']])) |
| 570 # TODO Create a class MsDbOutputCsvFileStream | 513 # TODO Create a class MsDbOutputCsvFileStream |
| 571 df.write.tsv(peaks.output$getDataFrame(), file = opt[['peak-output-file']], row.names = FALSE) | 514 df.write.tsv(peaks.output$getDataFrame(), file = opt[['peak-output-file']], row.names = FALSE) |
| 572 if ( ! is.null(opt[['html-output-file']])) | 515 if ( ! is.null(opt[['html-output-file']])) |
| 573 # TODO Create a class MsDbOutputHtmlFileStream | 516 # TODO Create a class MsDbOutputHtmlFileStream |
| 574 output.html(db = db, main = main.output$getDataFrame(), peaks = peaks.output$getDataFrame(), file = opt[['html-output-file']], opt = opt, output.fields = opt[['output-col-names']]) | 517 output.html(db = db, peaks = peaks.output$getDataFrame(), file = opt[['html-output-file']]) |
