# HG changeset patch
# User prog
# Date 1489655155 14400
# Node ID 1ba222315fd5985ff3db4fb86470e41e54f26d74
# Parent  abfba8eb1c8dfe8262302108869db81860c577ae
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 947b8707b06176a4801de64a71c8771617311ffb

diff -r abfba8eb1c8d -r 1ba222315fd5 MsDb.R
--- a/MsDb.R	Tue Mar 14 12:35:23 2017 -0400
+++ b/MsDb.R	Thu Mar 16 05:05:55 2017 -0400
@@ -9,21 +9,22 @@
 	# CLASS DECLARATION #
 	#####################
 	
-	MsDb <- setRefClass("MsDb", fields = list(.observers = "ANY", .prec = "list", .output.streams = "ANY", .input.stream = "ANY", .mz.tol.unit = "character"))
+	MsDb <- setRefClass("MsDb", fields = list(.observers = "ANY", .prec = "list", .output.streams = "ANY", .input.stream = "ANY", .mz.tol.unit = "character", .rt.unit = "character"))
 	
 	###############
 	# CONSTRUCTOR #
 	###############
 	
 	MsDb$methods( initialize = function(...) {
+		
+		callSuper(...)
 
 		.observers <<- NULL
 		.output.streams <<- NULL
 		.input.stream <<- NULL
 		.prec <<- MSDB.DFT.PREC
 		.mz.tol.unit <<- MSDB.DFT.MZTOLUNIT
-		
-		callSuper(...)
+		.rt.unit <<- MSDB.RTUNIT.SEC
 	})
 
 	####################
@@ -126,6 +127,10 @@
 		stop("Method setDbMsModes() not implemented in concrete class.")
 	})
 	
+	###################
+	# SET MZ TOL UNIT #
+	###################
+
 	MsDb$methods( setMzTolUnit = function(mztolunit) {
 
 		if ( ! mztolunit %in% MSDB.MZTOLUNIT.VALS)
@@ -134,6 +139,18 @@
 		.mz.tol.unit <<- mztolunit
 	})
 
+	###############
+	# SET RT UNIT #
+	###############
+
+	MsDb$methods( setRtUnit = function(unit) {
+
+		if ( ! unit %in% MSDB.RTUNIT.VALS)
+			stop(paste0("RT unit must be one of: ", paste(MSDB.RTUNIT.VALS, collapse = ', '), "."))
+
+		.rt.unit <<- unit
+	})
+
 	####################
 	# HANDLE COMPOUNDS #
 	####################
diff -r abfba8eb1c8d -r 1ba222315fd5 MsDbInputDataFrameStream.R
--- a/MsDbInputDataFrameStream.R	Tue Mar 14 12:35:23 2017 -0400
+++ b/MsDbInputDataFrameStream.R	Thu Mar 16 05:05:55 2017 -0400
@@ -7,18 +7,19 @@
 	# CLASS DECLARATION #
 	#####################
 	
-	MsDbInputDataFrameStream <- setRefClass("MsDbInputDataFrameStream", contains = 'MsDbInputStream', fields = list( .df = "ANY", .i = "integer"))
+	MsDbInputDataFrameStream <- setRefClass("MsDbInputDataFrameStream", contains = 'MsDbInputStream', fields = list( .df = "ANY", .i = "integer", .rtunit = 'character'))
 	
 	###############
 	# CONSTRUCTOR #
 	###############
 	
-	MsDbInputDataFrameStream$methods( initialize = function(df = data.frame(), input.fields = msdb.get.dft.input.fields(), ...) {
+	MsDbInputDataFrameStream$methods( initialize = function(df = data.frame(), input.fields = msdb.get.dft.input.fields(), rtunit = MSDB.RTUNIT.SEC, ...) {
+		
+		callSuper(input.fields = input.fields, ...)
 
 		.df <<- df
 		.i <<- 0L
-		
-		callSuper(input.fields = input.fields, ...)
+		.rtunit <<- rtunit
 	})
 
 	##########
@@ -39,10 +40,15 @@
 
 	MsDbInputDataFrameStream$methods( getRt = function() {
 
-		if (.self$.i > 0 && .self$.i <= nrow(.self$.df) && ! is.null(.self$.input.fields[[MSDB.TAG.RT]]))
-			return(.self$.df[.self$.i, .self$.input.fields[[MSDB.TAG.RT]]])
+		rt <- NULL
 
-		return(NULL)
+		if (.self$.i > 0 && .self$.i <= nrow(.self$.df) && ! is.null(.self$.input.fields[[MSDB.TAG.RT]])) {
+			rt <- .self$.df[.self$.i, .self$.input.fields[[MSDB.TAG.RT]]]
+			if (.self$.rtunit == MSDB.RTUNIT.MIN)
+				rt <- rt * 60
+		}
+
+		return(rt)
 	})
 
 	###########
diff -r abfba8eb1c8d -r 1ba222315fd5 MsFileDb.R
--- a/MsFileDb.R	Tue Mar 14 12:35:23 2017 -0400
+++ b/MsFileDb.R	Thu Mar 16 05:05:55 2017 -0400
@@ -395,8 +395,10 @@
 			db <- db[db[[MSDB.TAG.COL]] %in% col,]
 
 		# Filter on retention time
-		if ( ! is.null(rt.low) && ! is.na(rt.low) && ! is.null(rt.high) && ! is.na(rt.high))
-			db <- db[db[[MSDB.TAG.COLRT]] >= rt.low & db[[MSDB.TAG.COLRT]] <= rt.high, ]
+		if ( ! is.null(rt.low) && ! is.na(rt.low) && ! is.null(rt.high) && ! is.na(rt.high)) {
+			scale <- if (.self$.rt.unit == MSDB.RTUNIT.MIN) 60 else 1
+			db <- db[db[[MSDB.TAG.COLRT]] * scale >= rt.low & db[[MSDB.TAG.COLRT]] <= rt.high, ]
+		}
 
 		# Remove retention times and column information
 		if (is.null(col) || is.na(col) || is.null(rt.low) || is.na(rt.low) || is.null(rt.high) || is.na(rt.high)) {
@@ -487,6 +489,9 @@
 			rt[col] <- list(colrts)
 		}
 
+		if (.self$.rt.unit == MSDB.RTUNIT.MIN)
+			rt <- 60 * rt
+
 		return(rt)
 	})
 
diff -r abfba8eb1c8d -r 1ba222315fd5 MsPeakForestDb.R
--- a/MsPeakForestDb.R	Tue Mar 14 12:35:23 2017 -0400
+++ b/MsPeakForestDb.R	Thu Mar 16 05:05:55 2017 -0400
@@ -16,6 +16,8 @@
 	
 	MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) {
 
+		callSuper(...)
+
 		# Check URL
 		if (is.null(url) || is.na(url))
 		    stop("No URL defined for new MsPeakForestDb instance.")
@@ -26,8 +28,7 @@
 		.url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent)
 		.self$.url.scheduler$setVerbose(1L)
 		.token <<- token
-
-		callSuper(...)
+		.rt.unit <<- MSDB.RTUNIT.MIN
 	})
 
 	###########
@@ -53,11 +54,9 @@
 
 		if (ret.type == 'json') {
 
-			library(RJSONIO)
+			res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE)
 
-			res <- fromJSON(content, nullValue = NA)
-
-			if (class(res) == 'list' && 'success' %in% names(res) && res$success == FALSE) {
+			if (is.null(res)) {
 				param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '')
 				stop(paste0("Failed to run web service. URL was \"", url, param.str, "\"."))
 			}
@@ -66,8 +65,7 @@
 				if (grepl('^[0-9]+$', content, perl = TRUE))
 					res <- as.integer(content)
 				else {
-					library(RJSONIO)
-					res <- fromJSON(content, nullValue = NA)
+					res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE)
 				}
 			}
 		}
@@ -141,6 +139,7 @@
 			for (s in spectra)
 				if (is.na(col) || s$liquidChromatography$columnCode %in% col) {
 					ret.time <- (s$RTmin + s$RTmax) / 2
+					ret.time <- ret.time * 60 # Retention time are in minutes in Peakforest, but we want them in seconds
 					c <- s$liquidChromatography$columnCode
 					if (c %in% names(rt)) {
 						if ( ! ret.time %in% rt[[c]])
@@ -262,21 +261,21 @@
 		results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character())
 		for (x in spectra) {
 			if ('source' %in% names(x) && is.list(x$source))
-				mztheo <- if ('theoricalMass' %in% names(x)) as.numeric(x$theoricalMass) else NA_real_
-				comp <- if ('composition' %in% names(x)) x$composition else NA_character_
-				attr <- if ('attribution' %in% names(x)) x$attribution else NA_character_
+				mztheo <- if ('mz' %in% names(x) && ! is.null(x$mz)) as.numeric(x$mz) else NA_real_
+				comp <- if ('composition' %in% names(x) && ! is.null(x$composition)) x$composition else NA_character_
+				attr <- if ('attribution' %in% names(x) && ! is.null(x$attribution)) x$attribution else NA_character_
 				if ('listOfCompounds' %in% names(x$source)) {
-					molids <- vapply(x$source$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = '')
-					molnames <- vapply(x$source$listOfCompounds, function(c) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP), FUN.VALUE = '')
-					mass <- vapply(x$source$listOfCompounds, function(c) as.character(c$averageMass), FUN.VALUE = '')
-					inchi <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChI), FUN.VALUE = '')
-					inchikey <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChIKey), FUN.VALUE = '')
-					chebi <- vapply(x$source$listOfCompounds, function(c) as.character(c$ChEBI), FUN.VALUE = '')
+					molids <- vapply(x$source$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '')
+					molnames <- vapply(x$source$listOfCompounds, function(c) if ('names' %in% names(c) && ! is.null(c$names)) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP) else NA_character_, FUN.VALUE = '')
+					mass <- vapply(x$source$listOfCompounds, function(c) if ( ! 'averageMass' %in% names(c) || is.null(c$averageMass)) NA_real_ else as.double(c$averageMass), FUN.VALUE = 0.0)
+					inchi <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChI' %in% names(c) || is.null(c$inChI)) NA_character_ else as.character(c$inChI), FUN.VALUE = '')
+					inchikey <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChIKey' %in% names(c) || is.null(c$inChIKey)) NA_character_ else as.character(c$inChIKey), FUN.VALUE = '')
+					chebi <- vapply(x$source$listOfCompounds, function(c) if ('ChEBI'  %in% names(c) && ! is.null(c$ChEBI)) as.character(c$ChEBI) else NA_character_, FUN.VALUE = '')
 					chebi[chebi == 'CHEBI:null'] <- NA_character_
-					hmdb <- vapply(x$source$listOfCompounds, function(c) as.character(c$HMDB), FUN.VALUE = '')
+					hmdb <- vapply(x$source$listOfCompounds, function(c) if ('HMDB' %in% names(c) && ! is.null(c$HMDB)) as.character(c$HMDB) else NA_character_, FUN.VALUE = '')
 					hmdb[hmdb == 'HMDBnull'] <- NA_character_
-					kegg <- vapply(x$source$listOfCompounds, function(c) as.character(c$KEGG), FUN.VALUE = '')
-					pubchem <- vapply(x$source$listOfCompounds, function(c) as.character(c$PubChemCID), FUN.VALUE = '')
+					kegg <- vapply(x$source$listOfCompounds, function(c) if ( ! 'KEGG' %in% names(c) || is.null(c$KEGG)) NA_character_ else as.character(c$KEGG), FUN.VALUE = '')
+					pubchem <- vapply(x$source$listOfCompounds, function(c) if ( ! 'PubChemCID' %in% names(c) || is.null(c$PubChemCID)) NA_character_ else as.character(c$PubChemCID), FUN.VALUE = '')
 					if (length(molids) > 0 && length(molids) == length(molnames))
 						results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE))
 				}
@@ -292,17 +291,27 @@
 				url <- paste0('spectra/lcms/range-rt-min/', rt.low, '/', rt.high)
 				params <- NULL
 				if ( ! is.null(col))
-					params <- c(columns = paste(col, collapse = ','))
+					params <- c(columns = paste(col[['id']], collapse = ','))
 
 				# Run query
 				rtspectra <- .self$.get.url(url = url, params = params)
 
+
 				# Get compound/molecule IDs
-				for (x in spectra)
-					rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = vapply(x$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = ''),
-				                                   	   MSDB.TAG.COL = as.character(x$liquidChromatography$columnCode),
-				                                   	   MSDB.TAG.COLRT = (as.numeric(x$RTmin) + as.numeric(x$RTmax)) / 2,
-					                                   	   stringsAsFactors = FALSE))
+				for (x in rtspectra)
+					if (all(c('listOfCompounds', 'liquidChromatography') %in% names(x))) {
+						molids <- vapply(x$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '')
+						if (length(molids) > 0) {
+							col <- if ('columnCode' %in% names(x$liquidChromatography) && ! is.null(x$liquidChromatography$columnCode)) as.character(x$liquidChromatography$columnCode) else NA_character_
+							rtmin <- if ('RTmin' %in% names(x) && ! is.null(x$RTmin)) as.double(x$RTmin) else NA_real_
+							rtmax <- if ('RTmax' %in% names(x) && ! is.null(x$RTmax)) as.double(x$RTmax) else NA_real_
+							colrt <- (rtmin + rtmax) / 2
+							rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = molids,
+				                                   	   	   	   MSDB.TAG.COL = col,
+				                                   	   	   	   MSDB.TAG.COLRT = colrt,
+					                                   	   	   stringsAsFactors = FALSE))
+						}
+					}
 			}	
 
 			# Add retention times and column info
diff -r abfba8eb1c8d -r 1ba222315fd5 lcmsmatching.xml
--- a/lcmsmatching.xml	Tue Mar 14 12:35:23 2017 -0400
+++ b/lcmsmatching.xml	Thu Mar 16 05:05:55 2017 -0400
@@ -1,4 +1,4 @@
-<tool id="lcmsmatching" name="LC/MS matching" version="3.2.0" profile="16.01">
+<tool id="lcmsmatching" name="LC/MS matching" version="3.2.1" profile="16.01">
 
 	<description>Annotation of MS peaks using matching on a spectra database.</description>
 
@@ -9,7 +9,7 @@
 		<requirement type="package" version="3.98">r-xml</requirement>
 		<requirement type="package" version="1.0_6">r-bitops</requirement>
 		<requirement type="package" version="1.95">r-rcurl</requirement>
-		<requirement type="package" version="1.3">r-rjsonio</requirement>
+		<requirement type="package" version="1.3">r-jsonlite</requirement>
 	</requirements>
 
 	<code file="list-chrom-cols.py"/>
diff -r abfba8eb1c8d -r 1ba222315fd5 msdb-common.R
--- a/msdb-common.R	Tue Mar 14 12:35:23 2017 -0400
+++ b/msdb-common.R	Thu Mar 16 05:05:55 2017 -0400
@@ -46,6 +46,11 @@
 	MSDB.MZTOLUNIT.PLAIN <- 'plain' # same as mz: mass-to-charge ratio
 	MSDB.MZTOLUNIT.VALS <- c(MSDB.MZTOLUNIT.PPM, MSDB.MZTOLUNIT.PLAIN)
 
+	# Authorized rt units
+	MSDB.RTUNIT.SEC <- 's'
+	MSDB.RTUNIT.MIN <- 'min'
+	MSDB.RTUNIT.VALS <- c(MSDB.RTUNIT.SEC ,MSDB.RTUNIT.MIN)
+
 	# Default values
 	MSDB.DFT.PREC <- list()
 	MSDB.DFT.PREC[[MSDB.TAG.POS]] <- c("[(M+H)]+", "[M+H]+", "[(M+Na)]+", "[M+Na]+", "[(M+K)]+", "[M+K]+")
diff -r abfba8eb1c8d -r 1ba222315fd5 search-mz
--- a/search-mz	Tue Mar 14 12:35:23 2017 -0400
+++ b/search-mz	Thu Mar 16 05:05:55 2017 -0400
@@ -51,6 +51,8 @@
 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES)
 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',')
 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',')
+MSDB.DFT[['db-rt-unit']] <- MSDB.RTUNIT.SEC
+MSDB.DFT[['rtunit']] <- MSDB.RTUNIT.SEC
 DEFAULT.ARG.VALUES <- MSDB.DFT
 DEFAULT.ARG.VALUES[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields())
 DEFAULT.ARG.VALUES[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields())
@@ -181,6 +183,7 @@
 		'rttolx',           'x',            1,  'numeric',      paste0('Tolerance on retention times. Unset by default.'),
 		'rttoly',           'y',            1,  'numeric',      paste0('Tolerance on retention times. Unset by default.'),
 		'rtcol',            'c',            1,  'character',    paste0('Chromatographic column to use. Unset by default. If set, use the corresponding column to filter on retention times, if retention times are provided.'),
+		'rtunit',           NA_character_,  1,  'character',    paste0('Retention time unit for the input file. Default is ', MSDB.DFT$rtunit, '. Allowed values are:', paste(MSDB.RTUNIT.VALS, collapse = ", "), '.'),
 		'all-cols',         NA_character_,  0,  'logical',      'Use all available chromatographic columns to match retention times.',
 		'check-cols',       NA_character_,  0,  'logical',      'Check that the chromatographic column names specified with option -c really exist.',
 		'list-cols',        NA_character_,  0,  'logical',      'List all chromatographic columns present in the database. Write list inside the file specified by -o option.',
@@ -206,8 +209,9 @@
 		'db-name',          NA_character_,  1,  'character',    'Name of the database. Used by the "4tabsql" database.',
 		'db-user',          NA_character_,  1,  'character',    'User of the database. Used by the "4tabsql" database.',
 		'db-password',      NA_character_,  1,  'character',    'Password of the database user. Used by the "4tabsql" database.',
-		'db-fields',        NA_character_,  1,  'character',    paste0('Comma separated key/value list giving the field names to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-fields']], '".'),
-		'db-ms-modes',      NA_character_,  1,  'character',    paste0('Comma separated key/value list giving the MS modes to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-ms-modes']], '".'),
+		'db-fields',        NA_character_,  1,  'character',    paste0('Comma separated key/value list giving the field names to be used in the single file database. Default is "', MSDB.DFT[['db-fields']], '".'),
+		'db-ms-modes',      NA_character_,  1,  'character',    paste0('Comma separated key/value list giving the MS modes to be used in the single file database. Default is "', MSDB.DFT[['db-ms-modes']], '".'),
+		'db-rt-unit',       NA_character_,  1,  'character',    paste0('Retention time unit for the database, used in the single file database. Default is "', MSDB.DFT[['db-rt-unit']], '". Allowed values are:', paste(MSDB.RTUNIT.VALS, collapse = ", "), '.'),
 		'db-token',         NA_character_,  1,  'character',    'Database token. Used by Peakforest database.',
 		'debug',            NA_character_,  0,  'logical',      'Set debug mode.'
 		)
@@ -542,14 +546,16 @@
 	stop(paste0("You are running an MZ/RT match run on your input data, but no retention time column named '", opt[['input-col-names']][['rt']],"' can be found inside your input file."))
 
 # Set streams
-input.stream <- MsDbInputDataFrameStream$new(df = input, input.fields = opt[['input-col-names']])
+input.stream <- MsDbInputDataFrameStream$new(df = input, input.fields = opt[['input-col-names']], rtunit = opt[['rtunit']])
 main.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], one.line = ! is.null(opt[['same-rows']]), match.sep = opt[['molids-sep']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']]))
 peaks.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']]))
 invisible(db$setInputStream(input.stream))
 db$addOutputStreams(c(main.output, peaks.output))
 
-# Set M/Z tolerance unit
+# Set database units
 db$setMzTolUnit(opt$mztolunit)
+if ( ! is.null(opt[['db-rt-unit']]) && opt$database == 'file')
+	db$setRtUnit(opt[['db-rt-unit']])
 
 # Search database
 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG