# HG changeset patch
# User lecorguille
# Date 1522770032 14400
# Node ID d8bac12914730194e5cb664b22023285687383ec
# Parent ea611367e1dac3899c7eda7b8e4b0778b21ebbd8
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a
diff -r ea611367e1da -r d8bac1291473 abims_xcms_fillPeaks.xml
--- a/abims_xcms_fillPeaks.xml Thu Mar 08 05:55:04 2018 -0500
+++ b/abims_xcms_fillPeaks.xml Tue Apr 03 11:40:32 2018 -0400
@@ -4,13 +4,14 @@
macros.xml
+ macros_xcms.xml
-
+
@@ -163,33 +164,20 @@
**Downstream tools**
-+---------------------------+------------------+-----------------------+
-| Name | Output file | Format |
-+===========================+==================+=======================+
-|CAMERA.annotate | xset.retcor.RData| rdata.xcms.fillpeaks |
-+---------------------------+------------------+-----------------------+
-|xcms.summary | xset.retcor.RData| rdata.xcms.fillpeaks |
-+---------------------------+------------------+-----------------------+
-
-The output file **xset.fillpeaks** is a RData file. You can continue your analysis using it in **CAMERA.annotate** or **xcms.summary** tool as a following step of your workflow.
+=========================== ================== =======================
+Name Output file Format
+=========================== ================== =======================
+CAMERA.annotate xset.retcor.RData rdata.xcms.fillpeaks
+--------------------------- ------------------ -----------------------
+xcms.summary xset.retcor.RData rdata.xcms.fillpeaks
+=========================== ================== =======================
**General schema of the metabolomic workflow**
.. image:: xcms_fillpeaks_workflow.png
-
-
------------
-Input files
------------
-
-+---------------------------+-----------------------+
-| Parameter : num + label | Format |
-+===========================+=======================+
-| 1 : RData file | rdata.xcms.group |
-+---------------------------+-----------------------+
-
+---------------------------------------------------
----------
Parameters
@@ -211,43 +199,7 @@
| Rdata file that will be used in the **CAMERA.annotate** or **xcms.summary** step of the workflow.
-xset.variableMetadata.tsv : tabular format
-
- | Table containing information about ions; can be used as one input of **Quality_Metrics** or **Generic_filter** modules.
-
-xset.dataMatrix.tsv : tabular format
-
- | Table containing ions' intensities; can be used as one input of **Quality_Metrics** or **Generic_filter** modules.
-
-------
-
-.. class:: infomark
-
-The output file is a xset.fillPeaks.RData file. You can continue your analysis using it in **CAMERA.annotate** or **xcms.summary** tool.
-
-
----------------------------------------------------
-
----------------
-Working example
----------------
-
-Input files
------------
-
- | RData file -> **xset.retcor.RData**
-
-Parameters
-----------
-
- | method -> **chrom**
- | Get a Peak List -> **false**
-
-
-Output files
-------------
-
- | **xset.fillPeaks.RData: RData file**
+@HELP_PEAKLIST_OUTPUT@
---------------------------------------------------
diff -r ea611367e1da -r d8bac1291473 lib.r
--- a/lib.r Thu Mar 08 05:55:04 2018 -0500
+++ b/lib.r Tue Apr 03 11:40:32 2018 -0400
@@ -28,6 +28,58 @@
}
#@author G. Le Corguille
+# This function merge several xdata into one.
+mergeXData <- function(args) {
+ for(image in args$images) {
+ load(image)
+ # Handle infiles
+ if (!exists("singlefile")) singlefile <- NULL
+ if (!exists("zipfile")) zipfile <- NULL
+ rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args)
+ zipfile <- rawFilePath$zipfile
+ singlefile <- rawFilePath$singlefile
+ retrieveRawfileInTheWorkingDirectory(singlefile, zipfile)
+ if (exists("raw_data")) xdata <- raw_data
+ if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
+ cat(sampleNamesList$sampleNamesOrigin,"\n")
+ if (!exists("xdata_merged")) {
+ xdata_merged <- xdata
+ singlefile_merged <- singlefile
+ md5sumList_merged <- md5sumList
+ sampleNamesList_merged <- sampleNamesList
+ } else {
+ if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata)
+ else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- .concatenate_OnDiskMSnExp(xdata_merged,xdata)
+ else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata")
+ singlefile_merged <- c(singlefile_merged,singlefile)
+ md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin)
+ sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin)
+ sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames)
+ }
+ }
+ rm(image)
+ xdata <- xdata_merged; rm(xdata_merged)
+ singlefile <- singlefile_merged; rm(singlefile_merged)
+ md5sumList <- md5sumList_merged; rm(md5sumList_merged)
+ sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged)
+
+ if (!is.null(args$sampleMetadata)) {
+ cat("\tXSET PHENODATA SETTING...\n")
+ sampleMetadataFile <- args$sampleMetadata
+ sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F)
+ xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)]
+
+ if (any(is.na(pData(xdata)$sample_group))) {
+ sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)]
+ error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" "))
+ print(error_message)
+ stop(error_message)
+ }
+ }
+ return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList))
+}
+
+#@author G. Le Corguille
# This function convert if it is required the Retention Time in minutes
RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
if (convertRTMinute){
@@ -79,9 +131,11 @@
# Color by group
group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
- names(group_colors) <- unique(xdata$sample_group)
- plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
- legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
+ if (length(group_colors) > 1) {
+ names(group_colors) <- unique(xdata$sample_group)
+ plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
+ legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
+ }
# Color by sample
plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name)))
@@ -109,6 +163,19 @@
}
#@author G. Le Corguille
+# It allow different of field separators
+getDataFrameFromFile <- function(filename, header=T) {
+ myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F)
+ if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F)
+ if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F)
+ if (ncol(myDataFrame) < 2) {
+ error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation"
+ print(error_message)
+ stop(error_message)
+ }
+ return(myDataFrame)
+}
+
getPlotChromatogram <- function(xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") {
chrom <- chromatogram(xdata, aggregationFun = aggregationFun)
@@ -127,9 +194,11 @@
# Color by group
group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
- names(group_colors) <- unique(xdata$sample_group)
- plot(chrom, col = group_colors[chrom$sample_group], main=main)
- legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
+ if (length(group_colors) > 1) {
+ names(group_colors) <- unique(xdata$sample_group)
+ plot(chrom, col = group_colors[chrom$sample_group], main=main)
+ legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
+ }
# Color by sample
plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main)
@@ -345,10 +414,7 @@
singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
}
}
- for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) {
- args[[argument]] <- NULL
- }
- return(list(zipfile=zipfile, singlefile=singlefile, args=args))
+ return(list(zipfile=zipfile, singlefile=singlefile))
}
@@ -559,3 +625,9 @@
c.XCMSnExp <- function(...) {
.concatenate_XCMSnExp(...)
}
+
+#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
+# https://github.com/sneumann/xcms/issues/247
+c.MSnbase <- function(...) {
+ .concatenate_OnDiskMSnExp(...)
+}
diff -r ea611367e1da -r d8bac1291473 macros.xml
--- a/macros.xml Thu Mar 08 05:55:04 2018 -0500
+++ b/macros.xml Tue Apr 03 11:40:32 2018 -0400
@@ -1,22 +1,13 @@
- 3.0.0
-
-
- bioconductor-xcms
- r-batch
- r-rcolorbrewer
- unzip
-
-
-
- LC_ALL=C Rscript $__tool_directory__/
+
+ LC_ALL=C Rscript $__tool_directory__/
;
@@ -25,6 +16,7 @@
sh -c "exit \$return"
+
[0-9]+ *, *[0-9]+
@@ -37,155 +29,24 @@
[0-9, ]+
+
RData file
It contain a xcms3::XCMSnExp object (named xdata)
-
-
- #if $file_load_section.file_load_conditional.file_load_select == "yes":
- #if $file_load_section.file_load_conditional.input[0].is_of_type("mzxml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzdata") or $file_load_section.file_load_conditional.input[0].is_of_type("netcdf"):
- #set singlefile_galaxyPath = '|'.join( [ str( $single_file ) for $single_file in $file_load_section.file_load_conditional.input ] )
- #set singlefile_sampleName = '|'.join( [ str( $single_file.name ) for $single_file in $file_load_section.file_load_conditional.input ] )
-
- singlefile_galaxyPath '$singlefile_galaxyPath' singlefile_sampleName '$singlefile_sampleName'
- #else
- zipfile '$file_load_section.file_load_conditional.input'
- #end if
- #end if
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- #if $peaklist.peaklistBool
- convertRTMinute $peaklist.convertRTMinute
- numDigitsMZ $peaklist.numDigitsMZ
- numDigitsRT $peaklist.numDigitsRT
- intval $peaklist.intval
- #end if
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- (peaklist['peaklistBool'])
-
-
- (peaklist['peaklistBool'])
-
-
-
-
-.. class:: infomark
-
-**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu
+
+
.. class:: infomark
-**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M]
+**Galaxy integration** ABiMS TEAM - SU/CNRS - Station biologique de Roscoff and Yann Guitton - LABERCA
+Part of Workflow4Metabolomics.org [W4M]
| Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool.
----------------------------------------------------
-
-
-
-
-
-For details and explanations for all the parameters and the workflow of xcms_ package, see its manual_ and this example_
-
-.. _xcms: https://bioconductor.org/packages/release/bioc/html/xcms.html
-.. _manual: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf
-.. _example: https://bioconductor.org/packages/release/bioc/vignettes/xcms/inst/doc/xcms.html
-
-
-
-Get a Peak List
----------------
-
-If 'true', the module generates two additional files corresponding to the peak list:
-- the variable metadata file (corresponding to information about extracted ions such as mass or retention time)
-- the data matrix (corresponding to related intensities)
-
-**decimal places for [mass or retention time] values in identifiers**
-
- | Ions' identifiers are constructed as MxxxTyyy where 'xxx' is the ion median mass and 'yyy' the ion median retention time.
- | Two parameters are used to adjust the number of decimal places wanted in identifiers for mass and retention time respectively.
- | Theses parameters do not affect decimal places in columns other than the identifier one.
-
-**Reported intensity values**
-
- | This parameter determines which values should be reported as intensities in the dataMatrix table; it correspond to xcms 'intval' parameter:
- | - into: integrated area of original (raw) peak
- | - maxo: maximum intensity of original (raw) peak
- | - intb: baseline corrected integrated peak area (only available if peak detection was done by ‘findPeaks.centWave’)
-
-
-
-
-
- 10.1021/ac051437y
+
10.1093/bioinformatics/btu813
-
diff -r ea611367e1da -r d8bac1291473 macros_xcms.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros_xcms.xml Tue Apr 03 11:40:32 2018 -0400
@@ -0,0 +1,242 @@
+
+
+
+ 3.0.0
+
+
+ bioconductor-xcms
+ r-batch
+ r-rcolorbrewer
+ unzip
+
+
+
+
+
+
+ #if $file_load_section.file_load_conditional.file_load_select == "yes":
+ #if $file_load_section.file_load_conditional.input[0].is_of_type("mzxml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzdata") or $file_load_section.file_load_conditional.input[0].is_of_type("netcdf"):
+ #set singlefile_galaxyPath = '|'.join( [ str( $single_file ) for $single_file in $file_load_section.file_load_conditional.input ] )
+ #set singlefile_sampleName = '|'.join( [ str( $single_file.name ) for $single_file in $file_load_section.file_load_conditional.input ] )
+
+ singlefile_galaxyPath '$singlefile_galaxyPath' singlefile_sampleName '$singlefile_sampleName'
+ #else
+ zipfile '$file_load_section.file_load_conditional.input'
+ #end if
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #if $peaklist.peaklistBool
+ convertRTMinute $peaklist.convertRTMinute
+ numDigitsMZ $peaklist.numDigitsMZ
+ numDigitsRT $peaklist.numDigitsRT
+ intval $peaklist.intval
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (peaklist['peaklistBool'])
+
+
+ (peaklist['peaklistBool'])
+
+
+
+
+
+Get a Peak List
+---------------
+
+If 'true', the module generates two additional files corresponding to the peak list:
+- the variable metadata file (corresponding to information about extracted ions such as mass or retention time)
+- the data matrix (corresponding to related intensities)
+
+**decimal places for [mass or retention time] values in identifiers**
+
+ | Ions' identifiers are constructed as MxxxTyyy where 'xxx' is the ion median mass and 'yyy' the ion median retention time.
+ | Two parameters are used to adjust the number of decimal places wanted in identifiers for mass and retention time respectively.
+ | Theses parameters do not affect decimal places in columns other than the identifier one.
+
+**Reported intensity values**
+
+ | This parameter determines which values should be reported as intensities in the dataMatrix table; it correspond to xcms 'intval' parameter:
+ | - into: integrated area of original (raw) peak
+ | - maxo: maximum intensity of original (raw) peak
+ | - intb: baseline corrected integrated peak area (only available if peak detection was done by ‘findPeaks.centWave’)
+
+
+
+
+xset.variableMetadata.tsv : tabular format
+
+ | Table containing information about ions; can be used as one input of **Quality_Metrics** or **Generic_filter** modules.
+
+xset.dataMatrix.tsv : tabular format
+
+ | Table containing ions' intensities; can be used as one input of **Quality_Metrics** or **Generic_filter** modules.
+
+
+
+
+ ppm $methods.ppm
+ peakwidth "c($methods.peakwidth)"
+
+ ## Advanced
+ snthresh $methods.CentWaveAdv.snthresh
+ prefilter "c($methods.CentWaveAdv.prefilter)"
+ mzCenterFun $methods.CentWaveAdv.mzCenterFun
+ integrate $methods.CentWaveAdv.integrate
+ mzdiff $methods.CentWaveAdv.mzdiff
+ fitgauss $methods.CentWaveAdv.fitgauss
+ noise $methods.CentWaveAdv.noise
+ verboseColumns $methods.CentWaveAdv.verboseColumns
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #if $sectionROI.roiList:
+ roiList '$sectionROI.roiList'
+ firstBaselineCheck $sectionROI.firstBaselineCheck
+ #if $sectionROI.roiScales != "":
+ roiScales "c($sectionROI.roiScales)"
+ #end if
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu
+
+@HELP_AUTHORS_WRAPPERS@
+
+---------------------------------------------------
+
+
+
+
+
+For details and explanations for all the parameters and the workflow of xcms_ package, see its manual_ and this example_
+
+.. _xcms: https://bioconductor.org/packages/release/bioc/html/xcms.html
+.. _manual: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf
+.. _example: https://bioconductor.org/packages/release/bioc/vignettes/xcms/inst/doc/xcms.html
+
+
+
+
+
+ 10.1021/ac051437y
+
+
+
+
diff -r ea611367e1da -r d8bac1291473 repository_dependencies.xml
--- a/repository_dependencies.xml Thu Mar 08 05:55:04 2018 -0500
+++ b/repository_dependencies.xml Tue Apr 03 11:40:32 2018 -0400
@@ -1,5 +1,5 @@
-
+
diff -r ea611367e1da -r d8bac1291473 static/images/xcms_fillpeaks_workflow.png
Binary file static/images/xcms_fillpeaks_workflow.png has changed
diff -r ea611367e1da -r d8bac1291473 xcms_fillpeaks.r
--- a/xcms_fillpeaks.r Thu Mar 08 05:55:04 2018 -0500
+++ b/xcms_fillpeaks.r Tue Apr 03 11:40:32 2018 -0400
@@ -31,18 +31,10 @@
#saving the specific parameters
method <- "FillChromPeaks"
-if (!is.null(args$convertRTMinute)){
- convertRTMinute <- args$convertRTMinute; args$convertRTMinute <- NULL
-}
-if (!is.null(args$numDigitsMZ)){
- numDigitsMZ <- args$numDigitsMZ; args$numDigitsMZ <- NULL
-}
-if (!is.null(args$numDigitsRT)){
- numDigitsRT <- args$numDigitsRT; args$numDigitsRT <- NULL
-}
-if (!is.null(args$intval)){
- intval <- args$intval; args$intval <- NULL
-}
+if (!is.null(args$convertRTMinute)) convertRTMinute <- args$convertRTMinute
+if (!is.null(args$numDigitsMZ)) numDigitsMZ <- args$numDigitsMZ
+if (!is.null(args$numDigitsRT)) numDigitsRT <- args$numDigitsRT
+if (!is.null(args$intval)) intval <- args$intval
cat("\n\n")
@@ -51,7 +43,7 @@
cat("\tINFILE PROCESSING INFO\n")
#image is an .RData file necessary to use xset variable given by previous tools
-load(args$image); args$image=NULL
+load(args$image)
if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
#Verification of a group step before doing the fillpeaks job.
@@ -63,7 +55,6 @@
rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args)
zipfile <- rawFilePath$zipfile
singlefile <- rawFilePath$singlefile
-args <- rawFilePath$args
directory <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile)
# Check some character issues
@@ -82,6 +73,9 @@
cat("\t\tCOMPUTE\n")
cat("\t\t\tFilling missing peaks using default settings\n")
+# clear the arguement list to remove unexpected key/value as singlefile_galaxyPath or method ...
+args <- args[names(args) %in% slotNames(do.call(paste0(method,"Param"), list()))]
+
fillChromPeaksParam <- do.call(paste0(method,"Param"), args)
print(fillChromPeaksParam)
xdata <- fillChromPeaks(xdata, param=fillChromPeaksParam)