# HG changeset patch # User lecorguille # Date 1522770032 14400 # Node ID d8bac12914730194e5cb664b22023285687383ec # Parent ea611367e1dac3899c7eda7b8e4b0778b21ebbd8 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a diff -r ea611367e1da -r d8bac1291473 abims_xcms_fillPeaks.xml --- a/abims_xcms_fillPeaks.xml Thu Mar 08 05:55:04 2018 -0500 +++ b/abims_xcms_fillPeaks.xml Tue Apr 03 11:40:32 2018 -0400 @@ -4,13 +4,14 @@ macros.xml + macros_xcms.xml - + @@ -163,33 +164,20 @@ **Downstream tools** -+---------------------------+------------------+-----------------------+ -| Name | Output file | Format | -+===========================+==================+=======================+ -|CAMERA.annotate | xset.retcor.RData| rdata.xcms.fillpeaks | -+---------------------------+------------------+-----------------------+ -|xcms.summary | xset.retcor.RData| rdata.xcms.fillpeaks | -+---------------------------+------------------+-----------------------+ - -The output file **xset.fillpeaks** is a RData file. You can continue your analysis using it in **CAMERA.annotate** or **xcms.summary** tool as a following step of your workflow. +=========================== ================== ======================= +Name Output file Format +=========================== ================== ======================= +CAMERA.annotate xset.retcor.RData rdata.xcms.fillpeaks +--------------------------- ------------------ ----------------------- +xcms.summary xset.retcor.RData rdata.xcms.fillpeaks +=========================== ================== ======================= **General schema of the metabolomic workflow** .. image:: xcms_fillpeaks_workflow.png - - ------------ -Input files ------------ - -+---------------------------+-----------------------+ -| Parameter : num + label | Format | -+===========================+=======================+ -| 1 : RData file | rdata.xcms.group | -+---------------------------+-----------------------+ - +--------------------------------------------------- ---------- Parameters @@ -211,43 +199,7 @@ | Rdata file that will be used in the **CAMERA.annotate** or **xcms.summary** step of the workflow. -xset.variableMetadata.tsv : tabular format - - | Table containing information about ions; can be used as one input of **Quality_Metrics** or **Generic_filter** modules. - -xset.dataMatrix.tsv : tabular format - - | Table containing ions' intensities; can be used as one input of **Quality_Metrics** or **Generic_filter** modules. - ------- - -.. class:: infomark - -The output file is a xset.fillPeaks.RData file. You can continue your analysis using it in **CAMERA.annotate** or **xcms.summary** tool. - - ---------------------------------------------------- - ---------------- -Working example ---------------- - -Input files ------------ - - | RData file -> **xset.retcor.RData** - -Parameters ----------- - - | method -> **chrom** - | Get a Peak List -> **false** - - -Output files ------------- - - | **xset.fillPeaks.RData: RData file** +@HELP_PEAKLIST_OUTPUT@ --------------------------------------------------- diff -r ea611367e1da -r d8bac1291473 lib.r --- a/lib.r Thu Mar 08 05:55:04 2018 -0500 +++ b/lib.r Tue Apr 03 11:40:32 2018 -0400 @@ -28,6 +28,58 @@ } #@author G. Le Corguille +# This function merge several xdata into one. +mergeXData <- function(args) { + for(image in args$images) { + load(image) + # Handle infiles + if (!exists("singlefile")) singlefile <- NULL + if (!exists("zipfile")) zipfile <- NULL + rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) + zipfile <- rawFilePath$zipfile + singlefile <- rawFilePath$singlefile + retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) + if (exists("raw_data")) xdata <- raw_data + if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") + cat(sampleNamesList$sampleNamesOrigin,"\n") + if (!exists("xdata_merged")) { + xdata_merged <- xdata + singlefile_merged <- singlefile + md5sumList_merged <- md5sumList + sampleNamesList_merged <- sampleNamesList + } else { + if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata) + else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- .concatenate_OnDiskMSnExp(xdata_merged,xdata) + else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") + singlefile_merged <- c(singlefile_merged,singlefile) + md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin) + sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin) + sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames) + } + } + rm(image) + xdata <- xdata_merged; rm(xdata_merged) + singlefile <- singlefile_merged; rm(singlefile_merged) + md5sumList <- md5sumList_merged; rm(md5sumList_merged) + sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) + + if (!is.null(args$sampleMetadata)) { + cat("\tXSET PHENODATA SETTING...\n") + sampleMetadataFile <- args$sampleMetadata + sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F) + xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)] + + if (any(is.na(pData(xdata)$sample_group))) { + sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] + error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" ")) + print(error_message) + stop(error_message) + } + } + return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList)) +} + +#@author G. Le Corguille # This function convert if it is required the Retention Time in minutes RTSecondToMinute <- function(variableMetadata, convertRTMinute) { if (convertRTMinute){ @@ -79,9 +131,11 @@ # Color by group group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] - names(group_colors) <- unique(xdata$sample_group) - plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) - legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + if (length(group_colors) > 1) { + names(group_colors) <- unique(xdata$sample_group) + plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) + legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + } # Color by sample plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) @@ -109,6 +163,19 @@ } #@author G. Le Corguille +# It allow different of field separators +getDataFrameFromFile <- function(filename, header=T) { + myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F) + if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F) + if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F) + if (ncol(myDataFrame) < 2) { + error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" + print(error_message) + stop(error_message) + } + return(myDataFrame) +} + getPlotChromatogram <- function(xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { chrom <- chromatogram(xdata, aggregationFun = aggregationFun) @@ -127,9 +194,11 @@ # Color by group group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] - names(group_colors) <- unique(xdata$sample_group) - plot(chrom, col = group_colors[chrom$sample_group], main=main) - legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + if (length(group_colors) > 1) { + names(group_colors) <- unique(xdata$sample_group) + plot(chrom, col = group_colors[chrom$sample_group], main=main) + legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + } # Color by sample plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main) @@ -345,10 +414,7 @@ singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath } } - for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) { - args[[argument]] <- NULL - } - return(list(zipfile=zipfile, singlefile=singlefile, args=args)) + return(list(zipfile=zipfile, singlefile=singlefile)) } @@ -559,3 +625,9 @@ c.XCMSnExp <- function(...) { .concatenate_XCMSnExp(...) } + +#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 +# https://github.com/sneumann/xcms/issues/247 +c.MSnbase <- function(...) { + .concatenate_OnDiskMSnExp(...) +} diff -r ea611367e1da -r d8bac1291473 macros.xml --- a/macros.xml Thu Mar 08 05:55:04 2018 -0500 +++ b/macros.xml Tue Apr 03 11:40:32 2018 -0400 @@ -1,22 +1,13 @@ - 3.0.0 - - - bioconductor-xcms - r-batch - r-rcolorbrewer - unzip - - - - LC_ALL=C Rscript $__tool_directory__/ + + LC_ALL=C Rscript $__tool_directory__/ ; @@ -25,6 +16,7 @@ sh -c "exit \$return" + [0-9]+ *, *[0-9]+ @@ -37,155 +29,24 @@ [0-9, ]+ + RData file It contain a xcms3::XCMSnExp object (named xdata) - - - #if $file_load_section.file_load_conditional.file_load_select == "yes": - #if $file_load_section.file_load_conditional.input[0].is_of_type("mzxml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzdata") or $file_load_section.file_load_conditional.input[0].is_of_type("netcdf"): - #set singlefile_galaxyPath = '|'.join( [ str( $single_file ) for $single_file in $file_load_section.file_load_conditional.input ] ) - #set singlefile_sampleName = '|'.join( [ str( $single_file.name ) for $single_file in $file_load_section.file_load_conditional.input ] ) - - singlefile_galaxyPath '$singlefile_galaxyPath' singlefile_sampleName '$singlefile_sampleName' - #else - zipfile '$file_load_section.file_load_conditional.input' - #end if - #end if - - - -

- - - - - - - - - - - -

- - - -

- - - - -

- - - -

- - - - -

- - - -

- - - - -

- - - - #if $peaklist.peaklistBool - convertRTMinute $peaklist.convertRTMinute - numDigitsMZ $peaklist.numDigitsMZ - numDigitsRT $peaklist.numDigitsRT - intval $peaklist.intval - #end if - - - - - - - - - - - - - - - - - - - - - - (peaklist['peaklistBool']) - - - (peaklist['peaklistBool']) - - - - -.. class:: infomark - -**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu + + .. class:: infomark -**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M] +**Galaxy integration** ABiMS TEAM - SU/CNRS - Station biologique de Roscoff and Yann Guitton - LABERCA +Part of Workflow4Metabolomics.org [W4M] | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool. ---------------------------------------------------- - - - - - -For details and explanations for all the parameters and the workflow of xcms_ package, see its manual_ and this example_ - -.. _xcms: https://bioconductor.org/packages/release/bioc/html/xcms.html -.. _manual: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf -.. _example: https://bioconductor.org/packages/release/bioc/vignettes/xcms/inst/doc/xcms.html - - - -Get a Peak List ---------------- - -If 'true', the module generates two additional files corresponding to the peak list: -- the variable metadata file (corresponding to information about extracted ions such as mass or retention time) -- the data matrix (corresponding to related intensities) - -**decimal places for [mass or retention time] values in identifiers** - - | Ions' identifiers are constructed as MxxxTyyy where 'xxx' is the ion median mass and 'yyy' the ion median retention time. - | Two parameters are used to adjust the number of decimal places wanted in identifiers for mass and retention time respectively. - | Theses parameters do not affect decimal places in columns other than the identifier one. - -**Reported intensity values** - - | This parameter determines which values should be reported as intensities in the dataMatrix table; it correspond to xcms 'intval' parameter: - | - into: integrated area of original (raw) peak - | - maxo: maximum intensity of original (raw) peak - | - intb: baseline corrected integrated peak area (only available if peak detection was done by ‘findPeaks.centWave’) - - - - - - 10.1021/ac051437y + 10.1093/bioinformatics/btu813 - diff -r ea611367e1da -r d8bac1291473 macros_xcms.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros_xcms.xml Tue Apr 03 11:40:32 2018 -0400 @@ -0,0 +1,242 @@ + + + + 3.0.0 + + + bioconductor-xcms + r-batch + r-rcolorbrewer + unzip + + + + + + + #if $file_load_section.file_load_conditional.file_load_select == "yes": + #if $file_load_section.file_load_conditional.input[0].is_of_type("mzxml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzdata") or $file_load_section.file_load_conditional.input[0].is_of_type("netcdf"): + #set singlefile_galaxyPath = '|'.join( [ str( $single_file ) for $single_file in $file_load_section.file_load_conditional.input ] ) + #set singlefile_sampleName = '|'.join( [ str( $single_file.name ) for $single_file in $file_load_section.file_load_conditional.input ] ) + + singlefile_galaxyPath '$singlefile_galaxyPath' singlefile_sampleName '$singlefile_sampleName' + #else + zipfile '$file_load_section.file_load_conditional.input' + #end if + #end if + + + +

+ + + + + + + + + + + +

+ + + +

+ + + + +

+ + + +

+ + + + +

+ + + +

+ + + + +

+ + + +

+ + + + +

+ + + + + #if $peaklist.peaklistBool + convertRTMinute $peaklist.convertRTMinute + numDigitsMZ $peaklist.numDigitsMZ + numDigitsRT $peaklist.numDigitsRT + intval $peaklist.intval + #end if + + + + + + + + + + + + + + + + + + + + + + (peaklist['peaklistBool']) + + + (peaklist['peaklistBool']) + + + + + +Get a Peak List +--------------- + +If 'true', the module generates two additional files corresponding to the peak list: +- the variable metadata file (corresponding to information about extracted ions such as mass or retention time) +- the data matrix (corresponding to related intensities) + +**decimal places for [mass or retention time] values in identifiers** + + | Ions' identifiers are constructed as MxxxTyyy where 'xxx' is the ion median mass and 'yyy' the ion median retention time. + | Two parameters are used to adjust the number of decimal places wanted in identifiers for mass and retention time respectively. + | Theses parameters do not affect decimal places in columns other than the identifier one. + +**Reported intensity values** + + | This parameter determines which values should be reported as intensities in the dataMatrix table; it correspond to xcms 'intval' parameter: + | - into: integrated area of original (raw) peak + | - maxo: maximum intensity of original (raw) peak + | - intb: baseline corrected integrated peak area (only available if peak detection was done by ‘findPeaks.centWave’) + + + + +xset.variableMetadata.tsv : tabular format + + | Table containing information about ions; can be used as one input of **Quality_Metrics** or **Generic_filter** modules. + +xset.dataMatrix.tsv : tabular format + + | Table containing ions' intensities; can be used as one input of **Quality_Metrics** or **Generic_filter** modules. + + + + + ppm $methods.ppm + peakwidth "c($methods.peakwidth)" + + ## Advanced + snthresh $methods.CentWaveAdv.snthresh + prefilter "c($methods.CentWaveAdv.prefilter)" + mzCenterFun $methods.CentWaveAdv.mzCenterFun + integrate $methods.CentWaveAdv.integrate + mzdiff $methods.CentWaveAdv.mzdiff + fitgauss $methods.CentWaveAdv.fitgauss + noise $methods.CentWaveAdv.noise + verboseColumns $methods.CentWaveAdv.verboseColumns + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #if $sectionROI.roiList: + roiList '$sectionROI.roiList' + firstBaselineCheck $sectionROI.firstBaselineCheck + #if $sectionROI.roiScales != "": + roiScales "c($sectionROI.roiScales)" + #end if + #end if + + + + + + + + + + + + +.. class:: infomark + +**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu + +@HELP_AUTHORS_WRAPPERS@ + +--------------------------------------------------- + + + + + +For details and explanations for all the parameters and the workflow of xcms_ package, see its manual_ and this example_ + +.. _xcms: https://bioconductor.org/packages/release/bioc/html/xcms.html +.. _manual: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf +.. _example: https://bioconductor.org/packages/release/bioc/vignettes/xcms/inst/doc/xcms.html + + + + + + 10.1021/ac051437y + + + + diff -r ea611367e1da -r d8bac1291473 repository_dependencies.xml --- a/repository_dependencies.xml Thu Mar 08 05:55:04 2018 -0500 +++ b/repository_dependencies.xml Tue Apr 03 11:40:32 2018 -0400 @@ -1,5 +1,5 @@ - + diff -r ea611367e1da -r d8bac1291473 static/images/xcms_fillpeaks_workflow.png Binary file static/images/xcms_fillpeaks_workflow.png has changed diff -r ea611367e1da -r d8bac1291473 xcms_fillpeaks.r --- a/xcms_fillpeaks.r Thu Mar 08 05:55:04 2018 -0500 +++ b/xcms_fillpeaks.r Tue Apr 03 11:40:32 2018 -0400 @@ -31,18 +31,10 @@ #saving the specific parameters method <- "FillChromPeaks" -if (!is.null(args$convertRTMinute)){ - convertRTMinute <- args$convertRTMinute; args$convertRTMinute <- NULL -} -if (!is.null(args$numDigitsMZ)){ - numDigitsMZ <- args$numDigitsMZ; args$numDigitsMZ <- NULL -} -if (!is.null(args$numDigitsRT)){ - numDigitsRT <- args$numDigitsRT; args$numDigitsRT <- NULL -} -if (!is.null(args$intval)){ - intval <- args$intval; args$intval <- NULL -} +if (!is.null(args$convertRTMinute)) convertRTMinute <- args$convertRTMinute +if (!is.null(args$numDigitsMZ)) numDigitsMZ <- args$numDigitsMZ +if (!is.null(args$numDigitsRT)) numDigitsRT <- args$numDigitsRT +if (!is.null(args$intval)) intval <- args$intval cat("\n\n") @@ -51,7 +43,7 @@ cat("\tINFILE PROCESSING INFO\n") #image is an .RData file necessary to use xset variable given by previous tools -load(args$image); args$image=NULL +load(args$image) if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") #Verification of a group step before doing the fillpeaks job. @@ -63,7 +55,6 @@ rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) zipfile <- rawFilePath$zipfile singlefile <- rawFilePath$singlefile -args <- rawFilePath$args directory <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) # Check some character issues @@ -82,6 +73,9 @@ cat("\t\tCOMPUTE\n") cat("\t\t\tFilling missing peaks using default settings\n") +# clear the arguement list to remove unexpected key/value as singlefile_galaxyPath or method ... +args <- args[names(args) %in% slotNames(do.call(paste0(method,"Param"), list()))] + fillChromPeaksParam <- do.call(paste0(method,"Param"), args) print(fillChromPeaksParam) xdata <- fillChromPeaks(xdata, param=fillChromPeaksParam)