Mercurial > repos > lecorguille > xcms_merge
changeset 11:67ab853b89f3 draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a
| author | lecorguille | 
|---|---|
| date | Tue, 03 Apr 2018 11:38:21 -0400 | 
| parents | 47e953d9da82 | 
| children | 9efcd7620cde | 
| files | lib.r macros.xml macros_xcms.xml repository_dependencies.xml static/images/xcms_merge_workflow.png xcms_merge.r xcms_merge.xml | 
| diffstat | 7 files changed, 385 insertions(+), 256 deletions(-) [+] | 
line wrap: on
 line diff
--- a/lib.r Thu Mar 08 05:52:52 2018 -0500 +++ b/lib.r Tue Apr 03 11:38:21 2018 -0400 @@ -28,6 +28,58 @@ } #@author G. Le Corguille +# This function merge several xdata into one. +mergeXData <- function(args) { + for(image in args$images) { + load(image) + # Handle infiles + if (!exists("singlefile")) singlefile <- NULL + if (!exists("zipfile")) zipfile <- NULL + rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) + zipfile <- rawFilePath$zipfile + singlefile <- rawFilePath$singlefile + retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) + if (exists("raw_data")) xdata <- raw_data + if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") + cat(sampleNamesList$sampleNamesOrigin,"\n") + if (!exists("xdata_merged")) { + xdata_merged <- xdata + singlefile_merged <- singlefile + md5sumList_merged <- md5sumList + sampleNamesList_merged <- sampleNamesList + } else { + if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata) + else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- .concatenate_OnDiskMSnExp(xdata_merged,xdata) + else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") + singlefile_merged <- c(singlefile_merged,singlefile) + md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin) + sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin) + sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames) + } + } + rm(image) + xdata <- xdata_merged; rm(xdata_merged) + singlefile <- singlefile_merged; rm(singlefile_merged) + md5sumList <- md5sumList_merged; rm(md5sumList_merged) + sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) + + if (!is.null(args$sampleMetadata)) { + cat("\tXSET PHENODATA SETTING...\n") + sampleMetadataFile <- args$sampleMetadata + sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F) + xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)] + + if (any(is.na(pData(xdata)$sample_group))) { + sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] + error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" ")) + print(error_message) + stop(error_message) + } + } + return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList)) +} + +#@author G. Le Corguille # This function convert if it is required the Retention Time in minutes RTSecondToMinute <- function(variableMetadata, convertRTMinute) { if (convertRTMinute){ @@ -79,9 +131,11 @@ # Color by group group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] - names(group_colors) <- unique(xdata$sample_group) - plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) - legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + if (length(group_colors) > 1) { + names(group_colors) <- unique(xdata$sample_group) + plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) + legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + } # Color by sample plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) @@ -109,6 +163,19 @@ } #@author G. Le Corguille +# It allow different of field separators +getDataFrameFromFile <- function(filename, header=T) { + myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F) + if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F) + if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F) + if (ncol(myDataFrame) < 2) { + error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" + print(error_message) + stop(error_message) + } + return(myDataFrame) +} + getPlotChromatogram <- function(xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { chrom <- chromatogram(xdata, aggregationFun = aggregationFun) @@ -127,9 +194,11 @@ # Color by group group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] - names(group_colors) <- unique(xdata$sample_group) - plot(chrom, col = group_colors[chrom$sample_group], main=main) - legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + if (length(group_colors) > 1) { + names(group_colors) <- unique(xdata$sample_group) + plot(chrom, col = group_colors[chrom$sample_group], main=main) + legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + } # Color by sample plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main) @@ -345,10 +414,7 @@ singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath } } - for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) { - args[[argument]] <- NULL - } - return(list(zipfile=zipfile, singlefile=singlefile, args=args)) + return(list(zipfile=zipfile, singlefile=singlefile)) } @@ -559,3 +625,9 @@ c.XCMSnExp <- function(...) { .concatenate_XCMSnExp(...) } + +#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 +# https://github.com/sneumann/xcms/issues/247 +c.MSnbase <- function(...) { + .concatenate_OnDiskMSnExp(...) +}
--- a/macros.xml Thu Mar 08 05:52:52 2018 -0500 +++ b/macros.xml Tue Apr 03 11:38:21 2018 -0400 @@ -1,22 +1,13 @@ <?xml version="1.0"?> <macros> - <token name="@WRAPPER_VERSION@">3.0.0</token> - <xml name="requirements"> - <requirements> - <requirement type="package" version="@WRAPPER_VERSION@">bioconductor-xcms</requirement> - <requirement type="package" version="1.1_4">r-batch</requirement> - <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement> - <requirement type="package" version="6.0">unzip</requirement> - <yield /> - </requirements> - </xml> <xml name="stdio"> <stdio> <exit_code range="1" level="fatal" /> </stdio> </xml> - <token name="@COMMAND_XCMS_SCRIPT@">LC_ALL=C Rscript $__tool_directory__/</token> + <!-- COMMAND --> + <token name="@COMMAND_RSCRIPT@">LC_ALL=C Rscript $__tool_directory__/</token> <token name="@COMMAND_LOG_EXIT@"> ; @@ -25,6 +16,7 @@ sh -c "exit \$return" </token> + <!-- INPUT_VALIDATORS --> <xml name="input_validator_range_integer"> <validator type="regex" message="The format is 'min,max'" >[0-9]+ *, *[0-9]+</validator> </xml> @@ -37,155 +29,24 @@ <validator type="regex" message="The format is '1,2,4,6'" >[0-9, ]+</validator> </xml> + <token name="@INPUT_IMAGE_LABEL@">RData file</token> <token name="@INPUT_IMAGE_HELP@">It contain a xcms3::XCMSnExp object (named xdata)</token> - <!-- zipfile load for planemo test --> - <token name="@COMMAND_FILE_LOAD@"> - #if $file_load_section.file_load_conditional.file_load_select == "yes": - #if $file_load_section.file_load_conditional.input[0].is_of_type("mzxml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzdata") or $file_load_section.file_load_conditional.input[0].is_of_type("netcdf"): - #set singlefile_galaxyPath = '|'.join( [ str( $single_file ) for $single_file in $file_load_section.file_load_conditional.input ] ) - #set singlefile_sampleName = '|'.join( [ str( $single_file.name ) for $single_file in $file_load_section.file_load_conditional.input ] ) - - singlefile_galaxyPath '$singlefile_galaxyPath' singlefile_sampleName '$singlefile_sampleName' - #else - zipfile '$file_load_section.file_load_conditional.input' - #end if - #end if - </token> - - <xml name="input_file_load"> - <section name="file_load_section" title="Resubmit your raw dataset or your zip file"> - <conditional name="file_load_conditional"> - <param name="file_load_select" type="select" label="Resubmit your dataset or your zip file" help="Use only if you get a message which say that your original dataset or zip file have been deleted on the server." > - <option value="no" >no need</option> - <option value="yes" >yes</option> - </param> - <when value="no"> - </when> - <when value="yes"> - <param name="input" type="data" format="mzxml,mzml,mzdata,netcdf,no_unzip.zip,zip" multiple="true" label="File(s) from your history containing your chromatograms" help="Single file mode for the format: mzxml, mzml, mzdata and netcdf. Zip file mode for the format: no_unzip.zip, zip. See the help section below." /> - </when> - </conditional> - </section> - </xml> - - <xml name="test_file_load_zip"> - <section name="file_load_section"> - <conditional name="file_load_conditional"> - <param name="file_load_select" value="yes" /> - <param name="input" value="faahKO_reduce.zip" ftype="zip" /> - </conditional> - </section> - </xml> - - <xml name="test_file_load_zip_sacuri"> - <section name="file_load_section"> - <conditional name="file_load_conditional"> - <param name="file_load_select" value="yes" /> - <param name="input" value="sacuri_dir_root.zip" ftype="zip" /> - </conditional> - </section> - </xml> - - <xml name="test_file_load_single"> - <section name="file_load_section"> - <conditional name="file_load_conditional"> - <param name="file_load_select" value="yes" /> - <param name="input" value="wt15.CDF,ko16.CDF,ko15.CDF,wt16.CDF" ftype="netcdf" /> - </conditional> - </section> - </xml> - - <token name="@COMMAND_PEAKLIST@"> - #if $peaklist.peaklistBool - convertRTMinute $peaklist.convertRTMinute - numDigitsMZ $peaklist.numDigitsMZ - numDigitsRT $peaklist.numDigitsRT - intval $peaklist.intval - #end if - </token> - - <xml name="input_peaklist"> - <conditional name="peaklist"> - <param name="peaklistBool" type="boolean" label="Get a Peak List" /> - <when value="true"> - <param name="convertRTMinute" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Convert retention time (seconds) into minutes" help="Convert the columns rtmed, rtmin and rtmax into minutes"/> - <param name="numDigitsMZ" type="integer" value="4" label="Number of decimal places for mass values reported in ions' identifiers." help="A minimum of 4 decimal places is recommended. Useful to avoid duplicates within identifiers" /> - <param name="numDigitsRT" type="integer" value="0" label="Number of decimal places for retention time values reported in ions' identifiers." help="Useful to avoid duplicates within identifiers" /> - <param name="intval" type="select" label="Reported intensity values" help="[intval] See the help section below"> - <option value="into" selected="true">into</option> - <option value="maxo">maxo</option> - <option value="intb">intb</option> - </param> - </when> - <when value="false" /> - </conditional> - </xml> - - <xml name="output_peaklist" token_function=""> - <data name="variableMetadata" format="tabular" label="${image.name[:-6]}.@FUNCTION@.variableMetadata.tsv" from_work_dir="variableMetadata.tsv" > - <filter>(peaklist['peaklistBool'])</filter> - </data> - <data name="dataMatrix" format="tabular" label="${image.name[:-6]}.@FUNCTION@.dataMatrix.tsv" from_work_dir="dataMatrix.tsv" > - <filter>(peaklist['peaklistBool'])</filter> - </data> - </xml> - - <token name="@HELP_AUTHORS@"> -.. class:: infomark - -**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu + <!-- MISC --> + <token name="@HELP_AUTHORS_WRAPPERS@"> .. class:: infomark -**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M] +**Galaxy integration** ABiMS TEAM - SU/CNRS - Station biologique de Roscoff and Yann Guitton - LABERCA +Part of Workflow4Metabolomics.org [W4M] | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool. ---------------------------------------------------- - - </token> - - <token name="@HELP_XCMS_MANUAL@"> - -For details and explanations for all the parameters and the workflow of xcms_ package, see its manual_ and this example_ - -.. _xcms: https://bioconductor.org/packages/release/bioc/html/xcms.html -.. _manual: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf -.. _example: https://bioconductor.org/packages/release/bioc/vignettes/xcms/inst/doc/xcms.html - </token> - <token name="@HELP_PEAKLIST@"> - -Get a Peak List ---------------- - -If 'true', the module generates two additional files corresponding to the peak list: -- the variable metadata file (corresponding to information about extracted ions such as mass or retention time) -- the data matrix (corresponding to related intensities) - -**decimal places for [mass or retention time] values in identifiers** - - | Ions' identifiers are constructed as MxxxTyyy where 'xxx' is the ion median mass and 'yyy' the ion median retention time. - | Two parameters are used to adjust the number of decimal places wanted in identifiers for mass and retention time respectively. - | Theses parameters do not affect decimal places in columns other than the identifier one. - -**Reported intensity values** - - | This parameter determines which values should be reported as intensities in the dataMatrix table; it correspond to xcms 'intval' parameter: - | - into: integrated area of original (raw) peak - | - maxo: maximum intensity of original (raw) peak - | - intb: baseline corrected integrated peak area (only available if peak detection was done by ‘findPeaks.centWave’) - - </token> - - <xml name="citation"> - <citations> - <citation type="doi">10.1021/ac051437y</citation> + <xml name="citation_w4m"> <citation type="doi">10.1093/bioinformatics/btu813</citation> - </citations> </xml> </macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros_xcms.xml Tue Apr 03 11:38:21 2018 -0400 @@ -0,0 +1,242 @@ +<?xml version="1.0"?> +<macros> + + <token name="@WRAPPER_VERSION@">3.0.0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@WRAPPER_VERSION@">bioconductor-xcms</requirement> + <requirement type="package" version="1.1_4">r-batch</requirement> + <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement> + <requirement type="package" version="6.0">unzip</requirement> + <yield /> + </requirements> + </xml> + + <!-- FILE_LOAD for planemo test --> + <token name="@COMMAND_FILE_LOAD@"> + #if $file_load_section.file_load_conditional.file_load_select == "yes": + #if $file_load_section.file_load_conditional.input[0].is_of_type("mzxml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzdata") or $file_load_section.file_load_conditional.input[0].is_of_type("netcdf"): + #set singlefile_galaxyPath = '|'.join( [ str( $single_file ) for $single_file in $file_load_section.file_load_conditional.input ] ) + #set singlefile_sampleName = '|'.join( [ str( $single_file.name ) for $single_file in $file_load_section.file_load_conditional.input ] ) + + singlefile_galaxyPath '$singlefile_galaxyPath' singlefile_sampleName '$singlefile_sampleName' + #else + zipfile '$file_load_section.file_load_conditional.input' + #end if + #end if + </token> + + <xml name="input_file_load"> + <section name="file_load_section" title="Resubmit your raw dataset or your zip file"> + <conditional name="file_load_conditional"> + <param name="file_load_select" type="select" label="Resubmit your dataset or your zip file" help="Use only if you get a message which say that your original dataset or zip file have been deleted on the server." > + <option value="no" >no need</option> + <option value="yes" >yes</option> + </param> + <when value="no"> + </when> + <when value="yes"> + <param name="input" type="data" format="mzxml,mzml,mzdata,netcdf,no_unzip.zip,zip" multiple="true" label="File(s) from your history containing your chromatograms" help="Single file mode for the format: mzxml, mzml, mzdata and netcdf. Zip file mode for the format: no_unzip.zip, zip. See the help section below." /> + </when> + </conditional> + </section> + </xml> + + <xml name="test_file_load_zip"> + <section name="file_load_section"> + <conditional name="file_load_conditional"> + <param name="file_load_select" value="yes" /> + <param name="input" value="faahKO_reduce.zip" ftype="zip" /> + </conditional> + </section> + </xml> + + <xml name="test_file_load_zip_sacuri"> + <section name="file_load_section"> + <conditional name="file_load_conditional"> + <param name="file_load_select" value="yes" /> + <param name="input" value="sacuri_dir_root.zip" ftype="zip" /> + </conditional> + </section> + </xml> + + <xml name="test_file_load_single"> + <section name="file_load_section"> + <conditional name="file_load_conditional"> + <param name="file_load_select" value="yes" /> + <param name="input" value="wt15.CDF,ko16.CDF,ko15.CDF,wt16.CDF" ftype="netcdf" /> + </conditional> + </section> + </xml> + + <xml name="test_file_load_single_ko15"> + <section name="file_load_section"> + <conditional name="file_load_conditional"> + <param name="file_load_select" value="yes" /> + <param name="input" value="ko15.CDF" ftype="netcdf" /> + </conditional> + </section> + </xml> + + <!-- PEAKLIST --> + <token name="@COMMAND_PEAKLIST@"> + #if $peaklist.peaklistBool + convertRTMinute $peaklist.convertRTMinute + numDigitsMZ $peaklist.numDigitsMZ + numDigitsRT $peaklist.numDigitsRT + intval $peaklist.intval + #end if + </token> + + <xml name="input_peaklist"> + <conditional name="peaklist"> + <param name="peaklistBool" type="boolean" label="Get a Peak List" /> + <when value="true"> + <param name="convertRTMinute" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Convert retention time (seconds) into minutes" help="Convert the columns rtmed, rtmin and rtmax into minutes"/> + <param name="numDigitsMZ" type="integer" value="4" label="Number of decimal places for mass values reported in ions' identifiers." help="A minimum of 4 decimal places is recommended. Useful to avoid duplicates within identifiers" /> + <param name="numDigitsRT" type="integer" value="0" label="Number of decimal places for retention time values reported in ions' identifiers." help="Useful to avoid duplicates within identifiers" /> + <param name="intval" type="select" label="Reported intensity values" help="[intval] See the help section below"> + <option value="into" selected="true">into</option> + <option value="maxo">maxo</option> + <option value="intb">intb</option> + </param> + </when> + <when value="false" /> + </conditional> + </xml> + + <xml name="output_peaklist" token_function=""> + <data name="variableMetadata" format="tabular" label="${image.name[:-6]}.@FUNCTION@.variableMetadata.tsv" from_work_dir="variableMetadata.tsv" > + <filter>(peaklist['peaklistBool'])</filter> + </data> + <data name="dataMatrix" format="tabular" label="${image.name[:-6]}.@FUNCTION@.dataMatrix.tsv" from_work_dir="dataMatrix.tsv" > + <filter>(peaklist['peaklistBool'])</filter> + </data> + </xml> + + <token name="@HELP_PEAKLIST@"> + +Get a Peak List +--------------- + +If 'true', the module generates two additional files corresponding to the peak list: +- the variable metadata file (corresponding to information about extracted ions such as mass or retention time) +- the data matrix (corresponding to related intensities) + +**decimal places for [mass or retention time] values in identifiers** + + | Ions' identifiers are constructed as MxxxTyyy where 'xxx' is the ion median mass and 'yyy' the ion median retention time. + | Two parameters are used to adjust the number of decimal places wanted in identifiers for mass and retention time respectively. + | Theses parameters do not affect decimal places in columns other than the identifier one. + +**Reported intensity values** + + | This parameter determines which values should be reported as intensities in the dataMatrix table; it correspond to xcms 'intval' parameter: + | - into: integrated area of original (raw) peak + | - maxo: maximum intensity of original (raw) peak + | - intb: baseline corrected integrated peak area (only available if peak detection was done by ‘findPeaks.centWave’) + + </token> + + <token name="@HELP_PEAKLIST_OUTPUT@"> +xset.variableMetadata.tsv : tabular format + + | Table containing information about ions; can be used as one input of **Quality_Metrics** or **Generic_filter** modules. + +xset.dataMatrix.tsv : tabular format + + | Table containing ions' intensities; can be used as one input of **Quality_Metrics** or **Generic_filter** modules. + </token> + + <!-- CENTWAVE --> + <token name="@COMMAND_CENTWAVE@"> + ppm $methods.ppm + peakwidth "c($methods.peakwidth)" + + ## Advanced + snthresh $methods.CentWaveAdv.snthresh + prefilter "c($methods.CentWaveAdv.prefilter)" + mzCenterFun $methods.CentWaveAdv.mzCenterFun + integrate $methods.CentWaveAdv.integrate + mzdiff $methods.CentWaveAdv.mzdiff + fitgauss $methods.CentWaveAdv.fitgauss + noise $methods.CentWaveAdv.noise + verboseColumns $methods.CentWaveAdv.verboseColumns + </token> + + <xml name="input_centwave"> + <param argument="ppm" type="integer" value="25" label="Max tolerated ppm m/z deviation in consecutive scans in ppm" help="for the initial ROI definition." /> + <param argument="peakwidth" type="text" value="20,50" label="Min,Max peak width in seconds" help="with the expected approximate peak width in chromatographic space."> + <expand macro="input_validator_range_float"/> + </param> + </xml> + + <xml name="input_centwaveAdv"> + <param argument="snthresh" type="integer" value="10" label="Signal to Noise ratio cutoff" /> + <param argument="prefilter" type="text" value="3,100" label="Prefilter step for for the first analysis step (ROI detection)" help="Separate by coma k, I. Mass traces are only retained if they contain at least ‘k‘ peaks with intensity ‘>= I‘."> + <expand macro="input_validator_range_integer"/> + </param> + <param argument="mzCenterFun" type="select" label="Name of the function to calculate the m/z center of the chromatographic peak" > + <option value="wMean">intensity weighted mean of the peak's m/z values</option> + <option value="mean">mean of the peak's m/z values</option> + <option value="apex">use the m/z value at the peak apex</option> + <option value="wMeanApex3">ntensity weighted mean of the m/z value at the peak apex and the m/z values left and right of it</option> + <option value="meanApex3">mean of the m/z value of the peak apex and the m/z values left and right of it</option> + </param> + <param argument="integrate" type="select" label="Integration method" > + <option value="1">peak limits are found through descent on the mexican hat filtered data (more robust, but less exact)</option> + <option value="2">peak limits based on real data (more accurate but prone to noise)</option> + </param> + <param argument="mzdiff" type="float" value="-0.001" label="Minimum difference in m/z for peaks with overlapping retention times" help="can be negative to allow overlap" /> + <param argument="fitgauss" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="fitgauss" help="whether or not a Gaussian should be fitted to each peak" /> + <param argument="noise" type="integer" value="0" label="Noise filter" help="allowing to set a minimum intensity required for centroids to be considered in the first analysis step (centroids with intensity lower than ‘noise’ are omitted from ROI detection)." /> + <param argument="verboseColumns" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="verbose Columns" help="whether additional peak meta data columns should be returned" /> + </xml> + + <token name="@COMMAND_CENTWAVEADVROI@"> + #if $sectionROI.roiList: + roiList '$sectionROI.roiList' + firstBaselineCheck $sectionROI.firstBaselineCheck + #if $sectionROI.roiScales != "": + roiScales "c($sectionROI.roiScales)" + #end if + #end if + </token> + + <xml name="input_centwaveAdvROI" token_optional="true"> + <param argument="roiList" type="data" format="tabular" optional="@OPTIONAL@" label="List of regions-of-interest (ROI) representing detected mass traces" help="If ROIs are submitted the first analysis step is omitted and chromatographic peak detection is performed on the submitted ROIs. Each ROI is expected to have the following elements specified: ‘scmin’ (start scan index), ‘scmax’ (end scan index), ‘mzmin’ (minimum m/z), ‘mzmax’ (maximum m/z), ‘length’ (number of scans), ‘intensity’ (summed intensity)." /> + <param argument="firstBaselineCheck" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Is continuous data within regions of interest is checked to be above the first baseline." /> + <param argument="roiScales" type="text" value="" optional="true" label="Numeric vector defining the scale for each region of interest in ‘roiList’" help="Length equal to ‘roiList’ - Should be used for the centWave-wavelets (format 0.9,1,0.2)"> + <expand macro="input_validator_range_float"/> + </param> + </xml> + + <!-- MISC --> + <token name="@HELP_AUTHORS@"> +.. class:: infomark + +**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu + +@HELP_AUTHORS_WRAPPERS@ + +--------------------------------------------------- + + </token> + + <token name="@HELP_XCMS_MANUAL@"> + +For details and explanations for all the parameters and the workflow of xcms_ package, see its manual_ and this example_ + +.. _xcms: https://bioconductor.org/packages/release/bioc/html/xcms.html +.. _manual: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf +.. _example: https://bioconductor.org/packages/release/bioc/vignettes/xcms/inst/doc/xcms.html + + </token> + + <xml name="citation"> + <citations> + <citation type="doi">10.1021/ac051437y</citation> + <expand macro="citation_w4m"/> + </citations> + </xml> +</macros>
--- a/repository_dependencies.xml Thu Mar 08 05:52:52 2018 -0500 +++ b/repository_dependencies.xml Tue Apr 03 11:38:21 2018 -0400 @@ -1,5 +1,5 @@ <?xml version="1.0"?> <repositories> <repository changeset_revision="91815b6d07fe" name="no_unzip_datatype" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - <repository changeset_revision="bff835d58914" name="rdata_xcms_datatypes" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="8ce71291b600" name="rdata_xcms_datatypes" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </repositories>
--- a/xcms_merge.r Thu Mar 08 05:52:52 2018 -0500 +++ b/xcms_merge.r Tue Apr 03 11:38:21 2018 -0400 @@ -13,58 +13,11 @@ cat("\tXSET MERGING...\n") -for(image in args$images) { - load(image) - print(args) - # Handle infiles - if (!exists("singlefile")) singlefile <- NULL - if (!exists("zipfile")) zipfile <- NULL - rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) - zipfile <- rawFilePath$zipfile - singlefile <- rawFilePath$singlefile - directory <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) - - if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") - cat(sampleNamesList$sampleNamesOrigin,"\n") - if (!exists("xdata_merged")) { - xdata_merged <- xdata - singlefile_merged <- singlefile - md5sumList_merged <- md5sumList - sampleNamesList_merged <- sampleNamesList - } else { - xdata_merged <- c(xdata_merged,xdata) - singlefile_merged <- c(singlefile_merged,singlefile) - md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin) - sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin) - sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames) - } -} -rm(image) -xdata <- xdata_merged; rm(xdata_merged) -singlefile <- singlefile_merged; rm(singlefile_merged) -md5sumList <- md5sumList_merged; rm(md5sumList_merged) -sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) - -if (!is.null(args$sampleMetadata)) { - cat("\tXSET PHENODATA SETTING...\n") - sampleMetadataFile <- args$sampleMetadata - sampleMetadata <- read.table(sampleMetadataFile, h=F, sep=";", stringsAsFactors=F) - if (ncol(sampleMetadata) < 2) sampleMetadata <- read.table(sampleMetadataFile, h=F, sep="\t", stringsAsFactors=F) - if (ncol(sampleMetadata) < 2) sampleMetadata <- read.table(sampleMetadataFile, h=F, sep=",", stringsAsFactors=F) - if (ncol(sampleMetadata) < 2) { - error_message="Your sampleMetadata file seems not well formatted. The column separators accepted are ; , and tabulation" - print(error_message) - stop(error_message) - } - xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)] - - if (any(is.na(pData(xdata)$sample_group))) { - sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] - error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" ")) - print(error_message) - stop(error_message) - } -} +mergeXDataReturn <- mergeXData(args) +xdata <- mergeXDataReturn$xdata +singlefile <- mergeXDataReturn$singlefile +md5sumList <- mergeXDataReturn$md5sumList +sampleNamesList <- mergeXDataReturn$sampleNamesList # Create a sampleMetada file sampleNamesList <- getSampleMetadata(xdata=xdata, sampleMetadataOutput="sampleMetadata.tsv")
--- a/xcms_merge.xml Thu Mar 08 05:52:52 2018 -0500 +++ b/xcms_merge.xml Tue Apr 03 11:38:21 2018 -0400 @@ -3,13 +3,14 @@ <macros> <import>macros.xml</import> + <import>macros_xcms.xml</import> </macros> <expand macro="requirements"/> <expand macro="stdio"/> <command><![CDATA[ - @COMMAND_XCMS_SCRIPT@/xcms_merge.r + @COMMAND_RSCRIPT@/xcms_merge.r images 'c("${"\",\"".join(map(str, $images))}")' @@ -21,14 +22,14 @@ ]]></command> <inputs> - <param name="images" type="data" format="rdata.xcms.raw,rdata" label="@INPUT_IMAGE_LABEL@" help="@INPUT_IMAGE_HELP@ from multiple findChromPeaks" multiple="true" /> + <param name="images" type="data" format="rdata.xcms.findchrompeaks,rdata" label="@INPUT_IMAGE_LABEL@" help="@INPUT_IMAGE_HELP@ from multiple findChromPeaks" multiple="true" /> <param name="sampleMetadata" label="Sample metadata file " format="tabular" type="data" optional="true" help="must contain at least one column with the sample id and one column with the sample class"/> <expand macro="input_file_load"/> </inputs> <outputs> - <data name="xsetRData" format="rdata.xcms.raw" label="xset.merged.RData" from_work_dir="merged.RData" /> + <data name="xsetRData" format="rdata.xcms.findchrompeaks" label="xset.merged.RData" from_work_dir="merged.RData" /> <data name="sampleMetadataOutput" format="tabular" label="xset.merged.sampleMetadata.tsv" from_work_dir="sampleMetadata.tsv"> <filter>not sampleMetadata</filter> </data> @@ -37,7 +38,7 @@ <tests> <!-- DISABLE FOR TRAVIS <test> - <param name="images" value="ko15-xset.RData,ko16-xset.RData,wt15-xset.RData,wt16-xset.RData" /> + <param name="images" value="ko15-xset.RData,ko16-xset.RData,wt15-xset.RData,wt16-xset.RData" ftype="rdata"/> <expand macro="test_file_load_single"/> <assert_stdout> <has_text text="object with 4 samples" /> @@ -51,7 +52,7 @@ </test> --> <test> - <param name="images" value="ko15-xset.RData,ko16-xset.RData,wt15-xset.RData,wt16-xset.RData" /> + <param name="images" value="ko15-xset.RData,ko16-xset.RData,wt15-xset.RData,wt16-xset.RData" ftype="rdata"/> <expand macro="test_file_load_single"/> <param name="sampleMetadata" value="sampleMetadata.tab" /> <assert_stdout> @@ -68,7 +69,7 @@ </assert_stdout> </test> <!--<test expect_failure="True"> - <param name="images" value="ko15-xset.RData,ko16-xset.RData,wt15-xset.RData,wt16-xset.RData" /> + <param name="images" value="ko15-xset.RData,ko16-xset.RData,wt15-xset.RData,wt16-xset.RData" ftype="rdata"/> <param name="sampleMetadata" value="sampleMetadata_missing.tab" /> <expand macro="test_file_load_single"/> <assert_stderr> @@ -101,26 +102,26 @@ **Upstream tools** -========================= ================= =================== ========== -Name output file format parameter -========================= ================= =================== ========== -xcms.xcmsSet xset.RData rdata.xcms.raw RData file -------------------------- ----------------- ------------------- ---------- -xcms.xcmsSet xset.RData rdata.xcms.raw RData file -------------------------- ----------------- ------------------- ---------- -xcms.xcmsSet xset.RData rdata.xcms.raw RData file -------------------------- ----------------- ------------------- ---------- -... ... ... ... -========================= ================= =================== ========== +========================= ================= ============================== ========== +Name output file format parameter +========================= ================= ============================== ========== +xcms.xcmsSet xset.RData rdata.xcms.findchrompeaks RData file +------------------------- ----------------- ------------------------------ ---------- +xcms.xcmsSet xset.RData rdata.xcms.findchrompeaks RData file +------------------------- ----------------- ------------------------------ ---------- +xcms.xcmsSet xset.RData rdata.xcms.findchrompeaks RData file +------------------------- ----------------- ------------------------------ ---------- +... ... ... ... +========================= ================= ============================== ========== **Downstream tools** -+---------------------------+--------------------+-----------------+ -| Name | Output file | Format | -+===========================+====================+=================+ -|xcms.group | xset.RData | rdata.xcms.raw | -+---------------------------+--------------------+-----------------+ +=========================== ==================== ============================ +Name Output file Format +=========================== ==================== ============================ +xcms.group xset.RData rdata.xcms.findchrompeaks +=========================== ==================== ============================ .. image:: xcms_merge_workflow.png @@ -128,37 +129,37 @@ Input files ----------- -+---------------------------+-----------------------+ -| Parameter : num + label | Format | -+===========================+=======================+ -| 1 : RData file | rdata.xcms.raw | -+---------------------------+-----------------------+ -| 2 : RData file | rdata.xcms.raw | -+---------------------------+-----------------------+ -| N : RData file | rdata.xcms.raw | -+---------------------------+-----------------------+ -| Optional : sampleMetadata | tsv or csv | -+---------------------------+-----------------------+ +=========================== ================================== +Parameter : num + label Format +=========================== ================================== +1 : RData file rdata.xcms.findchrompeaks +--------------------------- ---------------------------------- +2 : RData file rdata.xcms.findchrompeaks +--------------------------- ---------------------------------- +N : RData file rdata.xcms.findchrompeaks +--------------------------- ---------------------------------- +Optional : sampleMetadata tsv or csv +=========================== ================================== Example of a sampleMetadata: -+---------------------------+------------+ -|HU_neg_028 | bio | -+---------------------------+------------+ -|HU_neg_034 | bio | -+---------------------------+------------+ -|Blanc04 | blank | -+---------------------------+------------+ -|Blanc06 | blank | -+---------------------------+------------+ -|Blanc09 | blank | -+---------------------------+------------+ +--------------------------- ------------ +HU_neg_028 bio +--------------------------- ------------ +HU_neg_034 bio +--------------------------- ------------ +Blanc04 blank +--------------------------- ------------ +Blanc06 blank +--------------------------- ------------ +Blanc09 blank +--------------------------- ------------ ------------ Output files ------------ -xset.merged.RData: rdata.xcms.raw format +xset.merged.RData: rdata.xcms.findchrompeaks format | Rdata file that is necessary in the next step of the workflow "xcms.group".
