Mercurial > repos > lecorguille > xcms_fillpeaks
changeset 34:d8bac1291473 draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a
| author | lecorguille |
|---|---|
| date | Tue, 03 Apr 2018 11:40:32 -0400 |
| parents | ea611367e1da |
| children | e67cbb96d9e9 |
| files | abims_xcms_fillPeaks.xml lib.r macros.xml macros_xcms.xml repository_dependencies.xml static/images/xcms_fillpeaks_workflow.png xcms_fillpeaks.r |
| diffstat | 7 files changed, 357 insertions(+), 236 deletions(-) [+] |
line wrap: on
line diff
--- a/abims_xcms_fillPeaks.xml Thu Mar 08 05:55:04 2018 -0500 +++ b/abims_xcms_fillPeaks.xml Tue Apr 03 11:40:32 2018 -0400 @@ -4,13 +4,14 @@ <macros> <import>macros.xml</import> + <import>macros_xcms.xml</import> </macros> <expand macro="requirements"/> <expand macro="stdio"/> <command><![CDATA[ - @COMMAND_XCMS_SCRIPT@/xcms_fillpeaks.r + @COMMAND_RSCRIPT@/xcms_fillpeaks.r image '$image' @@ -49,7 +50,7 @@ <tests> <!--<test> - <param name="image" value="xset.group.retcor.group.RData"/> + <param name="image" value="xset.group.retcor.group.RData" ftype="rdata"/> <param name="method" value="chrom"/> <param name="zip_file" value="sacuri_dir_root.zip" ftype="zip" /> <assert_stdout> @@ -63,7 +64,7 @@ </test>--> <!-- Issue with fillpeaks because it seems that there are too many NA <test> - <param name="image" value="faahKO.xset.group.retcor.group.RData"/> + <param name="image" value="faahKO.xset.group.retcor.group.RData" ftype="rdata"/> <param name="method" value="chrom"/> <conditional name="peaklist"> <param name="peaklistBool" value="true" /> @@ -84,7 +85,7 @@ <output name="dataMatrix" file="faahKO.xset.group.retcor.group.fillPeaks.dataMatrix.tsv" /> </test> <test> - <param name="image" value="faahKO-single.xset.merged.group.retcor.group.RData"/> + <param name="image" value="faahKO-single.xset.merged.group.retcor.group.RData" ftype="rdata"/> <param name="method" value="chrom"/> <conditional name="peaklist"> <param name="peaklistBool" value="true" /> @@ -105,7 +106,7 @@ <output name="dataMatrix" file="faahKO.xset.group.retcor.group.fillPeaks.dataMatrix.tsv" /> </test>--> <test> - <param name="image" value="faahKO-single.xset.merged.group2.retcor2.group2.RData"/> + <param name="image" value="faahKO-single.xset.merged.group2.retcor2.group2.RData" ftype="rdata"/> <conditional name="peaklist"> <param name="peaklistBool" value="true" /> <param name="convertRTMinute" value="false" /> @@ -163,33 +164,20 @@ **Downstream tools** -+---------------------------+------------------+-----------------------+ -| Name | Output file | Format | -+===========================+==================+=======================+ -|CAMERA.annotate | xset.retcor.RData| rdata.xcms.fillpeaks | -+---------------------------+------------------+-----------------------+ -|xcms.summary | xset.retcor.RData| rdata.xcms.fillpeaks | -+---------------------------+------------------+-----------------------+ - -The output file **xset.fillpeaks** is a RData file. You can continue your analysis using it in **CAMERA.annotate** or **xcms.summary** tool as a following step of your workflow. +=========================== ================== ======================= +Name Output file Format +=========================== ================== ======================= +CAMERA.annotate xset.retcor.RData rdata.xcms.fillpeaks +--------------------------- ------------------ ----------------------- +xcms.summary xset.retcor.RData rdata.xcms.fillpeaks +=========================== ================== ======================= **General schema of the metabolomic workflow** .. image:: xcms_fillpeaks_workflow.png - - ------------ -Input files ------------ - -+---------------------------+-----------------------+ -| Parameter : num + label | Format | -+===========================+=======================+ -| 1 : RData file | rdata.xcms.group | -+---------------------------+-----------------------+ - +--------------------------------------------------- ---------- Parameters @@ -211,43 +199,7 @@ | Rdata file that will be used in the **CAMERA.annotate** or **xcms.summary** step of the workflow. -xset.variableMetadata.tsv : tabular format - - | Table containing information about ions; can be used as one input of **Quality_Metrics** or **Generic_filter** modules. - -xset.dataMatrix.tsv : tabular format - - | Table containing ions' intensities; can be used as one input of **Quality_Metrics** or **Generic_filter** modules. - ------- - -.. class:: infomark - -The output file is a xset.fillPeaks.RData file. You can continue your analysis using it in **CAMERA.annotate** or **xcms.summary** tool. - - ---------------------------------------------------- - ---------------- -Working example ---------------- - -Input files ------------ - - | RData file -> **xset.retcor.RData** - -Parameters ----------- - - | method -> **chrom** - | Get a Peak List -> **false** - - -Output files ------------- - - | **xset.fillPeaks.RData: RData file** +@HELP_PEAKLIST_OUTPUT@ ---------------------------------------------------
--- a/lib.r Thu Mar 08 05:55:04 2018 -0500 +++ b/lib.r Tue Apr 03 11:40:32 2018 -0400 @@ -28,6 +28,58 @@ } #@author G. Le Corguille +# This function merge several xdata into one. +mergeXData <- function(args) { + for(image in args$images) { + load(image) + # Handle infiles + if (!exists("singlefile")) singlefile <- NULL + if (!exists("zipfile")) zipfile <- NULL + rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) + zipfile <- rawFilePath$zipfile + singlefile <- rawFilePath$singlefile + retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) + if (exists("raw_data")) xdata <- raw_data + if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") + cat(sampleNamesList$sampleNamesOrigin,"\n") + if (!exists("xdata_merged")) { + xdata_merged <- xdata + singlefile_merged <- singlefile + md5sumList_merged <- md5sumList + sampleNamesList_merged <- sampleNamesList + } else { + if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata) + else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- .concatenate_OnDiskMSnExp(xdata_merged,xdata) + else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") + singlefile_merged <- c(singlefile_merged,singlefile) + md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin) + sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin) + sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames) + } + } + rm(image) + xdata <- xdata_merged; rm(xdata_merged) + singlefile <- singlefile_merged; rm(singlefile_merged) + md5sumList <- md5sumList_merged; rm(md5sumList_merged) + sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) + + if (!is.null(args$sampleMetadata)) { + cat("\tXSET PHENODATA SETTING...\n") + sampleMetadataFile <- args$sampleMetadata + sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F) + xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)] + + if (any(is.na(pData(xdata)$sample_group))) { + sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] + error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" ")) + print(error_message) + stop(error_message) + } + } + return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList)) +} + +#@author G. Le Corguille # This function convert if it is required the Retention Time in minutes RTSecondToMinute <- function(variableMetadata, convertRTMinute) { if (convertRTMinute){ @@ -79,9 +131,11 @@ # Color by group group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] - names(group_colors) <- unique(xdata$sample_group) - plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) - legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + if (length(group_colors) > 1) { + names(group_colors) <- unique(xdata$sample_group) + plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) + legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + } # Color by sample plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) @@ -109,6 +163,19 @@ } #@author G. Le Corguille +# It allow different of field separators +getDataFrameFromFile <- function(filename, header=T) { + myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F) + if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F) + if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F) + if (ncol(myDataFrame) < 2) { + error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" + print(error_message) + stop(error_message) + } + return(myDataFrame) +} + getPlotChromatogram <- function(xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { chrom <- chromatogram(xdata, aggregationFun = aggregationFun) @@ -127,9 +194,11 @@ # Color by group group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] - names(group_colors) <- unique(xdata$sample_group) - plot(chrom, col = group_colors[chrom$sample_group], main=main) - legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + if (length(group_colors) > 1) { + names(group_colors) <- unique(xdata$sample_group) + plot(chrom, col = group_colors[chrom$sample_group], main=main) + legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) + } # Color by sample plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main) @@ -345,10 +414,7 @@ singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath } } - for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) { - args[[argument]] <- NULL - } - return(list(zipfile=zipfile, singlefile=singlefile, args=args)) + return(list(zipfile=zipfile, singlefile=singlefile)) } @@ -559,3 +625,9 @@ c.XCMSnExp <- function(...) { .concatenate_XCMSnExp(...) } + +#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 +# https://github.com/sneumann/xcms/issues/247 +c.MSnbase <- function(...) { + .concatenate_OnDiskMSnExp(...) +}
--- a/macros.xml Thu Mar 08 05:55:04 2018 -0500 +++ b/macros.xml Tue Apr 03 11:40:32 2018 -0400 @@ -1,22 +1,13 @@ <?xml version="1.0"?> <macros> - <token name="@WRAPPER_VERSION@">3.0.0</token> - <xml name="requirements"> - <requirements> - <requirement type="package" version="@WRAPPER_VERSION@">bioconductor-xcms</requirement> - <requirement type="package" version="1.1_4">r-batch</requirement> - <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement> - <requirement type="package" version="6.0">unzip</requirement> - <yield /> - </requirements> - </xml> <xml name="stdio"> <stdio> <exit_code range="1" level="fatal" /> </stdio> </xml> - <token name="@COMMAND_XCMS_SCRIPT@">LC_ALL=C Rscript $__tool_directory__/</token> + <!-- COMMAND --> + <token name="@COMMAND_RSCRIPT@">LC_ALL=C Rscript $__tool_directory__/</token> <token name="@COMMAND_LOG_EXIT@"> ; @@ -25,6 +16,7 @@ sh -c "exit \$return" </token> + <!-- INPUT_VALIDATORS --> <xml name="input_validator_range_integer"> <validator type="regex" message="The format is 'min,max'" >[0-9]+ *, *[0-9]+</validator> </xml> @@ -37,155 +29,24 @@ <validator type="regex" message="The format is '1,2,4,6'" >[0-9, ]+</validator> </xml> + <token name="@INPUT_IMAGE_LABEL@">RData file</token> <token name="@INPUT_IMAGE_HELP@">It contain a xcms3::XCMSnExp object (named xdata)</token> - <!-- zipfile load for planemo test --> - <token name="@COMMAND_FILE_LOAD@"> - #if $file_load_section.file_load_conditional.file_load_select == "yes": - #if $file_load_section.file_load_conditional.input[0].is_of_type("mzxml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzdata") or $file_load_section.file_load_conditional.input[0].is_of_type("netcdf"): - #set singlefile_galaxyPath = '|'.join( [ str( $single_file ) for $single_file in $file_load_section.file_load_conditional.input ] ) - #set singlefile_sampleName = '|'.join( [ str( $single_file.name ) for $single_file in $file_load_section.file_load_conditional.input ] ) - - singlefile_galaxyPath '$singlefile_galaxyPath' singlefile_sampleName '$singlefile_sampleName' - #else - zipfile '$file_load_section.file_load_conditional.input' - #end if - #end if - </token> - - <xml name="input_file_load"> - <section name="file_load_section" title="Resubmit your raw dataset or your zip file"> - <conditional name="file_load_conditional"> - <param name="file_load_select" type="select" label="Resubmit your dataset or your zip file" help="Use only if you get a message which say that your original dataset or zip file have been deleted on the server." > - <option value="no" >no need</option> - <option value="yes" >yes</option> - </param> - <when value="no"> - </when> - <when value="yes"> - <param name="input" type="data" format="mzxml,mzml,mzdata,netcdf,no_unzip.zip,zip" multiple="true" label="File(s) from your history containing your chromatograms" help="Single file mode for the format: mzxml, mzml, mzdata and netcdf. Zip file mode for the format: no_unzip.zip, zip. See the help section below." /> - </when> - </conditional> - </section> - </xml> - - <xml name="test_file_load_zip"> - <section name="file_load_section"> - <conditional name="file_load_conditional"> - <param name="file_load_select" value="yes" /> - <param name="input" value="faahKO_reduce.zip" ftype="zip" /> - </conditional> - </section> - </xml> - - <xml name="test_file_load_zip_sacuri"> - <section name="file_load_section"> - <conditional name="file_load_conditional"> - <param name="file_load_select" value="yes" /> - <param name="input" value="sacuri_dir_root.zip" ftype="zip" /> - </conditional> - </section> - </xml> - - <xml name="test_file_load_single"> - <section name="file_load_section"> - <conditional name="file_load_conditional"> - <param name="file_load_select" value="yes" /> - <param name="input" value="wt15.CDF,ko16.CDF,ko15.CDF,wt16.CDF" ftype="netcdf" /> - </conditional> - </section> - </xml> - - <token name="@COMMAND_PEAKLIST@"> - #if $peaklist.peaklistBool - convertRTMinute $peaklist.convertRTMinute - numDigitsMZ $peaklist.numDigitsMZ - numDigitsRT $peaklist.numDigitsRT - intval $peaklist.intval - #end if - </token> - - <xml name="input_peaklist"> - <conditional name="peaklist"> - <param name="peaklistBool" type="boolean" label="Get a Peak List" /> - <when value="true"> - <param name="convertRTMinute" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Convert retention time (seconds) into minutes" help="Convert the columns rtmed, rtmin and rtmax into minutes"/> - <param name="numDigitsMZ" type="integer" value="4" label="Number of decimal places for mass values reported in ions' identifiers." help="A minimum of 4 decimal places is recommended. Useful to avoid duplicates within identifiers" /> - <param name="numDigitsRT" type="integer" value="0" label="Number of decimal places for retention time values reported in ions' identifiers." help="Useful to avoid duplicates within identifiers" /> - <param name="intval" type="select" label="Reported intensity values" help="[intval] See the help section below"> - <option value="into" selected="true">into</option> - <option value="maxo">maxo</option> - <option value="intb">intb</option> - </param> - </when> - <when value="false" /> - </conditional> - </xml> - - <xml name="output_peaklist" token_function=""> - <data name="variableMetadata" format="tabular" label="${image.name[:-6]}.@FUNCTION@.variableMetadata.tsv" from_work_dir="variableMetadata.tsv" > - <filter>(peaklist['peaklistBool'])</filter> - </data> - <data name="dataMatrix" format="tabular" label="${image.name[:-6]}.@FUNCTION@.dataMatrix.tsv" from_work_dir="dataMatrix.tsv" > - <filter>(peaklist['peaklistBool'])</filter> - </data> - </xml> - - <token name="@HELP_AUTHORS@"> -.. class:: infomark - -**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu + <!-- MISC --> + <token name="@HELP_AUTHORS_WRAPPERS@"> .. class:: infomark -**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M] +**Galaxy integration** ABiMS TEAM - SU/CNRS - Station biologique de Roscoff and Yann Guitton - LABERCA +Part of Workflow4Metabolomics.org [W4M] | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool. ---------------------------------------------------- - - </token> - - <token name="@HELP_XCMS_MANUAL@"> - -For details and explanations for all the parameters and the workflow of xcms_ package, see its manual_ and this example_ - -.. _xcms: https://bioconductor.org/packages/release/bioc/html/xcms.html -.. _manual: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf -.. _example: https://bioconductor.org/packages/release/bioc/vignettes/xcms/inst/doc/xcms.html - </token> - <token name="@HELP_PEAKLIST@"> - -Get a Peak List ---------------- - -If 'true', the module generates two additional files corresponding to the peak list: -- the variable metadata file (corresponding to information about extracted ions such as mass or retention time) -- the data matrix (corresponding to related intensities) - -**decimal places for [mass or retention time] values in identifiers** - - | Ions' identifiers are constructed as MxxxTyyy where 'xxx' is the ion median mass and 'yyy' the ion median retention time. - | Two parameters are used to adjust the number of decimal places wanted in identifiers for mass and retention time respectively. - | Theses parameters do not affect decimal places in columns other than the identifier one. - -**Reported intensity values** - - | This parameter determines which values should be reported as intensities in the dataMatrix table; it correspond to xcms 'intval' parameter: - | - into: integrated area of original (raw) peak - | - maxo: maximum intensity of original (raw) peak - | - intb: baseline corrected integrated peak area (only available if peak detection was done by ‘findPeaks.centWave’) - - </token> - - <xml name="citation"> - <citations> - <citation type="doi">10.1021/ac051437y</citation> + <xml name="citation_w4m"> <citation type="doi">10.1093/bioinformatics/btu813</citation> - </citations> </xml> </macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros_xcms.xml Tue Apr 03 11:40:32 2018 -0400 @@ -0,0 +1,242 @@ +<?xml version="1.0"?> +<macros> + + <token name="@WRAPPER_VERSION@">3.0.0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@WRAPPER_VERSION@">bioconductor-xcms</requirement> + <requirement type="package" version="1.1_4">r-batch</requirement> + <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement> + <requirement type="package" version="6.0">unzip</requirement> + <yield /> + </requirements> + </xml> + + <!-- FILE_LOAD for planemo test --> + <token name="@COMMAND_FILE_LOAD@"> + #if $file_load_section.file_load_conditional.file_load_select == "yes": + #if $file_load_section.file_load_conditional.input[0].is_of_type("mzxml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzdata") or $file_load_section.file_load_conditional.input[0].is_of_type("netcdf"): + #set singlefile_galaxyPath = '|'.join( [ str( $single_file ) for $single_file in $file_load_section.file_load_conditional.input ] ) + #set singlefile_sampleName = '|'.join( [ str( $single_file.name ) for $single_file in $file_load_section.file_load_conditional.input ] ) + + singlefile_galaxyPath '$singlefile_galaxyPath' singlefile_sampleName '$singlefile_sampleName' + #else + zipfile '$file_load_section.file_load_conditional.input' + #end if + #end if + </token> + + <xml name="input_file_load"> + <section name="file_load_section" title="Resubmit your raw dataset or your zip file"> + <conditional name="file_load_conditional"> + <param name="file_load_select" type="select" label="Resubmit your dataset or your zip file" help="Use only if you get a message which say that your original dataset or zip file have been deleted on the server." > + <option value="no" >no need</option> + <option value="yes" >yes</option> + </param> + <when value="no"> + </when> + <when value="yes"> + <param name="input" type="data" format="mzxml,mzml,mzdata,netcdf,no_unzip.zip,zip" multiple="true" label="File(s) from your history containing your chromatograms" help="Single file mode for the format: mzxml, mzml, mzdata and netcdf. Zip file mode for the format: no_unzip.zip, zip. See the help section below." /> + </when> + </conditional> + </section> + </xml> + + <xml name="test_file_load_zip"> + <section name="file_load_section"> + <conditional name="file_load_conditional"> + <param name="file_load_select" value="yes" /> + <param name="input" value="faahKO_reduce.zip" ftype="zip" /> + </conditional> + </section> + </xml> + + <xml name="test_file_load_zip_sacuri"> + <section name="file_load_section"> + <conditional name="file_load_conditional"> + <param name="file_load_select" value="yes" /> + <param name="input" value="sacuri_dir_root.zip" ftype="zip" /> + </conditional> + </section> + </xml> + + <xml name="test_file_load_single"> + <section name="file_load_section"> + <conditional name="file_load_conditional"> + <param name="file_load_select" value="yes" /> + <param name="input" value="wt15.CDF,ko16.CDF,ko15.CDF,wt16.CDF" ftype="netcdf" /> + </conditional> + </section> + </xml> + + <xml name="test_file_load_single_ko15"> + <section name="file_load_section"> + <conditional name="file_load_conditional"> + <param name="file_load_select" value="yes" /> + <param name="input" value="ko15.CDF" ftype="netcdf" /> + </conditional> + </section> + </xml> + + <!-- PEAKLIST --> + <token name="@COMMAND_PEAKLIST@"> + #if $peaklist.peaklistBool + convertRTMinute $peaklist.convertRTMinute + numDigitsMZ $peaklist.numDigitsMZ + numDigitsRT $peaklist.numDigitsRT + intval $peaklist.intval + #end if + </token> + + <xml name="input_peaklist"> + <conditional name="peaklist"> + <param name="peaklistBool" type="boolean" label="Get a Peak List" /> + <when value="true"> + <param name="convertRTMinute" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Convert retention time (seconds) into minutes" help="Convert the columns rtmed, rtmin and rtmax into minutes"/> + <param name="numDigitsMZ" type="integer" value="4" label="Number of decimal places for mass values reported in ions' identifiers." help="A minimum of 4 decimal places is recommended. Useful to avoid duplicates within identifiers" /> + <param name="numDigitsRT" type="integer" value="0" label="Number of decimal places for retention time values reported in ions' identifiers." help="Useful to avoid duplicates within identifiers" /> + <param name="intval" type="select" label="Reported intensity values" help="[intval] See the help section below"> + <option value="into" selected="true">into</option> + <option value="maxo">maxo</option> + <option value="intb">intb</option> + </param> + </when> + <when value="false" /> + </conditional> + </xml> + + <xml name="output_peaklist" token_function=""> + <data name="variableMetadata" format="tabular" label="${image.name[:-6]}.@FUNCTION@.variableMetadata.tsv" from_work_dir="variableMetadata.tsv" > + <filter>(peaklist['peaklistBool'])</filter> + </data> + <data name="dataMatrix" format="tabular" label="${image.name[:-6]}.@FUNCTION@.dataMatrix.tsv" from_work_dir="dataMatrix.tsv" > + <filter>(peaklist['peaklistBool'])</filter> + </data> + </xml> + + <token name="@HELP_PEAKLIST@"> + +Get a Peak List +--------------- + +If 'true', the module generates two additional files corresponding to the peak list: +- the variable metadata file (corresponding to information about extracted ions such as mass or retention time) +- the data matrix (corresponding to related intensities) + +**decimal places for [mass or retention time] values in identifiers** + + | Ions' identifiers are constructed as MxxxTyyy where 'xxx' is the ion median mass and 'yyy' the ion median retention time. + | Two parameters are used to adjust the number of decimal places wanted in identifiers for mass and retention time respectively. + | Theses parameters do not affect decimal places in columns other than the identifier one. + +**Reported intensity values** + + | This parameter determines which values should be reported as intensities in the dataMatrix table; it correspond to xcms 'intval' parameter: + | - into: integrated area of original (raw) peak + | - maxo: maximum intensity of original (raw) peak + | - intb: baseline corrected integrated peak area (only available if peak detection was done by ‘findPeaks.centWave’) + + </token> + + <token name="@HELP_PEAKLIST_OUTPUT@"> +xset.variableMetadata.tsv : tabular format + + | Table containing information about ions; can be used as one input of **Quality_Metrics** or **Generic_filter** modules. + +xset.dataMatrix.tsv : tabular format + + | Table containing ions' intensities; can be used as one input of **Quality_Metrics** or **Generic_filter** modules. + </token> + + <!-- CENTWAVE --> + <token name="@COMMAND_CENTWAVE@"> + ppm $methods.ppm + peakwidth "c($methods.peakwidth)" + + ## Advanced + snthresh $methods.CentWaveAdv.snthresh + prefilter "c($methods.CentWaveAdv.prefilter)" + mzCenterFun $methods.CentWaveAdv.mzCenterFun + integrate $methods.CentWaveAdv.integrate + mzdiff $methods.CentWaveAdv.mzdiff + fitgauss $methods.CentWaveAdv.fitgauss + noise $methods.CentWaveAdv.noise + verboseColumns $methods.CentWaveAdv.verboseColumns + </token> + + <xml name="input_centwave"> + <param argument="ppm" type="integer" value="25" label="Max tolerated ppm m/z deviation in consecutive scans in ppm" help="for the initial ROI definition." /> + <param argument="peakwidth" type="text" value="20,50" label="Min,Max peak width in seconds" help="with the expected approximate peak width in chromatographic space."> + <expand macro="input_validator_range_float"/> + </param> + </xml> + + <xml name="input_centwaveAdv"> + <param argument="snthresh" type="integer" value="10" label="Signal to Noise ratio cutoff" /> + <param argument="prefilter" type="text" value="3,100" label="Prefilter step for for the first analysis step (ROI detection)" help="Separate by coma k, I. Mass traces are only retained if they contain at least ‘k‘ peaks with intensity ‘>= I‘."> + <expand macro="input_validator_range_integer"/> + </param> + <param argument="mzCenterFun" type="select" label="Name of the function to calculate the m/z center of the chromatographic peak" > + <option value="wMean">intensity weighted mean of the peak's m/z values</option> + <option value="mean">mean of the peak's m/z values</option> + <option value="apex">use the m/z value at the peak apex</option> + <option value="wMeanApex3">ntensity weighted mean of the m/z value at the peak apex and the m/z values left and right of it</option> + <option value="meanApex3">mean of the m/z value of the peak apex and the m/z values left and right of it</option> + </param> + <param argument="integrate" type="select" label="Integration method" > + <option value="1">peak limits are found through descent on the mexican hat filtered data (more robust, but less exact)</option> + <option value="2">peak limits based on real data (more accurate but prone to noise)</option> + </param> + <param argument="mzdiff" type="float" value="-0.001" label="Minimum difference in m/z for peaks with overlapping retention times" help="can be negative to allow overlap" /> + <param argument="fitgauss" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="fitgauss" help="whether or not a Gaussian should be fitted to each peak" /> + <param argument="noise" type="integer" value="0" label="Noise filter" help="allowing to set a minimum intensity required for centroids to be considered in the first analysis step (centroids with intensity lower than ‘noise’ are omitted from ROI detection)." /> + <param argument="verboseColumns" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="verbose Columns" help="whether additional peak meta data columns should be returned" /> + </xml> + + <token name="@COMMAND_CENTWAVEADVROI@"> + #if $sectionROI.roiList: + roiList '$sectionROI.roiList' + firstBaselineCheck $sectionROI.firstBaselineCheck + #if $sectionROI.roiScales != "": + roiScales "c($sectionROI.roiScales)" + #end if + #end if + </token> + + <xml name="input_centwaveAdvROI" token_optional="true"> + <param argument="roiList" type="data" format="tabular" optional="@OPTIONAL@" label="List of regions-of-interest (ROI) representing detected mass traces" help="If ROIs are submitted the first analysis step is omitted and chromatographic peak detection is performed on the submitted ROIs. Each ROI is expected to have the following elements specified: ‘scmin’ (start scan index), ‘scmax’ (end scan index), ‘mzmin’ (minimum m/z), ‘mzmax’ (maximum m/z), ‘length’ (number of scans), ‘intensity’ (summed intensity)." /> + <param argument="firstBaselineCheck" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Is continuous data within regions of interest is checked to be above the first baseline." /> + <param argument="roiScales" type="text" value="" optional="true" label="Numeric vector defining the scale for each region of interest in ‘roiList’" help="Length equal to ‘roiList’ - Should be used for the centWave-wavelets (format 0.9,1,0.2)"> + <expand macro="input_validator_range_float"/> + </param> + </xml> + + <!-- MISC --> + <token name="@HELP_AUTHORS@"> +.. class:: infomark + +**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu + +@HELP_AUTHORS_WRAPPERS@ + +--------------------------------------------------- + + </token> + + <token name="@HELP_XCMS_MANUAL@"> + +For details and explanations for all the parameters and the workflow of xcms_ package, see its manual_ and this example_ + +.. _xcms: https://bioconductor.org/packages/release/bioc/html/xcms.html +.. _manual: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf +.. _example: https://bioconductor.org/packages/release/bioc/vignettes/xcms/inst/doc/xcms.html + + </token> + + <xml name="citation"> + <citations> + <citation type="doi">10.1021/ac051437y</citation> + <expand macro="citation_w4m"/> + </citations> + </xml> +</macros>
--- a/repository_dependencies.xml Thu Mar 08 05:55:04 2018 -0500 +++ b/repository_dependencies.xml Tue Apr 03 11:40:32 2018 -0400 @@ -1,5 +1,5 @@ <?xml version="1.0"?> <repositories> <repository changeset_revision="91815b6d07fe" name="no_unzip_datatype" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - <repository changeset_revision="bff835d58914" name="rdata_xcms_datatypes" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="8ce71291b600" name="rdata_xcms_datatypes" owner="lecorguille" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </repositories>
--- a/xcms_fillpeaks.r Thu Mar 08 05:55:04 2018 -0500 +++ b/xcms_fillpeaks.r Tue Apr 03 11:40:32 2018 -0400 @@ -31,18 +31,10 @@ #saving the specific parameters method <- "FillChromPeaks" -if (!is.null(args$convertRTMinute)){ - convertRTMinute <- args$convertRTMinute; args$convertRTMinute <- NULL -} -if (!is.null(args$numDigitsMZ)){ - numDigitsMZ <- args$numDigitsMZ; args$numDigitsMZ <- NULL -} -if (!is.null(args$numDigitsRT)){ - numDigitsRT <- args$numDigitsRT; args$numDigitsRT <- NULL -} -if (!is.null(args$intval)){ - intval <- args$intval; args$intval <- NULL -} +if (!is.null(args$convertRTMinute)) convertRTMinute <- args$convertRTMinute +if (!is.null(args$numDigitsMZ)) numDigitsMZ <- args$numDigitsMZ +if (!is.null(args$numDigitsRT)) numDigitsRT <- args$numDigitsRT +if (!is.null(args$intval)) intval <- args$intval cat("\n\n") @@ -51,7 +43,7 @@ cat("\tINFILE PROCESSING INFO\n") #image is an .RData file necessary to use xset variable given by previous tools -load(args$image); args$image=NULL +load(args$image) if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") #Verification of a group step before doing the fillpeaks job. @@ -63,7 +55,6 @@ rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) zipfile <- rawFilePath$zipfile singlefile <- rawFilePath$singlefile -args <- rawFilePath$args directory <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) # Check some character issues @@ -82,6 +73,9 @@ cat("\t\tCOMPUTE\n") cat("\t\t\tFilling missing peaks using default settings\n") +# clear the arguement list to remove unexpected key/value as singlefile_galaxyPath or method ... +args <- args[names(args) %in% slotNames(do.call(paste0(method,"Param"), list()))] + fillChromPeaksParam <- do.call(paste0(method,"Param"), args) print(fillChromPeaksParam) xdata <- fillChromPeaks(xdata, param=fillChromPeaksParam)
