Mercurial > repos > lecorguille > xcms_group
changeset 27:36b470f3ebcf draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit bff1445c9b00ccdbe05ee3dc6ed24221033384b9
author | lecorguille |
---|---|
date | Tue, 24 Oct 2017 11:47:07 -0400 |
parents | e121f6d76dfb |
children | a18fc7554c6d |
files | abims_xcms_group.xml lib.r macros.xml xcms.r |
diffstat | 4 files changed, 169 insertions(+), 62 deletions(-) [+] |
line wrap: on
line diff
--- a/abims_xcms_group.xml Fri May 19 09:35:20 2017 -0400 +++ b/abims_xcms_group.xml Tue Oct 24 11:47:07 2017 -0400 @@ -15,9 +15,8 @@ image '$image' xsetRdataOutput '$xsetRData' - rplotspdf '$rplotsPdf' - method $methods.method + method $methods.method #if $methods.method == "density": ## minsamp $methods.minsamp minfrac $methods.minfrac @@ -53,7 +52,7 @@ <option value="nearest" >nearest</option> </param> <when value="density"> - <param name="bw" type="integer" value="30" label="Bandwidth" help="[bw] bandwidth (standard deviation or half width at half maximum) of gaussian smoothing kernel to apply to the peak density chromatogram" /> + <param name="bw" type="float" value="30" label="Bandwidth" help="[bw] bandwidth (standard deviation or half width at half maximum) of gaussian smoothing kernel to apply to the peak density chromatogram" /> <param name="minfrac" type="float" value="0.5" label="Minimum fraction of samples necessary" help="[minfrac] in at least one of the sample groups for it to be a valid group" /> <param name="mzwid" type="float" value="0.25" label="Width of overlapping m/z slices" help="[mzwid] to use for creating peak density chromatograms and grouping peaks across samples " /> <!-- @@ -98,7 +97,7 @@ <outputs> <data name="xsetRData" format="rdata.xcms.group" label="${image.name[:-6]}.group.RData"/> - <data name="rplotsPdf" format="pdf" label="${image.name[:-6]}.group.Rplots.pdf"/> + <data name="rplotsPdf" format="pdf" from_work_dir="Rplots.pdf" label="${image.name[:-6]}.group.Rplots.pdf"/> <expand macro="output_peaklist" function="group"/> <data name="log" format="txt" label="xset.log.txt" hidden="true" /> </outputs> @@ -282,29 +281,26 @@ Xcms.Group ========== ------------ Description ----------- After peak identification with xcmsSet, this tool groups the peaks which represent the same analyte across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time. Allows rejection of features, which are only partially detected within the replicates of a sample class. - ------------------ Workflow position ----------------- **Upstream tools** -========================= ================= =================== ========== -Name output file format parameter -========================= ================= =================== ========== -xcms.xcmsSet xset.RData rdata.xcms.raw RData file -------------------------- ----------------- ------------------- ---------- -xcms.xcmsSet Merger xset.RData rdata.xcms.raw RData file -------------------------- ----------------- ------------------- ---------- -xcms.retcor xset.RData rdata.xcms.retcor RData file -========================= ================= =================== ========== ++------------------------+-----------------+--------------------+------------+ +| Name | output file | format | parameter | ++========================+=================+====================+============+ +| xcms.xcmsSet | xset.RData | rdata.xcms.raw | RData file | ++------------------------+-----------------+--------------------+------------+ +| xcms.xcmsSet Merger | xset.RData | rdata.xcms.raw | RData file | ++------------------------+-----------------+--------------------+------------+ +| xcms.retcor | xset.RData | rdata.xcms.retcor | RData file | ++------------------------+-----------------+--------------------+------------+ **Downstream tools** @@ -317,23 +313,23 @@ |xcms.fillPeaks | xset.RData | rdata.xcms.group | +---------------------------+--------------------------------------+ -The output file is an xcmsSet.RData file. You can continue your analysis using it in **xcms.retcor** tool as an next step and then **xcms.fillPeaks**. - **General schema of the metabolomic workflow** .. image:: xcms_group_workflow.png +--------------------------------------------------- + ----------- Input files ----------- +---------------------------+-----------------------+ -| Parameter : num + label | Format | +| Parameter | Format | +===========================+=======================+ -| Or : RData file | rdata.xcms.raw | +| OR : RData file | rdata.xcms.raw | +---------------------------+-----------------------+ -| Or : RData file | rdata.xcms.retcor | +| OR : RData file | rdata.xcms.retcor | +---------------------------+-----------------------+ @@ -357,22 +353,19 @@ | Groups peaks together across samples by creating a master peak list and assigning corresponding peaks from all samples. It is inspired by the alignment algorithm of mzMine. +@HELP_GET_PEAK_LIST@ + ------------ Output files ------------ -xset.group.Rplots.pdf - xset.group.RData: rdata.xcms.group format | Rdata file that will be necessary in the third and fourth step of the workflow (xcms.retcor and xcms.fillpeaks). - ------- +xset.group.Rplots.pdf -.. class:: infomark - -The output file is an xset.group.RData file. You can continue your analysis using it in **xcms.retcor** tool. +@HELP_GET_PEAK_LIST_OUTPUTS@ --------------------------------------------------- @@ -390,23 +383,12 @@ Parameters ---------- - | Method -> **density** - | bw -> **5** - | minfrac -> **0.3** - | mzwid -> **0.01** - | Advanced options: **show** - | max -> **50** - - -Output files ------------- - - | **1) xset.RData: RData file** - - | **2) Example of an xset.group.Rplots pdf file** - -.. image:: xcms_group.png - :width: 700 + | **Method**: density + | **bw**: 10 + | **minfrac**: 0.5 + | **mzwid**: 0.05 + | **Advanced options**: show + | **max**: 10 ---------------------------------------------------
--- a/lib.r Fri May 19 09:35:20 2017 -0400 +++ b/lib.r Tue Oct 24 11:47:07 2017 -0400 @@ -51,6 +51,42 @@ write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) } +#@author G. Le Corguille +exportTicBpcTabular <- function(dataset, filenameBase, ticORbpc, rt='raw') { + + rawORcorrected = '' + title = '' + if (rt=='corrected') { + rawORcorrected = '_corrected' + title = ' corrected by retcor' + } + + if (ticORbpc == "TIC") { + section_name = paste0('TIC',rawORcorrected) + title = paste0('Total Ion Current (TIC) chromatogram',title) + description = 'Sum of intensity (Y) of all ions detected at each retention time(X)' + } else if (ticORbpc == "BPC") { + section_name = paste0('BPC',rawORcorrected) + title = paste0('Base Peak Chromatogram (BPC)',title) + description = 'Sum of intensity (Y) of the most intense peaks at each retention time(X)' + } + + filename=paste0(basename(file_path_sans_ext(filenameBase)),"-",ticORbpc,rawORcorrected,"_mqc.out") + + # Headers for MultiQC + cat("# file_format: 'tsv'\n", sep="", file = filename) + cat("# section_name: '",section_name,"'\n", sep="", file = filename, append = T) + cat("# title: '",title,"'\n", sep="", file = filename, append = T) + cat("# description: '",description,"'\n", sep="", file = filename, append = T) + cat("# plot_type: 'linegraph'\n", sep="", file = filename, append = T) + cat("# pconfig:\n", sep="", file = filename, append = T) + cat("# id: '",ticORbpc,rawORcorrected,"_lineplot'\n", sep="", file = filename, append = T) + cat("# ylab: 'Base Peak Intensity'\n", sep="", file = filename, append = T) + cat("# xlab: 'Retention Time'\n", sep="", file = filename, append = T) + cat("Intensity\tRT\n", file = filename, append = T) + write.table(dataset, filename ,row.names = F, col.names = F, sep = "\t", append = T, quote = F) +} + #@author Y. Guitton getBPC <- function(file,rtcor=NULL, ...) { object <- xcmsRaw(file) @@ -94,6 +130,9 @@ rtcor <- NULL TIC[[j]] <- getBPC(files[j],rtcor=rtcor) + + exportTicBpcTabular(TIC[[j]], files[j], "BPC", rt=rt) + # TIC[[j]][,1]<-rtcor } @@ -221,6 +260,8 @@ rtcor <- xcmsSet@rt$corrected[[i]] else rtcor <- NULL TIC[[i]] <- getTIC(files[i],rtcor=rtcor) + + exportTicBpcTabular(TIC[[i]], files[i], "TIC", rt=rt) } pdf(pdfname,w=16,h=10) @@ -476,7 +517,7 @@ # This function get the raw file path from the arguments -getRawfilePathFromArguments <- function(singlefile, zipfile, listArguments) { +getRawfilePathFromArguments <- function(singlefile, zipfile, listArguments) { if (!is.null(listArguments[["zipfile"]])) zipfile = listArguments[["zipfile"]] if (!is.null(listArguments[["zipfilePositive"]])) zipfile = listArguments[["zipfilePositive"]] if (!is.null(listArguments[["zipfileNegative"]])) zipfile = listArguments[["zipfileNegative"]]
--- a/macros.xml Fri May 19 09:35:20 2017 -0400 +++ b/macros.xml Tue Oct 24 11:47:07 2017 -0400 @@ -12,6 +12,7 @@ <requirement type="package" version="1.46.0">bioconductor-xcms</requirement> </requirements> </xml> + <xml name="stdio"> <stdio> <exit_code range="1" level="fatal" /> @@ -116,14 +117,55 @@ </data> </xml> + <xml name="input_tic_bpc_pdf"> + <param name="tic_bpc_pdf" type="boolean" checked="False" label="Do you want TIC and BCP in PDF Format" help="Whatever, you will be able to use MultiQC tools on the tabular files" /> + </xml> + + <xml name="test_retcor_param"> + <param name="methods|method" value="peakgroups"/> + <param name="methods|smooth" value="loess"/> + <param name="methods|extra" value="1"/> + <param name="methods|missing" value="1"/> + <param name="methods|options|option" value="show"/> + <param name="methods|options|span" value="0.2"/> + <param name="methods|options|family" value="gaussian"/> + <param name="methods|options|plottype" value="deviation"/> + </xml> + + <xml name="test_retcor_output" token_raworcorrected=""> + <output name="log"> + <assert_contents> + <has_text text="object with 4 samples" /> + <has_text text="Time range: 2507.7-4481.7 seconds (41.8-74.7 minutes)" /> + <has_text text="Mass range: 200.1-600 m/z" /> + <has_text text="Peaks: 9251 (about 2313 per sample)" /> + <has_text text="Peak Groups: 0" /> + <has_text text="Sample classes: KO, WT" /> + </assert_contents> + </output> + <output_collection name="ticsCorrectedTabCollection" type="list"> + <element name="ko15" value="ko15-TIC@RAWORCORRECTED@_mqc.out" /> + <element name="ko16" value="ko16-TIC@RAWORCORRECTED@_mqc.out" /> + <element name="wt15" value="wt15-TIC@RAWORCORRECTED@_mqc.out" /> + <element name="wt16" value="wt16-TIC@RAWORCORRECTED@_mqc.out" /> + </output_collection> + <output_collection name="bpcsCorrectedTabCollection" type="list"> + <element name="ko15" value="ko15-BPC@RAWORCORRECTED@_mqc.out" /> + <element name="ko16" value="ko16-BPC@RAWORCORRECTED@_mqc.out" /> + <element name="wt15" value="wt15-BPC@RAWORCORRECTED@_mqc.out" /> + <element name="wt16" value="wt16-BPC@RAWORCORRECTED@_mqc.out" /> + </output_collection> + </xml> + <token name="@HELP_AUTHORS@"> + .. class:: infomark **Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu .. class:: infomark -**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M] +**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station Biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M] | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool. @@ -131,6 +173,57 @@ </token> + <token name="@HELP_BCP_TIC@"> + +BPCs and TICs: tabular + + | "Base Peak Chromatograms" and "Total Ion Chromatograms" graphs + | Import BPC and TIC from xcmsSet and retcor [at once] within MultiQC_ (in or outside Galaxy) to display and navigate in the graphs. + | - In MultiQC: as tool, use the Custom Content + +.. _MultiQC: http://multiqc.info/ + +BPCs and TICs: pdf [if using zip] + + | "Base Peak Chromatograms" and "Total Ion Chromatograms" graphs in pdf format. + + </token> + + <token name="@HELP_GET_PEAK_LIST@"> + +Get a Peak List +--------------- + +If 'true', the module generates two additional files corresponding to the peak list: +- the variable metadata file (corresponding to information about extracted ions such as mass or retention time) +- the data matrix (corresponding to related intensities) + +**decimal places for [mass or retention time] values in identifiers** + + | Ions' identifiers are constructed as MxxxTyyy where 'xxx' is the ion median mass and 'yyy' the ion median retention time. + | Two parameters are used to adjust the number of decimal places wanted in identifiers for mass and retention time respectively. + | Theses parameters do not affect decimal places in columns other than the identifier one. + +**Reported intensity values** + + | This parameter determines which values should be reported as intensities in the dataMatrix table; it correspond to xcms 'intval' parameter: + | - into: integrated area of original (raw) peak + | - maxo: maximum intensity of original (raw) peak + | - intb: baseline corrected integrated peak area (only available if peak detection was done by ‘findPeaks.centWave’) + + </token> + + <token name="@HELP_GET_PEAK_LIST_OUTPUTS@"> + +xset.variableMetadata.tsv : tabular format [If Get a Peak List == Yes] + + | Table containing information about ions - Can be used in **Normalisation/Generic_filter** and **Statitics** tools. + +xset.dataMatrix.tsv : tabular format + + | Table containing ions' intensities - Can be used **Normalisation/Generic_filter** and **Statitics** tools. + + </token> <xml name="citation"> <citations>
--- a/xcms.r Fri May 19 09:35:20 2017 -0400 +++ b/xcms.r Tue Oct 24 11:47:07 2017 -0400 @@ -13,7 +13,7 @@ # ----- PACKAGE ----- cat("\tPACKAGE INFO\n") #pkgs=c("xcms","batch") -pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch") +pkgs=c("tools","parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch") for(pkg in pkgs) { suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") @@ -65,10 +65,6 @@ } #saving the specific parameters -rplotspdf = "Rplots.pdf" -if (!is.null(listArguments[["rplotspdf"]])){ - rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL -} sampleMetadataOutput = "sampleMetadata.tsv" if (!is.null(listArguments[["sampleMetadataOutput"]])){ sampleMetadataOutput = listArguments[["sampleMetadataOutput"]]; listArguments[["sampleMetadataOutput"]]=NULL @@ -94,11 +90,6 @@ intval = listArguments[["intval"]]; listArguments[["intval"]]=NULL } -if (thefunction %in% c("xcmsSet","retcor")) { - ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL - bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL -} - if (thefunction %in% c("xcmsSet","retcor","fillPeaks")) { if (!exists("singlefile")) singlefile=NULL @@ -147,7 +138,7 @@ #change the default display settings #dev.new(file="Rplots.pdf", width=16, height=12) -pdf(file=rplotspdf, width=16, height=12) +pdf(file="Rplots.pdf", width=16, height=12) if (thefunction == "group") { par(mfrow=c(2,2)) } @@ -197,12 +188,12 @@ if (thefunction == "xcmsSet") { cat("\t\tGET TIC GRAPH\n") sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput) - getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw") - getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf) + getTICs(xcmsSet=xset, rt="raw") + getBPCs(xcmsSet=xset, rt="raw") } else if (thefunction == "retcor") { cat("\t\tGET TIC GRAPH\n") - getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected") - getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf) + getTICs(xcmsSet=xset, rt="corrected") + getBPCs(xcmsSet=xset, rt="corrected") } if ((thefunction == "group" || thefunction == "fillPeaks") && exists("intval")) {