Mercurial > repos > lecorguille > xcms_group

--- a/abims_xcms_group.xml	Fri May 19 09:35:20 2017 -0400
+++ b/abims_xcms_group.xml	Tue Oct 24 11:47:07 2017 -0400
@@ -15,9 +15,8 @@
         image '$image'

         xsetRdataOutput '$xsetRData'
-        rplotspdf '$rplotsPdf'

-        method $methods.method
+        method $methods.method
         #if $methods.method == "density":
             ## minsamp $methods.minsamp
             minfrac $methods.minfrac
@@ -53,7 +52,7 @@
                 <option value="nearest" >nearest</option>
             </param>
             <when value="density">
-                <param name="bw" type="integer" value="30" label="Bandwidth" help="[bw] bandwidth (standard deviation or half width at half maximum) of gaussian smoothing kernel to apply to the peak density chromatogram" />
+                <param name="bw" type="float" value="30" label="Bandwidth" help="[bw] bandwidth (standard deviation or half width at half maximum) of gaussian smoothing kernel to apply to the peak density chromatogram" />
                 <param name="minfrac" type="float" value="0.5" label="Minimum fraction of samples necessary" help="[minfrac] in at least one of the sample groups for it to be a valid group" />
                 <param name="mzwid" type="float" value="0.25" label="Width of overlapping m/z slices" help="[mzwid] to use for creating peak density chromatograms and grouping peaks across samples " />
 <!--
@@ -98,7 +97,7 @@

     <outputs>
         <data name="xsetRData" format="rdata.xcms.group" label="${image.name[:-6]}.group.RData"/>
-        <data name="rplotsPdf" format="pdf" label="${image.name[:-6]}.group.Rplots.pdf"/>
+        <data name="rplotsPdf" format="pdf" from_work_dir="Rplots.pdf" label="${image.name[:-6]}.group.Rplots.pdf"/>
         <expand macro="output_peaklist" function="group"/>
         <data name="log" format="txt" label="xset.log.txt"  hidden="true" />
     </outputs>
@@ -282,29 +281,26 @@
 Xcms.Group
 ==========

------------
 Description
 -----------

 After peak identification with xcmsSet, this tool groups the peaks which represent the same analyte across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time. Allows rejection of features, which are only partially detected within the replicates of a sample class.


-
------------------
 Workflow position
 -----------------

 **Upstream tools**

-========================= ================= =================== ==========
-Name                      output file       format              parameter
-========================= ================= =================== ==========
-xcms.xcmsSet              xset.RData        rdata.xcms.raw      RData file
-------------------------- ----------------- ------------------- ----------
-xcms.xcmsSet Merger       xset.RData        rdata.xcms.raw      RData file
-------------------------- ----------------- ------------------- ----------
-xcms.retcor               xset.RData        rdata.xcms.retcor   RData file
-========================= ================= =================== ==========
++------------------------+-----------------+--------------------+------------+
+| Name                   | output file     | format             | parameter  |
++========================+=================+====================+============+
+| xcms.xcmsSet           |  xset.RData     |  rdata.xcms.raw    | RData file |
++------------------------+-----------------+--------------------+------------+
+| xcms.xcmsSet Merger    |  xset.RData     |  rdata.xcms.raw    | RData file |
++------------------------+-----------------+--------------------+------------+
+| xcms.retcor            |  xset.RData     |  rdata.xcms.retcor | RData file |
++------------------------+-----------------+--------------------+------------+


 **Downstream tools**
@@ -317,23 +313,23 @@
 |xcms.fillPeaks             | xset.RData      | rdata.xcms.group   |
 +---------------------------+--------------------------------------+

-The output file is an xcmsSet.RData file. You can continue your analysis using it in **xcms.retcor** tool as an next step and then **xcms.fillPeaks**.
-
 **General schema of the metabolomic workflow**

 .. image:: xcms_group_workflow.png


+---------------------------------------------------
+
 -----------
 Input files
 -----------

 +---------------------------+-----------------------+
-| Parameter : num + label   |   Format              |
+| Parameter                 |   Format              |
 +===========================+=======================+
-| Or : RData file            |   rdata.xcms.raw     |
+| OR : RData file           |   rdata.xcms.raw      |
 +---------------------------+-----------------------+
-| Or : RData file            |   rdata.xcms.retcor  |
+| OR : RData file           |   rdata.xcms.retcor   |
 +---------------------------+-----------------------+


@@ -357,22 +353,19 @@
     | Groups peaks together across samples by creating a master peak list and assigning corresponding peaks from all samples. It is inspired by the alignment algorithm of mzMine.


+@HELP_GET_PEAK_LIST@
+
 ------------
 Output files
 ------------

-xset.group.Rplots.pdf
-
 xset.group.RData: rdata.xcms.group format

     | Rdata file that will be necessary in the third and fourth step of the workflow (xcms.retcor and xcms.fillpeaks).

-
-------
+xset.group.Rplots.pdf

-.. class:: infomark
-
-The output file is an xset.group.RData file. You can continue your analysis using it in **xcms.retcor** tool.
+@HELP_GET_PEAK_LIST_OUTPUTS@


 ---------------------------------------------------
@@ -390,23 +383,12 @@
 Parameters
 ----------

-    | Method -> **density**
-    | bw     -> **5**
-    | minfrac -> **0.3**
-    | mzwid    -> **0.01**
-    | Advanced options: **show**
-    | max -> **50**
-
-
-Output files
-------------
-
-    | **1) xset.RData: RData file**
-
-    | **2) Example of an xset.group.Rplots pdf file**
-
-.. image:: xcms_group.png
-        :width: 700
+    | **Method**: density
+    | **bw**: 10
+    | **minfrac**: 0.5
+    | **mzwid**: 0.05
+    | **Advanced options**: show
+    | **max**: 10


 ---------------------------------------------------
--- a/lib.r	Fri May 19 09:35:20 2017 -0400
+++ b/lib.r	Tue Oct 24 11:47:07 2017 -0400
@@ -51,6 +51,42 @@
     write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F)
 }

+#@author G. Le Corguille
+exportTicBpcTabular <- function(dataset, filenameBase, ticORbpc, rt='raw') {
+
+        rawORcorrected = ''
+        title = ''
+        if (rt=='corrected') {
+            rawORcorrected = '_corrected'
+            title = ' corrected by retcor'
+        }
+
+        if (ticORbpc == "TIC") {
+            section_name = paste0('TIC',rawORcorrected)
+            title = paste0('Total Ion Current (TIC) chromatogram',title)
+            description = 'Sum of intensity (Y) of all ions detected at each retention time(X)'
+        } else if (ticORbpc == "BPC") {
+            section_name = paste0('BPC',rawORcorrected)
+            title = paste0('Base Peak Chromatogram (BPC)',title)
+            description = 'Sum of intensity (Y) of the most intense peaks at each retention time(X)'
+        }
+
+        filename=paste0(basename(file_path_sans_ext(filenameBase)),"-",ticORbpc,rawORcorrected,"_mqc.out")
+
+        # Headers for MultiQC
+        cat("# file_format: 'tsv'\n", sep="", file = filename)
+        cat("# section_name: '",section_name,"'\n", sep="", file = filename, append = T)
+        cat("# title: '",title,"'\n", sep="", file = filename, append = T)
+        cat("# description: '",description,"'\n", sep="", file = filename, append = T)
+        cat("# plot_type: 'linegraph'\n", sep="", file = filename, append = T)
+        cat("# pconfig:\n", sep="", file = filename, append = T)
+        cat("#     id: '",ticORbpc,rawORcorrected,"_lineplot'\n", sep="", file = filename, append = T)
+        cat("#     ylab: 'Base Peak Intensity'\n", sep="", file = filename, append = T)
+        cat("#     xlab: 'Retention Time'\n", sep="", file = filename, append = T)
+        cat("Intensity\tRT\n", file = filename, append = T)
+        write.table(dataset, filename ,row.names = F, col.names = F, sep = "\t", append = T, quote = F)
+}
+
 #@author Y. Guitton
 getBPC <- function(file,rtcor=NULL, ...) {
     object <- xcmsRaw(file)
@@ -94,6 +130,9 @@
             rtcor <- NULL

         TIC[[j]] <- getBPC(files[j],rtcor=rtcor)
+
+        exportTicBpcTabular(TIC[[j]], files[j], "BPC", rt=rt)
+
         # TIC[[j]][,1]<-rtcor
     }

@@ -221,6 +260,8 @@
             rtcor <- xcmsSet@rt$corrected[[i]] else
         rtcor <- NULL
         TIC[[i]] <- getTIC(files[i],rtcor=rtcor)
+
+        exportTicBpcTabular(TIC[[i]], files[i], "TIC", rt=rt)
     }

     pdf(pdfname,w=16,h=10)
@@ -476,7 +517,7 @@


 # This function get the raw file path from the arguments
-getRawfilePathFromArguments <- function(singlefile, zipfile, listArguments) {
+getRawfilePathFromArguments <- function(singlefile, zipfile, listArguments) {
     if (!is.null(listArguments[["zipfile"]]))           zipfile = listArguments[["zipfile"]]
     if (!is.null(listArguments[["zipfilePositive"]]))   zipfile = listArguments[["zipfilePositive"]]
     if (!is.null(listArguments[["zipfileNegative"]]))   zipfile = listArguments[["zipfileNegative"]]
--- a/macros.xml	Fri May 19 09:35:20 2017 -0400
+++ b/macros.xml	Tue Oct 24 11:47:07 2017 -0400
@@ -12,6 +12,7 @@
             <requirement type="package" version="1.46.0">bioconductor-xcms</requirement>
         </requirements>
     </xml>
+
     <xml name="stdio">
         <stdio>
             <exit_code range="1" level="fatal" />
@@ -116,14 +117,55 @@
         </data>
     </xml>

+    <xml name="input_tic_bpc_pdf">
+        <param name="tic_bpc_pdf" type="boolean" checked="False" label="Do you want TIC and BCP in PDF Format" help="Whatever, you will be able to use MultiQC tools on the tabular files" />
+    </xml>
+
+    <xml name="test_retcor_param">
+        <param name="methods|method" value="peakgroups"/>
+        <param name="methods|smooth" value="loess"/>
+        <param name="methods|extra" value="1"/>
+        <param name="methods|missing" value="1"/>
+        <param name="methods|options|option" value="show"/>
+        <param name="methods|options|span" value="0.2"/>
+        <param name="methods|options|family" value="gaussian"/>
+        <param name="methods|options|plottype" value="deviation"/>
+    </xml>
+
+    <xml name="test_retcor_output" token_raworcorrected="">
+        <output name="log">
+            <assert_contents>
+                <has_text text="object with 4 samples" />
+                <has_text text="Time range: 2507.7-4481.7 seconds (41.8-74.7 minutes)" />
+                <has_text text="Mass range: 200.1-600 m/z" />
+                <has_text text="Peaks: 9251 (about 2313 per sample)" />
+                <has_text text="Peak Groups: 0" />
+                <has_text text="Sample classes: KO, WT" />
+            </assert_contents>
+        </output>
+        <output_collection name="ticsCorrectedTabCollection" type="list">
+            <element name="ko15" value="ko15-TIC@RAWORCORRECTED@_mqc.out" />
+            <element name="ko16" value="ko16-TIC@RAWORCORRECTED@_mqc.out" />
+            <element name="wt15" value="wt15-TIC@RAWORCORRECTED@_mqc.out" />
+            <element name="wt16" value="wt16-TIC@RAWORCORRECTED@_mqc.out" />
+        </output_collection>
+        <output_collection name="bpcsCorrectedTabCollection" type="list">
+            <element name="ko15" value="ko15-BPC@RAWORCORRECTED@_mqc.out" />
+            <element name="ko16" value="ko16-BPC@RAWORCORRECTED@_mqc.out" />
+            <element name="wt15" value="wt15-BPC@RAWORCORRECTED@_mqc.out" />
+            <element name="wt16" value="wt16-BPC@RAWORCORRECTED@_mqc.out" />
+        </output_collection>
+    </xml>
+
     <token name="@HELP_AUTHORS@">
+
 .. class:: infomark

 **Authors**  Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu

 .. class:: infomark

-**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M]
+**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station Biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M]

  | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool.

@@ -131,6 +173,57 @@

     </token>

+    <token name="@HELP_BCP_TIC@">
+
+BPCs and TICs: tabular
+
+    | "Base Peak Chromatograms" and "Total Ion Chromatograms" graphs
+    | Import BPC and TIC from xcmsSet and retcor [at once] within MultiQC_ (in or outside Galaxy) to display and navigate in the graphs.
+    | - In MultiQC: as tool, use the Custom Content
+
+.. _MultiQC: http://multiqc.info/
+
+BPCs and TICs: pdf [if using zip]
+
+    | "Base Peak Chromatograms" and "Total Ion Chromatograms" graphs in pdf format.
+
+    </token>
+
+    <token name="@HELP_GET_PEAK_LIST@">
+
+Get a Peak List
+---------------
+
+If 'true', the module generates two additional files corresponding to the peak list:
+- the variable metadata file (corresponding to information about extracted ions such as mass or retention time)
+- the data matrix (corresponding to related intensities)
+
+**decimal places for [mass or retention time] values in identifiers**
+
+    | Ions' identifiers are constructed as MxxxTyyy where 'xxx' is the ion median mass and 'yyy' the ion median retention time.
+    | Two parameters are used to adjust the number of decimal places wanted in identifiers for mass and retention time respectively.
+    | Theses parameters do not affect decimal places in columns other than the identifier one.
+
+**Reported intensity values**
+
+    | This parameter determines which values should be reported as intensities in the dataMatrix table; it correspond to xcms 'intval' parameter:
+    | - into: integrated area of original (raw) peak
+    | - maxo: maximum intensity of original (raw) peak
+    | - intb: baseline corrected integrated peak area (only available if peak detection was done by ‘findPeaks.centWave’)
+
+    </token>
+
+    <token name="@HELP_GET_PEAK_LIST_OUTPUTS@">
+
+xset.variableMetadata.tsv : tabular format [If Get a Peak List == Yes]
+
+    | Table containing information about ions - Can be used in **Normalisation/Generic_filter** and **Statitics** tools.
+
+xset.dataMatrix.tsv : tabular format
+
+    | Table containing ions' intensities - Can be used **Normalisation/Generic_filter** and **Statitics** tools.
+
+    </token>

     <xml name="citation">
         <citations>
--- a/xcms.r	Fri May 19 09:35:20 2017 -0400
+++ b/xcms.r	Tue Oct 24 11:47:07 2017 -0400
@@ -13,7 +13,7 @@
 # ----- PACKAGE -----
 cat("\tPACKAGE INFO\n")
 #pkgs=c("xcms","batch")
-pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch")
+pkgs=c("tools","parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch")
 for(pkg in pkgs) {
     suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE)))
     cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="")
@@ -65,10 +65,6 @@
 }

 #saving the specific parameters
-rplotspdf = "Rplots.pdf"
-if (!is.null(listArguments[["rplotspdf"]])){
-    rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL
-}
 sampleMetadataOutput = "sampleMetadata.tsv"
 if (!is.null(listArguments[["sampleMetadataOutput"]])){
     sampleMetadataOutput = listArguments[["sampleMetadataOutput"]]; listArguments[["sampleMetadataOutput"]]=NULL
@@ -94,11 +90,6 @@
     intval = listArguments[["intval"]]; listArguments[["intval"]]=NULL
 }

-if (thefunction %in% c("xcmsSet","retcor")) {
-    ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL
-    bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL
-}
-

 if (thefunction %in% c("xcmsSet","retcor","fillPeaks"))  {
     if (!exists("singlefile")) singlefile=NULL
@@ -147,7 +138,7 @@

 #change the default display settings
 #dev.new(file="Rplots.pdf", width=16, height=12)
-pdf(file=rplotspdf, width=16, height=12)
+pdf(file="Rplots.pdf", width=16, height=12)
 if (thefunction == "group") {
     par(mfrow=c(2,2))
 }
@@ -197,12 +188,12 @@
 if (thefunction == "xcmsSet") {
     cat("\t\tGET TIC GRAPH\n")
     sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput)
-    getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw")
-    getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf)
+    getTICs(xcmsSet=xset, rt="raw")
+    getBPCs(xcmsSet=xset, rt="raw")
 } else if (thefunction == "retcor") {
     cat("\t\tGET TIC GRAPH\n")
-    getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected")
-    getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf)
+    getTICs(xcmsSet=xset, rt="corrected")
+    getBPCs(xcmsSet=xset, rt="corrected")
 }

 if ((thefunction == "group" || thefunction == "fillPeaks") && exists("intval")) {