Mercurial > repos > mish > xcms_tests

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.txt	Wed Aug 28 10:48:49 2013 -0400
@@ -0,0 +1,31 @@
+
+-----------------------------------------------------------------------
+   XCMS Suite datatypes: A Workflow for metabolomics by ABIMS team
+-----------------------------------------------------------------------
+
+This package contains the tools for the XCMS suite.
+
+--------------------------------------------------------------------
+Instructions for integration of the XCMS Suite tools into the workflow-system
+Galaxy (http://getgalaxy.org)
+--------------------------------------------------------------------
+
+For installing the tools of the XCMS Suite into your Galaxy installation, please do the following:
+
+
+   - Download and follow the instructions for installing the XCMS Suite datatypes (http://misharl@toolsheddev.sb-roscoff.fr/repos/misharl/xcms_datatypes).
+
+   -R libraries needed for the script xcms.r used by all the XCMS tools:
+       library(xcms)
+	   library(batch) #necessary for parseCommandArgs function
+       library(CAMERA)
+
+
+   - Set up the "--file" parameter (absolute path of the xcms.r used by all tools) in all XML tools (XCMS_R_DIR for the "--file" parameter)
+       --file=/XCMS_R_DIR/xcms.r
+
+   - Set up the tcis.r directory location (TCIS_R_DIR) in the script xcms.r:
+		source("/TCIS_R_DIR/tcis.r")
+
+
+Last but not least, restart Galaxy.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Wed Aug 28 10:48:49 2013 -0400
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="The XCMS Suite requires the Galaxy applicable XCMS data formats .">
+     <repository toolshed="http://toolsheddev.sb-roscoff.fr" name="xcms_datatypes" owner="misharl" changeset_revision="284ba0d136f9" />
+</repositories>
Binary file static/images/xcms_workflow.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xcms/abims_CAMERA_annotateDiffreport.xml	Wed Aug 28 10:48:49 2013 -0400
@@ -0,0 +1,152 @@
+<tool id="abims_CAMERA_annotateDiffreport_dev" name="CAMERA.annotateDiffreport_dev" version="20130607">
+
+    <description>Wrapper function for the xcms diffreport and the annotate function. Returns a diffreport within the annotation results.</description>
+
+    <command>
+        R --vanilla --no-site-file --file=/XCMS_R_DIR/xcms.r --args xfunction annotateDiffreport image $image input $input
+        eicmax $eicmax eicwidth $eicwidth metlin $metlin sortpval $sortpval
+        nSlaves $nSlaves
+        sigma $sigma perfwhm $perfwhm
+        maxcharge $maxcharge maxiso $maxiso minfrac $minfrac
+        ppm $ppm mzabs $mzabs
+        #quick "TRUE"
+
+        quick $quick_block.quick
+        #if $quick_block.quick == "FALSE"
+			cor_eic_th $quick_block.cor_eic_th graphMethod $quick_block.graphMethod pval $quick_block.pval calcCiS $quick_block.calcCiS calcIso $quick_block.calcIso calcCaS $quick_block.calcCaS
+			polarity $quick_block.polarity multiplier $quick_block.multiplier
+        #end if
+
+        #if $options.option == "show":
+            value "$options.intval"
+            h $options.h
+            w $options.w
+            mzdec $options.mzdec
+            max_peaks $options.max_peaks
+        #end if
+    </command>
+
+    <inputs>
+        <param name="input" type="data" format="ms_zip" label="ms zip file" help="" />
+        <param name="image" type="data" label="RData file" format="rdata" help="output file from another function xcms (fillPeaks)" />
+        <param name="eicmax" type="integer" value="0" label="eicmax" help="diffreport: number of the most significantly different analytes to create EICs for" />
+        <param name="eicwidth" type="integer" value="200" label="eicwidth" help="diffreport: width (in seconds) of EICs produced" />
+        <param name="metlin" type="float" value="0" label="metlin" help="mass uncertainty to use for generating link to Metlin metabolite database. the sign of the uncertainty indicates negative or positive mode data for M+H or M-H calculation. a value of FALSE or 0 removes the column"/>
+		<param name="sortpval" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="sortpval" help="diffreport: logical indicating whether the reports should be sorted by p-value"/>
+        <param name="nSlaves" type="integer" value="9" label="MPI-slaves CPU" help="xsAnnotate: Use parallel CAMERA mode, require Rmpi" />
+        <param name="sigma" type="integer" value="6" label="sigma" help="groupFWHM: multiplier of the standard deviation" />
+        <param name="perfwhm" type="float" value="0.6" max="1" min="0" label="perfwhm" help="groupFWHM: percentage of FWHM width" />
+		<param name="maxcharge" type="integer" value="3" label="maxcharge" help="findIsotopes: max. ion charge" />
+		<param name="maxiso" type="integer" value="4" label="maxiso" help="findIsotopes: max. number of expected isotopes" />
+        <param name="minfrac" type="float" value="0.5" max="1" min="0" label="minfrac" help="findIsotopes: The percentage number of samples, which must satisfy the C12/C13 rule for isotope annotation" />
+        <param name="ppm" type="integer" value="5" label="ppm" help="General ppm error" />
+        <param name="mzabs" type="float" value="0.015" label="mzabs" help="General absolut error in m/z" />
+
+        <conditional name="quick_block">
+			<param name="quick" type="select" label="quick" help="Use only groupFWHM and findIsotopes">
+				<option value="TRUE" selected="true">TRUE</option>
+				<option value="FALSE" >FALSE</option>
+			</param>
+			<when value="FALSE">
+				<param name="cor_eic_th" type="float" value="0.75" max="1" min="0" label="cor_eic_th" help="groupCorr: correlation threshold (0..1)" />
+				<param name="graphMethod" type="select" label="graphMethod" help="groupCorr: Method selection for grouping peaks after correlation analysis into pseudospectra">
+					<option value="hcs" selected="true">hcs</option>
+					<option value="lpc">lpc</option>
+				</param>
+				<param name="pval" type="float" value="0.05" max="1" min="0" label="pval" help="groupCorr: significant correlation threshold" />
+				<param name="calcCiS" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="calcCiS" help="groupCorr: Use correlation inside samples for peak grouping"/>
+				<param name="calcIso" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="calcIso" help="groupCorr: Use isotopic relationship for peak grouping"/>
+				<param name="calcCaS" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="calcCaS" help="groupCorr: Use correlation across samples for peak grouping"/>
+				<param name="polarity" type="select" label="polarity" help="findAdducts: Which polarity mode was used for measuring of the ms sample">
+					<option value="positive" >positive</option>
+					<option value="negative" selected="true">negative</option>
+				</param>
+				<param name="multiplier" type="integer" value="3" label="multiplier" help="findAdducts: If no ruleset is provided, calculate ruleset with max. number n of [nM+x] clusterions" />
+			</when>
+		</conditional>
+
+        <conditional name="options">
+            <param name="option" type="select" label="Advanced options">
+                <option value="show">show</option>
+                <option value="hide" selected="true">hide</option>
+            </param>
+            <when value="show">
+                <param name="intval" type="select" label="intval" help="General used intensity value">
+                    <option value="into" selected="true">into</option>
+                    <option value="maxo" >maxo</option>
+                    <option value="intb">intb</option>
+                </param>
+                <param name="h" type="integer" value="480" label="height" help="Numeric variable for the height of the eic and boxplots that are printed out." />
+                <param name="w" type="integer" value="640" label="width" help="Numeric variable for the width of the eic and boxplots print out made." />
+                <param name="mzdec" type="integer" value="2" label="mzdec" help="Number of decimal places of title m/z values in the eic plot." />
+                <param name="max_peaks" type="integer" value="100" label="max_peaks" help="How much peaks will be calculated in every thread using the parallel mode" />
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="output_tsv" format="tabular" from_work_dir="Xdiffreport.tsv" label="${image.name[:-6]}.annotateDiffreport.tsv" />
+        <data name="output_matrix" format="tabular" from_work_dir="Xdiffreport.data_matrix.tsv" label="${image.name[:-6]}.annotateDiffreport.data_matrix.tsv" />
+        <data name="output_zip" format="zip" from_work_dir="Xdiffreport.zip" label="${image.name[:-6]}.annotateDiffreport.zip" />
+        <data name="output_image" format="rdata" from_work_dir="annotateDiffreport.RData" label="${image.name[:-6]}.annotateDiffreport.Rdata" />
+        <data name="output_debug" format="rdata" from_work_dir="debug.RData" label="${image.name[:-6]}.annotateDiffreport.debug.Rdata" />
+    </outputs>
+
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+
+    <help>
+**CAMERA package**
+
+The R-package CAMERA is a Collection of Algorithms for MEtabolite
+pRofile Annotation. Its primary purpose is the annotation and evaluation of
+LC-MS data. It includes algorithms for annotation of isotope peaks, adducts
+and fragments in peak lists. Additional methods cluster mass signals that
+originate from a single metabolite, based on rules for mass differences and
+peak shape comparison. To use the strength of already existing programs,
+CAMERA is designed to interact directly with processed peak data from the
+R-package **xcms**.
+
+**What it does?**
+
+The CAMERA annotation procedure can be split into two parts: We want to
+answer the questions which peaks occur from the same molecule and secondly
+compute its exact mass and annotate the ion species. Therefore CAMERA
+annotation workflow contains following primary functions:
+1. peak grouping after retention time (**groupFWHM**)
+2. peak group verification with peakshape correlation (**groupCorr**)
+Both methods separate peaks into different groups, which we define as ”pseu-
+dospectra”. Those pseudospectra can consists from one up to 100 ions, de-
+pending on the molecules amount and ionizability. Afterwards the exposure
+of the ion species can be performed with:
+2
+1. annotation of possible isotopes (**findIsotopes**)
+2. annotation of adducts and calculating hypothetical masses for the group
+(**findAdducts**)
+This workflow results in a **data-frame** similar to a xcms peak table, that can
+be easily stored in a comma separated table .csv (Excel-readable)
+
+The **annotateDiffreport** returns an **diffreport**, see diffreport, within additional columns containing the annotation results.
+
+------
+
+CAMERA needs as input an xcmsSet object that is processed with your
+favorite parameters
+
+Put in the .RData file generated by **fillPeaks** and **.mz.zip** source file
+
+------
+
+.. class:: infomark
+
+The output file is an group.RData file.
+
+You can continue your analysis using it in **CAMERA** package tools.
+
+diffreport.zip file contains filebase_eic and filebase_box directories and filebase.tsv file : results of analysis
+    </help>
+
+</tool>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xcms/abims_xcms_diffreport.xml	Wed Aug 28 10:48:49 2013 -0400
@@ -0,0 +1,115 @@
+<tool id="abims_xcms_diffreport_dev" name="xcms.diffreport_dev" version="20130418">
+
+	<description>A report showing the most statistically significant differences in analyte intensities</description>
+
+    <command>
+        R --vanilla --no-site-file --file=/XCMS_R_DIR/xcms.r --args xfunction diffreport image $image input $input eicmax $eicmax eicwidth $eicwidth metlin $metlin sortpval $sortpval
+        #if $options.option == "show":
+            value "$options.value"
+            h $options.h
+            w $options.w
+            mzdec $options.mzdec
+        #end if
+    </command>
+
+    <inputs>
+        <param name="input" type="data" format="ms_zip" label="ms zip file" help="" />
+        <param name="image" type="data" format="rdata" label="RData file" help="output file from another function xcms (fillPeaks)" />
+        <param name="eicmax" type="integer" value="0" label="eicmax" help="number of the most significantly different analytes to create EICs for" />
+        <param name="eicwidth" type="integer" value="200" label="eicwidth" help="width (in seconds) of EICs produced" />
+        <param name="metlin" type="float" value="0" label="metlin" help="mass uncertainty to use for generating link to Metlin metabolite database. the sign of the uncertainty indicates negative or positive mode data for M+H or M-H calculation. a value of FALSE or 0 removes the column"/>
+		<param name="sortpval" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="sortpval" help="logical indicating whether the reports should be sorted by p-value"/>
+        <conditional name="options">
+            <param name="option" type="select" label="Advanced options">
+                <option value="show">show</option>
+                <option value="hide" selected="true">hide</option>
+            </param>
+            <when value="show">
+                <param name="value" type="select" label="value" help="intensity values to be used for the diffreport">
+                    <option value="into" selected="true">into</option>
+                    <option value="maxo" >maxo</option>
+                    <option value="intb">intb</option>
+                </param>
+                <param name="h" type="integer" value="480" label="height" help="Numeric variable for the height of the eic and boxplots that are printed out." />
+                <param name="w" type="integer" value="640" label="width" help="Numeric variable for the width of the eic and boxplots print out made." />
+                <param name="mzdec" type="integer" value="2" label="mzdec" help="Number of decimal places of title m/z values in the eic plot." />
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="output_tsv"   format="tabular" from_work_dir="Xdiffreport.tsv"   label="${image.name[:-6]}.diffreport.tsv" />
+        <data name="output_matrix" format="tabular" from_work_dir="Xdiffreport.data_matrix.tsv" label="${image.name[:-6]}.diffreport.data_matrix.tsv" />
+        <data name="output_zip"   format="zip"   from_work_dir="Xdiffreport.zip"   label="${image.name[:-6]}.diffreport.zip" />
+        <data name="output_image" format="rdata" from_work_dir="diffreport.RData" label="${image.name[:-6]}.diffreport.RData" />
+    </outputs>
+
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+
+    <tests>
+    </tests>
+
+    <help>
+**What it does?**
+
+Create a report showing the most significant differences between
+two sets of samples. Optionally create extracted ion chromatograms
+for the most significant differences.
+
+This method handles creation of summary reports with statistics
+about which analytes were most significantly different between two
+sets of samples. It computes Welch's two-sample t-statistic for
+each analyte and ranks them by p-value. It returns a summary
+report that can optionally be written out to a tab-separated file.
+
+Additionally, it does all the heavy lifting involved in creating
+superimposed extracted ion chromatograms for a given number of
+analytes.  It does so by reading the raw data files associated
+with the samples of interest one at a time. As it does so, it
+prints the name of the sample it is currently reading. Depending
+on the number and size of the samples, this process can take a
+long time.
+
+If a base file name is provided, the report (see Value section)
+will be saved to a tab separated file. If EICs are generated, they
+will be saved as 640x480 PNG files in a newly created
+
+------
+
+**Parameters**
+
+**eicmax**
+
+It will automatically generate extracted ion chro-
+matograms for a given number of them
+
+**metlin**
+
+If the metlin argument is set to a numeric value, the report will include links
+to the Metlin Metabolite Database (http://metlin.scripps.edu/) showing potential
+metabolite identities. A positive value indicates the data was acquired in positive ion
+mode and the neutral mass is calculated assuming all ions are M+H. A negative value
+does the opposite. The value itself indicates the uncertainty in mass accuracy.
+
+**value**
+
+If ‘value="into"’, integrated peak intensities are used.
+
+If ‘value="maxo"’, maximum peak intensities are used.
+
+If ‘value="intb"’, baseline corrected integrated peak intensities are used (only available if peak detection was done by ‘findPeaks.centWave’).
+
+------
+
+.. class:: infomark
+
+The output file is an group.RData file.
+
+You can continue your analysis using it in **CAMERA** package tools.
+
+diffreport.zip file contains filebase_eic and filebase_box directories and filebase.tsv file : results of analysis
+    </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xcms/abims_xcms_fillPeaks.xml	Wed Aug 28 10:48:49 2013 -0400
@@ -0,0 +1,85 @@
+<tool id="abims_xcms_fillPeaks_dev" name="xcms.fillPeaks_dev" version="20130418">
+
+    <requirements>
+        <requirement type="binary">R</requirement>
+    </requirements>
+
+    <description>Integrate the signal in the region of that peak group not represented and create a new peak</description>
+
+    <command>
+        R --vanilla --no-site-file --file=/XCMS_R_DIR/xcms.r --args xfunction fillPeaks image $image input $input method $method
+    </command>
+
+    <inputs>
+        <param name="input" type="data" format="ms_zip" label="ms zip file" help="" />
+        <param name="image" type="data" format="rdata" label="RData file" help="output file from another function xcms (group)" />
+        <param name="method" type="select" label="method">
+            <option value="chrom" selected="true">chrom</option>
+            <option value="MSW" >MSW</option>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data name="output" format="rdata" from_work_dir="fillPeaks.RData" label="${image.name[:-6]}.fillPeaks.RData" />
+    </outputs>
+
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+
+    <tests>
+    </tests>
+
+    <help>
+**What it does?**
+
+For each sample, identify peak groups where that sample is not
+represented. For each of those peak groups, integrate the signal
+in the region of that peak group and create a new peak.
+
+According to the type of raw-data there are 2
+different methods available. for filling gcms/lcms data the method
+"chrom" integrates raw-data in the chromatographic domain, whereas
+"MSW" is used for peaklists without retention-time information
+like those from direct-infusion spectra.
+
+------
+
+**Methods**
+
+-**chrom**
+
+This method produces intensity values for those missing samples
+by integrating raw data in peak
+group region. In a given group, the start and ending retention
+time points for integration are defined by the median start and
+end points of the other detected peaks. The start and end m/z
+values are similarly determined. Intensities can be still be zero,
+which is a rather unusual intensity for a peak.  This is the case
+if e.g. the raw data was threshholded, and the integration area
+contains no actual raw intensities, or if one sample is
+miscalibrated, such thet the raw data points are (just) outside
+the integration area.
+
+Importantly, if retention time correction data is available, the
+alignment information is used to more precisely integrate the
+propper region of the raw data. If the corrected retention time is
+beyond the end of the raw data, the value will be not-a-number (NaN).
+
+-**MSW**
+
+After peak grouping, there will always be peak groups that do not
+include peaks from every sample. This method produces intensity
+values for those missing samples by integrating raw data in peak
+group region. In a given group, the start and ending m/z values
+for integration are defined by the median start and end points of
+the other detected peaks
+
+------
+
+.. class:: infomark
+
+The output file is an group.RData file. You can continue your analysis using it in **diffreport** tool.
+    </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xcms/abims_xcms_group.xml	Wed Aug 28 10:48:49 2013 -0400
@@ -0,0 +1,142 @@
+<tool id="abims_xcms_group_dev" name="xcms.group_dev" version="20130418">
+
+    <requirements>
+        <requirement type="binary">R</requirement>
+    </requirements>
+
+    <description>Group peaks together across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time.</description>
+
+    <command>
+        R --vanilla --file=/XCMS_R_DIR/xcms.r --quiet --no-site-file --args xfunction group image $image method $methods.method sleep 0.001
+        #if $methods.method == "density":
+            ## minsamp $methods.minsamp
+            minfrac $methods.minfrac
+            bw $methods.bw
+            mzwid $methods.mzwid
+        #elif $methods.method == "mzClust":
+            mzppm $methods.mzppm
+            mzabs $methods.mzabs
+            minfrac $methods.minfrac
+            ## minsamp $methods.minsamp
+        #else:
+            mzVsRTbalance $methods.mzVsRTbalance
+            mzCheck $methods.mzCheck
+            rtCheck $methods.rtCheck
+            kNN $methods.kNN
+        #end if
+        #if $options.option == "show":
+            max $options.max
+        #end if
+    </command>
+
+    <inputs>
+        <param name="image" type="data" format="rdata" label="RData file" help="output file from another function xcms (xcmsSet, retcor etc.)" />
+        <conditional name="methods">
+            <param name="method" type="select" label="Method">
+                <option value="density" selected="true">density</option>
+                <option value="mzClust" >mzClust</option>
+                <option value="nearest" >nearest</option>
+            </param>
+            <when value="density">
+                <param name="bw" type="integer" value="30" label="bw" help="bandwidth (standard deviation or half width at half maximum) of gaussian smoothing kernel to apply to the peak density chromatogram" />
+                <param name="minfrac" type="float" value="0.5" label="minfrac" help="minimum fraction of samples necessary in at least one of the sample groups for it to be a valid group" />
+                <param name="mzwid" type="float" value="0.25" label="mzwid" help="width of overlapping m/z slices to use for creating peak density chromatograms and grouping peaks across samples " />
+<!--
+                <param name="minsamp" type="hidden" value="1" label="minsamp" help="minimum number of samples necessary in at least one of the sample groups for it to be a valid group " />
+-->
+            </when>
+            <when value="mzClust">
+                <param name="mzppm" type="integer" value="20 " label="mzppm" help="The relative error used for clustering/grouping in ppm (parts per million)" />
+                <param name="mzabs" type="float" value="0" label="mzabs" help="The absolute error used for clustering/grouping" />
+                <param name="minfrac" type="float" value="0" label="minfrac" help="minimum fraction of samples necessary in at least one of the sample groups for it to be a valid group" />
+<!--
+                <param name="minsamp" type="hidden" value="1" label="minsamp" help="minimum number of samples necessary in at least one of the sample groups for it to be a valid group " />
+-->
+            </when>
+            <when value="nearest">
+                <param name="mzVsRTbalance" type="integer" value="10 " label="mzVsRTbalance" help="" />
+                <param name="mzCheck" type="float" value="0.2" label="mzCheck" help="" />
+                <param name="rtCheck" type="integer" value="15" label="rtCheck" help="" />
+                <param name="kNN" type="integer" value="10" label="kNN" help="" />
+            </when>
+        </conditional>
+<!--
+        <param name="sleepy" type="float" value="0.001" label="sleep" help="seconds to pause between plotting successive steps of the peak grouping algorithm. peaks are plotted as points showing relative intensity. identified groups are flanked by dotted vertical lines">
+            <validator type="in_range" message="Must be more than 0" min="0.001" max="inf"/>
+        </param>
+-->
+        <conditional name="options">
+            <param name="option" type="select" label="Advanced options">
+                <option value="show">show</option>
+                <option value="hide" selected="true">hide</option>
+            </param>
+            <when value="show">
+                <param name="max" type="integer" value="5" label="max" help="maximum number of groups to identify in a single m/z slice " />
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="output" format="rdata" from_work_dir="group.RData" label="${image.name[:-6]}.group.RData"/>
+        <data name="rplots" format="pdf"   from_work_dir="Rplots.pdf"  label="${image.name[:-6]}.group.Rplots.pdf"/>
+    </outputs>
+
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+
+    <tests>
+        <test>
+            <param name="image" value="test_xcmsSet.RData"/>
+            <param name="method" value="density"/>
+            <param name="bw" value="45"/>
+            <param name="sleepy" value="0.001"/>
+            <param name="max" value="200"/>
+            <param name="mzwid" value="0.05"/>
+            <param name="minfrac" value="0.5"/>
+            <output name="output" file="test_group.RData" />
+        </test>
+        <!--test>
+            <param name="image" value="xcmsSet.RData"/>
+            <param name="method" value="mzClust"/>
+            <param name="mzppm" value="20"/>
+            <param name="minfrac" value="0"/>
+            <param name="minsamp" value="1"/>
+            <output name="rdata" file="group2.RData" />
+        </test-->
+    </tests>
+
+    <help>
+**What it does?**
+
+Returns a new xcmsSet object with the additional group information.
+
+Allows rejection of features, which are only partially detected within the replicates of a sample class. In addition to the intensity matrix, separate columns containing information related to the the presence of a feature within a sample class are generated and have proven useful in further filtering steps. It should be noted, that each alignement has to be evaluated in therms of quality prior to any further analysis.
+
+------
+
+**Parameters**
+
+There are several grouping parameters to consider optimizing for your chromatography and mass spectrometer.
+
+**method**
+
+"mzClust" Runs high resolution alignment on single spectra samples stored in a given xcmsSet
+
+"density" groups peaks together across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time
+
+"nearest" groups peaks together across samples by creating a master peak list and assigning corresponding peaks from all samples. It is inspired by the alignment algorithm of mzMine.
+
+
+Use the **sleep** argument to specify a time (in seconds) to pause and plot each iteration. That can be quite useful for
+visualizing parameter effects.
+
+
+------
+
+.. class:: infomark
+
+The output file is an group.RData file. You can continue your analysis using it in **retcor** or **fillPeaks** tool.
+    </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xcms/abims_xcms_retcor.xml	Wed Aug 28 10:48:49 2013 -0400
@@ -0,0 +1,115 @@
+<tool id="abims_xcms_retcor_dev" name="xcms.retcor_dev" version="20130418">
+
+    <requirements>
+        <requirement type="binary">R</requirement>
+    </requirements>
+
+    <description>Retention Time Correction using retcor function from xcms R package </description>
+
+    <command>
+        R --vanilla --slave --no-site-file --file=/XCMS_R_DIR/xcms.r --args input $input image $image xfunction retcor method $methods.method
+        #if $methods.method == "obiwarp":
+            profStep $methods.profStep
+        #else
+            smooth $methods.smooth
+            extra $methods.extra
+            missing $methods.missing
+            #if $methods.options.option == "show":
+                span $methods.options.span
+                family $methods.options.family
+                plottype $methods.options.plottype
+            #end if
+        #end if
+    </command>
+
+    <inputs>
+        <param name="input" type="data" format="ms_zip" label="ms zip file" help="" />
+        <param name="image" type="data" format="rdata" label="RData file" help="output file from another function xcms (xcmsSet, retcor etc.)" />
+        <conditional name="methods">
+            <param name="method" type="select" label="Method" help="Chose the method" >
+                <option value="obiwarp" >obiwarp</option>
+                <option value="peakgroups" selected="peakgroups">peakgroups</option>
+            </param>
+            <when value="obiwarp">
+                <param name="profStep" type="float" value="1" label="profStep" help="step size (in m/z) to use for profile generation from the raw data files" />
+            </when>
+            <when value="peakgroups">
+                <param name="smooth" type="select" label="smooth" help=" either 'loess’ for non-linear alignment or ‘linear’ for linear alignment" >
+                    <option value="loess">loess</option>
+                    <option value="linear">linear</option>
+                </param>
+                <param name="extra" type="integer" value="1" label="extra" help="number of extra peaks to allow in retention time correction correction groups" />
+                <param name="missing" type="integer" value="1" label="missing" help="number of missing samples to allow in retention time correction groups" />
+                <conditional name="options">
+                    <param name="option" type="select" label="Advanced options">
+                        <option value="show">show</option>
+                        <option value="hide" selected="true">hide</option>
+                    </param>
+                    <when value="show">
+                        <param name="span" type="float" value="0.2" label="span" help="degree of smoothing for local polynomial regression fitting"/>
+                        <param name="family" type="text" value="gaussian" label="family" help="if gaussian fitting is by least-squares with no outlier removal, and if symmetric a re descending M estimator is used with Tukey's biweight function, allowing outlier removal " />
+                        <param name="plottype" type="text" value="none" label="plottype" help="if deviation plot retention time deviation points and regression fit, and if mdevden also plot peak overall peak density and retention time correction peak density " />
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="output" format="rdata" from_work_dir="retcor.RData" label="${image.name[:-6]}.retcor.RData" />
+        <data name="rplots" format="pdf"   from_work_dir="Rplots.pdf"  label="${image.name[:-6]}.retcor.Rplots.pdf">
+            <filter>(methods['method'] == 'peakgroups')</filter>
+            <filter>(options['option'] == 'show')</filter>
+            <filter>(family == 'symmetric')</filter>
+            <filter>(plottype != 'none')</filter>
+        </data>
+        <data name="tics_cor"   format="pdf"   from_work_dir="TICs_corrected.pdf"  label="${image.name[:-6]}.retcor.TICs_corrected.pdf" />
+    </outputs>
+
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+
+    <tests>
+    </tests>
+
+    <help>
+**What it does?**
+
+After matching peaks into groups, xcms can use those groups to identify and correct
+correlated drifts in retention time from run to run. The aligned peaks can then be
+used for a second pass of peak grouping which will be more accurate than the first.
+The whole process can be repeated in an iterative fashion. Not all peak groups will be helpful
+for identifying retention time drifts. Some groups may be missing peaks from a large
+fraction of samples and thus provide an incomplete picture of the drift at that time point.
+Still others may contain multiple peaks from the same sample, which is a sign of impropper grouping.
+
+It returns an xcms-Set object with corrected retention times
+
+------
+
+**Parameters**
+
+xcms ignores those groups by only considering well-behaved peak groups which are missing at most one sample and
+have at most one extra peak. (Those values can be changed with the **missing** and **extra** arguments.)
+
+For each of those well-behaved groups, the algorithm calculates a median retention
+time and, for every sample, a deviation from that median. Within a sample, the observed deviation
+generally changes over time in a nonlinear fashion. Those changes are
+approximated using a local polynomial regression technique implemented in the **loess**
+function. By default, the curve fitting is done using least-squares on all data points.
+However, it is possible to enable outlier detection and removal by setting the **family**
+argument to **symmetric**.
+
+------
+
+.. class:: infomark
+
+The output file is an group.RData file.
+
+After retention time correction, the initial peak grouping becomes invalid and is
+discarded. Therefore, the resulting object needs to be regrouped. Here, we decrease the
+inclusiveness of the grouping using the **bw** argument using the output file in **group**  tool.
+    </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xcms/abims_xcms_xcmsSet.xml	Wed Aug 28 10:48:49 2013 -0400
@@ -0,0 +1,188 @@
+<tool id="abims_xcms_xcmsSet_dev" name="xcms.xcmsSet_dev" version="20130418">
+
+    <requirements>
+        <requirement type="binary">R</requirement>
+    </requirements>
+
+    <description>Filtration and Peak Identification using xcmsSet function from xcms R package to preprocess LC/MS data for relative quantification and statistical analysis </description>
+
+    <command>
+        R --vanilla --slave --no-site-file --file=/XCMS_R_DIR/xcms.r --args input $input xfunction xcmsSet
+        ## profmethod $profmethod
+        nSlaves 9 method $methods.method
+        #if $methods.method == "centWave":
+            ppm $methods.ppm
+            peakwidth "c($methods.peakwidth)"
+            #if $methods.options_c.option == "show":
+                mzdiff $methods.options_c.mzdiff
+                snthresh $methods.options_c.snthresh
+                integrate $methods.options_c.integrate
+                noise $methods.options_c.noise
+                prefilter "c($methods.options_c.prefilter)"
+            #end if
+        #elif $methods.method == "matchedFilter":
+            step $methods.step
+            fwhm $methods.fwhm
+            #if $methods.options_m.option == "show":
+                ## sigma "$methods.options_m.sigma"
+                max $methods.options_m.max
+                snthresh $methods.options_m.snthresh
+                ## mzdiff $methods.options_m.mzdiff
+                steps $methods.options_m.steps
+                ## sleep $methods.options_m.sleep
+            #end if
+        #elif $methods.method == "MSW":
+            snthr $methods.snthr
+            nearbyPeak $methods.nearbyPeak
+            winSize.noise $methods.winSize_noise
+            amp.Th $methods.amp_Th
+            scales "c($methods.scales)"
+            SNR_method "$methods.SNR_method "
+        #end if
+    </command>
+
+    <inputs>
+        <param name="input" type="data" format="ms_zip" label="ms zip file" help="" />
+<!--
+        <param name="profmethod" type="select" label="Method to use for profile generation (profmethod)" >
+            <option value="bin" selected="true">bin</option>
+            <option value="binlin">binlin</option>
+            <option value="binlinbase">binlinbase</option>
+            <option value="intlin">intlin</option>
+        </param>
+        <param name="nSlaves" type="integer" value="9" label="MPI-slaves CPU" help="number of MPI-slaves to use for parallel peak detection" />
+-->
+	<conditional name="methods">
+            <param name="method" type="select" label="Method" help="Choose the method used for finding peaks" >
+                <option value="centWave" >centWave</option>
+                <option value="matchedFilter" selected="true">matchedFilter</option>
+                <option value="MSW">MSW</option>
+            </param>
+            <when value="centWave">
+                <param name="ppm" type="integer" value="25" label="ppm" help="max tolerated ppm deviation in consecutive scans" />
+                <param name="peakwidth" type="text" value="20,50" label="peakwidth" help="min peak width in seconds, max peak width in seconds" />
+                <conditional name="options_c">
+                    <param name="option" type="select" label="Advanced options" >
+                        <option value="show">show</option>
+                        <option value="hide" selected="true">hide</option>
+                    </param>
+                    <when value="show">
+                        <param name="snthresh" type="integer" value="10" label="Signal/Noise threshold" help="signal to noise ratio cutoff" />
+                        <param name="mzdiff" type="float" value="-0.001" label="m/z difference" help="min m/z difference for peaks with overlapping RT " />
+                        <param name="integrate" type="select" label="peak limits method" help="1 - peak limits based on smoothed 2nd derivative (less precise)  2 - peak limits based on real data (more sensitive to noise)" >
+                            <option value="1">1</option>
+                            <option value="2">2</option>
+                        </param>
+                        <param name="prefilter" type="text" value="3,100" label="prefilter" help="Prefilter step for the first phase. Separate by coma k,I. Mass traces are only retained if they contain at least ‘k’ peaks with intensity >= ‘I’"/>
+                        <param name="noise" type="integer" value="0" label="noise filter" help="optional argument which is useful for data that was centroided without any intensity threshold, centroids with intensity smaller than ‘noise’ are omitted from ROI detection"/>
+                    </when>
+                </conditional>
+            </when>
+            <when value="matchedFilter">
+                <param name="step" type="float" value="0.01" label="step" help="the peak detection algorithm creates extracted ion base peak chromatograms (EIBPC) on a fixed step size defined by the step argument" />
+                <param name="fwhm" type="integer" value="30" label="fwhm" help="full width at half maximum" />
+                <conditional name="options_m">
+                    <param name="option" type="select" label="Advanced options" >
+                        <option value="show">show</option>
+                        <option value="hide" selected="true">hide</option>
+                    </param>
+                    <when value="show">
+<!--
+                        <param name="sigma" type="hidden" value="fwhm/2.3548" label="sigma" help="standard deviation (fwhm/2.3548)" />
+-->
+                        <param name="max" type="integer" value="5" label="max" help="maximum number of peaks per extracted ion chromatogram" />
+                        <param name="snthresh" type="integer" value="10" label="snthresh" help="signal to noise ratio cutoff" />
+                        <param name="steps" type="integer" value="2" label="steps" help="the peak identification algorithm combines a given number of EIBPCs prior to filtration and peak detection, as defined by the steps argument" />
+<!--
+                        <param name="mzdiff" type="text" size="20" value="0.8-step*steps" label="m/z difference" help="min m/z difference for peaks with overlapping RT " />
+-->
+                    </when>
+                </conditional>
+            </when>
+            <when value="MSW">
+                <param name="nearbyPeak" type="select" label="nearbyPeak" help="determine whether to include the nearby small peaks of major peaks" >
+                    <option value="TRUE">TRUE</option>
+                    <option value="FALSE">FALSE</option>
+                </param>
+                <param name="winSize_noise" type="integer" value="500" label="winSize.noise" help="The local window size to estimate the noise level" />
+                <param name="snthr" type="integer" value="3" label="snthr" help="SNR (Signal to Noise Ratio) threshold" />
+                <param name="amp_Th" type="float" value="0.002" label="amp.Th" help="the minimum required relative amplitude of the peak (ratio to the maximum of CWT coefficients)" />
+                <param name="scales" type="text" value="seq(1,22,3)" label="scales" help="Scales for the Continuous Wavelet Transform (CWT). Scales are linked to the width of the peaks that are to be detected. Tape as indicaded seq('n,n,n') or c(n,n) : seq(from, to, by steps), c - linear vector " />
+                <param name="SNR_method" type="text" value="data.mean" label="SNR method" help="Method to estimate noise level. Currently, only 95 percentage quantile is supported." />
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="output" format="rdata" from_work_dir="xcmsSet.RData" label="${input.name[:-7]}.RData" />
+        <data name="output_info" format="tabular" from_work_dir="sample_info.tab" label="sample_info.tab" />
+        <data name="tics_raw"   format="pdf"   from_work_dir="TICs_raw.pdf"  label="${input.name[:-7]}.TICs_raw.pdf" />
+    </outputs>
+
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+
+    <tests>
+    </tests>
+
+    <help>
+** The ms_zip input file **
+
+The input file to the metabolomic workflow is a zip file containing all your conditions as sub-directories, with its extension renamed to ".ms.zip".
+This file can then be loaded in Galaxy using the "Get Data" tool. The file datatype should be "mz_zip".
+
+**What it does?**
+The class of objects used for preprocessing analyte data from multiple LC/MS files is xcmsSet. It stores peak lists and provides methods for grouping and aligning those peaks.
+
+-----
+
+**Parameters**
+
+The default arguments for xcmsSet should work acceptably in most cases. However, there are a number of parameters that may need to be optimized for a particular instrument or group of samples.
+
++ **Profmethod**
+
+  One way of thinking about that process is as a transformation of the data from being separate
+  lists of mass/intensity pairs (one list for each scan) to a matrix with rows representing
+  equally spaced masses and a column for each scan. Data transformed into such a matrix
+  is usually referred to as being in profile mode. To do so, each scan of unequally spaced
+  masses must be mapped onto a column of the final matrix. The algorithm used to do so
+  is selected using the **profmethod** argument and can be either **“bin”**, **“binlin”**, **“binlinbase”**,
+  or **“intlin”**.
+
+  **bin** - bins the intensity into the matrix cell closest to it in mas
+
+  **binlin** - same as bin except that it uses linear interpolation to fill in cells that otherwise would have been left at zero
+
+  **binlinbase** -used when an intensity threshold is set below which no mass/intensity values are recorded in continuum mode and the mass spectral signal falls below that threshold, so that simple linear interpolation will create artificially high background
+
+  **intlin** - uses integration and linear interpolation between mass/intensity pairs to determine the equally spaced intensity values
+
++ **Method**
+
+  **Matched Filter**
+
+  One parameter to consider is the Gaussian model peak width used for matched filtration,an integral part of the peak detection algorithm.
+  For a discussion of how model peak width affects the signal to noise ratio, see Danielsson et al. (2002).
+
+  **cent Wave**
+
+  This algorithm is most suitable for high resolution LC/{TOF,OrbiTrap,FTICR}-MS data in centroid mode.
+  Due to the fact that peak centroids are used, a binning step is not necessary.
+  The method is capable of detecting close-by-peaks and also overlapping peaks. Some efforts are made to detect the exact peak boundaries to get precise
+  peak integrals.
+
+  **MSW**
+
+  Wavelet based, used for direct infusion data.
+  Continuous wavelet transform (CWT) can be used to locate chromatographic peaks on different scales.
+
+-----
+
+.. class:: infomark
+
+The output file is an xcmsSet.RData file. You can continue your analysis using it in **group** tool what is recommended as next step.
+    </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xcms/tcis.r	Wed Aug 28 10:48:49 2013 -0400
@@ -0,0 +1,52 @@
+# tcis.r version 20130418
+
+getTIC <- function(file,rtcor=NULL) {
+     object <- xcmsRaw(file)
+     cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity)
+}
+
+##
+##  overlay TIC from all files in current folder or from xcmsSet, create pdf
+##
+getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) {
+  if (is.null(xcmsSet)) {
+    filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]",
+                      "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
+    filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
+    if (is.null(files))
+        files <- getwd()
+    info <- file.info(files)
+    listed <- list.files(files[info$isdir], pattern = filepattern,
+                          recursive = TRUE, full.names = TRUE)
+    files <- c(files[!info$isdir], listed)
+  } else {
+    files <- filepaths(xcmsSet)
+  }
+
+  N <- length(files)
+  TIC <- vector("list",N)
+
+  for (i in 1:N) {
+      cat(files[i],"\n")
+      if (!is.null(xcmsSet) && rt == "corrected")
+        rtcor <- xcmsSet@rt$corrected[[i]] else
+          rtcor <- NULL
+      TIC[[i]] <- getTIC(files[i],rtcor=rtcor)
+  }
+
+  pdf(pdfname,w=16,h=12)
+      cols <- rainbow(N)
+      lty = 1:N
+      pch = 1:N
+      xlim = range(sapply(TIC, function(x) range(x[,1])))
+      ylim = range(sapply(TIC, function(x) range(x[,2])))
+      plot(0, 0, type="n", xlim = xlim, ylim = ylim, main = "Total Ion Chromatograms", xlab = "Retention Time", ylab = "TIC")
+      for (i in 1:N) {
+      tic <- TIC[[i]]
+      points(tic[,1], tic[,2], col = cols[i], pch = pch[i], type="l")
+      }
+      legend("topright",paste(basename(files)), col = cols, lty = lty, pch = pch)
+  dev.off()
+
+  invisible(TIC)
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xcms/xcms.r	Wed Aug 28 10:48:49 2013 -0400
@@ -0,0 +1,87 @@
+# xcms.r version 20130418
+
+library(xcms)
+library(batch) #necessary for parseCommandArgs function
+library(CAMERA)
+
+source("/w/galaxy/tools/prod/abims/xcms.20130418/tcis.r")
+
+listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
+print(listArguments)
+
+#image is an .RData file necessary to use xset variable given by previous tools
+if (!is.null(listArguments[["image"]])){
+    load(listArguments[["image"]])
+    listArguments[["image"]]=NULL
+}
+
+#saving the name of the function in a variable thefunction
+thefunction = listArguments[["xfunction"]]
+listArguments[["xfunction"]]=NULL #delete from the list of arguments
+
+#necessary to unzip .zip file uploaded to Galaxy
+#thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories
+if (!is.null(listArguments[["input"]])) {
+    directory=unzip(listArguments[["input"]])
+    if (thefunction == "xcmsSet") {
+        listArguments=append(list(directory), listArguments)
+    }
+    listArguments[["input"]]=NULL
+}
+
+#addition of xset object to the list of arguments in the first position
+if (length(ls(pattern="^xset$")) != 0){
+    listArguments=append(list(xset), listArguments)
+}
+
+#change the default display settings
+dev.new(width=16, height=12)
+if (thefunction == "group") {
+    par(mfrow=c(2,2))
+} else if (thefunction == "retcor") {
+#try to change the legend display
+#~     par(xpd=NA)
+#~     par(xpd=T, mar=par()$mar+c(0,0,0,4))
+}
+
+##################################
+if (thefunction %in% c("diffreport", "annotateDiffreport")) {
+	classes=levels(sampclass(xset))
+    x=1:(length(classes)-1)
+    for (i in seq(along=x) ) {
+        y=1:(length(classes))
+        for (n in seq(along=y)){
+            if(i+n <= length(classes)){
+                listArguments[["class1"]]=classes[i]
+                listArguments[["class2"]]=classes[i+n]
+                listArguments[["filebase"]]=paste(listArguments[["class1"]],listArguments[["class2"]], sep="_vs_")
+                save.image(paste("debug","RData",sep="."))
+                diffreport = do.call(thefunction, listArguments)
+            }
+        }
+    }
+    system(paste('ls . | grep -e "tsv$" -e "box$" -e "eic$" | zip -r -@ "Xdiffreport.zip" '))
+    write.table(diffreport, sep="\t", quote=FALSE, col.names=NA, file="Xdiffreport.tsv")
+    statmatrix = diffreport[,(names(diffreport) %in% c("name", sampnames(xset)))]
+    write.table(statmatrix, sep="\t", quote=FALSE, row.names=FALSE, file="Xdiffreport.data_matrix.tsv")
+###
+} else {
+    #execution of the function "thefunction" with the parameters given in "listArguments"
+    xset = do.call(thefunction, listArguments)
+    print(xset)
+    #transform the files absolute pathways into relative pathways
+    xset@filepaths<-sub("^.*/database/job_working_directory/[0123456789]+/[0123456789]+/" ,"", xset@filepaths)
+}
+
+if (thefunction == "xcmsSet") {
+	write.table(xset@phenoData, sep="\t", quote=FALSE, col.names=NA, file="sample_info.tab")
+    getTICs(xcmsSet=xset, pdfname="TICs_raw.pdf",rt="raw")
+} else if (thefunction == "retcor") {
+    getTICs(xcmsSet=xset, pdfname="TICs_corrected.pdf",rt="corrected")
+}
+
+#delete the parameters to avoid the passage to the next tool in .RData image
+rm(listArguments)
+
+#saving R data in .Rdata file to save the variables used in the present tool
+save.image(paste(thefunction,"RData",sep="."))