Mercurial > repos > lecorguille > xcms_xcmsset
diff abims_xcms_xcmsSet.xml @ 31:e93153c07be0 draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 73791d74546087b2a872d9279df960f5bc207298
author | lecorguille |
---|---|
date | Tue, 13 Feb 2018 04:42:24 -0500 |
parents | d71827ecd22c |
children | 2bf1cb023c94 |
line wrap: on
line diff
--- a/abims_xcms_xcmsSet.xml Thu Oct 26 11:14:49 2017 -0400 +++ b/abims_xcms_xcmsSet.xml Tue Feb 13 04:42:24 2018 -0500 @@ -1,4 +1,4 @@ -<tool id="abims_xcms_xcmsSet" name="xcms.xcmsSet" version="2.2.0"> +<tool id="abims_xcms_xcmsSet" name="xcms.xcmsSet" version="2.1.1"> <description>Filtration and Peak Identification using xcmsSet function from xcms R package to preprocess LC/MS data for relative quantification and statistical analysis </description> <macros> @@ -21,6 +21,9 @@ xsetRdataOutput '$xsetRData' sampleMetadataOutput '$sampleMetadata' + ticspdf '$ticsRawPdf' + bicspdf '$bpcsRawPdf' + #if $options_scanrange.option == "show": scanrange "c($options_scanrange.scanrange)" @@ -56,13 +59,7 @@ scales "c($methods.scales)" SNR.method "$methods.SNR_method" #end if - - #if $input.is_of_type("mzxml") or $input.is_of_type("mzml") or $input.is_of_type("mzdata") or $input.is_of_type("netcdf"): - && mv *-TIC_mqc.out $ticsRawTab - && mv *-BPC_mqc.out $bpcsRawTab - #end if - - @COMMAND_LOG_EXIT@; + @COMMAND_LOG_EXIT@ ]]></command> <inputs> @@ -158,47 +155,39 @@ <param name="SNR_method" type="text" value="data.mean" label="SNR (Signal to Noise Ratio) method" help="[SNR.method] Method to estimate noise level. Currently, only 95 percentage quantile is supported." /> </when> </conditional> - - - <expand macro="input_tic_bpc_pdf"/> </inputs> <outputs> <data name="xsetRData" format="rdata.xcms.raw" label="${input.name.rsplit('.',1)[0]}.xset.RData" /> - <data name="log" format="txt" label="${input.name.rsplit('.',1)[0]}.xset.log.txt" /> - - <!-- SINGLE MODE --> - <data name="ticsRawTab" format="tabular" label="${input.name.rsplit('.',1)[0]}.xset.TICs_raw.tabular"> - <filter>input.extension in ["mzxml","mzml","mzdata","netcdf"]</filter> - </data> - <data name="bpcsRawTab" format="tabular" label="${input.name.rsplit('.',1)[0]}.xset.BPCs_raw.tabular"> - <filter>input.extension in ["mzxml","mzml","mzdata","netcdf"]</filter> - </data> - - <!-- ZIP MODE --> <data name="sampleMetadata" format="tabular" label="${input.name.rsplit('.',1)[0]}.sampleMetadata.tsv"> <filter>input.extension not in ["mzxml","mzml","mzdata","netcdf"]</filter> </data> - <data name="ticsRawPdf" format="pdf" from_work_dir="TICs.pdf" label="${input.name.rsplit('.',1)[0]}.xset.TICs_raw.pdf"> - <filter>input.extension not in ["mzxml","mzml","mzdata","netcdf"]</filter> - <filter>tic_bpc_pdf</filter> - </data> - <data name="bpcsRawPdf" format="pdf" from_work_dir="BPCs.pdf" label="${input.name.rsplit('.',1)[0]}.xset.BPCs_raw.pdf"> - <filter>input.extension not in ["mzxml","mzml","mzdata","netcdf"]</filter> - <filter>tic_bpc_pdf</filter> - </data> - <collection name="ticsRawTabCollection" type="list" label="TIC raw tabular"> - <filter>input.extension not in ["mzxml","mzml","mzdata","netcdf"]</filter> - <discover_datasets pattern="(?P<designation>.+)-TIC_mqc\.out" ext="tabular" /> - </collection> - <collection name="bpcsRawTabCollection" type="list" label="BPC raw tabular"> - <filter>input.extension not in ["mzxml","mzml","mzdata","netcdf"]</filter> - <discover_datasets pattern="(?P<designation>.+)-BPC_mqc\.out" ext="tabular" /> - </collection> + <data name="ticsRawPdf" format="pdf" label="${input.name.rsplit('.',1)[0]}.xset.TICs_raw.pdf" /> + <data name="bpcsRawPdf" format="pdf" label="${input.name.rsplit('.',1)[0]}.xset.BPCs_raw.pdf" /> + <data name="log" format="txt" label="${input.name.rsplit('.',1)[0]}.xset.log.txt" /> </outputs> <tests> - <!-- + <!--<test> + <param name="input" value="sacuri_dir_root.zip" ftype="zip" /> + <param name="methods|method" value="matchedFilter" /> + <param name="methods|step" value="0.01" /> + <param name="methods|fwhm" value="4" /> + <param name="methods|options_m|option" value="show" /> + <param name="methods|options_m|max" value="50" /> + <param name="methods|options_m|snthresh" value="1" /> + <param name="methods|options_m|steps" value="2" /> + <output name="log"> + <assert_contents> + <has_text text="object with 4 samples" /> + <has_text text="Time range: 0.7-1139.7 seconds (0-19 minutes)" /> + <has_text text="Mass range: 50.0021-999.9863 m/z" /> + <has_text text="Peaks: 59359 (about 14840 per sample)" /> + <has_text text="Peak Groups: 0" /> + <has_text text="Sample classes: bio, blank" /> + </assert_contents> + </output> + </test> <test> <param name="input" value="sacuri_current_root.zip" ftype="zip" /> <param name="methods|method" value="centWave" /> @@ -230,21 +219,56 @@ <has_text text="Sample classes: KO, WT" /> </assert_contents> </output> - <output_collection name="ticsRawTabCollection" type="list"> - <element name="ko15" value="ko15-TIC_mqc.out" /> - <element name="ko16" value="ko16-TIC_mqc.out" /> - <element name="wt15" value="wt15-TIC_mqc.out" /> - <element name="wt16" value="wt16-TIC_mqc.out" /> - </output_collection> - <output_collection name="bpcsRawTabCollection" type="list"> - <element name="ko15" value="ko15-BPC_mqc.out" /> - <element name="ko16" value="ko16-BPC_mqc.out" /> - <element name="wt15" value="wt15-BPC_mqc.out" /> - <element name="wt16" value="wt16-BPC_mqc.out" /> - </output_collection> </test> <!-- Passed but disable to save time for Travis" --> - <!-- + <!--<test> + <param name="input" value="ko15.CDF" ftype="netcdf" /> + <param name="methods|method" value="centWave" /> + <param name="methods|ppm" value="25" /> + <param name="methods|peakwidth" value="20,50" /> + <output name="log"> + <assert_contents> + <has_text text="object with 1 samples" /> + <has_text text="Time range: 2506.1-4471.7 seconds (41.8-74.5 minutes)" /> + <has_text text="Mass range: 200.2-600 m/z" /> + <has_text text="Peaks: 2262 (about 2262 per sample)" /> + <has_text text="Peak Groups: 0" /> + <has_text text="Sample classes: ." /> + </assert_contents> + </output> + </test> + <test> + <param name="input" value="ko16.CDF" ftype="netcdf" /> + <param name="methods|method" value="centWave" /> + <param name="methods|ppm" value="25" /> + <param name="methods|peakwidth" value="20,50" /> + <output name="log"> + <assert_contents> + <has_text text="object with 1 samples" /> + <has_text text="Time range: 2521.7-4477.9 seconds (42-74.6 minutes)" /> + <has_text text="Mass range: 200.1-600 m/z" /> + <has_text text="Peaks: 2408 (about 2408 per sample)" /> + <has_text text="Peak Groups: 0" /> + <has_text text="Sample classes: ." /> + </assert_contents> + </output> + </test> + <test> + <param name="input" value="wt15.CDF" ftype="netcdf" /> + <param name="methods|method" value="centWave" /> + <param name="methods|ppm" value="25" /> + <param name="methods|peakwidth" value="20,50" /> + <output name="log"> + <assert_contents> + <has_text text="object with 1 samples" /> + <has_text text="Time range: 2517-4473.2 seconds (42-74.6 minutes)" /> + <has_text text="Mass range: 200.2-599.8 m/z" /> + <has_text text="Peaks: 2278 (about 2278 per sample)" /> + <has_text text="Peak Groups: 0" /> + <has_text text="Sample classes: ." /> + </assert_contents> + </output> + </test> <test> <param name="inputs|input" value="single_file" /> <param name="inputs|single_file" value="wt16.CDF" ftype="netcdf" /> @@ -277,8 +301,6 @@ <has_text text="Sample classes: ." /> </assert_contents> </output> - <output name="ticsRawTab" value="HU_neg_017-TIC_mqc.out" /> - <output name="bpcsRawTab" value="HU_neg_017-BPC_mqc.out" /> </test> <test> <param name="input" value="MM14.mzML" ftype="mzxml" /> @@ -295,8 +317,6 @@ <has_text text="Sample classes: ." /> </assert_contents> </output> - <output name="ticsRawTab" value="MM14-TIC_mqc.out" /> - <output name="bpcsRawTab" value="MM14-BPC_mqc.out" /> </test> </tests> @@ -308,6 +328,7 @@ Xcms.xcmsSet ============ +----------- Description ----------- @@ -317,16 +338,18 @@ .. _here: http://web11.sb-roscoff.fr/download/w4m/howto/w4m_HowToPerformXcmsPreprocessing_v02.pdf +----------------- Workflow position ----------------- **Upstream tools** -+------------------------+--------------------+-----------------------------+-----------+ -| Name | output file | format | parameter | -+------------------------+--------------------+-----------------------------+-----------+ -| Upload File | Dataset Collection | mzXML, mzML, mzData, netCDF | NA | -+------------------------+--------------------+-----------------------------+-----------+ +========================= ================= ======= ========= +Name output file format parameter +========================= ================= ======= ========= +NA NA zip NA +========================= ================= ======= ========= + **Downstream tools** @@ -335,89 +358,90 @@ +===========================+====================+=================+ |xcms.group | xset.RData | rdata.xcms.raw | +---------------------------+--------------------+-----------------+ +|PCA ellipsoid by factors | sampleMetadata.tsv | Tabular | ++---------------------------+--------------------+-----------------+ +|Anova | sampleMetadata.tsv | Tabular | ++---------------------------+--------------------+-----------------+ **Example of a metabolomic workflow** .. image:: xcms_xcmsset_workflow.png + +------ + +.. class:: infomark + +The output file is an xset.RData file. You can continue your analysis using it in **xcms.group** tool. + --------------------------------------------------- + + ----------- Input files ----------- -Choose your inputs ------------------- - +---------------------------+----------------------------------+ -| Parameter | Format | +| Parameter : num + label | Format | +===========================+==================================+ +| OR : Zip file | zip | ++---------------------------+----------------------------------+ | OR : Single file | mzXML, mzML, mzData, netCDF | +---------------------------+----------------------------------+ -| OR : Zip file | zip | -+---------------------------+----------------------------------+ + +**Choose your inputs** You have two methods for your inputs: -* Single file (recommended): You can put a single file as input. That way, you will be able to launch several xcmsSet in parallel and use "xcms.xcmsSet Merger" before "xcms.group" - -* Zip file: You can put a zip file containing your inputs: myinputs.zip (containing all your conditions as sub-directories). - -Single file ------------ - -This method is recommended because: - -* Since files are uploaded indivudially, they are smaller. And so they should be able to be uploaded using the Get Data tools. + | Single file (recommended): You can put a single file as input. That way, you will be able to launch several xcmsSet in parallel and use "xcms.xcmsSet Merger" before "xcms.group" + | Zip file: You can put a zip file containing your inputs: myinputs.zip (containing all your conditions as sub-directories). -* It allow you to launch your xcmsSet in parallele. - -You just have to create a Dataset Collection as explain in this video_ - -.. _video: http://download.workflow4metabolomics.org/docs/170510_galaxy_xcms_dataset_collection.m4v - - -Zip file --------- - -This method isn't recommended because zip file aren't really well integrated - -Steps for creating the zip file +Zip file: Steps for creating the zip file +----------------------------------------- **Step1: Creating your directory and hierarchize the subdirectories** - | **VERY IMPORTANT**: If you zip your files under Windows, you must use the 7Zip software (http://www.7-zip.org/), otherwise your zip will not be well unzipped on the platform W4M (zip corrupted bug). - | Your zip should contain all your conditions as sub-directories. For example, two conditions (mutant and wild): - | - arabidopsis/wild/01.raw - | - arabidopsis/mutant/01.raw +VERY IMPORTANT: If you zip your files under Windows, you must use the 7Zip software (http://www.7-zip.org/), otherwise your zip will not be well unzipped on the platform W4M (zip corrupted bug). + +Your zip should contain all your conditions as sub-directories. For example, two conditions (mutant and wild): +arabidopsis/wild/01.raw +arabidopsis/mutant/01.raw **Step2: Creating a zip file** - | Create your zip file (e.g.: arabidopsis.zip). +Create your zip file (e.g.: arabidopsis.zip). **Step 3 : Uploading it to our Galaxy server** - | If your zip file is less than 2Gb, you get use the Get Data tool to upload it. - | Otherwise if your zip file is larger than 2Gb, please refer to the HOWTO on workflow4metabolomics.org (http://application.sb-roscoff.fr/download/w4m/howto/galaxy_upload_up_2Go.pdf). - | For more informations, don't hesitate to send us an email at supportATworkflow4metabolomics.org). - | Advices for converting your files for the XCMS input +If your zip file is less than 2Gb, you get use the Get Data tool to upload it. + +Otherwise if your zip file is larger than 2Gb, please refer to the HOWTO on workflow4metabolomics.org (http://application.sb-roscoff.fr/download/w4m/howto/galaxy_upload_up_2Go.pdf). -Raw file format ---------------- +For more informations, don't hesitate to send us an email at supportATworkflow4metabolomics.org). -We recommend you to convert your raw files to **mzXML** in centroid mode (smaller files) and the files will be compatible with the xcms centWave method. +Advices for converting your files for the XCMS input +---------------------------------------------------- + +We recommend you to convert your raw files to **mzXML** in centroid mode (smaller files) and the files will be compatible with the xmcs centWave method. **We recommend you the following parameters:** - | **Use Filtering**: True - | **Use Peak Picking**: True - | **Peak Peaking**: -Apply to MS Levels: All Levels (1-) : Centroid Mode - | **Use zlib**: 64 - | **Binary Encoding**: 64 - | **m/z Encoding**: 64 - | **Intensity Encoding**: 64 +Use Filtering: **True** + +Use Peak Picking: **True** + +Peak Peaking -Apply to MS Levels: **All Levels (1-)** : Centroid Mode + +Use zlib: **64** + +Binary Encoding: **64** + +m/z Encoding: **64** + +Intensity Encoding: **64** ---------- @@ -451,16 +475,28 @@ Output files ------------ +xset.TICs_raw.pdf + + | "Total Ion Chromatograms" graph in pdf format. + +xset.BPCs_raw.pdf + + | "Base Peak Chromatograms" graph in pdf format with each class samples opposed. + +sampleMetadata.tsv + + | Tabular file that contains for each sample, it's associated class and polarity (positive,negative and mixed). + | This file is necessary in the Anova and PCA step of the workflow. + xset.RData: rdata.xcms.raw format | Rdata file that is necessary in the second step of the workflow "xcms.group". -@HELP_BCP_TIC@ +------ -sampleMetadata.tsv [if using zip] +.. class:: infomark - | Tabular file that contains for each sample, it's associated class and polarity (positive,negative and mixed). - | This file is necessary in the Batch correction and statictics steps of the workflow. +The output file is an xset.RData file. You can continue your analysis using it in **xcms.group** tool. --------------------------------------------------- @@ -471,20 +507,49 @@ Input files ----------- - | Dataset Collection -> build using this dataset_ - -.. _dataset: http://download.workflow4metabolomics.org/datasets/sacurine-neg-subset_mzXML.zip + | zip_file -> **sacuri.zip** Parameters ---------- - | **Method**: centWave - | **Max tolerated ppm m/z deviation in consecutive scans in ppm**: 25 - | **Min,Max peak width in seconds**: 10,40 - | **Advanced option**: show - | **Signal/Noise threshold**: 50 - | **Minimum difference in m/z for peaks with overlapping retention times**: 1 - | **Prefilter step for the first phase**: 3,100 + | Method -> **matchedFilter** + | step -> **0.01** + | fwhm -> **4** + | Advanced option -> **show** + | max: -> **50** + | snthresh -> **1** + | steps -> **2** + + +Output files +------------ + + | **1) xset.RData: RData file** + + | **2) Example of a sampleMetadata.tsv :** + + ++---------------------------+------------+---------+ +| sampleMetadata | class | polarity| ++===========================+============+=========+ +|HU_neg_017 | bio |negative | ++---------------------------+------------+---------+ +|HU_neg_028 | bio |negative | ++---------------------------+------------+---------+ +|HU_neg_034 | bio |negative | ++---------------------------+------------+---------+ +|Blanc04 | blank |negative | ++---------------------------+------------+---------+ +|Blanc06 | blank |negative | ++---------------------------+------------+---------+ +|Blanc09 | blank |negative | ++---------------------------+------------+---------+ + + + + | **3) Example of xset.TICs_raw.pdf (Total Ion Chromatograms) :** + +.. image:: xcms_tics.png --------------------------------------------------- @@ -492,9 +557,9 @@ Changelog/News -------------- -**Version 2.2.0 - 19/10/2017** +**Version 2.1.1 - 29/11/2017** -- NEW: The TIC and BPC is new exported as tabular files to be visualized using MultiQC. +- BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C **Version 2.1.0 - 22/02/2017**