view thermo_converter.xml @ 8:b0eeb2dba9f7 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/ThermoRawfileParser commit a502d3fbf1f1ecfca1c9b115beab9d9c0e358fb5"
author galaxyp
date Sat, 25 Jul 2020 20:26:19 +0000
parents 0948d4618f3d
children d160679217db
line wrap: on
line source

<tool id="thermo_raw_file_converter" name="Thermo" version="1.2.3+galaxy0">
    <description>RAW file converter</description>
    <requirements>
        <requirement type="package" version="1.2.3">thermorawfileparser</requirement>
    </requirements>
    <command>
<![CDATA[
#import re

mkdir ./raws_folder &&
mkdir ./output_folder &&
#for $input_raw in $input_files:
    #set $input_name = re.sub('[^\w\-\.]', '_',$input_raw.element_identifier.split('/')[-1].replace(".raw", "") + ".raw")
    ln -s -f '${input_raw}' './raws_folder/${input_name}' &&
#end for

ThermoRawFileParser.sh
    -d=./raws_folder
    -o=./output_folder
    -f=$output_format
    #if $output_metadata_selector != "off":
        --metadata="${output_metadata_selector}"
    #end if
    $zlib_boolean
    $peakpicking_boolean
    $ignore_instrument_errors_boolean

]]>
    </command>
    <inputs>
        <param name="input_files" type="data" format="thermo.raw" label="Thermo RAW file" help="" multiple="true"
          optional="False" />

        <param name="output_format" type="select" label="Output format">
            <option value="0">mgf</option>
            <option value="1" selected="True">mzml</option>
            <option value="2">Indexed mzml</option>
        </param>

        <param name="zlib_boolean" type="boolean" truevalue="" falsevalue="-z" checked="true"
            label="Use zlib compression for the m/z ratios and intensities" help="" />

        <param name="peakpicking_boolean" type="boolean" truevalue="" falsevalue="-p" checked="true"
            label="Use the peak picking provided by the native thermo library" help="" />

        <param name="ignore_instrument_errors_boolean" type="boolean" truevalue="-e" falsevalue="" checked="true"
            label="Ignore missing instrument properties" help="If false, it stops the conversion if instrument properties are missing" />

        <param name="output_metadata_selector" type="select" label="Output metadata" >
            <option value="off" selected="True">No</option>
            <option value="0">json</option>
            <option value="1">txt</option>
        </param>

    </inputs>
    <outputs>

        <collection name="output_mgf_collection" type="list" label="${tool.name} on ${on_string}: MGF collection">
            <filter>output_format == "0"</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.mgf" directory="output_folder" ext="mgf"/>
        </collection>

        <collection name="output_mzml_collection" type="list" label="${tool.name} on ${on_string}: mzML collection">
            <filter>output_format == "1"</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.mzML" ext="mzml" directory="output_folder"/>
        </collection>

        <collection name="output_indexedmzml_collection" type="list" label="${tool.name} on ${on_string}: Indexed mzML collection">
            <filter>output_format == "2"</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.mzML" ext="mzml" directory="output_folder"/>
        </collection>

        <collection name="output_metadata_collection" type="list" label="${tool.name} on ${on_string}: metadata collection">
            <filter>output_metadata_selector != "off"</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+)-metadata.txt" ext="txt" directory="output_folder"/>
            <discover_datasets pattern="(?P&lt;designation&gt;.+)-metadata.json" ext="json" directory="output_folder"/>
        </collection>

    </outputs>

    <tests>
        <!-- Basic mgf test - 1 file -->
        <test expect_num_outputs="1">
            <param name="input_files" value="really_small.raw" ftype="thermo.raw"/>
            <param name="output_format" value="0"/>
            <output_collection name="output_mgf_collection" type="list" count="1"/>

            <!--<output name="output" file="really_small.mzml" ftype="mzml" compare="sim_size" delta="3000" />-->
        </test>


        <!-- Basic mzml test - 2 files -->
        <test expect_num_outputs="1">
            <param name="input_files" value="really_small.raw,really_small_2.raw" ftype="thermo.raw"/>
            <param name="output_format" value="1"/>
            <output_collection name="output_mzml_collection" type="list" count="2"/>
        </test>

        <!-- Testing contents of converted mgf file with txt metadata -->
        <test expect_num_outputs="2">
            <param name="input_files" value="really_small.raw" ftype="thermo.raw"/>
            <param name="output_format" value="0"/>
            <param name="output_metadata_selector" value="1"/>
            <output_collection name="output_mgf_collection" type="list" count="1">
                <element name="really_small" ftype="mgf">
                    <assert_contents>
                        <has_text text="SCANS=36"/>
                        <has_text text="RTINSECONDS=73.863181104"/>
                        <has_text text="PEPMASS=675.248779296875"/>
                        <has_text text="CHARGE=2+"/>
                        <has_text text="121.3116455 920.2367553711"/>
                        <has_text text="229.2241211 1137.6958007813"/>
                        <has_text text="1577.8967285 1487.9519042969"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="output_metadata_collection" type="list">
                <element name="really_small" ftype="txt">
                    <assert_contents>
                        <has_text text="Instrument model=[MS, MS:1000494, Thermo Scientific instrument model, Orbitrap Fusion]"/>
                        <has_text text="Instrument name=Orbitrap Fusion"/>
                        <has_text text="Instrument serial number=[MS, MS:1000529, instrument serial number, FSN10188]"/>
                        <has_text text="Software version=[NCIT, NCIT:C111093, Software Version, 3.1.2412.17]"/>
                        <has_text text="Mass resolution=[MS, MS:1000011, mass resolution, 0.500]"/>
                        <has_text text="Number of scans=101"/>
                        <has_text text="Scan range=1;101"/>
                        <has_text text="Scan start time=[MS, MS:1000016, scan start time, 0.89]"/>
                        <has_text text="Time range=0.89;1.59"/>
                        <has_text text="Mass range=120.0000;2000.0000"/>
                    </assert_contents>
                </element>
            </output_collection>
        </test>

        <!-- mgf collection test with metadata -->
        <test expect_num_outputs="2">
            <param name="input_files" value="really_small.raw,really_small_2.raw,really_small_3.raw"/>
            <param name="output_format" value="0"/>
            <param name="output_metadata_selector" value="0"/>
            <output_collection name="output_mgf_collection" type="list" count="3"/>
            <output_collection name="output_metadata_collection" type="list" count="3"/>
        </test>
    </tests>
    <help>
<![CDATA[

Thermo RAW file converter based on the great `ThermoRawFileParser <https://github.com/compomics/ThermoRawFileParser>`_ project.

]]>
    </help>
    <citations>
        <citation type="bibtex">@misc{Galaxy Proteomics Tools,
        author = {Niels Hulstaert, et al.},
        title = {Galaxy Proteomics Tools},
        publisher = {GitHub},
        journal = {GitHub repository},
        year = {2017}, url = {https://github.com/compomics/ThermoRawFileParser}}
        </citation>
    </citations>
</tool>