Mercurial > repos > galaxyp > thermo_raw_file_converter

<tool id="thermo_raw_file_converter" name="Thermo" version="1.2.3">
    <description>RAW file converter</description>
    <requirements>
        <requirement type="package" version="1.2.3">thermorawfileparser</requirement>
    </requirements>
    <command>
<![CDATA[
#import re

mkdir ./raws_folder &&
mkdir ./output_folder &&
#for $input_raw in $input:
    #if len($input) > 1
        #set $input_name = re.sub('[^\w\-\.]', '_',$input_raw.element_identifier.split('/')[-1].replace(".raw", "") + ".raw")
        ln -s -f '${input_raw}' './raws_folder/${input_name}' &&
    #else:
        ln -s -f '${input_raw}' './raws_folder/input.raw' &&
    #end if
#end for

ThermoRawFileParser.sh
    -d=./raws_folder
    -o=./output_folder
    -f=$output_format
    #if $output_metadata_selector != "off":
        --metadata="${output_metadata_selector}"
    #end if
    $zlib_boolean
    $peakpicking_boolean
    $ignore_instrument_errors_boolean

    #if len($input) == 1:
        #if $output_format == "0":
           && mv ./output_folder/input.mgf ./output_file.out
        #else if $output_format == "1":
           && mv ./output_folder/input.mzML ./output_file.out
        #else if $output_format == "2":
           && mv ./output_folder/input.mzML ./output_file.out
        #end if

        #if $output_metadata_selector != "off":
            #if $output_metadata_selector == "0":
               && mv ./output_folder/input-metadata.json ./input-metadata.txt
            #else if $output_metadata_selector == "1":
               && mv ./output_folder/input-metadata.txt ./input-metadata.txt
            #end if
        #end if
    #end if
]]>
    </command>
    <inputs>
        <param name="input" type="data" format="thermo.raw" label="Thermo RAW file" help="" multiple="true"
          optional="False" />

        <param name="output_format" type="select" label="Output format">
            <option value="0">mgf</option>
            <option value="1" selected="True">mzml</option>
            <option value="2">Indexed mzml</option>
        </param>

        <param name="zlib_boolean" type="boolean" truevalue="" falsevalue="-z" checked="true"
            label="Use zlib compression for the m/z ratios and intensities" help="" />

        <param name="peakpicking_boolean" type="boolean" truevalue="" falsevalue="-p" checked="true"
            label="Use the peak picking provided by the native thermo library" help="" />

        <param name="ignore_instrument_errors_boolean" type="boolean" truevalue="-e" falsevalue="" checked="true"
            label="Ignore missing instrument properties" help="If false, it stops the conversion if instrument properties are missing" />

        <param name="output_metadata_selector" type="select" label="Output metadata" >
            <option value="off" selected="True">No</option>
            <option value="0">json</option>
            <option value="1">txt</option>
        </param>

    </inputs>
    <outputs>

        <!-- We use simple data outputs if we just have one file, for backwards compatibility -->

        <data name="output" format="mzml" from_work_dir="output_file.out" label="${tool.name} on ${on_string}">
            <filter>(str(input)).count(',') == 0</filter> <!-- funny way of counting the number of input files! -->
            <change_format>
                <when input="output_format" value="0" format="mgf" />
                <when input="output_format" value="2" format="txt" />
            </change_format>
        </data>

        <data name="output_metadata" format="txt" label="${tool.name} on ${on_string}: Metadata" from_work_dir="input-metadata.txt">
            <filter>str(output_metadata_selector) != "off"</filter>
            <filter>(str(input)).count(',') == 0</filter>
            <change_format>
                <when input="output_metadata_selector" value="0" format="json" />
            </change_format>
        </data>

        <!-- We use collections if we have multiple input files -->

        <collection name="output_mgf_collection" type="list" label="${tool.name} on ${on_string}: MGF">
            <filter>output_format == "0"</filter>
            <filter>(str(input)).count(',') > 0</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.mgf" directory="output_folder" ext="mgf"/>
        </collection>

        <collection name="output_mzml_collection" type="list" label="${tool.name} on ${on_string}: mzML">
            <filter>output_format == "1"</filter>
            <filter>(str(input)).count(',') > 0</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.mzML" ext="mzml" directory="output_folder"/>
        </collection>

        <collection name="output_indexedmzml_collection" type="list" label="${tool.name} on ${on_string}: Indexed mzML">
            <filter>output_format == "2"</filter>
            <filter>(str(input)).count(',') > 0</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.mzML" ext="mzml" directory="output_folder"/>
        </collection>

        <collection name="output_metadata_collection" type="list" label="${tool.name} on ${on_string}: metadata">
            <filter>output_metadata_selector != "off"</filter>
            <filter>(str(input)).count(',') > 0</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+)-metadata.txt" ext="txt" directory="output_folder"/>
            <discover_datasets pattern="(?P&lt;designation&gt;.+)-metadata.json" ext="json" directory="output_folder"/>
        </collection>

    </outputs>

    <tests>
        <!-- Basic test -->
        <test expect_num_outputs="1">
            <param name="input" value="really_small.raw" ftype="thermo.raw"/>
            <param name="output_format" value="1"/>
            <output name="output" file="really_small.mzml" ftype="mzml" compare="sim_size" delta="3000" />
        </test>

        <!-- Testing contents of converted mgf file with txt metadata -->
        <test expect_num_outputs="2">
            <param name="input" value="really_small.raw" ftype="thermo.raw"/>
            <param name="output_format" value="0"/>
            <param name="output_metadata_selector" value="1"/>
            <output name="output" ftype="mgf">
                <assert_contents>
                    <has_text text="SCANS=36"/>
                    <has_text text="RTINSECONDS=73.863181104"/>
                    <has_text text="PEPMASS=675.248779296875"/>
                    <has_text text="CHARGE=2+"/>
                    <has_text text="121.3116455 920.2367553711"/>
                    <has_text text="229.2241211 1137.6958007813"/>
                    <has_text text="1577.8967285 1487.9519042969"/>
                </assert_contents>
            </output>
            <output name="output_metadata" ftype="txt">
                <assert_contents>
                    <has_text text="Instrument model=[MS, MS:1000494, Thermo Scientific instrument model, Orbitrap Fusion]"/>
                    <has_text text="Instrument name=Orbitrap Fusion"/>
                    <has_text text="Instrument serial number=[MS, MS:1000529, instrument serial number, FSN10188]"/>
                    <has_text text="Software version=[NCIT, NCIT:C111093, Software Version, 3.1.2412.17]"/>
                    <has_text text="Mass resolution=[MS, MS:1000011, mass resolution, 0.500]"/>
                    <has_text text="Number of scans=101"/>
                    <has_text text="Scan range=1;101"/>
                    <has_text text="Scan start time=[MS, MS:1000016, scan start time, 0.89]"/>
                    <has_text text="Time range=0.89;1.59"/>
                    <has_text text="Mass range=120.0000;2000.0000"/>
                </assert_contents>
            </output>

        </test>

        <!-- Basic mzml collection test -->
        <test expect_num_outputs="1">
            <param name="input" value="really_small.raw,really_small_2.raw" ftype="thermo.raw"/>
            <param name="output_format" value="1"/>
            <output_collection name="output_mzml_collection" type="list" count="2"/>
        </test>

        <!-- mgf collection test with metadata -->
        <test expect_num_outputs="2">
            <param name="input" value="really_small.raw,really_small_2.raw,really_small_3.raw"/>
            <param name="output_format" value="0"/>
            <param name="output_metadata_selector" value="0"/>
            <output_collection name="output_mgf_collection" type="list" count="3"/>
            <output_collection name="output_metadata_collection" type="list" count="3"/>
        </test>
    </tests>
    <help>
<![CDATA[

Thermo RAW file converter based on the great `ThermoRawFileParser <https://github.com/compomics/ThermoRawFileParser>`_ project.

]]>
    </help>
    <citations>
        <citation type="bibtex">@misc{Galaxy Proteomics Tools,
        author = {Niels Hulstaert, et al.},
        title = {Galaxy Proteomics Tools},
        publisher = {GitHub},
        journal = {GitHub repository},
        year = {2017}, url = {https://github.com/compomics/ThermoRawFileParser}}
        </citation>
    </citations>
</tool>
author	galaxyp
date	Mon, 04 May 2020 12:00:10 +0000
parents	8c71281015ee
children	b0eeb2dba9f7