view sample_filter.xml @ 0:a338f7319055 draft default tip

"planemo upload for repository https://github.com/computational-metabolomics/dimspy-galaxy commit 80069808371b58f45da0c8133c27d67ac1a5b448"
author computational-metabolomics
date Wed, 17 Feb 2021 10:49:33 +0000
parents
children
line wrap: on
line source

<tool id="dimspy_sample_filter" name="Sample Filter" version="@TOOL_VERSION@+galaxy@GALAXY_TOOL_VERSION@">
    <description>Remove peaks that fail to appear in x-out-of-n (biological) samples</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code">
    <![CDATA[
        dimspy sample-filter
        --input '$hdf5_file_in'
        --output '$hdf5_file_out'
        --min-fraction $min_fraction
        ${within}
        #if $rsd_threshold
            --rsd-threshold $rsd_threshold
        #end if
        #if $qc_label
            --qc-label '$qc_label'
        #end if
        #if $hdf5_to_txt.standard
            &&
            @HDF5_PM_TO_TXT@
        #end if
        #if $hdf5_to_txt.comprehensive
            &&
            @HDF5_PM_TO_TXT_COMPREHENSIVE@
        #end if
    ]]>
    </command>
    <inputs>
        <param name="hdf5_file_in" argument="--input" type="data" format="h5" label="Peak Intensity Matrix (HDF5 file)" help="" />
        <param name="within" argument="--within" type="boolean" checked="false" truevalue="--within" falsevalue="" label="Apply sample filter within each sample class."  help="" />
        <param name="min_fraction" argument="--min-fraction" type="float" min="0" max="1.0" value="0.5" label="Minimum fraction" help="Minimum fraction (i.e. percentage) of samples a peak has to be present" />
        <param name="rsd_threshold" type="hidden" value="" label="Relative standard deviation threshold" help="Leave empty to skip this step" argument="--rsd-threshold"/>
        <param name="qc_label" argument="--qc-label" type="hidden" value="" label="Label for the QC sample (RSD filter only)" help="Peaks that have a larger RSD than the QC peaks are removed (Leave empty to skip this step)." >
            <validator type="regex" message="Value may include alphanumeric characters, underscores, dashes and spaces.">[A-Za-z0-9_\- ]*</validator>
        </param>
        <param name="delimiter" argument="--delimiter" type="hidden" value="tab" label="" help=""/>
        <expand macro="hdf5_pm_to_txt" />
    </inputs>
    <outputs>
        <expand macro="outputs_peak_intensity_matrix" />
    </outputs>
    <tests>
        <test>
            <param name="hdf5_file_in" value="pm_as_bf.h5" ftype="h5"/>
            <param name="within" value=""/>
            <param name="min_fraction" value="0.5"/>
            <param name="rsd_threshold" value=""/>
            <param name="qc_label" value=""/>
            <param name="delimiter" value="tab"/>
            <conditional name="hdf5_to_txt">
                <param name="standard" value="True"/>
                <param name="comprehensive" value="True"/>
                <param name="representation_samples" value="rows"/>
                <param name="matrix_attr" value="intensity"/>
            </conditional>
            <output name="hdf5_file_out" file="pm_as_bf_sf.h5" ftype="h5" compare="sim_size"/>
            <output name="matrix_file_out" file="peak_matrix_as_bf_sf.txt" ftype="tsv" compare="sim_size"/>
            <output name="matrix_comprehensive_file_out" file="peak_matrix_as_bf_sf_compr.txt" ftype="tsv"/>
        </test>
        <test>
            <param name="hdf5_file_in" value="pm_as_bf.h5" ftype="h5"/>
            <param name="within" value=""/>
            <param name="min_fraction" value="0.5"/>
            <param name="rsd_threshold" value=""/>
            <param name="qc_label" value=""/>
            <param name="delimiter" value="tab"/>
            <conditional name="hdf5_to_txt">
                <param name="standard" value="True"/>
                <param name="comprehensive" value="True"/>
                <param name="representation_samples" value="columns"/>
                <param name="matrix_attr" value="mz"/>
            </conditional>
            <output name="hdf5_file_out" file="pm_as_bf_sf.h5" ftype="h5" compare="sim_size"/>
            <output name="matrix_file_out" file="peak_matrix_as_bf_sf_mz.txt" ftype="tsv"/>
            <output name="matrix_comprehensive_file_out" file="peak_matrix_as_bf_sf_mz_compr.txt" ftype="tsv"/>
        </test>
        <test>
            <param name="hdf5_file_in" value="pm_as_bf.h5" ftype="h5"/>
            <param name="within" value="--within"/>
            <param name="min_fraction" value="0.5"/>
            <param name="rsd_threshold" value=""/>
            <param name="qc_label" value=""/>
            <param name="delimiter" value="tab"/>
            <conditional name="hdf5_to_txt">
                <param name="standard" value="True"/>
                <param name="comprehensive" value="True"/>
                <param name="representation_samples" value="rows"/>
                <param name="matrix_attr" value="intensity"/>
            </conditional>
            <output name="hdf5_file_out" file="pm_as_bf_sf_within.h5" ftype="h5" compare="sim_size"/>
            <output name="matrix_file_out" file="peak_matrix_as_bf_sf_within.txt" ftype="tsv"/>
            <output name="matrix_comprehensive_file_out" file="peak_matrix_as_bf_sf_within_compr.txt" ftype="tsv"/>
        </test>
    </tests>
    <help>
-------------
Sample Filter
-------------

..

----------------------------------------------

Description
-----------

Standard DIMS processing workflow: Process Scans -> [Replicate Filter] -> Align Samples -> Blank Filter -> **Sample Filter** -> [Missing values sample filter] -> Pre-processing -> Statistics

|

There are many and varied reasons why a peak may not have been detected in all study samples, including: due to having an intensity (concentration) close to the signal-to-noise limit of the system; due to having been present in only one of the study classes (e.g. a drug administered to the ‘treatment’ class samples); due to ion suppression/enhancement effects in the mass spectrometer source region; etc. The Sample Filter tool allows users to remove peaks from the input peak intensity matrix that were detected in fewer-than a user-defined minimum number of study samples. 

------------------------------------

Parameters
-----------

**Peak Intensity Matrix (HDF5 file)** (REQUIRED) - a HDF5 file containing the peak intensity matrix to undergo sample filtering.


**Apply sample filter within each sample class** (REQUIRED; default **No**) 

    - **No** - check across ALL classes simultaneously whether greater-than the user-defined “Minimum fraction” of samples contained an intensity value for a specific mass spectral peak.

    - **Yes** - check within EACH class separately whether greater-than the user-defined “Minimum fraction” of samples contained an intensity value for a specific mass spectral peak. 

      **IMPORTANT**: if in ANY class a peak is detected in greater-than the user-defined minimum fraction of samples, then the peak is retained in the output peak matrix. For classes in which this condition is not met, the peak intensity recorded for that peak (if any) will still be presented in the output peak matrix. If no peak intensity was recorded in a sample, then a ‘0’ is inserted in to the peak matrix.


**Minimum fraction** (REQUIRED, default 0.5) - a numeric value between 0 and 1 indicating the proportion of study samples in which a peak must have a recorded intensity value in order for it to be retained in the output peak intensity matrix; e.g. 0.5 means that at least 50% of samples (whether assessed across all classes, or within each class individually) must have a recorded intensity value for a specific peak in order for it to be retained in the output peak matrix.

|

@help_options_addtional_output@

-------------------------------


Output file(s)
--------------

**IMPORTANT** - in all outputs except for the (optional) comprehensive output, if fewer-than the user-defined “Minimum fraction” of samples (whether assessed within a class, or across all classes) had a recorded intensity value (either within or across classes) for a given peak, then that peak will be removed from the output peak matrix. Note that when assessing within classes, only one class needs to have recorded intensity values in greater-than the "Minimum fraction" of samples in order for the peak to be retained in the output peak matrix.

@help_outputs_matrix@

-------------------------------------

@github_developers_contributors@
@license@
    </help>
    <expand macro="citations" />
</tool>