view cwpair2.xml @ 10:b52d6705aed0 draft

Uploaded
author greg
date Wed, 02 Dec 2015 16:14:07 -0500
parents d455f14530dc
children 6383cae47688
line wrap: on
line source

<?xml version="1.0"?>
<tool id="cwpair2" name="CWPair2" version="@WRAPPER_VERSION@.0">
    <description>find matched pairs and unmatched orphans</description>
    <macros>
        <import>cwpair2_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command>
        <![CDATA[
            python $__tool_directory__/cwpair2.py
            #for $i in $input:
                 --input "${i}" "${i.hid}"
            #end for
            --up_distance $up_distance
            --down_distance $down_distance
            --method $method
            --binsize $binsize
            --threshold_format $threshold_format_cond.threshold_format
            #if str($threshold_format_cond.threshold_format) == "absolute_threshold":
                --absolute_threshold $threshold_format_cond.absolute_threshold
            #elif str($threshold_format_cond.threshold_format) == "relative_threshold":
                --relative_threshold $threshold_format_cond.relative_threshold
            #end if
            --output_files $output_files
            --statistics_output "$statistics_output"
        ]]>
    </command>
    <inputs>
        <param  name="input" type="data" format="gff" multiple="True" label="Find matched pairs on" />
        <param name="up_distance" type="integer" value="50" min="0" label="Distance upstream from a peak to allow a pair" />
        <param name="down_distance" type="integer" value="100" min="0" label="Distance downstream from a peak to allow a pair" />
        <param name="method" type="select" label="Method of finding a match">
            <option value="mode" selected="True">Mode</option>
            <option value="closest">Closest</option>
            <option value="largest">Largest</option>
            <option value="all">All</option>
        </param>
        <param name="binsize" type="integer" value="1" min="0" label="Width of bins for frequency plots and mode calculation" help="Value 1 implies no bins" />
        <conditional name="threshold_format_cond">
            <param name="threshold_format" type="select" label="Filter using">
                <option value="relative_threshold" selected="True">Relative threshold</option>
                <option value="absolute_threshold">Absolute threshold</option>
            </param>
            <when value="relative_threshold">
                <param  name="relative_threshold" type="float" value="0.0" min="0.0" label="Percentage of the 95 percentile value to filter below" help="Value 0 results in no filtering" />
            </when>
            <when value="absolute_threshold">
                <param name="absolute_threshold" type="float" value="0.0" min="0.0" label="Absolute value to filter below" />
            </when>
        </conditional>
        <param name="output_files" type="select" label="Restrict output to" help="Statistics will always be generated." >
            <option value="all" selected="True">no restrictions (output everything: C,D,F,O,P,MP)</option>
            <option value="matched_pair">matched pairs only (MP)</option>
            <option value="matched_pair_orphan">matched pairs and orphans only (O,MP)</option>
            <option value="matched_pair_orphan_detail">matched pairs, orphans and details only (D,O,MP)</option>
        </param>
    </inputs>
    <outputs>
        <data name="statistics_output" format="tabular" label="Statistics Table: ${tool.name} on ${on_string}" />
        <collection name="H" type="list" label="Statistics Histogram: ${tool.name} on ${on_string}">
            <filter>output_files == "all"</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="H" ext="pdf" visible="false" />
        </collection>
        <collection name="D" type="list" label="Data D: ${tool.name} on ${on_string}">
            <filter>output_files in ["all", "matched_pair_orphan_detail"]</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="data_D" ext="tabular" visible="false" />
        </collection>
        <collection name="O" type="list" label="Data O: ${tool.name} on ${on_string}">
            <filter>output_files in ["all", "matched_pair_orphan", "matched_pair_orphan_detail"]</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="data_O" ext="tabular" visible="false" />
        </collection>
        <collection name="MP" type="list" label="Data MP: ${tool.name} on ${on_string}">
            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="data_MP" ext="gff" visible="false" />
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="input" value="cwpair2_input1.gff" />
            <param name="up_distance" value="25" />
            <param name="down_distance" value="100" />
            <param name="method" value="all" />
            <param name="binsize" value="1" />
            <param name="threshold_format" value="relative_threshold" />
            <param name="relative_threshold" value="0.0" />
            <param name="output_files" value="matched_pair" />
            <output name="statistics_output" file="statistics1.tabular" ftype="tabular" />
            <output_collection name="MP" type="list">
                <element name="f0u25d100_on_data_1" file="closest_mp_output1.gff" ftype="gff" />
                <element name="f0u25d100_on_data_1" file="largest_mp_output1.gff" ftype="gff" />
                <element name="f0u25d100_on_data_1" file="mode_mp_output1.gff" ftype="gff" />
            </output_collection>
        </test>
        <test>
            <param name="input" value="cwpair2_input1.gff" />
            <param name="up_distance" value="50" />
            <param name="down_distance" value="100" />
            <param name="method" value="all" />
            <param name="binsize" value="1" />
            <param name="threshold_format" value="relative_threshold" />
            <param name="relative_threshold" value="0.0" />
            <param name="output_files" value="all" />
            <output name="statistics_output" file="statistics2.tabular" ftype="tabular" />
            <output_collection name="H" type="list">
                <element name="histogram_C_mode_f0u50d100_on_data_1" file="mode_c_output2.pdf" ftype="pdf" compare="sim_size" />
                <element name="histogram_F_closest_f0u50d100_on_data_1" file="closest_f_output2.pdf" ftype="pdf" compare="sim_size" />
                <element name="histogram_F_largest_f0u50d100_on_data_1" file="largest_f_output2.pdf" ftype="pdf" compare="sim_size" />
                <element name="histogram_F_mode_f0u50d100_on_data_1" file="mode_f_output2.pdf" ftype="pdf" compare="sim_size" />
                <element name="histogram_P_mode_f0u50d100_on_data_1" file="mode_p_output2.pdf" ftype="pdf" compare="sim_size" />
            </output_collection>
            <output_collection name="D" type="list">
                <element name="data_D_closest_f0u50d100_on_data_1" file="closest_d_output2.tabular" ftype="tabular" />
                <element name="data_D_largest_f0u50d100_on_data_1" file="largest_d_output2.tabular" ftype="tabular" />
                <element name="data_D_mode_f0u50d100_on_data_1" file="mode_d_output2.tabular" ftype="tabular" />
            </output_collection>
            <output_collection name="O" type="list">
                <element name="data_O_closest_f0u50d100_on_data_1" file="closest_o_output2.tabular" ftype="tabular" />
                <element name="data_O_largest_f0u50d100_on_data_1" file="largest_o_output2.tabular" ftype="tabular" />
                <element name="data_O_mode_f0u50d100_on_data_1" file="mode_o_output2.tabular" ftype="tabular" />
            </output_collection>
            <output_collection name="MP" type="list">
                <element name="data_MP_closest_f0u50d100_on_data_1" file="closest_mp_output2.gff" ftype="gff" />
                <element name="data_MP_largest_f0u50d100_on_data_1" file="largest_mp_output2.gff" ftype="gff" />
                <element name="data_MP_mode_f0u50d100_on_data_1" file="mode_mp_output2.gff" ftype="gff" />
            </output_collection>
        </test>
    </tests>
    <help>
**What it does**

Takes a list of called peaks on both strands and produces lists of matched pairs and unmatched peaks using a
specified method for finding matched pairs.  Methods for finding matched pairs are mode, closest, largest or
all (where the analysis is run for each method).  A statistics dataset is generated and a collection of datasets
is produced for each method as follows.

**Data Files**

* **closest/largest/mode MP** - the Matched Pairs in gff format
* **closest/largest/mode O** - the Orphans in tabular format
* **closest/largest/mode D** - the Details in tabular format

**Statistics Files**

* **closest/largest/mode C** - the stastics graph in pdf format
* **closest/largest/mode P** - the preview plots graph in pdf format
* **closest/largest/mode F** - the final plots graph in pdf format

-----

**Options**

* **Method of finding match** - Method of finding matched pair, mode, closest, largest, or all (run with each method).
* **Distance upstream from a peak to allow a pair** - Distance upstream from a Watson peak to allow a Crick pair.
* **Distance downstream from a peak to allow a pair** - Distance downstream from a Watson peak to allow a Crick pair.
* **Percentage of the 95 percentile value to filter below** - Percentage of the 95 percentile value below which to filter when using a relative threshold.
* **Absolute value to filter below** - Absolute value below which to filter when using an absolute threshold.
* **Output files** - Restrict output dataset collections to matched pairs only or one of several combinations of collection types.

    </help>
    <expand macro="citations" />
</tool>