view join.xml @ 5:ddf5484243d2 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/join commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
author devteam
date Thu, 22 Jun 2017 18:50:39 -0400
parents 860e7e4899b1
children
line wrap: on
line source

<tool id="gops_join_1" name="Join" version="1.0.0">
    <description>the intervals of two datasets side-by-side</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <code file="operation_filter.py"/>
    <command><![CDATA[
python '$__tool_directory__/gops_join.py'
'$input1'
'$input2'
'$output'
-1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol}
-2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol}
-m $min
-f $fill
    ]]></command>
    <inputs>
        <param name="input1" type="data" format="interval" label="Join" help="First dataset" />
        <param name="input2" type="data" format="interval" label="With" help="Second dataset" />
        <param name="min" type="integer" value="1" label="With min overlap" help="(bp)" />
        <param name="fill" type="select" label="Return">
            <option value="none">Only records that are joined (INNER JOIN)</option>
            <option value="right">All records of first dataset (fill null with ".")</option>
            <option value="left">All records of second dataset (fill null with ".")</option>
            <option value="both">All records of both datasets (fill nulls with ".")</option>
        </param>
    </inputs>
    <outputs>
        <data name="output" format_source="input1" metadata_source="input1" />
    </outputs>
    <tests>
        <test>
            <param name="input1" value="1.bed" />
            <param name="input2" value="2.bed" />
            <param name="min" value="1" />
            <param name="fill" value="none" />
            <output name="output" file="gops-join-none.dat" />
        </test>
        <test>
            <param name="input1" value="1.bed" />
            <param name="input2" value="2.bed" />
            <param name="min" value="1" />
            <param name="fill" value="right" />
            <output name="output" file="gops-join-right.dat" />
        </test>
        <test>
            <param name="input1" value="1.bed" />
            <param name="input2" value="2.bed" />
            <param name="min" value="1" />
            <param name="fill" value="left" />
            <output name="output" file="gops-join-left.dat" />
        </test>
        <test>
            <param name="input1" value="1.bed" />
            <param name="input2" value="2.bed" />
            <param name="min" value="1" />
            <param name="fill" value="both" />
            <output name="output" file="gops-join-both.dat" />
        </test>
        <test>
            <param name="input1" value="1.bed" />
            <param name="input2" value="2.bed" />
            <param name="min" value="500" />
            <param name="fill" value="none" />
            <output name="output" file="gops-join-none-500.dat" />
        </test>
        <test>
            <param name="input1" value="1.bed" />
            <param name="input2" value="2.bed" />
            <param name="min" value="100" />
            <param name="fill" value="both" />
            <output name="output" file="gops-join-both-100.dat" />
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.

@SCREENCASTS@

**Syntax**

- **Where overlap** specifies the minimum overlap between intervals that allows them to be joined.
- **Return only records that are joined** returns only the records of the first dataset that join to a record in the second dataset. This is analogous to an INNER JOIN.
- **Return all records of first dataset (fill null with &quot;.&quot;)** returns all intervals of the first dataset, and any intervals that do not join an interval from the second dataset are filled in with a period(.). This is analogous to a LEFT JOIN.
- **Return all records of second dataset (fill null with &quot;.&quot;)** returns all intervals of the second dataset, and any intervals that do not join an interval from the first dataset are filled in with a period(.). **Note that this may produce an invalid interval file, since a period(.) is not a valid chrom, start, end or strand.**
- **Return all records of both datasets (fill nulls with &quot;.&quot;)** returns all records from both datasets, and fills on either the right or left with periods. **Note that this may produce an invalid interval file, since a period(.) is not a valid chrom, start, end or strand.**

-----

**Examples**

.. image:: gops_joinRecordsList.gif

Only records that are joined (inner join):

.. image:: gops_joinInner.gif

All records of first dataset:

.. image:: gops_joinLeftOuter.gif

All records of second dataset:

.. image:: gops_joinRightOuter.gif

All records of both datasets:

.. image:: gops_joinFullOuter.gif
    ]]></help>
</tool>