view crossmap_bed.xml @ 9:aa85f6cf1d16 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/crossmap commit e10d34b547557b2bd4f0be69e31204bd5703e422"
author iuc
date Thu, 20 Jan 2022 04:27:28 +0000
parents c664e42e75f8
children
line wrap: on
line source

<tool id="crossmap_bed" name="CrossMap BED" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
    <description>Convert genome coordinates or annotation files between genome assemblies</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_command"/>

<!--
1. CrossMap bed x.chain in.bed > out.bed
stdout/out.bed: valid and invalid combined

2. CrossMap bed x.chain in.bed out.bed
out.bed: valid only
out.bed.unmap: invalid only
-->
    <command><![CDATA[
CrossMap.py bed
'${chain_source.input_chain}'
'${input}'

#if $merge_unmapped_entries:
    > '${output_combined}'
#else:
    output
    --unmap-file output.unmap
#end if
--chromid $chromid
    ]]></command>


    <inputs>
        <param name="input" type="data" format="bed" label="BED file"
               help="BED format file must have at least 3 columns (chrom, start, end) and no more than 12 columns"/>

        <expand macro="chain" />
        <param name="chromid" type="select" label="Style of chromosome IDs">
            <option value="a" selected="true">As-is</option>
            <option value="l">Long style, e.g. chr1, chrX</option>
            <option value="s">Short style, e.g. 1, X</option>
        </param>
        <param name="merge_unmapped_entries" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Merge failed and converted entries into single file"/>
    </inputs>

    <outputs>
        <data name="output_valid" format="bed" label="${tool.name} (valid only) on ${on_string}" from_work_dir="output">
            <filter>merge_unmapped_entries is False</filter>
        </data>
        <data name="output_failed" format="bed" label="${tool.name} (failed only) on ${on_string}" from_work_dir="output.unmap">
            <filter>merge_unmapped_entries is False</filter>
        </data>

        <data name="output_combined" format="bed" label="${tool.name} on ${on_string}">
            <filter>merge_unmapped_entries is True</filter>
        </data>
    </outputs>

    <tests>
        <test><!-- this test only contains perfect entries that do get liftOvered (separate output) -->
            <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/>
            <param name="index_source" value="history"/>
            <param name="input_chain" value="aToB.over.chain" ftype="csv"/>
            <param name="merge_unmapped_entries" value="false" />

            <output name="output_valid" file="test_bed_01_output_a__only-matches.bed"/>
            <output name="output_failed" file="test_bed_01_output_a__only_fails.bed"/>
        </test>
        <test><!-- this test only contains perfect entries that do get liftOvered (merged output) -->
            <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/>
            <param name="index_source" value="history"/>
            <param name="input_chain" value="aToB.over.chain" ftype="txt"/>
            <param name="merge_unmapped_entries" value="true" />

            <output name="output_combined" file="test_bed_01_output_a__all.bed"/>
        </test>

        <test><!-- this test only contains imperfect entries that do get liftOvered (separate output) -->
            <param name="input" value="test_bed_02_input_a.bed" ftype="bed"/>
            <param name="index_source" value="history"/>
            <param name="input_chain" value="aToB.over.chain" ftype="txt"/>
            <param name="merge_unmapped_entries" value="false" />
            <param name="chromid" value="s" />

            <output name="output_valid" file="test_bed_02_output_a__only-matches.bed"/>
            <output name="output_failed" file="test_bed_02_output_a__only_fails.bed"/>
        </test>
        <test><!-- this test only contains imperfect entries that do get liftOvered (separate output) -->
            <param name="input" value="test_bed_02_input_a.bed" ftype="bed"/>
            <param name="index_source" value="history"/>
            <param name="input_chain" value="aToB.over.chain" ftype="txt"/>
            <param name="merge_unmapped_entries" value="true" />
            <param name="chromid" value="s" />

            <output name="output_combined" file="test_bed_02_output_a__all.bed"/>
        </test>

        <test><!-- clone of first test: tests cached reference chain file -->
            <param name="input" value="test_bed_01_input_a.bed" ftype="bed" dbkey="hg18"/>
            <param name="index_source" value="cached"/>
            <param name="merge_unmapped_entries" value="false" />

            <output name="output_valid" file="test_bed_01_output_a__only-matches.bed"/>
            <output name="output_failed" file="test_bed_01_output_a__only_fails.bed"/>
        </test>
    </tests>
    <help><![CDATA[
@HELP_GENERAL@

BED
---

BED format file must have at least 3 columns (chrom, start, end) and no more than 12 columns.
BED format: http://genome.ucsc.edu/FAQ/FAQformat.html#format1

A BED (Browser Extensible Data) file is a tab-delimited text file
describing genome regions or gene annotations. It is the standard file
format used by UCSC. It consists of one line per feature, each containing
3-12 columns. CrossMap converts BED files with less than 12 columns to a
different assembly by updating the chromosome and genome coordinates only;
all other columns remain unchanged. Regions from old assembly mapping to
multiple locations to the new assembly will be split. For 12-columns BED
files, all columns will be updated accordingly except the 4th column (name
of bed line), 5th column (score value) and 9th column (RGB value describing
the display color). 12-column BED files usually define multiple blocks (eg.
exon); if any of the exons fails to map to a new assembly, the whole BED
line is skipped.

Notes:

1. For BED-like formats mentioned above, CrossMap only updates “chrom (1st
   column)”, “start (2nd column) ”, “end (3rd column) ” and “strand” (if
   any). All other columns will keep AS-IS.
2. Lines starting with ‘#’, ‘browser’, ‘track’ will be skipped.
3. Lines will less than 3 columns will be skipped.
4. 2nd-column and 3-column must be integer, otherwise skipped.
5. “+” strand is assumed if no strand information was found.
6. For standard BED format (12 columns). If any of the defined exon blocks
   cannot be uniquely mapped to target assembly, the whole entry will be
   skipped.
7. If input region cannot be consecutively mapped target assembly, it will be split.
8. \*.unmap file contains regions that cannot be unambiguously converted.
    ]]></help>

    <citations>
        <citation type="doi">10.1093/bioinformatics/btt730</citation>
    </citations>
</tool>