view fastx_collapser.xml @ 3:7bda0f25191f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/fastx_toolkit/fastx_collapser commit bbb2e6b6769b03602a8ab97001f88fbec52080a1
author iuc
date Tue, 08 May 2018 12:51:34 -0400
parents 17bfc147c9ea
children 1e76bd67fb73
line wrap: on
line source

<tool id="cshl_fastx_collapser" version="1.0.1" name="Collapse">
    <description>sequences</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
@CATS@ fastx_collapser -v
-o '$output'
@FQQUAL@
    ]]></command>

    <inputs>
        <expand macro="fastx_input" />
    </inputs>
    <outputs>
        <data name="output" format="fasta" metadata_source="input" />
    </outputs>
    <tests>
        <test>
            <param name="input" value="fasta_collapser1.fasta" />
            <!-- The output is sorted differently depending on architecture,
                 so the sort attribute is needed here -->
            <output name="output" file="fasta_collapser1.out" sort="true" />
        </test>
    </tests>
    <help><![CDATA[
**What it does**

This tool collapses identical sequences in a FASTA file into a single sequence.

--------

**Example**

Example Input File (Sequence "ATAT" appears multiple times)::

    >CSHL_2_FC0042AGLLOO_1_1_605_414
    TGCG
    >CSHL_2_FC0042AGLLOO_1_1_537_759
    ATAT
    >CSHL_2_FC0042AGLLOO_1_1_774_520
    TGGC
    >CSHL_2_FC0042AGLLOO_1_1_742_502
    ATAT
    >CSHL_2_FC0042AGLLOO_1_1_781_514
    TGAG
    >CSHL_2_FC0042AGLLOO_1_1_757_487
    TTCA
    >CSHL_2_FC0042AGLLOO_1_1_903_769
    ATAT
    >CSHL_2_FC0042AGLLOO_1_1_724_499
    ATAT

Example Output file::

    >1-1
    TGCG
    >2-4
    ATAT
    >3-1
    TGGC
    >4-1
    TGAG
    >5-1
    TTCA

.. class:: infomark

Original Sequence Names / Lane descriptions (e.g. "CSHL_2_FC0042AGLLOO_1_1_742_502") are discarded.

The output sequence name is composed of two numbers: the first is the sequence's number, the second is the multiplicity value.

The following output::

    >2-4
    ATAT

means that the sequence "ATAT" is the second sequence in the file, and it appeared 4 times in the input FASTA file.

------

This tool is based on `FASTX-toolkit`__ by Assaf Gordon.

 .. __: http://hannonlab.cshl.edu/fastx_toolkit/
    ]]></help>
    <expand macro="citations" />
</tool>