view ribopicker.xml @ 0:98d76a28cc24 draft default tip

Uploaded
author jjohnson
date Thu, 25 Oct 2012 13:14:50 -0400
parents
children
line wrap: on
line source

<tool id="ribopicker" name="Ribopicker" version="1.0">
    <description>identify and remove rRNA-like sequences</description>
    <requirements>
    </requirements>
    <command interpreter="perl">
       ribopicker.pl -f $input -dbs $dbs -id results
    </command>
    <inputs>
        <param name="input" type="data" format="fastq,fasta" label="Input Sequences"/>
        <param name="dbs" type="select" label="Databases" multiple="true">
            <!-- from riboPickerConfig.pm -->
            <option value="rrnadb">rrnadb - Non-redundant Ribosomal RNA Database</option>
            <option value="slr">slr - SILVA Large subunit reference database (23S/28S)</option>
            <option value="ssr">ssr - SILVA Small subunit reference database (16S/18S)</option>
            <option value="test">test - Example test case db</option>
        </param>
        <param name="identity" type="integer" optional="true" label=" Alignment identity threshold percentage (1-100)">
            <help>
            Alignment identity threshold in percentage (integer from 1-100
            without %) used to define matching sequences as similar. The
            identity is calculated for the part of the query sequence that
            is aligned to a reference sequence. For example, a query
            sequence of 100 bp that aligns to a reference sequence over the
            first 50 bp with 40 matching positions has an identity value of
            80%."
            </help>
            <validator type="in_range" message="between 0 and 100" min="1" max="100"/>
        </param>
        <param name="coverage" type="integer" optional="true" label="Alignment coverage threshold percentage (1-100)">
            <help>
            Alignment coverage threshold in percentage (integer from 1-100
            without %) used to define matching sequences as similar. The
            coverage is calculated for the part of the query sequence that
            is aligned to a reference sequence. For example, a query
            sequence of 100 bp that aligns to a reference sequence over the
            first 50 bp with 40 matching positions has an coverage value of
            50%.
            </help>
            <validator type="in_range" message="between 0 and 100" min="1" max="100"/>
        </param>

        <param name="length" type="integer" optional="true" label="Alignment length threshold">
            <help>
            Alignment length threshold used to define matching sequences as
            similar. For example, a query sequence of 100 bp that aligns to
            a reference sequence over the first 50 bp with 40 matching
            positions has an alignment length of 50.
            </help>
            <validator type="in_range" message="length needs to be positive" min="1"/>
        </param>

        <param name="chunk_size" type="integer" optional="true" label="Chunk size of reads in bp as used by BWA-SW (default: 10000000)">
            <validator type="in_range" message="" min="10000" max="100"/>
        </param>
        <param name="z_best" type="integer" optional="true" label="Z-best value as used by BWA-SW (default: 1)">
            <validator type="in_range" message="" min="1" max="10"/>
        </param>
        <param name="score" type="integer" optional="true" label="Alignment score threshold as used by BWA-SW (default: 30)">
            <validator type="in_range" message="" min="1" max="10"/>
        </param>

    </inputs>
    <outputs>
        <data format_source="input" name="rrna_fa" label="rrna from ${on_string}" from_work_dir="results_rrna.fa">
          <filter>input.extension.find('fasta') >= 0</filter>
        </data>
        <data format_source="input" name="nonrrna_fa" label="non rrna from ${on_string}" from_work_dir="results_nonrrna.fa">
          <filter>input.extension.find('fasta') >= 0</filter>
        </data>
        <data format_source="input" name="rrna_fq" label="rrna from ${on_string}" from_work_dir="results_rrna.fq">
          <filter>input.extension.find('fastq') >= 0</filter>
        </data>
        <data format_source="input" name="nonrrna_fq" label="non rrna from ${on_string}" from_work_dir="results_nonrrna.fq">
          <filter>input.extension.find('fastq') >= 0</filter>
        </data>
    </outputs>
    <!--
    <stdio>
      <exit_code range=":-1"  level="fatal"   description="Error: Cannot open file" />
      <exit_code range="1:"  level="fatal"   description="Error" />
    </stdio>
    -->

    <help>

<!--
Usage:
    ribopicker [options] -f <file> -dbs <list> ...

Options:
    -help | -h
            Prints the help message and exists.

    -man    Prints the full documentation.

    -version
            Prints the version of the program.

    -show_dbs
            Prints a list of available databases.

    -f <file>
            Input file in FASTA or FASTQ format that contains the query
            sequences.

    -dbs <list>
            Name of database(s) to use (default: ssr). Names are according
            to their definition in the config file (riboPickerConfig.pm).
            Separate multiple database names by comma without spaces.

            Example: -dbs ssr,hmp16s,ncbi16s

    -out_dir <dir>
            Directory where the results should be written (default: .). If
            the directory does not exist, it will be created.

    -i <integer>
            Alignment identity threshold in percentage (integer from 1-100
            without %) used to define matching sequences as similar. The
            identity is calculated for the part of the query sequence that
            is aligned to a reference sequence. For example, a query
            sequence of 100 bp that aligns to a reference sequence over the
            first 50 bp with 40 matching positions has an identity value of
            80%.

    -c <integer>
            Alignment coverage threshold in percentage (integer from 1-100
            without %) used to define matching sequences as similar. The
            coverage is calculated for the part of the query sequence that
            is aligned to a reference sequence. For example, a query
            sequence of 100 bp that aligns to a reference sequence over the
            first 50 bp with 40 matching positions has an coverage value of
            50%.

    -l <integer>
            Alignment length threshold used to define matching sequences as
            similar. For example, a query sequence of 100 bp that aligns to
            a reference sequence over the first 50 bp with 40 matching
            positions has an alignment length of 50.

    -no_seq_out
            Prevents the generation of the fasta/fastq output file for the
            given coverage and identity thresholds. This feature is e.g.
            useful for the web-version since -i and -c are set interactively
            and not yet defined at the data processing step.

    -keep_tmp_files
            Prevents from unlinking the generated tmp files. These usually
            include the id file and the .tsv file(s). This feature is e.g.
            useful for the web-version since .tsv files are used to
            dynamically generate the output files.

    -id <string>
            Optional parameter. If not set, ID will be automatically
            generated to prevent from overwriting previous results. This
            option is useful if integrated into other tools and the output
            filenames need to be known. (Use this option to defined the
            output filename prefix. Output files will end in _rrna.fa and
            _nonrrna.fa, respectively.)

    -S <integer>
            Chunk size of reads in bp as used by BWA-SW (default: 10000000).

    -z <integer>
            Z-best value as used by BWA-SW (default: 1).

    -T <integer>
            Alignment score threshold as used by BWA-SW (default: 30).

-->

    </help>
</tool>