changeset 0:98d76a28cc24 draft default tip

Uploaded
author jjohnson
date Thu, 25 Oct 2012 13:14:50 -0400
parents
children
files ribopicker.xml
diffstat 1 files changed, 173 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ribopicker.xml	Thu Oct 25 13:14:50 2012 -0400
@@ -0,0 +1,173 @@
+<tool id="ribopicker" name="Ribopicker" version="1.0">
+    <description>identify and remove rRNA-like sequences</description>
+    <requirements>
+    </requirements>
+    <command interpreter="perl">
+       ribopicker.pl -f $input -dbs $dbs -id results
+    </command>
+    <inputs>
+        <param name="input" type="data" format="fastq,fasta" label="Input Sequences"/>
+        <param name="dbs" type="select" label="Databases" multiple="true">
+            <!-- from riboPickerConfig.pm -->
+            <option value="rrnadb">rrnadb - Non-redundant Ribosomal RNA Database</option>
+            <option value="slr">slr - SILVA Large subunit reference database (23S/28S)</option>
+            <option value="ssr">ssr - SILVA Small subunit reference database (16S/18S)</option>
+            <option value="test">test - Example test case db</option>
+        </param>
+        <param name="identity" type="integer" optional="true" label=" Alignment identity threshold percentage (1-100)">
+            <help>
+            Alignment identity threshold in percentage (integer from 1-100
+            without %) used to define matching sequences as similar. The
+            identity is calculated for the part of the query sequence that
+            is aligned to a reference sequence. For example, a query
+            sequence of 100 bp that aligns to a reference sequence over the
+            first 50 bp with 40 matching positions has an identity value of
+            80%."
+            </help>
+            <validator type="in_range" message="between 0 and 100" min="1" max="100"/>
+        </param>
+        <param name="coverage" type="integer" optional="true" label="Alignment coverage threshold percentage (1-100)">
+            <help>
+            Alignment coverage threshold in percentage (integer from 1-100
+            without %) used to define matching sequences as similar. The
+            coverage is calculated for the part of the query sequence that
+            is aligned to a reference sequence. For example, a query
+            sequence of 100 bp that aligns to a reference sequence over the
+            first 50 bp with 40 matching positions has an coverage value of
+            50%.
+            </help>
+            <validator type="in_range" message="between 0 and 100" min="1" max="100"/>
+        </param>
+
+        <param name="length" type="integer" optional="true" label="Alignment length threshold">
+            <help>
+            Alignment length threshold used to define matching sequences as
+            similar. For example, a query sequence of 100 bp that aligns to
+            a reference sequence over the first 50 bp with 40 matching
+            positions has an alignment length of 50.
+            </help>
+            <validator type="in_range" message="length needs to be positive" min="1"/>
+        </param>
+
+        <param name="chunk_size" type="integer" optional="true" label="Chunk size of reads in bp as used by BWA-SW (default: 10000000)">
+            <validator type="in_range" message="" min="10000" max="100"/>
+        </param>
+        <param name="z_best" type="integer" optional="true" label="Z-best value as used by BWA-SW (default: 1)">
+            <validator type="in_range" message="" min="1" max="10"/>
+        </param>
+        <param name="score" type="integer" optional="true" label="Alignment score threshold as used by BWA-SW (default: 30)">
+            <validator type="in_range" message="" min="1" max="10"/>
+        </param>
+
+    </inputs>
+    <outputs>
+        <data format_source="input" name="rrna_fa" label="rrna from ${on_string}" from_work_dir="results_rrna.fa">
+          <filter>input.extension.find('fasta') >= 0</filter>
+        </data>
+        <data format_source="input" name="nonrrna_fa" label="non rrna from ${on_string}" from_work_dir="results_nonrrna.fa">
+          <filter>input.extension.find('fasta') >= 0</filter>
+        </data>
+        <data format_source="input" name="rrna_fq" label="rrna from ${on_string}" from_work_dir="results_rrna.fq">
+          <filter>input.extension.find('fastq') >= 0</filter>
+        </data>
+        <data format_source="input" name="nonrrna_fq" label="non rrna from ${on_string}" from_work_dir="results_nonrrna.fq">
+          <filter>input.extension.find('fastq') >= 0</filter>
+        </data>
+    </outputs>
+    <!--
+    <stdio>
+      <exit_code range=":-1"  level="fatal"   description="Error: Cannot open file" />
+      <exit_code range="1:"  level="fatal"   description="Error" />
+    </stdio>
+    -->
+
+    <help>
+
+<!--
+Usage:
+    ribopicker [options] -f <file> -dbs <list> ...
+
+Options:
+    -help | -h
+            Prints the help message and exists.
+
+    -man    Prints the full documentation.
+
+    -version
+            Prints the version of the program.
+
+    -show_dbs
+            Prints a list of available databases.
+
+    -f <file>
+            Input file in FASTA or FASTQ format that contains the query
+            sequences.
+
+    -dbs <list>
+            Name of database(s) to use (default: ssr). Names are according
+            to their definition in the config file (riboPickerConfig.pm).
+            Separate multiple database names by comma without spaces.
+
+            Example: -dbs ssr,hmp16s,ncbi16s
+
+    -out_dir <dir>
+            Directory where the results should be written (default: .). If
+            the directory does not exist, it will be created.
+
+    -i <integer>
+            Alignment identity threshold in percentage (integer from 1-100
+            without %) used to define matching sequences as similar. The
+            identity is calculated for the part of the query sequence that
+            is aligned to a reference sequence. For example, a query
+            sequence of 100 bp that aligns to a reference sequence over the
+            first 50 bp with 40 matching positions has an identity value of
+            80%.
+
+    -c <integer>
+            Alignment coverage threshold in percentage (integer from 1-100
+            without %) used to define matching sequences as similar. The
+            coverage is calculated for the part of the query sequence that
+            is aligned to a reference sequence. For example, a query
+            sequence of 100 bp that aligns to a reference sequence over the
+            first 50 bp with 40 matching positions has an coverage value of
+            50%.
+
+    -l <integer>
+            Alignment length threshold used to define matching sequences as
+            similar. For example, a query sequence of 100 bp that aligns to
+            a reference sequence over the first 50 bp with 40 matching
+            positions has an alignment length of 50.
+
+    -no_seq_out
+            Prevents the generation of the fasta/fastq output file for the
+            given coverage and identity thresholds. This feature is e.g.
+            useful for the web-version since -i and -c are set interactively
+            and not yet defined at the data processing step.
+
+    -keep_tmp_files
+            Prevents from unlinking the generated tmp files. These usually
+            include the id file and the .tsv file(s). This feature is e.g.
+            useful for the web-version since .tsv files are used to
+            dynamically generate the output files.
+
+    -id <string>
+            Optional parameter. If not set, ID will be automatically
+            generated to prevent from overwriting previous results. This
+            option is useful if integrated into other tools and the output
+            filenames need to be known. (Use this option to defined the
+            output filename prefix. Output files will end in _rrna.fa and
+            _nonrrna.fa, respectively.)
+
+    -S <integer>
+            Chunk size of reads in bp as used by BWA-SW (default: 10000000).
+
+    -z <integer>
+            Z-best value as used by BWA-SW (default: 1).
+
+    -T <integer>
+            Alignment score threshold as used by BWA-SW (default: 30).
+
+-->
+
+    </help>
+</tool>