view tools/primers/seq_primer_clip.xml @ 3:f9ddb43ce28d draft

Uploaded v0.0.10a, fixes Biopython dependency setup
author peterjc
date Fri, 13 Sep 2013 06:09:54 -0400
parents bd151574bbf3
children
line wrap: on
line source

<tool id="seq_primer_clip" name="Primer clip sequences" version="0.0.10">
    <description>Trim off 5' or 3' primers</description>
    <requirements>
        <requirement type="package" version="1.61">biopython</requirement>
        <requirement type="python-module">Bio</requirement>
    </requirements>
    <version_command interpreter="python">seq_primer_clip.py --version</version_command>   
    <command interpreter="python">
seq_primer_clip.py $input_file $input_file.ext $primer_fasta $primer_type $mm $min_len $keep_negatives $output_file
    </command>
    <stdio>
        <!-- Anything other than zero is an error -->
        <exit_code range="1:" />
        <exit_code range=":-1" />
    </stdio>
    <inputs>
        <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to clip" description="FASTA, FASTQ, or SFF format."/>
        <param name="primer_fasta" type="data" format="fasta" label="FASTA file containing primer(s)"/>
        <param name="primer_type" type="select" label="Type of primers">
            <option value="Forward">Forward (5') primers</option>
            <option value="Reverse">Reverse (3') primers (given with respect to the forward strand)</option>
            <option value="Reverse-complement">Reverse (3') primers (given with respect to the reverse strand)</option>
        </param>
        <param name="mm" type="integer" value="0" label="How many mismatches to allow? (0, 1 or 2)">
            <validator type="in_range" min="0" max="2" />
        </param>
        <param name="keep_negatives" type="boolean" value="false" label="Keep reads with no matched primer"/>
        <param name="min_len" type="integer" label="Minimum length for (clipped) sequences " value="1"/>
    </inputs>
    <outputs>
        <data name="output_file" format="data" label="$primer_type primer clipped">
            <!-- TODO - Replace this with format="input:input_fastq" if/when that works -->
            <change_format>
                <when input_dataset="input_file" attribute="extension" value="sff" format="sff" />
                <when input_dataset="input_file" attribute="extension" value="fasta" format="fasta" />
                <when input_dataset="input_file" attribute="extension" value="fastq" format="fastq" />
                <when input_dataset="input_file" attribute="extension" value="fastqsanger" format="fastqsanger" />
                <when input_dataset="input_file" attribute="extension" value="fastqsolexa" format="fastqsolexa" />
                <when input_dataset="input_file" attribute="extension" value="fastqillumina" format="fastqillumina" />
                <when input_dataset="input_file" attribute="extension" value="fastqcssanger" format="fastqcssanger" />
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="input_file" value="MID4_GLZRM4E04_rnd30.fasta" ftype="fasta" />
            <param name="primer_fasta" value="dop_primers.fasta" />
            <param name="primer_type" value="Forward" />
            <param name="mm" value="2" />
            <param name="keep_negatives" value="false" />
            <param name="min_len" value="35" />
            <output name="output_file" file="MID4_GLZRM4E04_rnd30_fclip.fasta" ftype="fasta" />
        </test>
        <test>
            <param name="input_file" value="MID4_GLZRM4E04_rnd30.fastqsanger" ftype="fastqsanger" />
            <param name="primer_fasta" value="dop_primers.fasta" />
            <param name="primer_type" value="Forward" />
            <param name="mm" value="2" />
            <param name="keep_negatives" value="false" />
            <param name="min_len" value="35" />
            <output name="output_file" file="MID4_GLZRM4E04_rnd30_fclip.fastqsanger" ftype="fastqsanger" />
        </test>
        <test>
            <param name="input_file" value="MID4_GLZRM4E04_rnd30.sff" ftype="sff" />
            <param name="primer_fasta" value="dop_primers.fasta" />
            <param name="primer_type" value="Forward" />
            <param name="mm" value="2" />
            <param name="keep_negatives" value="false" />
            <param name="min_len" value="35" />
            <output name="output_file" file="MID4_GLZRM4E04_rnd30_fclip.sff" ftype="sff" />
        </test>
        <test>
            <param name="input_file" value="MID4_GLZRM4E04_rnd30_fclip.fasta" ftype="fasta" />
            <param name="primer_fasta" value="dop_primers.fasta" />
            <param name="primer_type" value="Reverse" />
            <param name="mm" value="2" />
            <param name="keep_negatives" value="true" />
            <param name="min_len" value="35" />
            <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.fasta" ftype="fasta" />
        </test>
        <test>
            <param name="input_file" value="MID4_GLZRM4E04_rnd30_fclip.fastqsanger" ftype="fastqsanger" />
            <param name="primer_fasta" value="dop_primers.fasta" />
            <param name="primer_type" value="Reverse" />
            <param name="mm" value="2" />
            <param name="keep_negatives" value="true" />
            <param name="min_len" value="35" />
            <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.fastqsanger" ftype="fastqsanger" />
        </test>
        <test>
            <param name="input_file" value="MID4_GLZRM4E04_rnd30_fclip.sff" ftype="sff" />
            <param name="primer_fasta" value="dop_primers.fasta" />
            <param name="primer_type" value="Reverse" />
            <param name="mm" value="2" />
            <param name="keep_negatives" value="true" />
            <param name="min_len" value="35" />
            <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff" />
        </test>
    </tests>
    <requirements>
        <requirement type="python-module">Bio</requirement>
    </requirements>
    <help>

**What it does**

Looks for the given primer sequences (within the existing clipped sequence) and
further clips the reads to remove the primers and any preceding/trailing sequence.

Reads containing a forward primer are reduced to just the sequence after (and
excluding) the forward primer.

Reads containing a reverse primer are reduced to just the sequence before (and
excluding) the reverse primer.

Degenerate primers can be specified using the standard IUPAC ambiguity codes,
thus a primer with an N would match A, C, T or G (or any of the IUPAC ambiguity
codes) and so on.

Note that for SFF files only the clip/trim positions are edited - you will still
be able to extract the original full read (with any adapter sequence and poor
quality sequence) if you need to.

.. class:: warningmark

**Note**. This tool was initially written for Roche 454 data, and should also
work fine on Sanger or Ion Torrent as well. However, it is probably too slow
for use on large Illumina datasets.


**Citation**

This tool uses Biopython. If you use this tool in scientific work leading to a
publication, please cite:

Cock et al 2009. Biopython: freely available Python tools for computational
molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.

This tool is available to install into other Galaxy Instances via the Galaxy
Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_primer_clip 
    </help>
</tool>