view prinseq.xml @ 0:9790cfb46d03 draft default tip

Uploaded
author bgruening
date Mon, 07 Oct 2013 15:34:32 -0400
parents
children
line wrap: on
line source

<tool id="prinseq_trimmer" name="FASTQ trimmer" version="0.1">
    <description>(prinseq)</description>
    <version_command interpreter="perl">prinseq-lite.pl --version</version_command>
    <requirements>
        <requirement type="package" version="0.20.3">prinseq_perl_dependencies</requirement>
        <requirement type="set_environment">PRINSEQ_SCRIPT_PATH</requirement>
    </requirements>
    <command>
        #import os
        temp_graph_file = `mktemp`;
        
        perl \$PRINSEQ_SCRIPT_PATH/prinseq-lite.pl
            #if $seq_type.seq_type_opt == 'single':
                -fastq $seq_type.input_singles
                #if $seq_type.input_singles.ext == 'fastqillumina':
                    -phred64
                #end if
            #else:
                -fastq $seq_type.input_mate1
                -fastq2 $seq_type.input_mate2
                #if $seq_type.input_mate1.ext != $seq_type.input_mate2.ext:
                    #import sys
                    #silent sys.stderr.write( 'Both pairs from your paired-end library need to be from the same filetype.' )
                #end if
                #if $seq_type.input_mate1.ext == 'fastqillumina':
                    -phred64
                -endif
            #end if
            
            -out_good 'trimmed_reads'
            ## we do not use the filter options in prinseq, so we are not interested in reads
            ## that do not pass the filters
            -out_bad null

            ## Trim options
            #if $trim_to_len:
                -trim_to_len $trim_to_len
            #end if
            
            #if $trim_left:
                -trim_left $trim_left
            #end if

            #if $trim_right:
                -trim_right
            #end if

            #if $trim_qual_left or $trim_qual_right:
                -trim_qual_type $trim_qual_type
                -trim_qual_rule $trim_qual_rule
                -trim_qual_window $trim_qual_window
                -trim_qual_step $trim_qual_step
            #end if

            #if $trim_qual_left:
                -trim_qual_left $trim_qual_left
            #end if
            
            #if $trim_qual_right:
                -trim_qual_right $trim_qual_right
            #end if


            -graph_stats #echo ','.join( $graph_stats )#

            ## summary are written to stdout
            -stats_all


            -graph_data $temp_graph_file

            ;

        perl \$PRINSEQ_SCRIPT_PATH/prinseq-graphs-noPCA.pl -i $temp_graph_file -html_all -o #echo os.path.join( $html_file.files_path, 'graphs' )#

            ;

        python \$PRINSEQ_SCRIPT_PATH/create_index.py $html_file.files_path > $html_file


    </command>
    <inputs>
        <conditional name="seq_type">
            <param name="seq_type_opt" type="select" label="Is this library paired- or single-end?">
              <option value="single">Single-end</option>
              <option value="paired">Paired-end</option>
            </param>
            <when value="single">
                <param name="input_singles" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
            </when>
            <when value="paired">
                <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
                <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
            </when>
        </conditional>

        <param name="trim_to_len" type="integer" value=""
            label="Trim all sequence from the 3'-end to result in sequence with this length" 
            help="(-trim_to_len)"/>

        <param name="trim_left" type="integer" value=""
            label="Trim sequence at the 5'-end by trim_left positions" 
            help="(-trim_left)"/>

        <param name="trim_right" type="integer" value=""
            label="Trim sequence at the 3'-end by trim_right positions" 
            help="(-trim_right)"/>

        <param name="trim_left_p" type="integer" value=""
            label="Trim sequence at the 5'-end by trim_left_p percentage of read length." 
            help="The trim length is rounded towards the lower integer (e.g. 143.6 is rounded to 143 positions). Use an integer between 1 and 100 for the percentage value. (-trim_left_p)"/>

        <param name="trim_right_p" type="integer" value=""
            label="Trim sequence at the 3'-end by trim_right_p percentage of read length" 
            help="The trim length is rounded towards the lower integer (e.g. 143.6 is rounded to 143 positions). Use an integer between 1 and 100 for the percentage value. (-trim_right_p)"/>

        <param name="trim_tail_left" type="integer" value=""
            label="Trim poly-A/T tail with a minimum length of trim_tail_left at the 5'-end" 
            help="(-trim_tail_left)"/>

        <param name="trim_tail_right" type="integer" value=""
            label="Trim poly-A/T tail with a minimum length of trim_tail_right at the 3'-end" 
            help="(-trim_tail_right)"/>

        <param name="trim_ns_left" type="integer" value=""
            label="Trim poly-N tail with a minimum length of trim_ns_left at the 5'-end" 
            help="(-trim_left)"/>

        <param name="trim_ns_right" type="integer" value=""
            label="Trim poly-N tail with a minimum length of trim_ns_right at the 3'-end." 
            help="(-trim_ns_right)"/>


        <param name="trim_qual_left" type="integer" value=""
            label=" Trim sequence by quality score from the 5'-end with this threshold score" 
            help="(-trim_qual_left)"/>

        <param name="trim_qual_right" type="integer" value=""
            label="Trim sequence by quality score from the 3'-end with this threshold score" 
            help="(-trim_qual_right)"/>

        <param name="trim_qual_type" type="select" label="Type of quality score calculation to use">
            <option value="min" selected="True">min</option>
            <option value="mean">mean</option>
            <option value="max">max</option>
            <option value="sum">sum</option>
        </param>

        <param name="trim_qual_rule" type="select" label="Rule to use to compare quality score to calculated value.">
            <option value="gt">greater than quality score</option>
            <option value="lt" selected="True">less than quality score</option>
            <option value="et">equal to quality score</option>
        </param>

        <param name="trim_qual_window" type="integer" value="1"
            label="The sliding window size used to calculate quality score by type" 
            help="(-trim_qual_window)"/>

        <param name="trim_qual_step" type="integer" value="1"
            label="Step size used to move the sliding window" 
            help="To move the window over all quality scores without missing any, the step size should be less or equal to the window size(-trim_qual_step)"/>

        <param name="graph_stats" type="select" multiple="True" label="Which statistics should be calculated included in the graph_data file">
            <option value="ld" selected="True">Length distribution</option>
            <option value="gc" selected="True">GC content distribution</option>
            <option value="qd" selected="True">Base quality distribution</option>
            <option value="ns" selected="True">Occurence of N</option>
            <option value="pt" selected="True">Poly-A/T tails</option>
            <option value="ts" selected="True">Tag sequence check</option>
            <option value="as" selected="True">Assembly quality measure</option>
            <option value="de" selected="True">Sequence duplication - exact only</option>
            <option value="da" selected="True">Sequence duplication - exact + 5'/3'</option>
            <option value="sc" selected="True">Sequence complexity</option>
            <option value="dn" selected="True">Dinucleotide odds ratios, includes the PCA plots</option>
        </param>


        <!-- TODO
        -log <file>
            Log file to keep track of parameters, errors, etc. The log file
            name is optional. If no file name is given, the log file name
            will be "inputname.log". If the log file already exists, new
            content will be added to the file.
        -->


    <outputs>
        <data format="fastq" name="ofile_single" metadata_source="seq_type.input_singles" label="${tool.name} on ${on_string}">
            <filter>seq_type['seq_type_opt'] == "single"</filter>
        </data>

        <data format="fastq" name="outfile_r1" label="${tool.name} on ${on_string}">
            <filter>seq_type['seq_type_opt'] == "paired"</filter>
            <actions>
                <conditional name="seq_type.seq_type_opt">
                  <when value="single">
                    <action type="format">
                      <option type="from_param" name="seq_type.input_singles" param_attribute="ext" />
                    </action>
                  </when>
                  <when value="paired">
                    <action type="format">
                      <option type="from_param" name="seq_type.input_mate1" param_attribute="ext" />
                    </action>
                  </when>
                </conditional>
            </actions>
        </data>
        <data format="fastq" name="outfile_r2" label="${tool.name} on ${on_string}">
            <filter>seq_type['seq_type_opt'] == "paired"</filter>
            <actions>
                <conditional name="seq_type.seq_type_opt">
                  <when value="single">
                    <action type="format">
                      <option type="from_param" name="seq_type.input_singles" param_attribute="ext" />
                    </action>
                  </when>
                  <when value="paired">
                    <action type="format">
                      <option type="from_param" name="seq_type.input_mate1" param_attribute="ext" />
                    </action>
                  </when>
                </conditional>
            </actions>
        </data>

        <data format="html" name="html_file" label="${tool.name} on ${on_string} summary" />
    </outputs>
    <tests>
        <test>
            <!-- grep a FASTA file for sequences with specific motif -->
            <param name="seq_type.input_singles" value="example1.fastq" />
            <output name="ofile_single" file="example1_trim_right_10.fastq" />
            <param name="trim_right" value="10" />
        </test>
    </tests>
    <help>


.. class:: warningmark

**TIP** 

-----

**What it does**


PRINSEQ is a tool that generates summary statistics of sequence and quality data and that is used to filter, reformat and trim next-generation sequence data.


http://prinseq.sourceforge.net/manual.html


    ***** ORDER OF PROCESSING *****
            The available options are processed in the following order:

            seq_num, trim_left, trim_right, trim_left_p, trim_right_p,
            trim_qual_left, trim_qual_right, trim_tail_left,
            trim_tail_right, trim_ns_left, trim_ns_right, trim_to_len,
            min_len, max_len, range_len, min_qual_score, max_qual_score,
            min_qual_mean, max_qual_mean, min_gc, max_gc, range_gc,
            ns_max_p, ns_max_n, noniupac, lc_method, derep, seq_id,
            seq_case, dna_rna, out_format




    </help>
</tool>