diff prinseq.xml @ 0:9790cfb46d03 draft default tip

Uploaded
author bgruening
date Mon, 07 Oct 2013 15:34:32 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prinseq.xml	Mon Oct 07 15:34:32 2013 -0400
@@ -0,0 +1,270 @@
+<tool id="prinseq_trimmer" name="FASTQ trimmer" version="0.1">
+    <description>(prinseq)</description>
+    <version_command interpreter="perl">prinseq-lite.pl --version</version_command>
+    <requirements>
+        <requirement type="package" version="0.20.3">prinseq_perl_dependencies</requirement>
+        <requirement type="set_environment">PRINSEQ_SCRIPT_PATH</requirement>
+    </requirements>
+    <command>
+        #import os
+        temp_graph_file = `mktemp`;
+        
+        perl \$PRINSEQ_SCRIPT_PATH/prinseq-lite.pl
+            #if $seq_type.seq_type_opt == 'single':
+                -fastq $seq_type.input_singles
+                #if $seq_type.input_singles.ext == 'fastqillumina':
+                    -phred64
+                #end if
+            #else:
+                -fastq $seq_type.input_mate1
+                -fastq2 $seq_type.input_mate2
+                #if $seq_type.input_mate1.ext != $seq_type.input_mate2.ext:
+                    #import sys
+                    #silent sys.stderr.write( 'Both pairs from your paired-end library need to be from the same filetype.' )
+                #end if
+                #if $seq_type.input_mate1.ext == 'fastqillumina':
+                    -phred64
+                -endif
+            #end if
+            
+            -out_good 'trimmed_reads'
+            ## we do not use the filter options in prinseq, so we are not interested in reads
+            ## that do not pass the filters
+            -out_bad null
+
+            ## Trim options
+            #if $trim_to_len:
+                -trim_to_len $trim_to_len
+            #end if
+            
+            #if $trim_left:
+                -trim_left $trim_left
+            #end if
+
+            #if $trim_right:
+                -trim_right
+            #end if
+
+            #if $trim_qual_left or $trim_qual_right:
+                -trim_qual_type $trim_qual_type
+                -trim_qual_rule $trim_qual_rule
+                -trim_qual_window $trim_qual_window
+                -trim_qual_step $trim_qual_step
+            #end if
+
+            #if $trim_qual_left:
+                -trim_qual_left $trim_qual_left
+            #end if
+            
+            #if $trim_qual_right:
+                -trim_qual_right $trim_qual_right
+            #end if
+
+
+            -graph_stats #echo ','.join( $graph_stats )#
+
+            ## summary are written to stdout
+            -stats_all
+
+
+            -graph_data $temp_graph_file
+
+            ;
+
+        perl \$PRINSEQ_SCRIPT_PATH/prinseq-graphs-noPCA.pl -i $temp_graph_file -html_all -o #echo os.path.join( $html_file.files_path, 'graphs' )#
+
+            ;
+
+        python \$PRINSEQ_SCRIPT_PATH/create_index.py $html_file.files_path > $html_file
+
+
+    </command>
+    <inputs>
+        <conditional name="seq_type">
+            <param name="seq_type_opt" type="select" label="Is this library paired- or single-end?">
+              <option value="single">Single-end</option>
+              <option value="paired">Paired-end</option>
+            </param>
+            <when value="single">
+                <param name="input_singles" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
+            </when>
+            <when value="paired">
+                <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
+                <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
+            </when>
+        </conditional>
+
+        <param name="trim_to_len" type="integer" value=""
+            label="Trim all sequence from the 3'-end to result in sequence with this length" 
+            help="(-trim_to_len)"/>
+
+        <param name="trim_left" type="integer" value=""
+            label="Trim sequence at the 5'-end by trim_left positions" 
+            help="(-trim_left)"/>
+
+        <param name="trim_right" type="integer" value=""
+            label="Trim sequence at the 3'-end by trim_right positions" 
+            help="(-trim_right)"/>
+
+        <param name="trim_left_p" type="integer" value=""
+            label="Trim sequence at the 5'-end by trim_left_p percentage of read length." 
+            help="The trim length is rounded towards the lower integer (e.g. 143.6 is rounded to 143 positions). Use an integer between 1 and 100 for the percentage value. (-trim_left_p)"/>
+
+        <param name="trim_right_p" type="integer" value=""
+            label="Trim sequence at the 3'-end by trim_right_p percentage of read length" 
+            help="The trim length is rounded towards the lower integer (e.g. 143.6 is rounded to 143 positions). Use an integer between 1 and 100 for the percentage value. (-trim_right_p)"/>
+
+        <param name="trim_tail_left" type="integer" value=""
+            label="Trim poly-A/T tail with a minimum length of trim_tail_left at the 5'-end" 
+            help="(-trim_tail_left)"/>
+
+        <param name="trim_tail_right" type="integer" value=""
+            label="Trim poly-A/T tail with a minimum length of trim_tail_right at the 3'-end" 
+            help="(-trim_tail_right)"/>
+
+        <param name="trim_ns_left" type="integer" value=""
+            label="Trim poly-N tail with a minimum length of trim_ns_left at the 5'-end" 
+            help="(-trim_left)"/>
+
+        <param name="trim_ns_right" type="integer" value=""
+            label="Trim poly-N tail with a minimum length of trim_ns_right at the 3'-end." 
+            help="(-trim_ns_right)"/>
+
+
+        <param name="trim_qual_left" type="integer" value=""
+            label=" Trim sequence by quality score from the 5'-end with this threshold score" 
+            help="(-trim_qual_left)"/>
+
+        <param name="trim_qual_right" type="integer" value=""
+            label="Trim sequence by quality score from the 3'-end with this threshold score" 
+            help="(-trim_qual_right)"/>
+
+        <param name="trim_qual_type" type="select" label="Type of quality score calculation to use">
+            <option value="min" selected="True">min</option>
+            <option value="mean">mean</option>
+            <option value="max">max</option>
+            <option value="sum">sum</option>
+        </param>
+
+        <param name="trim_qual_rule" type="select" label="Rule to use to compare quality score to calculated value.">
+            <option value="gt">greater than quality score</option>
+            <option value="lt" selected="True">less than quality score</option>
+            <option value="et">equal to quality score</option>
+        </param>
+
+        <param name="trim_qual_window" type="integer" value="1"
+            label="The sliding window size used to calculate quality score by type" 
+            help="(-trim_qual_window)"/>
+
+        <param name="trim_qual_step" type="integer" value="1"
+            label="Step size used to move the sliding window" 
+            help="To move the window over all quality scores without missing any, the step size should be less or equal to the window size(-trim_qual_step)"/>
+
+        <param name="graph_stats" type="select" multiple="True" label="Which statistics should be calculated included in the graph_data file">
+            <option value="ld" selected="True">Length distribution</option>
+            <option value="gc" selected="True">GC content distribution</option>
+            <option value="qd" selected="True">Base quality distribution</option>
+            <option value="ns" selected="True">Occurence of N</option>
+            <option value="pt" selected="True">Poly-A/T tails</option>
+            <option value="ts" selected="True">Tag sequence check</option>
+            <option value="as" selected="True">Assembly quality measure</option>
+            <option value="de" selected="True">Sequence duplication - exact only</option>
+            <option value="da" selected="True">Sequence duplication - exact + 5'/3'</option>
+            <option value="sc" selected="True">Sequence complexity</option>
+            <option value="dn" selected="True">Dinucleotide odds ratios, includes the PCA plots</option>
+        </param>
+
+
+        <!-- TODO
+        -log <file>
+            Log file to keep track of parameters, errors, etc. The log file
+            name is optional. If no file name is given, the log file name
+            will be "inputname.log". If the log file already exists, new
+            content will be added to the file.
+        -->
+
+
+    <outputs>
+        <data format="fastq" name="ofile_single" metadata_source="seq_type.input_singles" label="${tool.name} on ${on_string}">
+            <filter>seq_type['seq_type_opt'] == "single"</filter>
+        </data>
+
+        <data format="fastq" name="outfile_r1" label="${tool.name} on ${on_string}">
+            <filter>seq_type['seq_type_opt'] == "paired"</filter>
+            <actions>
+                <conditional name="seq_type.seq_type_opt">
+                  <when value="single">
+                    <action type="format">
+                      <option type="from_param" name="seq_type.input_singles" param_attribute="ext" />
+                    </action>
+                  </when>
+                  <when value="paired">
+                    <action type="format">
+                      <option type="from_param" name="seq_type.input_mate1" param_attribute="ext" />
+                    </action>
+                  </when>
+                </conditional>
+            </actions>
+        </data>
+        <data format="fastq" name="outfile_r2" label="${tool.name} on ${on_string}">
+            <filter>seq_type['seq_type_opt'] == "paired"</filter>
+            <actions>
+                <conditional name="seq_type.seq_type_opt">
+                  <when value="single">
+                    <action type="format">
+                      <option type="from_param" name="seq_type.input_singles" param_attribute="ext" />
+                    </action>
+                  </when>
+                  <when value="paired">
+                    <action type="format">
+                      <option type="from_param" name="seq_type.input_mate1" param_attribute="ext" />
+                    </action>
+                  </when>
+                </conditional>
+            </actions>
+        </data>
+
+        <data format="html" name="html_file" label="${tool.name} on ${on_string} summary" />
+    </outputs>
+    <tests>
+        <test>
+            <!-- grep a FASTA file for sequences with specific motif -->
+            <param name="seq_type.input_singles" value="example1.fastq" />
+            <output name="ofile_single" file="example1_trim_right_10.fastq" />
+            <param name="trim_right" value="10" />
+        </test>
+    </tests>
+    <help>
+
+
+.. class:: warningmark
+
+**TIP** 
+
+-----
+
+**What it does**
+
+
+PRINSEQ is a tool that generates summary statistics of sequence and quality data and that is used to filter, reformat and trim next-generation sequence data.
+
+
+http://prinseq.sourceforge.net/manual.html
+
+
+    ***** ORDER OF PROCESSING *****
+            The available options are processed in the following order:
+
+            seq_num, trim_left, trim_right, trim_left_p, trim_right_p,
+            trim_qual_left, trim_qual_right, trim_tail_left,
+            trim_tail_right, trim_ns_left, trim_ns_right, trim_to_len,
+            min_len, max_len, range_len, min_qual_score, max_qual_score,
+            min_qual_mean, max_qual_mean, min_gc, max_gc, range_gc,
+            ns_max_p, ns_max_n, noniupac, lc_method, derep, seq_id,
+            seq_case, dna_rna, out_format
+
+
+
+
+    </help>
+</tool>