Mercurial > repos > bgruening > prinseq
diff prinseq.xml @ 0:9790cfb46d03 draft default tip
Uploaded
author | bgruening |
---|---|
date | Mon, 07 Oct 2013 15:34:32 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prinseq.xml Mon Oct 07 15:34:32 2013 -0400 @@ -0,0 +1,270 @@ +<tool id="prinseq_trimmer" name="FASTQ trimmer" version="0.1"> + <description>(prinseq)</description> + <version_command interpreter="perl">prinseq-lite.pl --version</version_command> + <requirements> + <requirement type="package" version="0.20.3">prinseq_perl_dependencies</requirement> + <requirement type="set_environment">PRINSEQ_SCRIPT_PATH</requirement> + </requirements> + <command> + #import os + temp_graph_file = `mktemp`; + + perl \$PRINSEQ_SCRIPT_PATH/prinseq-lite.pl + #if $seq_type.seq_type_opt == 'single': + -fastq $seq_type.input_singles + #if $seq_type.input_singles.ext == 'fastqillumina': + -phred64 + #end if + #else: + -fastq $seq_type.input_mate1 + -fastq2 $seq_type.input_mate2 + #if $seq_type.input_mate1.ext != $seq_type.input_mate2.ext: + #import sys + #silent sys.stderr.write( 'Both pairs from your paired-end library need to be from the same filetype.' ) + #end if + #if $seq_type.input_mate1.ext == 'fastqillumina': + -phred64 + -endif + #end if + + -out_good 'trimmed_reads' + ## we do not use the filter options in prinseq, so we are not interested in reads + ## that do not pass the filters + -out_bad null + + ## Trim options + #if $trim_to_len: + -trim_to_len $trim_to_len + #end if + + #if $trim_left: + -trim_left $trim_left + #end if + + #if $trim_right: + -trim_right + #end if + + #if $trim_qual_left or $trim_qual_right: + -trim_qual_type $trim_qual_type + -trim_qual_rule $trim_qual_rule + -trim_qual_window $trim_qual_window + -trim_qual_step $trim_qual_step + #end if + + #if $trim_qual_left: + -trim_qual_left $trim_qual_left + #end if + + #if $trim_qual_right: + -trim_qual_right $trim_qual_right + #end if + + + -graph_stats #echo ','.join( $graph_stats )# + + ## summary are written to stdout + -stats_all + + + -graph_data $temp_graph_file + + ; + + perl \$PRINSEQ_SCRIPT_PATH/prinseq-graphs-noPCA.pl -i $temp_graph_file -html_all -o #echo os.path.join( $html_file.files_path, 'graphs' )# + + ; + + python \$PRINSEQ_SCRIPT_PATH/create_index.py $html_file.files_path > $html_file + + + </command> + <inputs> + <conditional name="seq_type"> + <param name="seq_type_opt" type="select" label="Is this library paired- or single-end?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + <param name="input_singles" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." /> + </when> + <when value="paired"> + <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." /> + <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." /> + </when> + </conditional> + + <param name="trim_to_len" type="integer" value="" + label="Trim all sequence from the 3'-end to result in sequence with this length" + help="(-trim_to_len)"/> + + <param name="trim_left" type="integer" value="" + label="Trim sequence at the 5'-end by trim_left positions" + help="(-trim_left)"/> + + <param name="trim_right" type="integer" value="" + label="Trim sequence at the 3'-end by trim_right positions" + help="(-trim_right)"/> + + <param name="trim_left_p" type="integer" value="" + label="Trim sequence at the 5'-end by trim_left_p percentage of read length." + help="The trim length is rounded towards the lower integer (e.g. 143.6 is rounded to 143 positions). Use an integer between 1 and 100 for the percentage value. (-trim_left_p)"/> + + <param name="trim_right_p" type="integer" value="" + label="Trim sequence at the 3'-end by trim_right_p percentage of read length" + help="The trim length is rounded towards the lower integer (e.g. 143.6 is rounded to 143 positions). Use an integer between 1 and 100 for the percentage value. (-trim_right_p)"/> + + <param name="trim_tail_left" type="integer" value="" + label="Trim poly-A/T tail with a minimum length of trim_tail_left at the 5'-end" + help="(-trim_tail_left)"/> + + <param name="trim_tail_right" type="integer" value="" + label="Trim poly-A/T tail with a minimum length of trim_tail_right at the 3'-end" + help="(-trim_tail_right)"/> + + <param name="trim_ns_left" type="integer" value="" + label="Trim poly-N tail with a minimum length of trim_ns_left at the 5'-end" + help="(-trim_left)"/> + + <param name="trim_ns_right" type="integer" value="" + label="Trim poly-N tail with a minimum length of trim_ns_right at the 3'-end." + help="(-trim_ns_right)"/> + + + <param name="trim_qual_left" type="integer" value="" + label=" Trim sequence by quality score from the 5'-end with this threshold score" + help="(-trim_qual_left)"/> + + <param name="trim_qual_right" type="integer" value="" + label="Trim sequence by quality score from the 3'-end with this threshold score" + help="(-trim_qual_right)"/> + + <param name="trim_qual_type" type="select" label="Type of quality score calculation to use"> + <option value="min" selected="True">min</option> + <option value="mean">mean</option> + <option value="max">max</option> + <option value="sum">sum</option> + </param> + + <param name="trim_qual_rule" type="select" label="Rule to use to compare quality score to calculated value."> + <option value="gt">greater than quality score</option> + <option value="lt" selected="True">less than quality score</option> + <option value="et">equal to quality score</option> + </param> + + <param name="trim_qual_window" type="integer" value="1" + label="The sliding window size used to calculate quality score by type" + help="(-trim_qual_window)"/> + + <param name="trim_qual_step" type="integer" value="1" + label="Step size used to move the sliding window" + help="To move the window over all quality scores without missing any, the step size should be less or equal to the window size(-trim_qual_step)"/> + + <param name="graph_stats" type="select" multiple="True" label="Which statistics should be calculated included in the graph_data file"> + <option value="ld" selected="True">Length distribution</option> + <option value="gc" selected="True">GC content distribution</option> + <option value="qd" selected="True">Base quality distribution</option> + <option value="ns" selected="True">Occurence of N</option> + <option value="pt" selected="True">Poly-A/T tails</option> + <option value="ts" selected="True">Tag sequence check</option> + <option value="as" selected="True">Assembly quality measure</option> + <option value="de" selected="True">Sequence duplication - exact only</option> + <option value="da" selected="True">Sequence duplication - exact + 5'/3'</option> + <option value="sc" selected="True">Sequence complexity</option> + <option value="dn" selected="True">Dinucleotide odds ratios, includes the PCA plots</option> + </param> + + + <!-- TODO + -log <file> + Log file to keep track of parameters, errors, etc. The log file + name is optional. If no file name is given, the log file name + will be "inputname.log". If the log file already exists, new + content will be added to the file. + --> + + + <outputs> + <data format="fastq" name="ofile_single" metadata_source="seq_type.input_singles" label="${tool.name} on ${on_string}"> + <filter>seq_type['seq_type_opt'] == "single"</filter> + </data> + + <data format="fastq" name="outfile_r1" label="${tool.name} on ${on_string}"> + <filter>seq_type['seq_type_opt'] == "paired"</filter> + <actions> + <conditional name="seq_type.seq_type_opt"> + <when value="single"> + <action type="format"> + <option type="from_param" name="seq_type.input_singles" param_attribute="ext" /> + </action> + </when> + <when value="paired"> + <action type="format"> + <option type="from_param" name="seq_type.input_mate1" param_attribute="ext" /> + </action> + </when> + </conditional> + </actions> + </data> + <data format="fastq" name="outfile_r2" label="${tool.name} on ${on_string}"> + <filter>seq_type['seq_type_opt'] == "paired"</filter> + <actions> + <conditional name="seq_type.seq_type_opt"> + <when value="single"> + <action type="format"> + <option type="from_param" name="seq_type.input_singles" param_attribute="ext" /> + </action> + </when> + <when value="paired"> + <action type="format"> + <option type="from_param" name="seq_type.input_mate1" param_attribute="ext" /> + </action> + </when> + </conditional> + </actions> + </data> + + <data format="html" name="html_file" label="${tool.name} on ${on_string} summary" /> + </outputs> + <tests> + <test> + <!-- grep a FASTA file for sequences with specific motif --> + <param name="seq_type.input_singles" value="example1.fastq" /> + <output name="ofile_single" file="example1_trim_right_10.fastq" /> + <param name="trim_right" value="10" /> + </test> + </tests> + <help> + + +.. class:: warningmark + +**TIP** + +----- + +**What it does** + + +PRINSEQ is a tool that generates summary statistics of sequence and quality data and that is used to filter, reformat and trim next-generation sequence data. + + +http://prinseq.sourceforge.net/manual.html + + + ***** ORDER OF PROCESSING ***** + The available options are processed in the following order: + + seq_num, trim_left, trim_right, trim_left_p, trim_right_p, + trim_qual_left, trim_qual_right, trim_tail_left, + trim_tail_right, trim_ns_left, trim_ns_right, trim_to_len, + min_len, max_len, range_len, min_qual_score, max_qual_score, + min_qual_mean, max_qual_mean, min_gc, max_gc, range_gc, + ns_max_p, ns_max_n, noniupac, lc_method, derep, seq_id, + seq_case, dna_rna, out_format + + + + + </help> +</tool>