comparison prinseq.xml @ 0:9790cfb46d03 draft default tip

Uploaded
author bgruening
date Mon, 07 Oct 2013 15:34:32 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9790cfb46d03
1 <tool id="prinseq_trimmer" name="FASTQ trimmer" version="0.1">
2 <description>(prinseq)</description>
3 <version_command interpreter="perl">prinseq-lite.pl --version</version_command>
4 <requirements>
5 <requirement type="package" version="0.20.3">prinseq_perl_dependencies</requirement>
6 <requirement type="set_environment">PRINSEQ_SCRIPT_PATH</requirement>
7 </requirements>
8 <command>
9 #import os
10 temp_graph_file = `mktemp`;
11
12 perl \$PRINSEQ_SCRIPT_PATH/prinseq-lite.pl
13 #if $seq_type.seq_type_opt == 'single':
14 -fastq $seq_type.input_singles
15 #if $seq_type.input_singles.ext == 'fastqillumina':
16 -phred64
17 #end if
18 #else:
19 -fastq $seq_type.input_mate1
20 -fastq2 $seq_type.input_mate2
21 #if $seq_type.input_mate1.ext != $seq_type.input_mate2.ext:
22 #import sys
23 #silent sys.stderr.write( 'Both pairs from your paired-end library need to be from the same filetype.' )
24 #end if
25 #if $seq_type.input_mate1.ext == 'fastqillumina':
26 -phred64
27 -endif
28 #end if
29
30 -out_good 'trimmed_reads'
31 ## we do not use the filter options in prinseq, so we are not interested in reads
32 ## that do not pass the filters
33 -out_bad null
34
35 ## Trim options
36 #if $trim_to_len:
37 -trim_to_len $trim_to_len
38 #end if
39
40 #if $trim_left:
41 -trim_left $trim_left
42 #end if
43
44 #if $trim_right:
45 -trim_right
46 #end if
47
48 #if $trim_qual_left or $trim_qual_right:
49 -trim_qual_type $trim_qual_type
50 -trim_qual_rule $trim_qual_rule
51 -trim_qual_window $trim_qual_window
52 -trim_qual_step $trim_qual_step
53 #end if
54
55 #if $trim_qual_left:
56 -trim_qual_left $trim_qual_left
57 #end if
58
59 #if $trim_qual_right:
60 -trim_qual_right $trim_qual_right
61 #end if
62
63
64 -graph_stats #echo ','.join( $graph_stats )#
65
66 ## summary are written to stdout
67 -stats_all
68
69
70 -graph_data $temp_graph_file
71
72 ;
73
74 perl \$PRINSEQ_SCRIPT_PATH/prinseq-graphs-noPCA.pl -i $temp_graph_file -html_all -o #echo os.path.join( $html_file.files_path, 'graphs' )#
75
76 ;
77
78 python \$PRINSEQ_SCRIPT_PATH/create_index.py $html_file.files_path > $html_file
79
80
81 </command>
82 <inputs>
83 <conditional name="seq_type">
84 <param name="seq_type_opt" type="select" label="Is this library paired- or single-end?">
85 <option value="single">Single-end</option>
86 <option value="paired">Paired-end</option>
87 </param>
88 <when value="single">
89 <param name="input_singles" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
90 </when>
91 <when value="paired">
92 <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
93 <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
94 </when>
95 </conditional>
96
97 <param name="trim_to_len" type="integer" value=""
98 label="Trim all sequence from the 3'-end to result in sequence with this length"
99 help="(-trim_to_len)"/>
100
101 <param name="trim_left" type="integer" value=""
102 label="Trim sequence at the 5'-end by trim_left positions"
103 help="(-trim_left)"/>
104
105 <param name="trim_right" type="integer" value=""
106 label="Trim sequence at the 3'-end by trim_right positions"
107 help="(-trim_right)"/>
108
109 <param name="trim_left_p" type="integer" value=""
110 label="Trim sequence at the 5'-end by trim_left_p percentage of read length."
111 help="The trim length is rounded towards the lower integer (e.g. 143.6 is rounded to 143 positions). Use an integer between 1 and 100 for the percentage value. (-trim_left_p)"/>
112
113 <param name="trim_right_p" type="integer" value=""
114 label="Trim sequence at the 3'-end by trim_right_p percentage of read length"
115 help="The trim length is rounded towards the lower integer (e.g. 143.6 is rounded to 143 positions). Use an integer between 1 and 100 for the percentage value. (-trim_right_p)"/>
116
117 <param name="trim_tail_left" type="integer" value=""
118 label="Trim poly-A/T tail with a minimum length of trim_tail_left at the 5'-end"
119 help="(-trim_tail_left)"/>
120
121 <param name="trim_tail_right" type="integer" value=""
122 label="Trim poly-A/T tail with a minimum length of trim_tail_right at the 3'-end"
123 help="(-trim_tail_right)"/>
124
125 <param name="trim_ns_left" type="integer" value=""
126 label="Trim poly-N tail with a minimum length of trim_ns_left at the 5'-end"
127 help="(-trim_left)"/>
128
129 <param name="trim_ns_right" type="integer" value=""
130 label="Trim poly-N tail with a minimum length of trim_ns_right at the 3'-end."
131 help="(-trim_ns_right)"/>
132
133
134 <param name="trim_qual_left" type="integer" value=""
135 label=" Trim sequence by quality score from the 5'-end with this threshold score"
136 help="(-trim_qual_left)"/>
137
138 <param name="trim_qual_right" type="integer" value=""
139 label="Trim sequence by quality score from the 3'-end with this threshold score"
140 help="(-trim_qual_right)"/>
141
142 <param name="trim_qual_type" type="select" label="Type of quality score calculation to use">
143 <option value="min" selected="True">min</option>
144 <option value="mean">mean</option>
145 <option value="max">max</option>
146 <option value="sum">sum</option>
147 </param>
148
149 <param name="trim_qual_rule" type="select" label="Rule to use to compare quality score to calculated value.">
150 <option value="gt">greater than quality score</option>
151 <option value="lt" selected="True">less than quality score</option>
152 <option value="et">equal to quality score</option>
153 </param>
154
155 <param name="trim_qual_window" type="integer" value="1"
156 label="The sliding window size used to calculate quality score by type"
157 help="(-trim_qual_window)"/>
158
159 <param name="trim_qual_step" type="integer" value="1"
160 label="Step size used to move the sliding window"
161 help="To move the window over all quality scores without missing any, the step size should be less or equal to the window size(-trim_qual_step)"/>
162
163 <param name="graph_stats" type="select" multiple="True" label="Which statistics should be calculated included in the graph_data file">
164 <option value="ld" selected="True">Length distribution</option>
165 <option value="gc" selected="True">GC content distribution</option>
166 <option value="qd" selected="True">Base quality distribution</option>
167 <option value="ns" selected="True">Occurence of N</option>
168 <option value="pt" selected="True">Poly-A/T tails</option>
169 <option value="ts" selected="True">Tag sequence check</option>
170 <option value="as" selected="True">Assembly quality measure</option>
171 <option value="de" selected="True">Sequence duplication - exact only</option>
172 <option value="da" selected="True">Sequence duplication - exact + 5'/3'</option>
173 <option value="sc" selected="True">Sequence complexity</option>
174 <option value="dn" selected="True">Dinucleotide odds ratios, includes the PCA plots</option>
175 </param>
176
177
178 <!-- TODO
179 -log <file>
180 Log file to keep track of parameters, errors, etc. The log file
181 name is optional. If no file name is given, the log file name
182 will be "inputname.log". If the log file already exists, new
183 content will be added to the file.
184 -->
185
186
187 <outputs>
188 <data format="fastq" name="ofile_single" metadata_source="seq_type.input_singles" label="${tool.name} on ${on_string}">
189 <filter>seq_type['seq_type_opt'] == "single"</filter>
190 </data>
191
192 <data format="fastq" name="outfile_r1" label="${tool.name} on ${on_string}">
193 <filter>seq_type['seq_type_opt'] == "paired"</filter>
194 <actions>
195 <conditional name="seq_type.seq_type_opt">
196 <when value="single">
197 <action type="format">
198 <option type="from_param" name="seq_type.input_singles" param_attribute="ext" />
199 </action>
200 </when>
201 <when value="paired">
202 <action type="format">
203 <option type="from_param" name="seq_type.input_mate1" param_attribute="ext" />
204 </action>
205 </when>
206 </conditional>
207 </actions>
208 </data>
209 <data format="fastq" name="outfile_r2" label="${tool.name} on ${on_string}">
210 <filter>seq_type['seq_type_opt'] == "paired"</filter>
211 <actions>
212 <conditional name="seq_type.seq_type_opt">
213 <when value="single">
214 <action type="format">
215 <option type="from_param" name="seq_type.input_singles" param_attribute="ext" />
216 </action>
217 </when>
218 <when value="paired">
219 <action type="format">
220 <option type="from_param" name="seq_type.input_mate1" param_attribute="ext" />
221 </action>
222 </when>
223 </conditional>
224 </actions>
225 </data>
226
227 <data format="html" name="html_file" label="${tool.name} on ${on_string} summary" />
228 </outputs>
229 <tests>
230 <test>
231 <!-- grep a FASTA file for sequences with specific motif -->
232 <param name="seq_type.input_singles" value="example1.fastq" />
233 <output name="ofile_single" file="example1_trim_right_10.fastq" />
234 <param name="trim_right" value="10" />
235 </test>
236 </tests>
237 <help>
238
239
240 .. class:: warningmark
241
242 **TIP**
243
244 -----
245
246 **What it does**
247
248
249 PRINSEQ is a tool that generates summary statistics of sequence and quality data and that is used to filter, reformat and trim next-generation sequence data.
250
251
252 http://prinseq.sourceforge.net/manual.html
253
254
255 ***** ORDER OF PROCESSING *****
256 The available options are processed in the following order:
257
258 seq_num, trim_left, trim_right, trim_left_p, trim_right_p,
259 trim_qual_left, trim_qual_right, trim_tail_left,
260 trim_tail_right, trim_ns_left, trim_ns_right, trim_to_len,
261 min_len, max_len, range_len, min_qual_score, max_qual_score,
262 min_qual_mean, max_qual_mean, min_gc, max_gc, range_gc,
263 ns_max_p, ns_max_n, noniupac, lc_method, derep, seq_id,
264 seq_case, dna_rna, out_format
265
266
267
268
269 </help>
270 </tool>