0
|
1 <tool id="prinseq_trimmer" name="FASTQ trimmer" version="0.1">
|
|
2 <description>(prinseq)</description>
|
|
3 <version_command interpreter="perl">prinseq-lite.pl --version</version_command>
|
|
4 <requirements>
|
|
5 <requirement type="package" version="0.20.3">prinseq_perl_dependencies</requirement>
|
|
6 <requirement type="set_environment">PRINSEQ_SCRIPT_PATH</requirement>
|
|
7 </requirements>
|
|
8 <command>
|
|
9 #import os
|
|
10 temp_graph_file = `mktemp`;
|
|
11
|
|
12 perl \$PRINSEQ_SCRIPT_PATH/prinseq-lite.pl
|
|
13 #if $seq_type.seq_type_opt == 'single':
|
|
14 -fastq $seq_type.input_singles
|
|
15 #if $seq_type.input_singles.ext == 'fastqillumina':
|
|
16 -phred64
|
|
17 #end if
|
|
18 #else:
|
|
19 -fastq $seq_type.input_mate1
|
|
20 -fastq2 $seq_type.input_mate2
|
|
21 #if $seq_type.input_mate1.ext != $seq_type.input_mate2.ext:
|
|
22 #import sys
|
|
23 #silent sys.stderr.write( 'Both pairs from your paired-end library need to be from the same filetype.' )
|
|
24 #end if
|
|
25 #if $seq_type.input_mate1.ext == 'fastqillumina':
|
|
26 -phred64
|
|
27 -endif
|
|
28 #end if
|
|
29
|
|
30 -out_good 'trimmed_reads'
|
|
31 ## we do not use the filter options in prinseq, so we are not interested in reads
|
|
32 ## that do not pass the filters
|
|
33 -out_bad null
|
|
34
|
|
35 ## Trim options
|
|
36 #if $trim_to_len:
|
|
37 -trim_to_len $trim_to_len
|
|
38 #end if
|
|
39
|
|
40 #if $trim_left:
|
|
41 -trim_left $trim_left
|
|
42 #end if
|
|
43
|
|
44 #if $trim_right:
|
|
45 -trim_right
|
|
46 #end if
|
|
47
|
|
48 #if $trim_qual_left or $trim_qual_right:
|
|
49 -trim_qual_type $trim_qual_type
|
|
50 -trim_qual_rule $trim_qual_rule
|
|
51 -trim_qual_window $trim_qual_window
|
|
52 -trim_qual_step $trim_qual_step
|
|
53 #end if
|
|
54
|
|
55 #if $trim_qual_left:
|
|
56 -trim_qual_left $trim_qual_left
|
|
57 #end if
|
|
58
|
|
59 #if $trim_qual_right:
|
|
60 -trim_qual_right $trim_qual_right
|
|
61 #end if
|
|
62
|
|
63
|
|
64 -graph_stats #echo ','.join( $graph_stats )#
|
|
65
|
|
66 ## summary are written to stdout
|
|
67 -stats_all
|
|
68
|
|
69
|
|
70 -graph_data $temp_graph_file
|
|
71
|
|
72 ;
|
|
73
|
|
74 perl \$PRINSEQ_SCRIPT_PATH/prinseq-graphs-noPCA.pl -i $temp_graph_file -html_all -o #echo os.path.join( $html_file.files_path, 'graphs' )#
|
|
75
|
|
76 ;
|
|
77
|
|
78 python \$PRINSEQ_SCRIPT_PATH/create_index.py $html_file.files_path > $html_file
|
|
79
|
|
80
|
|
81 </command>
|
|
82 <inputs>
|
|
83 <conditional name="seq_type">
|
|
84 <param name="seq_type_opt" type="select" label="Is this library paired- or single-end?">
|
|
85 <option value="single">Single-end</option>
|
|
86 <option value="paired">Paired-end</option>
|
|
87 </param>
|
|
88 <when value="single">
|
|
89 <param name="input_singles" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
|
|
90 </when>
|
|
91 <when value="paired">
|
|
92 <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
|
|
93 <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
|
|
94 </when>
|
|
95 </conditional>
|
|
96
|
|
97 <param name="trim_to_len" type="integer" value=""
|
|
98 label="Trim all sequence from the 3'-end to result in sequence with this length"
|
|
99 help="(-trim_to_len)"/>
|
|
100
|
|
101 <param name="trim_left" type="integer" value=""
|
|
102 label="Trim sequence at the 5'-end by trim_left positions"
|
|
103 help="(-trim_left)"/>
|
|
104
|
|
105 <param name="trim_right" type="integer" value=""
|
|
106 label="Trim sequence at the 3'-end by trim_right positions"
|
|
107 help="(-trim_right)"/>
|
|
108
|
|
109 <param name="trim_left_p" type="integer" value=""
|
|
110 label="Trim sequence at the 5'-end by trim_left_p percentage of read length."
|
|
111 help="The trim length is rounded towards the lower integer (e.g. 143.6 is rounded to 143 positions). Use an integer between 1 and 100 for the percentage value. (-trim_left_p)"/>
|
|
112
|
|
113 <param name="trim_right_p" type="integer" value=""
|
|
114 label="Trim sequence at the 3'-end by trim_right_p percentage of read length"
|
|
115 help="The trim length is rounded towards the lower integer (e.g. 143.6 is rounded to 143 positions). Use an integer between 1 and 100 for the percentage value. (-trim_right_p)"/>
|
|
116
|
|
117 <param name="trim_tail_left" type="integer" value=""
|
|
118 label="Trim poly-A/T tail with a minimum length of trim_tail_left at the 5'-end"
|
|
119 help="(-trim_tail_left)"/>
|
|
120
|
|
121 <param name="trim_tail_right" type="integer" value=""
|
|
122 label="Trim poly-A/T tail with a minimum length of trim_tail_right at the 3'-end"
|
|
123 help="(-trim_tail_right)"/>
|
|
124
|
|
125 <param name="trim_ns_left" type="integer" value=""
|
|
126 label="Trim poly-N tail with a minimum length of trim_ns_left at the 5'-end"
|
|
127 help="(-trim_left)"/>
|
|
128
|
|
129 <param name="trim_ns_right" type="integer" value=""
|
|
130 label="Trim poly-N tail with a minimum length of trim_ns_right at the 3'-end."
|
|
131 help="(-trim_ns_right)"/>
|
|
132
|
|
133
|
|
134 <param name="trim_qual_left" type="integer" value=""
|
|
135 label=" Trim sequence by quality score from the 5'-end with this threshold score"
|
|
136 help="(-trim_qual_left)"/>
|
|
137
|
|
138 <param name="trim_qual_right" type="integer" value=""
|
|
139 label="Trim sequence by quality score from the 3'-end with this threshold score"
|
|
140 help="(-trim_qual_right)"/>
|
|
141
|
|
142 <param name="trim_qual_type" type="select" label="Type of quality score calculation to use">
|
|
143 <option value="min" selected="True">min</option>
|
|
144 <option value="mean">mean</option>
|
|
145 <option value="max">max</option>
|
|
146 <option value="sum">sum</option>
|
|
147 </param>
|
|
148
|
|
149 <param name="trim_qual_rule" type="select" label="Rule to use to compare quality score to calculated value.">
|
|
150 <option value="gt">greater than quality score</option>
|
|
151 <option value="lt" selected="True">less than quality score</option>
|
|
152 <option value="et">equal to quality score</option>
|
|
153 </param>
|
|
154
|
|
155 <param name="trim_qual_window" type="integer" value="1"
|
|
156 label="The sliding window size used to calculate quality score by type"
|
|
157 help="(-trim_qual_window)"/>
|
|
158
|
|
159 <param name="trim_qual_step" type="integer" value="1"
|
|
160 label="Step size used to move the sliding window"
|
|
161 help="To move the window over all quality scores without missing any, the step size should be less or equal to the window size(-trim_qual_step)"/>
|
|
162
|
|
163 <param name="graph_stats" type="select" multiple="True" label="Which statistics should be calculated included in the graph_data file">
|
|
164 <option value="ld" selected="True">Length distribution</option>
|
|
165 <option value="gc" selected="True">GC content distribution</option>
|
|
166 <option value="qd" selected="True">Base quality distribution</option>
|
|
167 <option value="ns" selected="True">Occurence of N</option>
|
|
168 <option value="pt" selected="True">Poly-A/T tails</option>
|
|
169 <option value="ts" selected="True">Tag sequence check</option>
|
|
170 <option value="as" selected="True">Assembly quality measure</option>
|
|
171 <option value="de" selected="True">Sequence duplication - exact only</option>
|
|
172 <option value="da" selected="True">Sequence duplication - exact + 5'/3'</option>
|
|
173 <option value="sc" selected="True">Sequence complexity</option>
|
|
174 <option value="dn" selected="True">Dinucleotide odds ratios, includes the PCA plots</option>
|
|
175 </param>
|
|
176
|
|
177
|
|
178 <!-- TODO
|
|
179 -log <file>
|
|
180 Log file to keep track of parameters, errors, etc. The log file
|
|
181 name is optional. If no file name is given, the log file name
|
|
182 will be "inputname.log". If the log file already exists, new
|
|
183 content will be added to the file.
|
|
184 -->
|
|
185
|
|
186
|
|
187 <outputs>
|
|
188 <data format="fastq" name="ofile_single" metadata_source="seq_type.input_singles" label="${tool.name} on ${on_string}">
|
|
189 <filter>seq_type['seq_type_opt'] == "single"</filter>
|
|
190 </data>
|
|
191
|
|
192 <data format="fastq" name="outfile_r1" label="${tool.name} on ${on_string}">
|
|
193 <filter>seq_type['seq_type_opt'] == "paired"</filter>
|
|
194 <actions>
|
|
195 <conditional name="seq_type.seq_type_opt">
|
|
196 <when value="single">
|
|
197 <action type="format">
|
|
198 <option type="from_param" name="seq_type.input_singles" param_attribute="ext" />
|
|
199 </action>
|
|
200 </when>
|
|
201 <when value="paired">
|
|
202 <action type="format">
|
|
203 <option type="from_param" name="seq_type.input_mate1" param_attribute="ext" />
|
|
204 </action>
|
|
205 </when>
|
|
206 </conditional>
|
|
207 </actions>
|
|
208 </data>
|
|
209 <data format="fastq" name="outfile_r2" label="${tool.name} on ${on_string}">
|
|
210 <filter>seq_type['seq_type_opt'] == "paired"</filter>
|
|
211 <actions>
|
|
212 <conditional name="seq_type.seq_type_opt">
|
|
213 <when value="single">
|
|
214 <action type="format">
|
|
215 <option type="from_param" name="seq_type.input_singles" param_attribute="ext" />
|
|
216 </action>
|
|
217 </when>
|
|
218 <when value="paired">
|
|
219 <action type="format">
|
|
220 <option type="from_param" name="seq_type.input_mate1" param_attribute="ext" />
|
|
221 </action>
|
|
222 </when>
|
|
223 </conditional>
|
|
224 </actions>
|
|
225 </data>
|
|
226
|
|
227 <data format="html" name="html_file" label="${tool.name} on ${on_string} summary" />
|
|
228 </outputs>
|
|
229 <tests>
|
|
230 <test>
|
|
231 <!-- grep a FASTA file for sequences with specific motif -->
|
|
232 <param name="seq_type.input_singles" value="example1.fastq" />
|
|
233 <output name="ofile_single" file="example1_trim_right_10.fastq" />
|
|
234 <param name="trim_right" value="10" />
|
|
235 </test>
|
|
236 </tests>
|
|
237 <help>
|
|
238
|
|
239
|
|
240 .. class:: warningmark
|
|
241
|
|
242 **TIP**
|
|
243
|
|
244 -----
|
|
245
|
|
246 **What it does**
|
|
247
|
|
248
|
|
249 PRINSEQ is a tool that generates summary statistics of sequence and quality data and that is used to filter, reformat and trim next-generation sequence data.
|
|
250
|
|
251
|
|
252 http://prinseq.sourceforge.net/manual.html
|
|
253
|
|
254
|
|
255 ***** ORDER OF PROCESSING *****
|
|
256 The available options are processed in the following order:
|
|
257
|
|
258 seq_num, trim_left, trim_right, trim_left_p, trim_right_p,
|
|
259 trim_qual_left, trim_qual_right, trim_tail_left,
|
|
260 trim_tail_right, trim_ns_left, trim_ns_right, trim_to_len,
|
|
261 min_len, max_len, range_len, min_qual_score, max_qual_score,
|
|
262 min_qual_mean, max_qual_mean, min_gc, max_gc, range_gc,
|
|
263 ns_max_p, ns_max_n, noniupac, lc_method, derep, seq_id,
|
|
264 seq_case, dna_rna, out_format
|
|
265
|
|
266
|
|
267
|
|
268
|
|
269 </help>
|
|
270 </tool>
|