comparison fastq_groomer.xml @ 0:06c42572d7c0 draft

Imported from capsule None
author devteam
date Thu, 23 Jan 2014 12:31:44 -0500
parents
children 6b294cefd2ae
comparison
equal deleted inserted replaced
-1:000000000000 0:06c42572d7c0
1 <tool id="fastq_groomer" name="FASTQ Groomer" version="1.0.4">
2 <description>convert between various FASTQ quality formats</description>
3 <requirements>
4 <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement>
5 </requirements>
6 <command interpreter="python">fastq_groomer.py '$input_file' '$input_type' '$output_file'
7 #if str( $options_type['options_type_selector'] ) == 'basic':
8 #if str( $input_type ) == 'cssanger':
9 'cssanger'
10 #else:
11 'sanger'
12 #end if
13 'ascii' 'summarize_input'
14 #else:
15 '${options_type.output_type}' '${options_type.force_quality_encoding}' '${options_type.summarize_input}'
16 #end if
17 </command>
18 <inputs>
19 <param name="input_file" type="data" format="fastq" label="File to groom" />
20 <param name="input_type" type="select" label="Input FASTQ quality scores type">
21 <option value="solexa">Solexa</option>
22 <option value="illumina">Illumina 1.3-1.7</option>
23 <option value="sanger" selected="True">Sanger &amp; Illumina 1.8+</option>
24 <option value="cssanger">Color Space Sanger</option>
25 </param>
26 <conditional name="options_type">
27 <param name="options_type_selector" type="select" label="Advanced Options">
28 <option value="basic" selected="True">Hide Advanced Options</option>
29 <option value="advanced">Show Advanced Options</option>
30 </param>
31 <when value="basic">
32 <!-- no options -->
33 </when>
34 <when value="advanced">
35 <param name="output_type" type="select" label="Output FASTQ quality scores type" help="Galaxy tools are designed to work with the Sanger Quality score format.">
36 <option value="solexa">Solexa</option>
37 <option value="illumina">Illumina 1.3-1.7</option>
38 <option value="sanger" selected="True">Sanger (recommended)</option>
39 <option value="cssanger">Color Space Sanger</option>
40 </param>
41 <param name="force_quality_encoding" type="select" label="Force Quality Score encoding">
42 <option value="None">Use Source Encoding</option>
43 <option value="ascii" selected="True">ASCII</option>
44 <option value="decimal">Decimal</option>
45 </param>
46 <param name="summarize_input" type="select" label="Summarize input data">
47 <option value="summarize_input" selected="True">Summarize Input</option>
48 <option value="dont_summarize_input">Do not Summarize Input (faster)</option>
49 </param>
50 </when>
51 </conditional>
52 </inputs>
53 <outputs>
54 <data name="output_file" format="fastqsanger">
55 <change_format>
56 <when input="input_type" value="cssanger" format="fastqcssanger" />
57 <when input="options_type.output_type" value="solexa" format="fastqsolexa" />
58 <when input="options_type.output_type" value="illumina" format="fastqillumina" />
59 <when input="options_type.output_type" value="sanger" format="fastqsanger" />
60 <when input="options_type.output_type" value="cssanger" format="fastqcssanger" />
61 </change_format>
62 </data>
63 </outputs>
64 <tests>
65 <!-- These tests include test files adapted from supplemental material in Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16. -->
66 <!-- Unfortunately, cannot test for expected failures -->
67 <!-- Test basic options -->
68 <test>
69 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" />
70 <param name="input_type" value="sanger" />
71 <param name="options_type_selector" value="basic" />
72 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
73 </test>
74 <test>
75 <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" />
76 <param name="input_type" value="cssanger" />
77 <param name="options_type_selector" value="basic" />
78 <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" />
79 </test>
80 <test>
81 <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" />
82 <param name="input_type" value="illumina" />
83 <param name="options_type_selector" value="basic" />
84 <output name="output_file" file="illumina_full_range_as_sanger.fastqsanger" />
85 </test>
86 <test>
87 <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" />
88 <param name="input_type" value="solexa" />
89 <param name="options_type_selector" value="basic" />
90 <output name="output_file" file="solexa_full_range_as_sanger.fastqsanger" />
91 </test>
92 <test>
93 <param name="input_file" value="sanger_full_range_as_illumina.fastqillumina" ftype="fastq" />
94 <param name="input_type" value="sanger" />
95 <param name="options_type_selector" value="basic" />
96 <output name="output_file" file="sanger_full_range_as_illumina.fastqillumina" />
97 </test>
98 <!-- Test grooming from illumina -->
99 <test>
100 <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" />
101 <param name="input_type" value="illumina" />
102 <param name="options_type_selector" value="advanced" />
103 <param name="output_type" value="illumina" />
104 <param name="force_quality_encoding" value="None" />
105 <param name="summarize_input" value="summarize_input" />
106 <output name="output_file" file="illumina_full_range_original_illumina.fastqillumina" />
107 </test>
108 <test>
109 <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" />
110 <param name="input_type" value="illumina" />
111 <param name="options_type_selector" value="advanced" />
112 <param name="output_type" value="sanger" />
113 <param name="force_quality_encoding" value="None" />
114 <param name="summarize_input" value="summarize_input" />
115 <output name="output_file" file="illumina_full_range_as_sanger.fastqsanger" />
116 </test>
117 <test>
118 <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" />
119 <param name="input_type" value="illumina" />
120 <param name="options_type_selector" value="advanced" />
121 <param name="output_type" value="solexa" />
122 <param name="force_quality_encoding" value="None" />
123 <param name="summarize_input" value="summarize_input" />
124 <output name="output_file" file="illumina_full_range_as_solexa.fastqsolexa" />
125 </test>
126 <test>
127 <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" />
128 <param name="input_type" value="illumina" />
129 <param name="options_type_selector" value="advanced" />
130 <param name="output_type" value="cssanger" />
131 <param name="force_quality_encoding" value="None" />
132 <param name="summarize_input" value="summarize_input" />
133 <output name="output_file" file="illumina_full_range_as_cssanger.fastqcssanger" />
134 </test>
135 <!-- Test grooming from sanger -->
136 <test>
137 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" />
138 <param name="input_type" value="sanger" />
139 <param name="options_type_selector" value="advanced" />
140 <param name="output_type" value="sanger" />
141 <param name="force_quality_encoding" value="None" />
142 <param name="summarize_input" value="summarize_input" />
143 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
144 </test>
145 <test>
146 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" />
147 <param name="input_type" value="sanger" />
148 <param name="options_type_selector" value="advanced" />
149 <param name="output_type" value="illumina" />
150 <param name="force_quality_encoding" value="None" />
151 <param name="summarize_input" value="summarize_input" />
152 <output name="output_file" file="sanger_full_range_as_illumina.fastqillumina" />
153 </test>
154 <test>
155 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" />
156 <param name="input_type" value="sanger" />
157 <param name="options_type_selector" value="advanced" />
158 <param name="output_type" value="solexa" />
159 <param name="force_quality_encoding" value="None" />
160 <param name="summarize_input" value="summarize_input" />
161 <output name="output_file" file="sanger_full_range_as_solexa.fastqsolexa" />
162 </test>
163 <test>
164 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" />
165 <param name="input_type" value="sanger" />
166 <param name="options_type_selector" value="advanced" />
167 <param name="output_type" value="cssanger" />
168 <param name="force_quality_encoding" value="None" />
169 <param name="summarize_input" value="summarize_input" />
170 <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" />
171 </test>
172 <!-- Test grooming from solexa -->
173 <test>
174 <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" />
175 <param name="input_type" value="solexa" />
176 <param name="options_type_selector" value="advanced" />
177 <param name="output_type" value="solexa" />
178 <param name="force_quality_encoding" value="None" />
179 <param name="summarize_input" value="summarize_input" />
180 <output name="output_file" file="solexa_full_range_original_solexa.fastqsolexa" />
181 </test>
182 <test>
183 <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" />
184 <param name="input_type" value="solexa" />
185 <param name="options_type_selector" value="advanced" />
186 <param name="output_type" value="illumina" />
187 <param name="force_quality_encoding" value="None" />
188 <param name="summarize_input" value="summarize_input" />
189 <output name="output_file" file="solexa_full_range_as_illumina.fastqillumina" />
190 </test>
191 <test>
192 <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" />
193 <param name="input_type" value="solexa" />
194 <param name="options_type_selector" value="advanced" />
195 <param name="output_type" value="sanger" />
196 <param name="force_quality_encoding" value="None" />
197 <param name="summarize_input" value="summarize_input" />
198 <output name="output_file" file="solexa_full_range_as_sanger.fastqsanger" />
199 </test>
200 <test>
201 <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" />
202 <param name="input_type" value="solexa" />
203 <param name="options_type_selector" value="advanced" />
204 <param name="output_type" value="cssanger" />
205 <param name="force_quality_encoding" value="None" />
206 <param name="summarize_input" value="summarize_input" />
207 <output name="output_file" file="solexa_full_range_as_cssanger.fastqcssanger" />
208 </test>
209 <!-- Test grooming from cssanger -->
210 <test>
211 <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" />
212 <param name="input_type" value="cssanger" />
213 <param name="options_type_selector" value="advanced" />
214 <param name="output_type" value="cssanger" />
215 <param name="force_quality_encoding" value="None" />
216 <param name="summarize_input" value="summarize_input" />
217 <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" />
218 </test>
219 <test>
220 <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" />
221 <param name="input_type" value="cssanger" />
222 <param name="options_type_selector" value="advanced" />
223 <param name="output_type" value="sanger" />
224 <param name="force_quality_encoding" value="None" />
225 <param name="summarize_input" value="summarize_input" />
226 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
227 </test>
228 <test>
229 <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" />
230 <param name="input_type" value="cssanger" />
231 <param name="options_type_selector" value="advanced" />
232 <param name="output_type" value="illumina" />
233 <param name="force_quality_encoding" value="None" />
234 <param name="summarize_input" value="summarize_input" />
235 <output name="output_file" file="sanger_full_range_as_illumina.fastqillumina" />
236 </test>
237 <test>
238 <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" />
239 <param name="input_type" value="cssanger" />
240 <param name="options_type_selector" value="advanced" />
241 <param name="output_type" value="solexa" />
242 <param name="force_quality_encoding" value="None" />
243 <param name="summarize_input" value="summarize_input" />
244 <output name="output_file" file="sanger_full_range_as_solexa.fastqsolexa" />
245 </test>
246 <test>
247 <param name="input_file" value="sanger_full_range_as_cssanger_adapter_base_with_quality_score.fastqcssanger_fake_score" ftype="fastq" />
248 <param name="input_type" value="cssanger" />
249 <param name="options_type_selector" value="advanced" />
250 <param name="output_type" value="cssanger" />
251 <param name="force_quality_encoding" value="None" />
252 <param name="summarize_input" value="summarize_input" />
253 <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" />
254 </test>
255 <!-- Test fastq with line wrapping -->
256 <test>
257 <param name="input_file" value="wrapping_original_sanger.fastqsanger" ftype="fastq" />
258 <param name="input_type" value="sanger" />
259 <param name="options_type_selector" value="advanced" />
260 <param name="output_type" value="sanger" />
261 <param name="force_quality_encoding" value="None" />
262 <param name="summarize_input" value="summarize_input" />
263 <output name="output_file" file="wrapping_as_sanger.fastqsanger" />
264 </test>
265 <test>
266 <param name="input_file" value="wrapping_original_sanger.fastqsanger" ftype="fastq" />
267 <param name="input_type" value="sanger" />
268 <param name="options_type_selector" value="advanced" />
269 <param name="output_type" value="illumina" />
270 <param name="force_quality_encoding" value="None" />
271 <param name="summarize_input" value="summarize_input" />
272 <output name="output_file" file="wrapping_as_illumina.fastqillumina" />
273 </test>
274 <test>
275 <param name="input_file" value="wrapping_original_sanger.fastqsanger" ftype="fastq" />
276 <param name="input_type" value="sanger" />
277 <param name="options_type_selector" value="advanced" />
278 <param name="output_type" value="solexa" />
279 <param name="force_quality_encoding" value="None" />
280 <param name="summarize_input" value="summarize_input" />
281 <output name="output_file" file="wrapping_as_solexa.fastqsolexa" />
282 </test>
283 <!-- Test forcing quality score encoding -->
284 <!-- Sanger, range 0 - 93 -->
285 <test>
286 <param name="input_file" value="sanger_full_range_as_decimal_sanger.fastqsanger" ftype="fastq" />
287 <param name="input_type" value="sanger" />
288 <param name="options_type_selector" value="advanced" />
289 <param name="output_type" value="sanger" />
290 <param name="force_quality_encoding" value="ascii" />
291 <param name="summarize_input" value="summarize_input" />
292 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
293 </test>
294 <test>
295 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" />
296 <param name="input_type" value="sanger" />
297 <param name="options_type_selector" value="advanced" />
298 <param name="output_type" value="sanger" />
299 <param name="force_quality_encoding" value="decimal" />
300 <param name="summarize_input" value="summarize_input" />
301 <output name="output_file" file="sanger_full_range_as_decimal_sanger.fastqsanger" />
302 </test>
303 <test>
304 <param name="input_file" value="sanger_full_range_as_tab_decimal_sanger.fastqsanger" ftype="fastq" />
305 <param name="input_type" value="sanger" />
306 <param name="options_type_selector" value="advanced" />
307 <param name="output_type" value="sanger" />
308 <param name="force_quality_encoding" value="ascii" />
309 <param name="summarize_input" value="summarize_input" />
310 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
311 </test>
312 <!-- Solexa, range -5 - 62 -->
313 <test>
314 <param name="input_file" value="solexa_full_range_as_decimal_solexa.fastqsolexa" ftype="fastq" />
315 <param name="input_type" value="solexa" />
316 <param name="options_type_selector" value="advanced" />
317 <param name="output_type" value="solexa" />
318 <param name="force_quality_encoding" value="ascii" />
319 <param name="summarize_input" value="summarize_input" />
320 <output name="output_file" file="solexa_full_range_original_solexa.fastqsolexa" />
321 </test>
322 <test>
323 <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" />
324 <param name="input_type" value="solexa" />
325 <param name="options_type_selector" value="advanced" />
326 <param name="output_type" value="solexa" />
327 <param name="force_quality_encoding" value="decimal" />
328 <param name="summarize_input" value="summarize_input" />
329 <output name="output_file" file="solexa_full_range_as_decimal_solexa.fastqsolexa" />
330 </test>
331 </tests>
332 <help>
333 **What it does**
334
335 This tool offers several conversions options relating to the FASTQ format.
336
337 When using *Basic* options, the output will be *sanger* formatted or *cssanger* formatted (when the input is Color Space Sanger).
338
339 When converting, if a quality score falls outside of the target score range, it will be coerced to the closest available value (i.e. the minimum or maximum).
340
341 When converting between Solexa and the other formats, quality scores are mapped between Solexa and PHRED scales using the equations found in `Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.`_
342
343 When converting between color space (csSanger) and base/sequence space (Sanger, Illumina, Solexa) formats, adapter bases are lost or gained; if gained, the base 'G' is used as the adapter. You cannot convert a color space read to base space if there is no adapter present in the color space sequence. Any masked or ambiguous nucleotides in base space will be converted to 'N's when determining color space encoding.
344
345 -----
346
347 **Quality Score Comparison**
348
349 ::
350
351 SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS
352 ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
353 ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
354 !"#$%&amp;'()*+,-./0123456789:;&lt;=&gt;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
355 | | | | | |
356 33 59 64 73 104 126
357
358 S - Sanger Phred+33, 93 values (0, 93) (0 to 60 expected in raw reads)
359 I - Illumina 1.3 Phred+64, 62 values (0, 62) (0 to 40 expected in raw reads)
360 X - Solexa Solexa+64, 67 values (-5, 62) (-5 to 40 expected in raw reads)
361
362 Diagram adapted from http://en.wikipedia.org/wiki/FASTQ_format
363
364 .. class:: infomark
365
366 Output from Illumina 1.8+ pipelines are Sanger encoded.
367
368 ------
369
370 **Citation**
371
372 If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
373
374
375 .. _Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.: http://www.ncbi.nlm.nih.gov/pubmed/20015970
376
377 </help>
378 </tool>