annotate sickle.xml @ 0:d883d8d86977 draft default tip

Uploaded
author jjohnson
date Mon, 13 Jan 2014 14:52:59 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
1 <tool id="sickle" name="Sickle">
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
2 <description>Windowed Adaptive Trimming of FastQ data</description>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
3 <requirements>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
4 <requirement version="1.210">sickle</requirement>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
5 </requirements>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
6 <command>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
7 sickle $readtype.single_or_paired --quiet
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
8
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
9 #if str($readtype.single_or_paired) == "se":
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
10 -f $input_single -o $output_single
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
11
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
12 #if $input_single.ext == "fastq":
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
13 -t sanger
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
14 #else if $input_single.ext == "fastqsanger":
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
15 -t sanger
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
16 #else if $input_single.ext == "fastqillumina":
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
17 -t illumina
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
18 #else if $input_single.ext == "fastqsolexa":
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
19 -t solexa
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
20 #end if
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
21
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
22 #end if
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
23
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
24 #if str($readtype.single_or_paired) == "pe":
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
25 -f $input_paired1 -r $input_paired2 -o $output_paired1 -p $output_paired2 -s $output_paired_single
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
26
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
27 #if $input_paired1.ext == "fastq":
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
28 -t sanger
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
29 #else if $input_paired1.ext == "fastqsanger":
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
30 -t sanger
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
31 #else if $input_paired1.ext == "fastqillumina":
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
32 -t illumina
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
33 #else if $input_paired1.ext == "fastqsolexa":
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
34 -t solexa
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
35 #end if
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
36
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
37 #end if
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
38
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
39 #if str($qual_threshold) != "":
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
40 -q $qual_threshold
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
41 #end if
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
42
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
43 #if str($length_threshold) != "":
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
44 -l $length_threshold
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
45 #end if
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
46
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
47 #if $no_five_prime:
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
48 -x
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
49 #end if
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
50
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
51 #if $discard_n:
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
52 -n
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
53 #end if
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
54 </command>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
55
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
56 <inputs>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
57 <conditional name="readtype">
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
58 <param name="single_or_paired" type="select" optional="false" label="Single-End or Paired-End reads?" help="Note: Sickle will infer the quality type of the file from its datatype. I.e., if the datatype is fastqsanger, then the quality type is sanger. The default is fastqsanger.">
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
59 <option value="se" selected="true">Single-End</option>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
60 <option value="pe">Paired-End</option>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
61 </param>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
62
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
63 <when value="se">
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
64 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_single" type="data" optional="false" label="Single-End FastQ Reads"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
65 </when>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
66
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
67 <when value="pe">
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
68 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired1" type="data" optional="false" label="Paired-End Forward Strand FastQ Reads"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
69 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired2" type="data" optional="false" label="Paired-End Reverse Strand FastQ Reads"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
70 </when>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
71 </conditional>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
72
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
73 <param name="qual_threshold" value="20" type="integer" optional="true" label="Quality Threshold">
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
74 <validator type="in_range" min="0" message="Minimum value is 0"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
75 </param>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
76
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
77 <param name="length_threshold" value="20" type="integer" optional="true" label="Length Threshold">
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
78 <validator type="in_range" min="0" message="Minimum value is 0"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
79 </param>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
80
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
81 <param name="no_five_prime" type="boolean" label="Don't do 5' trimming"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
82 <param name="discard_n" type="boolean" label="Discard sequences with Ns"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
83 </inputs>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
84
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
85 <outputs>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
86 <data format_source="input_single" name="output_single" label="Single-End output of ${tool.name} on ${on_string}">
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
87 <filter>(readtype['single_or_paired'] == 'se')</filter>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
88 </data>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
89
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
90 <data format_source="input_paired1" name="output_paired1" label="Paired-End forward strand output of ${tool.name} on ${on_string}">
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
91 <filter>(readtype['single_or_paired'] == 'pe')</filter>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
92 </data>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
93
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
94 <data format_source="input_paired2" name="output_paired2" label="Paired-End reverse strand output of ${tool.name} on ${on_string}">
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
95 <filter>(readtype['single_or_paired'] == 'pe')</filter>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
96 </data>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
97
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
98 <data format_source="input_paired1" name="output_paired_single" label="Singletons from Paired-End output of ${tool.name} on ${on_string}">
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
99 <filter>(readtype['single_or_paired'] == 'pe')</filter>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
100 </data>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
101 </outputs>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
102
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
103 <stdio>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
104 <exit_code range="1" level="fatal" description="sickle failed" />
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
105 </stdio>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
106
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
107 <tests>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
108 <test>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
109 <param name="single_or_paired" value="se"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
110 <param name="input_single" value="test.fastq" ftype="fastqillumina"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
111 <param name="qual_threshold" value="33"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
112 <param name="length_threshold" value="40"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
113 <param name="no_five_prime" value="False"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
114 <output name="output_single" file="trimmed_output_file.fastq" ftype="fastqillumina"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
115 </test>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
116
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
117 <test>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
118 <param name="single_or_paired" value="pe"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
119 <param name="input_paired1" value="test.f.fastq" ftype="fastqillumina"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
120 <param name="input_paired2" value="test.r.fastq" ftype="fastqillumina"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
121 <param name="qual_threshold" value="12"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
122 <param name="length_threshold" value="15"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
123 <param name="no_five_prime" value="False"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
124 <output name="output_paired1" file="trimmed_output_file1.fastq" ftype="fastqillumina"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
125 <output name="output_paired2" file="trimmed_output_file2.fastq" ftype="fastqillumina"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
126 <output name="output_paired_single" file="trimmed_singles_file.fastq" ftype="fastqillumina"/>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
127 </test>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
128 </tests>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
129
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
130 <help>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
131 Most modern sequencing technologies produce reads that have deteriorating quality towards the 3'-end and some towards the 5'-end as well. Incorrectly called bases in both regions negatively impact assembles, mapping, and downstream bioinformatics analyses.
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
132
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
133 Sickle is a tool that uses sliding windows along with quality and length thresholds to determine when quality is sufficiently low to trim the 3'-end of reads and also determines when the quality is sufficiently high enough to trim the 5'-end of reads. It will also discard reads based upon the length threshold. It takes the quality values and slides a window across them whose length is 0.1 times the length of the read. If this length is less than 1, then the window is set to be equal to the length of the read. Otherwise, the window slides along the quality values until the average quality in the window rises above the threshold, at which point the algorithm determines where within the window the rise occurs and cuts the read and quality there for the 5'-end cut. Then when the average quality in the window drops below the threshold, the algorithm determines where in the window the drop occurs and cuts both the read and quality strings there for the 3'-end cut. However, if the length of the remaining sequence is less than the minimum length threshold, then the read is discarded entirely. 5'-end trimming can be disabled.
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
134
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
135 Sickle also has an option to discard reads with any Ns in them.
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
136
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
137 Sickle supports three types of quality values: Illumina, Solexa, and Sanger. Note that the Solexa quality setting is an approximation (the actual conversion is a non-linear transformation). The end approximation is close. Illumina quality refers to qualities encoded with the CASAVA pipeline between versions 1.3 and 1.7. Illumina quality using CASAVA >= 1.8 is Sanger encoded. Sickle will get the quality type from the datatype of the file.
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
138
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
139 Note that Sickle will remove the 2nd fastq record header (on the "+" line) and replace it with simply a "+". This is the default format for CASAVA >= 1.8.
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
140
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
141 Sickle also supports gzipped file inputs.
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
142 </help>
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
143
d883d8d86977 Uploaded
jjohnson
parents:
diff changeset
144 </tool>