Mercurial > repos > iuc > sickle
comparison sickle.xml @ 0:15cb7dc0ed41 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sickle commit 128d3f255f00c47fa2b16d9b7432d48a089660c1
| author | iuc |
|---|---|
| date | Thu, 12 Nov 2015 07:01:21 -0500 |
| parents | |
| children | b14d0191f2c8 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:15cb7dc0ed41 |
|---|---|
| 1 <tool id="sickle" name="Sickle" version="1.33"> | |
| 2 <description>windowed adaptive trimming of FASTQ data</description> | |
| 3 <requirements> | |
| 4 <requirement type="package" version="1.33">sickle</requirement> | |
| 5 </requirements> | |
| 6 <version_command>sickle --version | head -n 1</version_command> | |
| 7 <command> | |
| 8 sickle | |
| 9 | |
| 10 #if str($readtype.single_or_paired) == "se": | |
| 11 se -f "${readtype.input_single}" -o "$output_single" | |
| 12 | |
| 13 #if $readtype.input_single.ext in ("fastq", "fastqsanger"): | |
| 14 -t sanger | |
| 15 #else if $readtype.input_single.ext == "fastqillumina": | |
| 16 -t illumina | |
| 17 #else if $readtype.input_single.ext == "fastqsolexa": | |
| 18 -t solexa | |
| 19 #end if | |
| 20 #end if | |
| 21 | |
| 22 #if str($readtype.single_or_paired) == "pe_combo": | |
| 23 #if $readtype.output_n: | |
| 24 pe -c "${readtype.input_combo}" -M "$output_combo" | |
| 25 #else | |
| 26 pe -c "${readtype.input_combo}" -m "$output_combo" -s "$output_combo_single" | |
| 27 #end if | |
| 28 | |
| 29 #if $readtype.input_combo.ext in ("fastq", "fastqsanger"): | |
| 30 -t sanger | |
| 31 #else if $readtype.input_combo.ext == "fastqillumina": | |
| 32 -t illumina | |
| 33 #else if $readtype.input_combo.ext == "fastqsolexa": | |
| 34 -t solexa | |
| 35 #end if | |
| 36 #end if | |
| 37 | |
| 38 #if str($readtype.single_or_paired) == "pe_sep": | |
| 39 pe -f "${readtype.input_paired1}" -r "${readtype.input_paired2}" -o "$output_paired1" -p "$output_paired2" -s "$output_paired_single" | |
| 40 | |
| 41 #if $readtype.input_paired1.ext in ("fastq", "fastqsanger"): | |
| 42 -t sanger | |
| 43 #else if $readtype.input_paired1.ext == "fastqillumina": | |
| 44 -t illumina | |
| 45 #else if $readtype.input_paired1.ext == "fastqsolexa": | |
| 46 -t solexa | |
| 47 #end if | |
| 48 #end if | |
| 49 | |
| 50 #if str($qual_threshold) != "": | |
| 51 -q $qual_threshold | |
| 52 #end if | |
| 53 | |
| 54 #if str($length_threshold) != "": | |
| 55 -l $length_threshold | |
| 56 #end if | |
| 57 | |
| 58 #if $no_five_prime: | |
| 59 -x | |
| 60 #end if | |
| 61 | |
| 62 #if $trunc_n: | |
| 63 -n | |
| 64 #end if | |
| 65 </command> | |
| 66 | |
| 67 <inputs> | |
| 68 <conditional name="readtype"> | |
| 69 <param name="single_or_paired" type="select" label="Single-end or paired-end reads?" help="Note: Sickle will infer the quality type of the file from its datatype. I.e., if the datatype is fastqsanger, then the quality type is sanger. The default is fastqsanger."> | |
| 70 <option value="se" selected="true">Single-end</option> | |
| 71 <option value="pe_combo">Paired-end (one interleaved input file)</option> | |
| 72 <option value="pe_sep">Paired-end (two separate input files)</option> | |
| 73 </param> | |
| 74 | |
| 75 <when value="se"> | |
| 76 <param format="fastq" name="input_single" type="data" label="Single-end FASTQ reads" help="(-f)" /> | |
| 77 </when> | |
| 78 | |
| 79 <when value="pe_combo"> | |
| 80 <param format="fastq" name="input_combo" type="data" label="Paired-end interleaved FASTQ reads" help="(-c)" /> | |
| 81 <param name="output_n" type="boolean" label="Output only one file with all reads" help="This will output only one file with all the reads, where the reads that did not pass filter will be replaced with a single 'N', rather than discarded."/> | |
| 82 </when> | |
| 83 | |
| 84 <when value="pe_sep"> | |
| 85 <param format="fastq" name="input_paired1" type="data" label="Paired-end forward strand FASTQ reads" help="(-f)" /> | |
| 86 <param format="fastq" name="input_paired2" type="data" label="Paired-end reverse strand FASTQ reads" help="(-r)" /> | |
| 87 </when> | |
| 88 </conditional> | |
| 89 | |
| 90 <param name="qual_threshold" value="20" min="0" type="integer" optional="true" label="Quality threshold" help="Threshold for trimming based on average quality in a window (-q)" /> | |
| 91 | |
| 92 <param name="length_threshold" value="20" min="0" type="integer" optional="true" label="Length threshold" help="Threshold to keep a read based on length after trimming (-l)" /> | |
| 93 | |
| 94 <param name="no_five_prime" type="boolean" label="Don't do 5' trimming" help="(-x)" /> | |
| 95 <param name="trunc_n" type="boolean" label="Truncate sequences with Ns at first N position" help="(-n)" /> | |
| 96 </inputs> | |
| 97 | |
| 98 <outputs> | |
| 99 <data name="output_single" format_source="input_single" label="Single-end output of ${tool.name} on ${on_string}"> | |
| 100 <filter>readtype['single_or_paired'] == 'se'</filter> | |
| 101 </data> | |
| 102 | |
| 103 <data name="output_combo" format_source="input_combo" label="Paired-end interleaved output of ${tool.name} on ${on_string}"> | |
| 104 <filter>readtype['single_or_paired'] == 'pe_combo'</filter> | |
| 105 </data> | |
| 106 | |
| 107 <data name="output_combo_single" format_source="input_combo" label="Singletons from paired-end interleaved output of ${tool.name} on ${on_string}"> | |
| 108 <filter>readtype['single_or_paired'] == 'pe_combo' and not readtype['output_n']</filter> | |
| 109 </data> | |
| 110 | |
| 111 <data name="output_paired1" format_source="input_paired1" label="Paired-end forward strand output of ${tool.name} on ${on_string}"> | |
| 112 <filter>readtype['single_or_paired'] == 'pe_sep'</filter> | |
| 113 </data> | |
| 114 | |
| 115 <data name="output_paired2" format_source="input_paired2" label="Paired-end reverse strand output of ${tool.name} on ${on_string}"> | |
| 116 <filter>readtype['single_or_paired'] == 'pe_sep'</filter> | |
| 117 </data> | |
| 118 | |
| 119 <data name="output_paired_single" format_source="input_paired1" label="Singletons from paired-end output of ${tool.name} on ${on_string}"> | |
| 120 <filter>readtype['single_or_paired'] == 'pe_sep'</filter> | |
| 121 </data> | |
| 122 </outputs> | |
| 123 <tests> | |
| 124 <test> | |
| 125 <param name="single_or_paired" value="pe_combo" /> | |
| 126 <param name="input_combo" value="test.fastq" /> | |
| 127 <param name="qual_threshold" value="34" /> | |
| 128 <output name="output_combo" file="output.c1.fastq" /> | |
| 129 <output name="output_combo_single" file="output.s.fastq" /> | |
| 130 </test> | |
| 131 <test> | |
| 132 <param name="single_or_paired" value="pe_combo" /> | |
| 133 <param name="input_combo" value="test.fastq" /> | |
| 134 <param name="qual_threshold" value="34" /> | |
| 135 <param name="output_n" value="true" /> | |
| 136 <output name="output_combo" file="output.c2.fastq" /> | |
| 137 </test> | |
| 138 <test> | |
| 139 <param name="single_or_paired" value="pe_sep" /> | |
| 140 <param name="input_paired1" value="test.f.fastq" /> | |
| 141 <param name="input_paired2" value="test.r.fastq" /> | |
| 142 <param name="qual_threshold" value="34" /> | |
| 143 <output name="output_paired1" file="output.f.fastq" /> | |
| 144 <output name="output_paired2" file="output.r.fastq" /> | |
| 145 <output name="output_paired_single" file="output.s.fastq" /> | |
| 146 </test> | |
| 147 </tests> | |
| 148 <help> | |
| 149 **What it does** | |
| 150 | |
| 151 Most modern sequencing technologies produce reads that have | |
| 152 deteriorating quality towards the 3'-end and some towards the 5'-end | |
| 153 as well. Incorrectly called bases in both regions negatively impact | |
| 154 assembles, mapping, and downstream bioinformatics analyses. | |
| 155 | |
| 156 Sickle is a tool that uses sliding windows along with quality and | |
| 157 length thresholds to determine when quality is sufficiently low to | |
| 158 trim the 3'-end of reads and also determines when the quality is | |
| 159 sufficiently high enough to trim the 5'-end of reads. It will also | |
| 160 discard reads based upon the length threshold. It takes the quality | |
| 161 values and slides a window across them whose length is 0.1 times the | |
| 162 length of the read. If this length is less than 1, then the window is | |
| 163 set to be equal to the length of the read. Otherwise, the window | |
| 164 slides along the quality values until the average quality in the | |
| 165 window rises above the threshold, at which point the algorithm | |
| 166 determines where within the window the rise occurs and cuts the read | |
| 167 and quality there for the 5'-end cut. Then when the average quality | |
| 168 in the window drops below the threshold, the algorithm determines | |
| 169 where in the window the drop occurs and cuts both the read and quality | |
| 170 strings there for the 3'-end cut. However, if the length of the | |
| 171 remaining sequence is less than the minimum length threshold, then the | |
| 172 read is discarded entirely (or replaced with an "N" record). 5'-end | |
| 173 trimming can be disabled. Sickle also has an option to truncate reads | |
| 174 with Ns at the first N position. | |
| 175 | |
| 176 Sickle supports three types of quality values: Illumina, Solexa, and | |
| 177 Sanger. Note that the Solexa quality setting is an approximation (the | |
| 178 actual conversion is a non-linear transformation). The end | |
| 179 approximation is close. Illumina quality refers to qualities encoded | |
| 180 with the CASAVA pipeline between versions 1.3 and 1.7. Illumina | |
| 181 quality using CASAVA >= 1.8 is Sanger encoded. The quality value will | |
| 182 be determined from the datatype of the data, i.e. a fastqsanger datatype | |
| 183 is assumed to be Sanger encoded. | |
| 184 | |
| 185 Note that Sickle will remove the 2nd FASTQ record header (on the "+" | |
| 186 line) and replace it with simply a "+". This is the default format for | |
| 187 CASAVA >= 1.8. | |
| 188 | |
| 189 ----- | |
| 190 | |
| 191 **Options** | |
| 192 | |
| 193 **Single-end** | |
| 194 | |
| 195 This option takes one single-end input file and outputs one single-end | |
| 196 output file of reads that passed the filters. | |
| 197 | |
| 198 **Paired-End (one interleaved input file)** | |
| 199 | |
| 200 This option takes as input one interleaved paired-end file. If you then | |
| 201 check the "Output only one file with all reads" checkbox, it will output | |
| 202 one interleaved file where any read that did not pass filter will be replaced | |
| 203 with a FASTQ record where the sequence is a single "N" and the quality is the | |
| 204 lowest quality possible for that quality type. This will preserve the paired | |
| 205 nature of the data. If you leave the checkbox unchecked, it will output two files, | |
| 206 one interleaved file with all the passed pairs and one singletons file where only | |
| 207 one of the pair passed filter. | |
| 208 | |
| 209 **Paired-End (two separate input files)** | |
| 210 | |
| 211 This option takes two separate (forward and reverse) paired-end files as input. | |
| 212 The output is three files: Two paired-end files with pairs that passed filter and | |
| 213 a singletons file where only one of the pair passed filter. | |
| 214 | |
| 215 **Quality threshold** | |
| 216 | |
| 217 Input your desired quality threshold. This threshold is phred-scaled, which is typically | |
| 218 values between 0-41 for FASTQ data. | |
| 219 | |
| 220 **Length threshold** | |
| 221 | |
| 222 Input your desired length threshold. This is the threshold to determine if a read is kept | |
| 223 after all the trimming steps are done. | |
| 224 | |
| 225 **Disable 5-prime trimming** | |
| 226 | |
| 227 An option to disable trimming the read on the 5-prime end. This trimming trims the read | |
| 228 if the average quality values dip below the quality threshold at the 5-prime end. | |
| 229 | |
| 230 **Truncate sequences with Ns** | |
| 231 | |
| 232 This option will trim a read at the first "N" base in the read after doing quality trimming. | |
| 233 It is then still subject to the length threshold. | |
| 234 | |
| 235 ----- | |
| 236 | |
| 237 Copyright: Nikhil Joshi | |
| 238 | |
| 239 http://bioinformatics.ucdavis.edu | |
| 240 | |
| 241 http://github.com/najoshi/sickle | |
| 242 </help> | |
| 243 <citations> | |
| 244 <citation type="bibtex"> | |
| 245 @unpublished{sickle_link, | |
| 246 author = {Joshi, Nikhil A. and Fass, Joseph N.}, | |
| 247 title = {Sickle: A windowed adaptive trimming tool for FASTQ files using quality}, | |
| 248 year = 2011, | |
| 249 url = { https://github.com/najoshi/sickle } | |
| 250 } | |
| 251 </citation> | |
| 252 </citations> | |
| 253 </tool> |
