|
1
|
1 <tool id="trimmomatic" name="Trimmomatic" version="1.1.0">
|
|
|
2
|
|
|
3 <description>Trim adapters and otherwise cleanup fastq files in a pair aware manner using trimmomatic 0.32</description>
|
|
|
4 <requirements>
|
|
|
5 <requirement type="set_environment">JAR_PATH</requirement>
|
|
|
6 </requirements>
|
|
|
7
|
|
|
8 <command interpreter="perl">
|
|
|
9 trimmomatic_wrapper.pl
|
|
|
10 paired ${paired.is_paired}
|
|
13
|
11 #if $paired.is_paired == "single":
|
|
|
12 fwdfile ${paired.forwards_file}
|
|
|
13 #end if
|
|
|
14 #if $paired.is_paired == "paired":
|
|
|
15 fwdfile ${paired.forwards_file}
|
|
1
|
16 revfile ${paired.reverse_file}
|
|
|
17 #end if
|
|
13
|
18 #if $paired.is_paired == "collection":
|
|
|
19 fwdfile ${paired.collection_data.forward}
|
|
|
20 revfile ${paired.collection_data.reverse}
|
|
|
21 #end if
|
|
1
|
22 phred $phred
|
|
|
23 cutadapt ${adapt.adapters}
|
|
|
24 #if $adapt.adapters:
|
|
|
25 adaptfile ${adapt.adapt_file}
|
|
|
26 adaptseed ${adapt.adapt_seed}
|
|
|
27 adaptpalindrome ${adapt.adapt_palindrome}
|
|
|
28 adaptsimple ${adapt.adapt_simple}
|
|
|
29 #end if
|
|
|
30 slidingwindow ${sliding.slidingwindow}
|
|
|
31 #if $sliding.slidingwindow:
|
|
|
32 slidingsize ${sliding.slid_window}
|
|
|
33 slidingqual ${sliding.slid_qual}
|
|
|
34 #end if
|
|
|
35 trimleading ${leading.trimleading}
|
|
|
36 #if $leading.trimleading:
|
|
|
37 leadingqual ${leading.lead_qual}
|
|
|
38 #end if
|
|
|
39 trimtrailing ${trailing.trimtrailing}
|
|
|
40 #if $trailing.trimtrailing:
|
|
|
41 trailingqual ${trailing.trail_qual}
|
|
|
42 #end if
|
|
|
43 crop ${crop.cropend}
|
|
|
44 #if $crop.cropend:
|
|
|
45 croplen ${crop.crop_len}
|
|
|
46 #end if
|
|
|
47 headcrop ${headcrop.headcrop_s}
|
|
|
48 #if $headcrop.headcrop_s:
|
|
|
49 headcroplen ${headcrop.headcrop_len}
|
|
|
50 #end if
|
|
|
51 minlen $minlen
|
|
|
52 log $log
|
|
|
53 #if $log == "True":
|
|
|
54 logfile $logfile
|
|
|
55 #end if
|
|
|
56 singles $singles
|
|
|
57 #if $paired.is_paired:
|
|
|
58 fwdpairs $fwdpairs
|
|
|
59 revpairs $revpairs
|
|
|
60 #end if
|
|
|
61 tool-dir \$JAR_PATH
|
|
4
|
62 threads \${GALAXY_SLOTS:-4}
|
|
1
|
63 > $dummy_out
|
|
|
64 </command>
|
|
|
65
|
|
|
66 <inputs>
|
|
|
67
|
|
|
68 <conditional name="paired">
|
|
13
|
69 <!-- <param name="is_paired" type="boolean" checked="true" truevalue="paired" falsevalue="single" label="Paired end reads?" help="Do you have 2 separate paired end read files?"/> -->
|
|
|
70 <param name="is_paired" type="select" label="Read type selector" help="single file (not pair aware), two files (paired reads) or a dataset collection of pairs">
|
|
|
71 <option value="single" selected="true">Single File</option>
|
|
|
72 <option value="paired">Two Files</option>
|
|
|
73 <option value="collection">Single dataset collection pair</option>
|
|
|
74 </param>
|
|
1
|
75 <when value="paired">
|
|
|
76 <param name="forwards_file" type="data" format="fastq,fastqsanger,fastqsolexa,fastqillumina" label="Direction 1 fastq reads to trim"/>
|
|
|
77 <param name="reverse_file" type="data" format="fastq,fastqsanger,fastqsolexa,fastqillumina" label="Direction 2 fastq reads to trim"/>
|
|
|
78 </when>
|
|
|
79 <when value="single">
|
|
|
80 <param name="forwards_file" type="data" format="fastq,fastqsanger,fastqsolexa,fastqillumina" label="Fastq reads to trim"/>
|
|
|
81 </when>
|
|
13
|
82 <when value="collection">
|
|
|
83 <param name="collection_data" type="data_collection" format="fastq,fastqsanger,fastqsolexa,fastqillumina" label="Paired end dataset collection" collection_type="paired"/>
|
|
|
84 </when>
|
|
1
|
85 </conditional>
|
|
|
86 <param name="phred" type="select" label="Quality encoding." help="Phred33 or Phred 64, probably Phred64">
|
|
|
87 <option value="phred64">phred64</option>
|
|
|
88 <option value="phred33" selected="True">phred33</option>
|
|
|
89 </param>
|
|
|
90
|
|
|
91 <conditional name="adapt">
|
|
|
92 <param name="adapters" type="boolean" checked="false" falsevalue="False" truevalue="True" label="Clip Illumina adapters?" help="Cut adapter and other illumina-specific sequences from the read."/>
|
|
|
93 <when value="True">
|
|
|
94 <param name="adapt_file" type="data" format="fasta" label="Fasta of adapters to clip" help="Please supply a list of adapters to clip"/>
|
|
|
95 <param name="adapt_seed" type="integer" value="2" label="Seed mismatches" help="Specifies the maximum mismatch count which will still allow a full match to be performed."/>
|
|
|
96 <param name="adapt_palindrome" type="integer" value="40" label="Palindrome clip threshold" help="Specifies how accurate the match between the two 'adapter ligated' reads must be for PE palindrome read alignment."/>
|
|
|
97 <param name="adapt_simple" type="integer" value="15" label="Simple clip threshold" help="Specifies how accurate the match between any adapter etc. sequence must be against a read."/>
|
|
|
98 </when>
|
|
|
99 <when value="False">
|
|
|
100 </when>
|
|
|
101 </conditional>
|
|
|
102
|
|
|
103 <conditional name="sliding">
|
|
|
104 <param name="slidingwindow" type="boolean" checked="true" falsevalue="False" truevalue="True" label="Perform Sliding Window trimming?" help="Perform a sliding window trimming, cutting once the average quality within the window falls below a threshold."/>
|
|
|
105 <when value="True">
|
|
|
106 <param name="slid_window" type="integer" value="4" label="Sliding window size" help="Specifies the number of bases to average across"/>
|
|
|
107 <param name="slid_qual" type="integer" value="15" label="Average quality required" help="Specifies the average quality required."/>
|
|
|
108 </when>
|
|
|
109 <when value="false">
|
|
|
110 </when>
|
|
|
111 </conditional>
|
|
|
112
|
|
|
113 <conditional name="leading">
|
|
|
114 <param name="trimleading" type="boolean" checked="true" falsevalue="False" truevalue="True" label="Trim leading bases?" help="Cut bases off the start of a read, if below a threshold quality."/>
|
|
|
115 <when value="True">
|
|
|
116 <param name="lead_qual" type="integer" value="3" label="Minimum quality" help="Specifies the minimum quality required to keep a base."/>
|
|
|
117 </when>
|
|
|
118 <when value="false">
|
|
|
119 </when>
|
|
|
120 </conditional>
|
|
|
121
|
|
|
122 <conditional name="trailing">
|
|
|
123 <param name="trimtrailing" type="boolean" checked="true" falsevalue="False" truevalue="True" label="Trim trailing bases?" help="Cut bases off the end of a read, if below a threshold quality."/>
|
|
|
124 <when value="True">
|
|
|
125 <param name="trail_qual" type="integer" value="3" label="Minimum quality" help="Specifies the minimum quality required to keep a base."/>
|
|
|
126 </when>
|
|
|
127 <when value="false">
|
|
|
128 </when>
|
|
|
129 </conditional>
|
|
|
130
|
|
|
131 <conditional name="crop">
|
|
|
132 <param name="cropend" type="boolean" checked="false" falsevalue="False" truevalue="True" label="Crop reads?" help="Cut the read to a specified length."/>
|
|
|
133 <when value="True">
|
|
|
134 <param name="crop_len" type="integer" value="0" label="Number of bases" help="The number of bases to keep, from the start of the read."/>
|
|
|
135 </when>
|
|
|
136 <when value="false">
|
|
|
137 </when>
|
|
|
138 </conditional>
|
|
|
139
|
|
|
140 <conditional name="headcrop">
|
|
|
141 <param name="headcrop_s" type="boolean" checked="false" falsevalue="False" truevalue="True" label="Crop starts?" help="Cut the specified number of bases from the start of the read."/>
|
|
|
142 <when value="True">
|
|
|
143 <param name="headcrop_len" type="integer" value="0" label="Number of bases" help="The number of bases to remove from the start of the read."/>
|
|
|
144 </when>
|
|
|
145 <when value="false">
|
|
|
146 </when>
|
|
|
147 </conditional>
|
|
|
148
|
|
|
149 <param name="minlen" type="integer" value="24" label="Minimum length read" help="Drop the read if it is below specified length"/>
|
|
|
150
|
|
|
151 <param name="logfile" type="boolean" checked="false" falsevalue="False" truevalue="True" label="Turn on log?" help="Detailed log of trims on each read. (Very large file!)"/>
|
|
|
152 </inputs>
|
|
|
153
|
|
|
154 <outputs>
|
|
|
155 <data name="log" format="tabular" label="${tool.name} on ${on_string}: Trimmomatic Log" hidden="True">
|
|
|
156 </data>
|
|
13
|
157 <data name="fwdpairs" format="input" label="${tool.name} on ${on_string}: Dir1 trimmed pairs">
|
|
|
158 <filter>(paired['is_paired']=="single")</filter>
|
|
1
|
159 </data>
|
|
13
|
160 <data name="revpairs" format="input" label="${tool.name} on ${on_string}: Dir2 trimmed pairs">
|
|
|
161 <filter>(paired['is_paired'] == "paired" or paired['is_paired'] == "collection")</filter>
|
|
1
|
162 </data>
|
|
13
|
163 <data name="singles" format="input" label="${tool.name} on ${on_string}: trimmed reads"/>
|
|
1
|
164 <data name="dummy_out" format="tabular" label="${tool.name} on ${on_string}: DEBUG OUTPUT" hidden="True"/>
|
|
|
165 </outputs>
|
|
5
|
166
|
|
|
167 <stdio>
|
|
|
168 <exit_code range="1:" level="fatal" description="Trimmomatic error" />
|
|
|
169 </stdio>
|
|
1
|
170 <help>
|
|
|
171 ***Trimmomatic***
|
|
|
172
|
|
|
173 A flexible read trimming tool for Illumina NGS data
|
|
|
174
|
|
|
175
|
|
|
176
|
|
|
177 Trimmomatic performs a variety of useful trimming tasks for illumina paired-end and single ended data.The selection of trimming steps and their associated parameters are supplied on the command line.
|
|
|
178
|
|
|
179 The current trimming steps are:
|
|
|
180
|
|
|
181 ILLUMINACLIP: Cut adapter and other illumina-specific sequences from the read.
|
|
|
182
|
|
|
183 SLIDINGWINDOW: Perform a sliding window trimming, cutting once the average quality within the window falls below a threshold.
|
|
|
184
|
|
|
185 LEADING: Cut bases off the start of a read, if below a threshold quality
|
|
|
186
|
|
|
187 TRAILING: Cut bases off the end of a read, if below a threshold quality
|
|
|
188
|
|
|
189 CROP: Cut the read to a specified length
|
|
|
190
|
|
|
191 HEADCROP: Cut the specified number of bases from the start of the read
|
|
|
192
|
|
|
193 MINLEN: Drop the read if it is below a specified length
|
|
|
194
|
|
|
195 **The Adapter Fasta**
|
|
|
196
|
|
|
197 Illumina adapter and other technical sequences are copyrighted by Illumina, which makes it illegal for us to make them available without a licence. As such, you need to get access to the illumina sequences from another source, and create an appropriate adapter FASTA.
|
|
|
198
|
|
|
199 To make this fasta, you must first understand how it will be used. Trimmomatic uses two strategies for adapter trimming: Palindrome and Simple
|
|
|
200
|
|
|
201 With 'simple' trimming, each adapter sequence is tested against the reads, and if a sufficiently accurate match is detected, the read is clipped appropriately.
|
|
|
202
|
|
|
203 'Palindrome' trimming is specifically designed for the case of 'reading through' a short fragment into the adapter sequence on the other end. In this approach, the appropriate adapter sequences are 'in silico ligated' onto the start of the reads, and the combined adapter+read sequences, forward and reverse are aligned. If they align in a manner which indicates 'read-through', the forward read is clipped and the reverse read dropped (since it contains no new data).
|
|
|
204
|
|
|
205 Naming of the sequences indicates how they should be used. For 'Palindrome' clipping, the sequence names should both start with 'Prefix', and end in '/1' for the forward adapter and '/2' for the reverse adapter.All other sequences are checked using 'simple' mode. Sequences with names ending in '/1' or '/2' will be checked only against the forward or reverse read. All other sequences will be checked against both the forward and reverse read. If you want to check for the reverse-complement of a specific sequence, you need to specifically include the reverse-complemented form of the sequence as well, with another name.
|
|
|
206
|
|
|
207 The thresholds used are a simplified log-likelihood approach. Each matching base adds just over 0.6, while each mismatch reduces the alignment score by Q/10. Therefore, a perfect match of a 20 base sequence will score just over 12, while 25 bases are needed to score 15. As such we recommend values between 12 - 15 for this parameter. For palindromic matches, the entire read sequence plus (partial) adapter sequences can be used - therefore this threshold can be higher, in the range of 30-40. The 'seed mismatch' parameter is used to make alignments more efficient, specifying the maximum base mismatch count in the 'seed' (16 bases). Typical values here are 1 or 2.
|
|
|
208
|
|
|
209 **Citation**
|
|
|
210
|
|
|
211 Lohse M, Bolger AM, Nagel A, Fernie AR, Lunn JE, Stitt M, Usadel B. RobiNA: a
|
|
|
212 user-friendly, integrated software solution for RNA-Seq-based transcriptomics.
|
|
|
213 Nucleic Acids Res. 2012 Jul;40(Web Server issue):W622-7.
|
|
|
214
|
|
|
215
|
|
|
216
|
|
|
217 **Contacts**
|
|
|
218
|
|
|
219 Anthony Bolger bolger at mpimp-golm dot mpg dot de
|
|
|
220
|
|
|
221 Federico M. Giorgi federico dot giorgi at gmail dot com
|
|
|
222
|
|
|
223 Wrapped for Galaxy by:
|
|
|
224
|
|
|
225 Simon Gladman (VBC/VLSCI) simon dot gladman at monash dot edu
|
|
|
226 </help>
|
|
|
227 </tool>
|