annotate cutadapt.xml @ 1:47f068960327 draft default tip

Deleted selected files
author slegras
date Thu, 06 Aug 2015 09:15:42 -0400
parents baf52103977b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
baf52103977b Uploaded
slegras
parents:
diff changeset
1 <tool id="cutadapt" name="Cutadapt" version="1.8">
baf52103977b Uploaded
slegras
parents:
diff changeset
2 <description>Remove adapter sequences from Fastq/Fasta</description>
baf52103977b Uploaded
slegras
parents:
diff changeset
3 <requirements>
baf52103977b Uploaded
slegras
parents:
diff changeset
4 <requirement type="package" version="1.8">cutadapt</requirement>
baf52103977b Uploaded
slegras
parents:
diff changeset
5 </requirements>
baf52103977b Uploaded
slegras
parents:
diff changeset
6
baf52103977b Uploaded
slegras
parents:
diff changeset
7 <stdio>
baf52103977b Uploaded
slegras
parents:
diff changeset
8 <exit_code range="1" level="fatal" description="IOError, FormatError, or Interrupt" />
baf52103977b Uploaded
slegras
parents:
diff changeset
9 <exit_code range="2" level="fatal" description="Invalid options specified" />
baf52103977b Uploaded
slegras
parents:
diff changeset
10 <exit_code range="3:" level="fatal" description="Unknown error" />
baf52103977b Uploaded
slegras
parents:
diff changeset
11 </stdio>
baf52103977b Uploaded
slegras
parents:
diff changeset
12
baf52103977b Uploaded
slegras
parents:
diff changeset
13 <version_command>cutadapt --version</version_command>
baf52103977b Uploaded
slegras
parents:
diff changeset
14
baf52103977b Uploaded
slegras
parents:
diff changeset
15 <command>cutadapt
baf52103977b Uploaded
slegras
parents:
diff changeset
16 #if $input.extension.startswith( "fastq"):
baf52103977b Uploaded
slegras
parents:
diff changeset
17 --format=fastq
baf52103977b Uploaded
slegras
parents:
diff changeset
18 #if $input.extension == "fastqillumina":
baf52103977b Uploaded
slegras
parents:
diff changeset
19 --quality-base=64
baf52103977b Uploaded
slegras
parents:
diff changeset
20 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
21 #if $input.extension == "fastqsolexa":
baf52103977b Uploaded
slegras
parents:
diff changeset
22 --quality-base=64
baf52103977b Uploaded
slegras
parents:
diff changeset
23 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
24 #else
baf52103977b Uploaded
slegras
parents:
diff changeset
25 --format=$input.extension
baf52103977b Uploaded
slegras
parents:
diff changeset
26 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
27 #for $a in $adapters
baf52103977b Uploaded
slegras
parents:
diff changeset
28 #if $a.adapter_source.adapter_source_list == 'prebuilt':
baf52103977b Uploaded
slegras
parents:
diff changeset
29 --adapter="${a.adapter_source.adapter.fields.name}"='${a.adapter_source.adapter}'
baf52103977b Uploaded
slegras
parents:
diff changeset
30 #else if str($a.adapter_source.adapter_name) != "":
baf52103977b Uploaded
slegras
parents:
diff changeset
31 --adapter='${a.adapter_source.adapter_name}'='${a.adapter_source.adapter}'
baf52103977b Uploaded
slegras
parents:
diff changeset
32 #else
baf52103977b Uploaded
slegras
parents:
diff changeset
33 --adapter='${a.adapter_source.adapter}'
baf52103977b Uploaded
slegras
parents:
diff changeset
34 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
35 #end for
baf52103977b Uploaded
slegras
parents:
diff changeset
36 #for $aa in $anywhere_adapters
baf52103977b Uploaded
slegras
parents:
diff changeset
37 #if $aa.anywhere_adapter_source.anywhere_adapter_source_list == 'prebuilt':
baf52103977b Uploaded
slegras
parents:
diff changeset
38 --anywhere="${aa.anywhere_adapter_source.anywhere_adapter.fields.name}"='${aa.anywhere_adapter_source.anywhere_adapter}'
baf52103977b Uploaded
slegras
parents:
diff changeset
39 #else if str($aa.anywhere_adapter_source.anywhere_adapter_name) != "":
baf52103977b Uploaded
slegras
parents:
diff changeset
40 --anywhere='${aa.anywhere_adapter_source.anywhere_adapter_name}'='${aa.anywhere_adapter_source.anywhere_adapter}'
baf52103977b Uploaded
slegras
parents:
diff changeset
41 #else
baf52103977b Uploaded
slegras
parents:
diff changeset
42 --anywhere='${aa.anywhere_adapter_source.anywhere_adapter}'
baf52103977b Uploaded
slegras
parents:
diff changeset
43 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
44 #end for
baf52103977b Uploaded
slegras
parents:
diff changeset
45 #for $fa in $front_adapters
baf52103977b Uploaded
slegras
parents:
diff changeset
46 #if $fa.front_adapter_source.front_adapter_source_list == 'prebuilt':
baf52103977b Uploaded
slegras
parents:
diff changeset
47 --front="${fa.front_adapter_source.front_adapter.fields.name}"='${fa.front_adapter_source.front_adapter}'
baf52103977b Uploaded
slegras
parents:
diff changeset
48 #else if str($fa.front_adapter_source.front_adapter_name) != "":
baf52103977b Uploaded
slegras
parents:
diff changeset
49 --front='${fa.front_adapter_source.front_adapter_name}'='${fa.front_adapter_source.front_adapter}'
baf52103977b Uploaded
slegras
parents:
diff changeset
50 #else
baf52103977b Uploaded
slegras
parents:
diff changeset
51 --front='${fa.front_adapter_source.front_adapter}'
baf52103977b Uploaded
slegras
parents:
diff changeset
52 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
53 #end for
baf52103977b Uploaded
slegras
parents:
diff changeset
54 --error-rate=$error_rate
baf52103977b Uploaded
slegras
parents:
diff changeset
55 --times=$count
baf52103977b Uploaded
slegras
parents:
diff changeset
56 --overlap=$overlap
baf52103977b Uploaded
slegras
parents:
diff changeset
57 $no_indels
baf52103977b Uploaded
slegras
parents:
diff changeset
58 $match_read_wildcards
baf52103977b Uploaded
slegras
parents:
diff changeset
59
baf52103977b Uploaded
slegras
parents:
diff changeset
60 #if str( $output_filtering_options.output_filtering) == "filter":
baf52103977b Uploaded
slegras
parents:
diff changeset
61 $output_filtering_options.discard
baf52103977b Uploaded
slegras
parents:
diff changeset
62 $output_filtering_options.discard_untrimmed
baf52103977b Uploaded
slegras
parents:
diff changeset
63 $output_filtering_options.no_trim
baf52103977b Uploaded
slegras
parents:
diff changeset
64 $output_filtering_options.mask_adapter
baf52103977b Uploaded
slegras
parents:
diff changeset
65 #if str($output_filtering_options.min) != '0':
baf52103977b Uploaded
slegras
parents:
diff changeset
66 --minimum-length=$output_filtering_options.min
baf52103977b Uploaded
slegras
parents:
diff changeset
67 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
68 #if str($output_filtering_options.max) != '0':
baf52103977b Uploaded
slegras
parents:
diff changeset
69 --maximum-length=$output_filtering_options.max
baf52103977b Uploaded
slegras
parents:
diff changeset
70 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
71 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
72
baf52103977b Uploaded
slegras
parents:
diff changeset
73 --output='$output'
baf52103977b Uploaded
slegras
parents:
diff changeset
74
baf52103977b Uploaded
slegras
parents:
diff changeset
75 #if $paired_end.paired_end_boolean:
baf52103977b Uploaded
slegras
parents:
diff changeset
76 --paired-output='$paired_output'
baf52103977b Uploaded
slegras
parents:
diff changeset
77 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
78
baf52103977b Uploaded
slegras
parents:
diff changeset
79 #if str( $output_params.output_type ) == "additional":
baf52103977b Uploaded
slegras
parents:
diff changeset
80 #if $output_params.rest_file:
baf52103977b Uploaded
slegras
parents:
diff changeset
81 --rest-file=$rest_output
baf52103977b Uploaded
slegras
parents:
diff changeset
82 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
83 #if $output_params.wildcard_file:
baf52103977b Uploaded
slegras
parents:
diff changeset
84 --wildcard-file=$wild_output
baf52103977b Uploaded
slegras
parents:
diff changeset
85 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
86 #if $output_params.too_short_file:
baf52103977b Uploaded
slegras
parents:
diff changeset
87 --too-short-output=$too_short_output
baf52103977b Uploaded
slegras
parents:
diff changeset
88 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
89 #if $output_params.too_long_file:
baf52103977b Uploaded
slegras
parents:
diff changeset
90 --too-long-output=$too_long_output
baf52103977b Uploaded
slegras
parents:
diff changeset
91 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
92 #if $output_params.untrimmed_file:
baf52103977b Uploaded
slegras
parents:
diff changeset
93 --untrimmed-output=$untrimmed_output
baf52103977b Uploaded
slegras
parents:
diff changeset
94 #if $paired_end.paired_end_boolean:
baf52103977b Uploaded
slegras
parents:
diff changeset
95 --untrimmed-paired-output=$untrimmed_paired_output
baf52103977b Uploaded
slegras
parents:
diff changeset
96 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
97 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
98 #if $output_params.info_file:
baf52103977b Uploaded
slegras
parents:
diff changeset
99 --info-file=$info_file
baf52103977b Uploaded
slegras
parents:
diff changeset
100 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
101
baf52103977b Uploaded
slegras
parents:
diff changeset
102 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
103
baf52103977b Uploaded
slegras
parents:
diff changeset
104 #if str( $read_modification_params.read_modification) == "modify":
baf52103977b Uploaded
slegras
parents:
diff changeset
105 #if str($read_modification_params.quality_cutoff) != '0':
baf52103977b Uploaded
slegras
parents:
diff changeset
106 --quality-cutoff=$read_modification_params.quality_cutoff
baf52103977b Uploaded
slegras
parents:
diff changeset
107 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
108 #if str($read_modification_params.cut) != '0':
baf52103977b Uploaded
slegras
parents:
diff changeset
109 --cut=$read_modification_params.cut
baf52103977b Uploaded
slegras
parents:
diff changeset
110 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
111 #if $read_modification_params.prefix != '':
baf52103977b Uploaded
slegras
parents:
diff changeset
112 --prefix="$read_modification_params.prefix"
baf52103977b Uploaded
slegras
parents:
diff changeset
113 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
114 #if $read_modification_params.suffix != '':
baf52103977b Uploaded
slegras
parents:
diff changeset
115 --suffix="$read_modification_params.suffix"
baf52103977b Uploaded
slegras
parents:
diff changeset
116 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
117 #if $read_modification_params.length_tag != '':
baf52103977b Uploaded
slegras
parents:
diff changeset
118 --length-tag="$read_modification_params.length_tag"
baf52103977b Uploaded
slegras
parents:
diff changeset
119 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
120 $read_modification_params.zero_cap
baf52103977b Uploaded
slegras
parents:
diff changeset
121 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
122
baf52103977b Uploaded
slegras
parents:
diff changeset
123 '$input'
baf52103977b Uploaded
slegras
parents:
diff changeset
124
baf52103977b Uploaded
slegras
parents:
diff changeset
125 #if $paired_end.paired_end_boolean:
baf52103977b Uploaded
slegras
parents:
diff changeset
126 '$input2'
baf52103977b Uploaded
slegras
parents:
diff changeset
127 #end if
baf52103977b Uploaded
slegras
parents:
diff changeset
128
baf52103977b Uploaded
slegras
parents:
diff changeset
129 > $report
baf52103977b Uploaded
slegras
parents:
diff changeset
130 </command>
baf52103977b Uploaded
slegras
parents:
diff changeset
131
baf52103977b Uploaded
slegras
parents:
diff changeset
132 <inputs>
baf52103977b Uploaded
slegras
parents:
diff changeset
133 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa, fasta" name="input" type="data" optional="false" label="Fastq file to trim" length="100"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
134 <conditional name="paired_end">
baf52103977b Uploaded
slegras
parents:
diff changeset
135 <param name="paired_end_boolean" type="boolean" value="false" label="Track Paired Reads" help="This option will keep a second file synchronized if you use one of the filtering options that discards reads. It will NOT trim adapters off of the second read. You must run Cutadapt a second time on the output of the first run to trim adapters from both reads (see Cutadapt documentation for details)." />
baf52103977b Uploaded
slegras
parents:
diff changeset
136 <when value="true">
baf52103977b Uploaded
slegras
parents:
diff changeset
137 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa, fasta" name="input2" type="data" optional="false" label="Paired fastq file (NOT trimmed)" length="100"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
138 </when>
baf52103977b Uploaded
slegras
parents:
diff changeset
139 <when value="false" />
baf52103977b Uploaded
slegras
parents:
diff changeset
140 </conditional>
baf52103977b Uploaded
slegras
parents:
diff changeset
141
baf52103977b Uploaded
slegras
parents:
diff changeset
142 <repeat name="adapters" title="3' Adapters" help="Sequence of an adapter that was ligated to the 3' end. The adapter itself and anything that follows is trimmed.">
baf52103977b Uploaded
slegras
parents:
diff changeset
143 <conditional name="adapter_source">
baf52103977b Uploaded
slegras
parents:
diff changeset
144 <param name="adapter_source_list" type="select" label="Source" >
baf52103977b Uploaded
slegras
parents:
diff changeset
145 <option value="prebuilt" selected="true">Standard (select from the list below)</option>
baf52103977b Uploaded
slegras
parents:
diff changeset
146 <option value="user">Enter custom sequence</option>
baf52103977b Uploaded
slegras
parents:
diff changeset
147 </param>
baf52103977b Uploaded
slegras
parents:
diff changeset
148
baf52103977b Uploaded
slegras
parents:
diff changeset
149 <when value="user">
baf52103977b Uploaded
slegras
parents:
diff changeset
150 <param name="adapter_name" size="30" label="Enter custom 3' adapter name (Optional)" type="text" value="" />
baf52103977b Uploaded
slegras
parents:
diff changeset
151 <param name="adapter" size="30" label="Enter custom 3' adapter sequence" type="text" value="AATTGGCC" />
baf52103977b Uploaded
slegras
parents:
diff changeset
152 </when>
baf52103977b Uploaded
slegras
parents:
diff changeset
153
baf52103977b Uploaded
slegras
parents:
diff changeset
154 <when value="prebuilt">
baf52103977b Uploaded
slegras
parents:
diff changeset
155 <param name="adapter" type="select" label="Choose 3' adapter">
baf52103977b Uploaded
slegras
parents:
diff changeset
156 <options from_file="cutadapt_adapters.txt">
baf52103977b Uploaded
slegras
parents:
diff changeset
157 <column name="name" index="1"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
158 <column name="value" index="0"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
159 </options>
baf52103977b Uploaded
slegras
parents:
diff changeset
160 </param>
baf52103977b Uploaded
slegras
parents:
diff changeset
161 </when>
baf52103977b Uploaded
slegras
parents:
diff changeset
162 </conditional>
baf52103977b Uploaded
slegras
parents:
diff changeset
163 </repeat>
baf52103977b Uploaded
slegras
parents:
diff changeset
164
baf52103977b Uploaded
slegras
parents:
diff changeset
165 <repeat name="anywhere_adapters" title="5' or 3' (Anywhere) Adapters" help="Sequence of an adapter that was ligated to the 5' or 3' end. If the adapter is found within the read or overlapping the 3' end of the read, the behavior is the same as for the -a option. If the adapter overlaps the 5' end (beginning of the read), the initial portion of the read matching the adapter is trimmed, but anything that follows is kept. If multiple -a or -b options are given, only the best matching adapter is trimmed.">
baf52103977b Uploaded
slegras
parents:
diff changeset
166 <conditional name="anywhere_adapter_source">
baf52103977b Uploaded
slegras
parents:
diff changeset
167 <param name="anywhere_adapter_source_list" type="select" label="Source">
baf52103977b Uploaded
slegras
parents:
diff changeset
168 <option value="prebuilt" selected="true">Standard (select from the list below)</option>
baf52103977b Uploaded
slegras
parents:
diff changeset
169 <option value="user">Enter custom sequence</option>
baf52103977b Uploaded
slegras
parents:
diff changeset
170 </param>
baf52103977b Uploaded
slegras
parents:
diff changeset
171
baf52103977b Uploaded
slegras
parents:
diff changeset
172 <when value="user">
baf52103977b Uploaded
slegras
parents:
diff changeset
173 <param name="anywhere_adapter_name" size="30" label="Enter custom 5' or 3' adapter name (Optional)" type="text" value="" />
baf52103977b Uploaded
slegras
parents:
diff changeset
174 <param name="anywhere_adapter" size="30" label="Enter custom 5' or 3' adapter sequence" type="text" value="AATTGGCC" />
baf52103977b Uploaded
slegras
parents:
diff changeset
175 </when>
baf52103977b Uploaded
slegras
parents:
diff changeset
176 <when value="prebuilt">
baf52103977b Uploaded
slegras
parents:
diff changeset
177 <param name="anywhere_adapter" type="select" label="Choose 5' or 3' adapter">
baf52103977b Uploaded
slegras
parents:
diff changeset
178 <options from_file="cutadapt_adapters.txt">
baf52103977b Uploaded
slegras
parents:
diff changeset
179 <column name="name" index="1"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
180 <column name="value" index="0"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
181 </options>
baf52103977b Uploaded
slegras
parents:
diff changeset
182 </param>
baf52103977b Uploaded
slegras
parents:
diff changeset
183 </when>
baf52103977b Uploaded
slegras
parents:
diff changeset
184 </conditional>
baf52103977b Uploaded
slegras
parents:
diff changeset
185 </repeat>
baf52103977b Uploaded
slegras
parents:
diff changeset
186
baf52103977b Uploaded
slegras
parents:
diff changeset
187 <repeat name="front_adapters" title="5' (Front) Adapters" help="Sequence of an adapter that was ligated to the 5' end. If the adapter sequence starts with the character '^', the adapter is 'anchored'. An anchored adapter must appear in its entirety at the 5' end of the read (it is a prefix of the read). A non-anchored adapter may appear partially at the 5' end, or it may occur within the read. If it is found within a read, the sequence preceding the adapter is also trimmed. In all cases the adapter itself is trimmed.">
baf52103977b Uploaded
slegras
parents:
diff changeset
188 <conditional name="front_adapter_source">
baf52103977b Uploaded
slegras
parents:
diff changeset
189 <param name="front_adapter_source_list" type="select" label="Source">
baf52103977b Uploaded
slegras
parents:
diff changeset
190 <option value="prebuilt" selected="true">Standard (select from the list below)</option>
baf52103977b Uploaded
slegras
parents:
diff changeset
191 <option value="user">Enter custom sequence</option>
baf52103977b Uploaded
slegras
parents:
diff changeset
192 </param>
baf52103977b Uploaded
slegras
parents:
diff changeset
193
baf52103977b Uploaded
slegras
parents:
diff changeset
194 <when value="user">
baf52103977b Uploaded
slegras
parents:
diff changeset
195 <param name="front_adapter_name" size="30" label="Enter custom 5' adapter name (Optional)" type="text" value="" />
baf52103977b Uploaded
slegras
parents:
diff changeset
196 <param name="front_adapter" size="30" label="Enter custom 5' adapter sequence" type="text" value="AATTGGCC" />
baf52103977b Uploaded
slegras
parents:
diff changeset
197 </when>
baf52103977b Uploaded
slegras
parents:
diff changeset
198 <when value="prebuilt">
baf52103977b Uploaded
slegras
parents:
diff changeset
199 <param name="front_adapter" type="select" label="Choose 5' adapter">
baf52103977b Uploaded
slegras
parents:
diff changeset
200 <options from_file="cutadapt_adapters.txt">
baf52103977b Uploaded
slegras
parents:
diff changeset
201 <column name="name" index="1"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
202 <column name="value" index="0"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
203 </options>
baf52103977b Uploaded
slegras
parents:
diff changeset
204 </param>
baf52103977b Uploaded
slegras
parents:
diff changeset
205 </when>
baf52103977b Uploaded
slegras
parents:
diff changeset
206 </conditional>
baf52103977b Uploaded
slegras
parents:
diff changeset
207 </repeat>
baf52103977b Uploaded
slegras
parents:
diff changeset
208
baf52103977b Uploaded
slegras
parents:
diff changeset
209 <param name="error_rate" type="float" min="0" max="1" value="0.1" label="Maximum error rate" help="Maximum allowed error rate (no. of errors divided by the length of the matching region)." />
baf52103977b Uploaded
slegras
parents:
diff changeset
210 <param name="no_indels" type="boolean" value="false" truevalue="--no-indels" falsevalue="" label="Do not allow indels (Use ONLY with anchored 5' (front) adapters)." help="Do not allow indels in the alignments. That is, allow only mismatches. This option is currently only supported for anchored 5' adapters ('^ADAPTER') (default: both mismatches and indels are allowed)." />
baf52103977b Uploaded
slegras
parents:
diff changeset
211 <param name="count" type="integer" min="1" value="1" label="Match times" help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times." />
baf52103977b Uploaded
slegras
parents:
diff changeset
212 <param name="overlap" type="integer" min="1" value="3" label="Minimum overlap length" help="Minimum overlap length. If the overlap between the adapter and the sequence is shorter than LENGTH, the read is not modified. This reduces the number of bases trimmed purely due to short random adapter matches." />
baf52103977b Uploaded
slegras
parents:
diff changeset
213 <param name="match_read_wildcards" type="boolean" value="false" truevalue="--match-read-wildcards" falsevalue="" label="Match Read Wildcards" help="Allow 'N's in the read as matches to the adapter." />
baf52103977b Uploaded
slegras
parents:
diff changeset
214
baf52103977b Uploaded
slegras
parents:
diff changeset
215 <conditional name="output_filtering_options">
baf52103977b Uploaded
slegras
parents:
diff changeset
216 <param name="output_filtering" type="select" label="Output filtering options" help="Options for filtering processed reads by those that contain the adapter or by minimum or maximum length">
baf52103977b Uploaded
slegras
parents:
diff changeset
217 <option value="default">Default (no filtering)</option>
baf52103977b Uploaded
slegras
parents:
diff changeset
218 <option value="filter">Set Filters</option>
baf52103977b Uploaded
slegras
parents:
diff changeset
219 </param>
baf52103977b Uploaded
slegras
parents:
diff changeset
220 <when value="default" />
baf52103977b Uploaded
slegras
parents:
diff changeset
221 <when value="filter">
baf52103977b Uploaded
slegras
parents:
diff changeset
222 <param name="discard" type="boolean" value="false" truevalue="--discard" falsevalue="" label="Discard Trimmed Reads" help="Discard reads that contain the adapter instead of trimming them. Use the 'Minimum overlap length' option in order to avoid throwing away too many randomly matching reads!" />
baf52103977b Uploaded
slegras
parents:
diff changeset
223 <param name="discard_untrimmed" type="boolean" value="false" truevalue="--discard-untrimmed" falsevalue="" label="Discard Untrimmed Reads" help="Discard reads that do not contain the adapter." />
baf52103977b Uploaded
slegras
parents:
diff changeset
224 <param name="min" type="integer" min="0" optional="true" value="0" label="Minimum length" help="Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no minimum length." />
baf52103977b Uploaded
slegras
parents:
diff changeset
225 <param name="max" type="integer" min="0" optional="true" value="0" label="Maximum length" help="Discard trimmed reads that are longer than LENGTH. Reads that are too long even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no maximum length." />
baf52103977b Uploaded
slegras
parents:
diff changeset
226 <param name="no_trim" type="boolean" value="false" truevalue="--no-trim" falsevalue="" label="Do not trim adapters" help="Match and redirect reads to output/untrimmed-output as usual, but don't remove the adapters (default: trim the adapters)." />
baf52103977b Uploaded
slegras
parents:
diff changeset
227 <param name="mask_adapter" type="boolean" value="false" truevalue="--mask-adapter" falsevalue="" label="Mask Adapters" help="Mask adapter bases with 'N' instead of trimming them (default: trim adapters)." />
baf52103977b Uploaded
slegras
parents:
diff changeset
228 </when>
baf52103977b Uploaded
slegras
parents:
diff changeset
229 </conditional>
baf52103977b Uploaded
slegras
parents:
diff changeset
230
baf52103977b Uploaded
slegras
parents:
diff changeset
231 <conditional name="output_params">
baf52103977b Uploaded
slegras
parents:
diff changeset
232 <param name="output_type" type="select" label="Additional output options" help="By default all reads will be put in the same file. However, reads with adapters matching in the middle, unmatched reads, and too-short reads can be saved in separate files.">
baf52103977b Uploaded
slegras
parents:
diff changeset
233 <option value="default">Default</option>
baf52103977b Uploaded
slegras
parents:
diff changeset
234 <option value="additional">Additional output files</option>
baf52103977b Uploaded
slegras
parents:
diff changeset
235 </param>
baf52103977b Uploaded
slegras
parents:
diff changeset
236 <when value="default" />
baf52103977b Uploaded
slegras
parents:
diff changeset
237 <when value="additional">
baf52103977b Uploaded
slegras
parents:
diff changeset
238 <param name="info_file" type="boolean" value="false" label="Info File" help="Write information about each read and its adapter matches to a file."/>
baf52103977b Uploaded
slegras
parents:
diff changeset
239 <param name="rest_file" type="boolean" value="false" label="Rest of Read" help="When the adapter matches in the middle of a read, write the rest (after the adapter) into a file."/>
baf52103977b Uploaded
slegras
parents:
diff changeset
240 <param name="wildcard_file" type="boolean" value="false" label="Wildcard File" help="When the adapter has wildcard bases ('N's) write adapter bases matching wildcard positions to file."/>
baf52103977b Uploaded
slegras
parents:
diff changeset
241 <param name="too_short_file" type="boolean" value="false" label="Too Short Reads" help="Write reads that are too short (according to minimum length specified) to a file. (default: discard reads)"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
242 <param name="too_long_file" type="boolean" value="false" label="Too Long Reads" help="Write reads that are too long (according to maximum length specified) to a file. (default: discard reads)"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
243 <param name="untrimmed_file" type="boolean" value="false" label="Untrimmed Reads" help="Write reads that do not contain the adapter to a separate file, instead of writing them to the regular output file. (default: output to same file as trimmed)"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
244 </when>
baf52103977b Uploaded
slegras
parents:
diff changeset
245 </conditional>
baf52103977b Uploaded
slegras
parents:
diff changeset
246
baf52103977b Uploaded
slegras
parents:
diff changeset
247 <conditional name="read_modification_params">
baf52103977b Uploaded
slegras
parents:
diff changeset
248 <param name="read_modification" type="select" label="Additional modifications to reads" help="Various options to trim reads based on quality, modify read names and quality scores">
baf52103977b Uploaded
slegras
parents:
diff changeset
249 <option value="none">No Read Modifications</option>
baf52103977b Uploaded
slegras
parents:
diff changeset
250 <option value="modify">Set Modification Options</option>
baf52103977b Uploaded
slegras
parents:
diff changeset
251 </param>
baf52103977b Uploaded
slegras
parents:
diff changeset
252 <when value="none" />
baf52103977b Uploaded
slegras
parents:
diff changeset
253 <when value="modify">
baf52103977b Uploaded
slegras
parents:
diff changeset
254 <param name="cut" type="integer" optional="true" value="0" label="Cut bases from reads before adapter trimming" help="Remove bases from the beginning or end of each read before trimming adapters. If positive, the bases are removed from the beginning of each read. If negative, the bases are removed from the end of each read." />
baf52103977b Uploaded
slegras
parents:
diff changeset
255 <param name="quality_cutoff" type="integer" min="0" optional="true" value="0" label="Quality cutoff" help="Trim low-quality ends from reads before adapter removal. The algorithm is the same as the one used by BWA (Subtract CUTOFF from all qualities; compute partial sums from all indices to the end of the sequence; cut sequence at the index at which the sum is minimal). Value of 0 means no quality trimming." />
baf52103977b Uploaded
slegras
parents:
diff changeset
256 <param name="prefix" label="Prefix" type="text" help="Add this prefix to read names" />
baf52103977b Uploaded
slegras
parents:
diff changeset
257 <param name="suffix" label="Suffix" type="text" help="Add this suffix to read names" />
baf52103977b Uploaded
slegras
parents:
diff changeset
258 <param name="strip_suffix" label="Strip suffix" type="text" help="Remove this suffix from read names if present." />
baf52103977b Uploaded
slegras
parents:
diff changeset
259 <param name="length_tag" label="Length Tag" type="text" help="Search for TAG followed by a decimal number in the name of the read (description/comment field of the FASTA or FASTQ file). Replace the decimal number with the correct length of the trimmed read. For example, use --length-tag 'length=' to search for fields like 'length=123'." />
baf52103977b Uploaded
slegras
parents:
diff changeset
260 <param name="zero_cap" type="boolean" value="false" label="Change negative quality values to zero (0)" truevalue="--zero-cap" falsevalue="" help="Workaround to avoid segmentation faults in BWA" />
baf52103977b Uploaded
slegras
parents:
diff changeset
261 </when>
baf52103977b Uploaded
slegras
parents:
diff changeset
262 </conditional>
baf52103977b Uploaded
slegras
parents:
diff changeset
263 </inputs>
baf52103977b Uploaded
slegras
parents:
diff changeset
264
baf52103977b Uploaded
slegras
parents:
diff changeset
265 <outputs>
baf52103977b Uploaded
slegras
parents:
diff changeset
266 <data format="txt" name="report" label="${tool.name} on ${on_string} (Report)" />
baf52103977b Uploaded
slegras
parents:
diff changeset
267 <data format_source="input" name="output" metadata_source="input" label="${tool.name} on ${on_string} (Reads)"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
268 <data format_source="input" name="paired_output" metadata_source="input" label="${tool.name} on ${on_string} (Paired Reads)" >
baf52103977b Uploaded
slegras
parents:
diff changeset
269 <filter>(paired_end['paired_end_boolean'] is True)</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
270 </data>
baf52103977b Uploaded
slegras
parents:
diff changeset
271 <data format_source="input" name="rest_output" metadata_source="input" label="${tool.name} on ${on_string} (Rest of Reads)" >
baf52103977b Uploaded
slegras
parents:
diff changeset
272 <filter>(output_params['output_type'] == "additional")</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
273 <filter>(output_params['rest_file'] is True)</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
274 </data>
baf52103977b Uploaded
slegras
parents:
diff changeset
275 <data format_source="txt" name="wild_output" metadata_source="input" label="${tool.name} on ${on_string} (Wildcard File)" >
baf52103977b Uploaded
slegras
parents:
diff changeset
276 <filter>(output_params['output_type'] == "additional")</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
277 <filter>(output_params['wildcard_file'] is True)</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
278 </data>
baf52103977b Uploaded
slegras
parents:
diff changeset
279 <data format_source="input" name="too_short_output" metadata_source="input" label="${tool.name} on ${on_string} (Too Short Reads)" >
baf52103977b Uploaded
slegras
parents:
diff changeset
280 <filter>(output_params['output_type'] == "additional")</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
281 <filter>(output_params['too_short_file'] is True)</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
282 </data>
baf52103977b Uploaded
slegras
parents:
diff changeset
283 <data format_source="input" name="too_long_output" metadata_source="input" label="${tool.name} on ${on_string} (Too Long Reads)" >
baf52103977b Uploaded
slegras
parents:
diff changeset
284 <filter>(output_params['output_type'] == "additional")</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
285 <filter>(output_params['too_long_file'] is True)</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
286 </data>
baf52103977b Uploaded
slegras
parents:
diff changeset
287 <data format_source="input" name="untrimmed_output" metadata_source="input" label="${tool.name} on ${on_string} (Untrimmed Reads)" >
baf52103977b Uploaded
slegras
parents:
diff changeset
288 <filter>(output_params['output_type'] == "additional")</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
289 <filter>(output_params['untrimmed_file'] is True)</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
290 </data>
baf52103977b Uploaded
slegras
parents:
diff changeset
291 <data format_source="input" name="untrimmed_paired_output" metadata_source="input" label="${tool.name} on ${on_string} (Untrimmed Paired Reads)" >
baf52103977b Uploaded
slegras
parents:
diff changeset
292 <filter>(paired_end['paired_end_boolean'] is True)</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
293 <filter>(output_params['output_type'] == "additional")</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
294 <filter>(output_params['untrimmed_file'] is True)</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
295 </data>
baf52103977b Uploaded
slegras
parents:
diff changeset
296 <data format_source="txt" name="info_file" metadata_source="input" label="${tool.name} on ${on_string} (Info File)" >
baf52103977b Uploaded
slegras
parents:
diff changeset
297 <filter>(output_params['output_type'] == "additional")</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
298 <filter>(output_params['info_file'] is True)</filter>
baf52103977b Uploaded
slegras
parents:
diff changeset
299 </data>
baf52103977b Uploaded
slegras
parents:
diff changeset
300 </outputs>
baf52103977b Uploaded
slegras
parents:
diff changeset
301
baf52103977b Uploaded
slegras
parents:
diff changeset
302 <tests>
baf52103977b Uploaded
slegras
parents:
diff changeset
303 <test>
baf52103977b Uploaded
slegras
parents:
diff changeset
304 <param name="input" value="cutadapt_small.fastq" ftype="fastqsanger"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
305 <param name="anywhere_adapter_source_list" value="user"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
306 <param name="anywhere_adapter" value="TTAGACATATCTCCGTCG"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
307 <param name="output_filtering" value="default"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
308 <param name="read_modification" value="none"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
309 <param name="output_type" value="default"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
310 <output name="output" file="cutadapt_small.out"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
311 </test>
baf52103977b Uploaded
slegras
parents:
diff changeset
312 <test>
baf52103977b Uploaded
slegras
parents:
diff changeset
313 <param name="input" value="cutadapt_small.fastq" ftype="fastqsanger"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
314 <param name="adapter_source_list" value="user"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
315 <param name="adapter" value="TTAGACATATCTCCGTCG"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
316 <param name="output_filtering" value="filter"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
317 <param name="discard" value="true"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
318 <param name="read_modification" value="none"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
319 <param name="output_type" value="default"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
320 <output name="output" file="cutadapt_discard.out"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
321 </test>
baf52103977b Uploaded
slegras
parents:
diff changeset
322 <test>
baf52103977b Uploaded
slegras
parents:
diff changeset
323 <param name="input" value="cutadapt_rest.fa" ftype="fasta"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
324 <param name="adapter_source_list" value="user"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
325 <param name="adapter" value="ADAPTER"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
326 <param name="output_filtering" value="default"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
327 <param name="read_modification" value="none"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
328 <param name="output_type" value="additional"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
329 <param name="rest_file" value="true"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
330 <output name="output" file="cutadapt_rest.out"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
331 <output name="rest_output" file="cutadapt_rest2.out"/>
baf52103977b Uploaded
slegras
parents:
diff changeset
332 </test>
baf52103977b Uploaded
slegras
parents:
diff changeset
333 </tests>
baf52103977b Uploaded
slegras
parents:
diff changeset
334
baf52103977b Uploaded
slegras
parents:
diff changeset
335 <help>
baf52103977b Uploaded
slegras
parents:
diff changeset
336 Summary
baf52103977b Uploaded
slegras
parents:
diff changeset
337 -------
baf52103977b Uploaded
slegras
parents:
diff changeset
338 This tool removes adapter sequences from DNA high-throughput
baf52103977b Uploaded
slegras
parents:
diff changeset
339 sequencing data. This is usually necessary when the read length of the
baf52103977b Uploaded
slegras
parents:
diff changeset
340 machine is longer than the molecule that is sequenced, such as in
baf52103977b Uploaded
slegras
parents:
diff changeset
341 microRNA data.
baf52103977b Uploaded
slegras
parents:
diff changeset
342
baf52103977b Uploaded
slegras
parents:
diff changeset
343 The tool is based on the opensource `cutadapt
baf52103977b Uploaded
slegras
parents:
diff changeset
344 &lt;http://code.google.com/p/cutadapt/>`_ tool. See the `complete cutadapt
baf52103977b Uploaded
slegras
parents:
diff changeset
345 documentation &lt;https://cutadapt.readthedocs.org/en/latest/index.html>`_ for additional details.
baf52103977b Uploaded
slegras
parents:
diff changeset
346
baf52103977b Uploaded
slegras
parents:
diff changeset
347 -----
baf52103977b Uploaded
slegras
parents:
diff changeset
348
baf52103977b Uploaded
slegras
parents:
diff changeset
349 Algorithm
baf52103977b Uploaded
slegras
parents:
diff changeset
350 ---------
baf52103977b Uploaded
slegras
parents:
diff changeset
351
baf52103977b Uploaded
slegras
parents:
diff changeset
352 cutadapt uses a simple semi-global alignment algorithm, without any special optimizations.
baf52103977b Uploaded
slegras
parents:
diff changeset
353 For speed, the algorithm is implemented as a Python extension module in ``calignmodule.c``.
baf52103977b Uploaded
slegras
parents:
diff changeset
354
baf52103977b Uploaded
slegras
parents:
diff changeset
355
baf52103977b Uploaded
slegras
parents:
diff changeset
356 Partial adapter matches
baf52103977b Uploaded
slegras
parents:
diff changeset
357 -----------------------
baf52103977b Uploaded
slegras
parents:
diff changeset
358
baf52103977b Uploaded
slegras
parents:
diff changeset
359 Cutadapt correctly deals with partial adapter matches. As an example, suppose
baf52103977b Uploaded
slegras
parents:
diff changeset
360 your adapter sequence is ``ADAPTER`` (specified via 3' Adapters parameter).
baf52103977b Uploaded
slegras
parents:
diff changeset
361 If you have these input sequences::
baf52103977b Uploaded
slegras
parents:
diff changeset
362
baf52103977b Uploaded
slegras
parents:
diff changeset
363 MYSEQUENCEADAPTER
baf52103977b Uploaded
slegras
parents:
diff changeset
364 MYSEQUENCEADAP
baf52103977b Uploaded
slegras
parents:
diff changeset
365 MYSEQUENCEADAPTERSOMETHINGELSE
baf52103977b Uploaded
slegras
parents:
diff changeset
366
baf52103977b Uploaded
slegras
parents:
diff changeset
367 All of them will be trimmed to ``MYSEQUENCE``. If the sequence starts with an
baf52103977b Uploaded
slegras
parents:
diff changeset
368 adapter, like this::
baf52103977b Uploaded
slegras
parents:
diff changeset
369
baf52103977b Uploaded
slegras
parents:
diff changeset
370 ADAPTERSOMETHING
baf52103977b Uploaded
slegras
parents:
diff changeset
371
baf52103977b Uploaded
slegras
parents:
diff changeset
372 It will be empty after trimming.
baf52103977b Uploaded
slegras
parents:
diff changeset
373
baf52103977b Uploaded
slegras
parents:
diff changeset
374 When the allowed error rate is sufficiently high, errors in
baf52103977b Uploaded
slegras
parents:
diff changeset
375 the adapter sequence are allowed. For example, ``ADABTER`` (1 mismatch), ``ADAPTR`` (1 deletion),
baf52103977b Uploaded
slegras
parents:
diff changeset
376 and ``ADAPPTER`` (1 insertion) will all be recognized if the error rate is set to 0.15.
baf52103977b Uploaded
slegras
parents:
diff changeset
377
baf52103977b Uploaded
slegras
parents:
diff changeset
378
baf52103977b Uploaded
slegras
parents:
diff changeset
379 Anchoring 5' adapters
baf52103977b Uploaded
slegras
parents:
diff changeset
380 ---------------------
baf52103977b Uploaded
slegras
parents:
diff changeset
381
baf52103977b Uploaded
slegras
parents:
diff changeset
382 If you specify a 5' (Front) adapter, the adapter may overlap the beginning of the read or
baf52103977b Uploaded
slegras
parents:
diff changeset
383 occur anywhere whithin it. If it appears withing the read, the sequence that precedes it
baf52103977b Uploaded
slegras
parents:
diff changeset
384 will also be trimmed in addition to the adapter. For example when the adapter sequence is
baf52103977b Uploaded
slegras
parents:
diff changeset
385 ``ADAPTER``::
baf52103977b Uploaded
slegras
parents:
diff changeset
386
baf52103977b Uploaded
slegras
parents:
diff changeset
387 HELLOADAPTERTHERE
baf52103977b Uploaded
slegras
parents:
diff changeset
388 APTERTHERE
baf52103977b Uploaded
slegras
parents:
diff changeset
389
baf52103977b Uploaded
slegras
parents:
diff changeset
390 will both be trimmed to ``THERE``. To avoid this, you can prefix the adapter with the character
baf52103977b Uploaded
slegras
parents:
diff changeset
391 ``^``. This will restrict the search, forcing the adapter to be a prefix of the read. With
baf52103977b Uploaded
slegras
parents:
diff changeset
392 the adapter sequence set to ``^ADAPTER``, only reads like this will be trimmed::
baf52103977b Uploaded
slegras
parents:
diff changeset
393
baf52103977b Uploaded
slegras
parents:
diff changeset
394 ADAPTERHELLO
baf52103977b Uploaded
slegras
parents:
diff changeset
395
baf52103977b Uploaded
slegras
parents:
diff changeset
396
baf52103977b Uploaded
slegras
parents:
diff changeset
397 Allowing adapters anywhere
baf52103977b Uploaded
slegras
parents:
diff changeset
398 --------------------------
baf52103977b Uploaded
slegras
parents:
diff changeset
399
baf52103977b Uploaded
slegras
parents:
diff changeset
400 Cutadapt assumes that any adapter specified via the 3' Adapter parameter
baf52103977b Uploaded
slegras
parents:
diff changeset
401 was ligated to the 3\' end of the sequence. This is the correct assumption for
baf52103977b Uploaded
slegras
parents:
diff changeset
402 at least the SOLiD and Illumina small RNA protocols and probably others.
baf52103977b Uploaded
slegras
parents:
diff changeset
403 The assumption is enforced by the alignment algorithm, which only finds the adapter
baf52103977b Uploaded
slegras
parents:
diff changeset
404 when its starting position is within the read. In other words, the 5' base of
baf52103977b Uploaded
slegras
parents:
diff changeset
405 the adapter must appear within the read. The adapter and all bases following
baf52103977b Uploaded
slegras
parents:
diff changeset
406 it are removed.
baf52103977b Uploaded
slegras
parents:
diff changeset
407
baf52103977b Uploaded
slegras
parents:
diff changeset
408 If, on the other hand, your adapter can also be ligated to the 5' end (on
baf52103977b Uploaded
slegras
parents:
diff changeset
409 purpose or by accident), you should tell cutadapt so by using the Anywhere Adapter
baf52103977b Uploaded
slegras
parents:
diff changeset
410 parameter. It will then use a slightly different alignment algorithm
baf52103977b Uploaded
slegras
parents:
diff changeset
411 (so-called semiglobal alignment), which allows any type of overlap between the
baf52103977b Uploaded
slegras
parents:
diff changeset
412 adapter and the sequence. In particular, the adapter may appear only partially
baf52103977b Uploaded
slegras
parents:
diff changeset
413 in the beginning of the read, like this::
baf52103977b Uploaded
slegras
parents:
diff changeset
414
baf52103977b Uploaded
slegras
parents:
diff changeset
415 PTERMYSEQUENCE
baf52103977b Uploaded
slegras
parents:
diff changeset
416
baf52103977b Uploaded
slegras
parents:
diff changeset
417 The decision which part of the read to remove is made as follows: If there is at
baf52103977b Uploaded
slegras
parents:
diff changeset
418 least one base before the found adapter, then the adapter is considered to be
baf52103977b Uploaded
slegras
parents:
diff changeset
419 a 3' adapter and the adapter itself and everything following it is removed.
baf52103977b Uploaded
slegras
parents:
diff changeset
420 Otherwise, the adapter is considered to be a 5' adapter and it is removed from
baf52103977b Uploaded
slegras
parents:
diff changeset
421 the read.
baf52103977b Uploaded
slegras
parents:
diff changeset
422
baf52103977b Uploaded
slegras
parents:
diff changeset
423 Here are some examples, which may make this clearer (left: read, right: trimmed
baf52103977b Uploaded
slegras
parents:
diff changeset
424 read)::
baf52103977b Uploaded
slegras
parents:
diff changeset
425
baf52103977b Uploaded
slegras
parents:
diff changeset
426 MYSEQUENCEADAPTER -> MYSEQUENCE (3' adapter)
baf52103977b Uploaded
slegras
parents:
diff changeset
427 MADAPTER -> M (3' adapter)
baf52103977b Uploaded
slegras
parents:
diff changeset
428 ADAPTERMYSEQUENCE -> MYSEQUENCE (5' adapter)
baf52103977b Uploaded
slegras
parents:
diff changeset
429 PTERMYSEQUENCE -> MYSEQUENCE (5' adapter)
baf52103977b Uploaded
slegras
parents:
diff changeset
430
baf52103977b Uploaded
slegras
parents:
diff changeset
431 The regular algorithm (3' Adapter) would trim the first two examples in the same way,
baf52103977b Uploaded
slegras
parents:
diff changeset
432 but trim the third to an empty sequence and trim the fourth not at all.
baf52103977b Uploaded
slegras
parents:
diff changeset
433
baf52103977b Uploaded
slegras
parents:
diff changeset
434
baf52103977b Uploaded
slegras
parents:
diff changeset
435 Format of the info file
baf52103977b Uploaded
slegras
parents:
diff changeset
436 -----------------------
baf52103977b Uploaded
slegras
parents:
diff changeset
437 The info file contains information about the found adapters. The output is a tab-separated text file. Each line corresponds to one read of the input file. The fields are:
baf52103977b Uploaded
slegras
parents:
diff changeset
438
baf52103977b Uploaded
slegras
parents:
diff changeset
439 1. Read name
baf52103977b Uploaded
slegras
parents:
diff changeset
440 2. Number of errors
baf52103977b Uploaded
slegras
parents:
diff changeset
441 3. 0-based start coordinate of the adapter match
baf52103977b Uploaded
slegras
parents:
diff changeset
442 4. 0-based end coordinate of the adapter match
baf52103977b Uploaded
slegras
parents:
diff changeset
443 5. Sequence of the read to the left of the adapter match (can be empty)
baf52103977b Uploaded
slegras
parents:
diff changeset
444 6. Sequence of the read that was matched to the adapter
baf52103977b Uploaded
slegras
parents:
diff changeset
445 7. Sequence of the read to the right of the adapter match (can be empty)
baf52103977b Uploaded
slegras
parents:
diff changeset
446 8. Name of the found adapter.
baf52103977b Uploaded
slegras
parents:
diff changeset
447
baf52103977b Uploaded
slegras
parents:
diff changeset
448 The concatenation of the fields 5-7 yields the full read sequence. In column 8, adapters without a name are numbered starting from 1.
baf52103977b Uploaded
slegras
parents:
diff changeset
449
baf52103977b Uploaded
slegras
parents:
diff changeset
450 If no adapter was found, the format is as follows:
baf52103977b Uploaded
slegras
parents:
diff changeset
451
baf52103977b Uploaded
slegras
parents:
diff changeset
452 1. Read name
baf52103977b Uploaded
slegras
parents:
diff changeset
453 2. The value -1
baf52103977b Uploaded
slegras
parents:
diff changeset
454 3. The read sequence
baf52103977b Uploaded
slegras
parents:
diff changeset
455
baf52103977b Uploaded
slegras
parents:
diff changeset
456 When parsing that file, be aware that additional columns may be added in the future. Note also that some fields can be empty, resulting in consecutive tabs within a line. Also, in the current version, when the *Match times* option is set to a value other than 1 (the default value), multiple lines are written to the info file for each read.
baf52103977b Uploaded
slegras
parents:
diff changeset
457
baf52103977b Uploaded
slegras
parents:
diff changeset
458 .. _cutadapt: http://code.google.com/p/cutadapt/
baf52103977b Uploaded
slegras
parents:
diff changeset
459 </help>
baf52103977b Uploaded
slegras
parents:
diff changeset
460
baf52103977b Uploaded
slegras
parents:
diff changeset
461 <citations>
baf52103977b Uploaded
slegras
parents:
diff changeset
462 <citation type="bibtex">
baf52103977b Uploaded
slegras
parents:
diff changeset
463 @article{marcel_cutadapt_2011,
baf52103977b Uploaded
slegras
parents:
diff changeset
464 title = {Cutadapt removes adapter sequences from high-throughput sequencing reads},
baf52103977b Uploaded
slegras
parents:
diff changeset
465 volume = {17},
baf52103977b Uploaded
slegras
parents:
diff changeset
466 copyright = {Authors who publish with this journal agree to the following terms: Authors retain copyright and grant the journal right of first publication with the work simultaneously licensed under a Creative Commons Attribution License that allows others to share the work with an acknowledgement of the work's authorship and initial publication in this journal. Authors are able to enter into separate, additional contractual arrangements for the non-exclusive distribution of the journal's published version of the work (e.g., post it to an institutional repository or publish it in a book), with an acknowledgement of its initial publication in this journal. Authors are permitted and encouraged to post their work online (e.g., in institutional repositories or on their website) prior to and during the submission process, as it can lead to productive exchanges, as well as earlier and greater citation of published work (See The Effect of Open Access ).},
baf52103977b Uploaded
slegras
parents:
diff changeset
467 url = {http://journal.embnet.org/index.php/embnetjournal/article/view/200},
baf52103977b Uploaded
slegras
parents:
diff changeset
468 abstract = {When small RNA is sequenced on current sequencing machines, the resulting reads are usually longer than the RNA and therefore contain parts of the 3' adapter. That adapter must be found and removed error-tolerantly from each read before read mapping. Previous solutions are either hard to use or do not offer required features, in particular support for color space data. As an easy to use alternative, we developed the command-line tool cutadapt, which supports 454, Illumina and SOLiD (color space) data, offers two adapter trimming algorithms, and has other useful features.
baf52103977b Uploaded
slegras
parents:
diff changeset
469
baf52103977b Uploaded
slegras
parents:
diff changeset
470 Cutadapt, including its MIT-licensed source code, is available for download at http://code.google.com/p/cutadapt/},
baf52103977b Uploaded
slegras
parents:
diff changeset
471 number = {1},
baf52103977b Uploaded
slegras
parents:
diff changeset
472 urldate = {2011-08-02},
baf52103977b Uploaded
slegras
parents:
diff changeset
473 journal = {EMBnet.journal},
baf52103977b Uploaded
slegras
parents:
diff changeset
474 author = {Marcel, Martin},
baf52103977b Uploaded
slegras
parents:
diff changeset
475 year = {2011},
baf52103977b Uploaded
slegras
parents:
diff changeset
476 note = {When small RNA is sequenced on current sequencing machines, the resulting reads are usually longer than the RNA and therefore contain parts of the 3' adapter. That adapter must be found and removed error-tolerantly from each read before read mapping. Previous solutions are either hard to use or do not offer required features, in particular support for color space data. As an easy to use alternative, we developed the command-line tool cutadapt, which supports 454, Illumina and SOLiD (color space) data, offers two adapter trimming algorithms, and has other useful features. Cutadapt, including its MIT-licensed source code, is available for download at http://code.google.com/p/cutadapt/},
baf52103977b Uploaded
slegras
parents:
diff changeset
477 keywords = {Adapter removal;, fastq, MicroRNA, Sequencing, Small RNA, software},
baf52103977b Uploaded
slegras
parents:
diff changeset
478 file = {Cutadapt removes adapter sequences from high-throughput sequencing reads | Martin | EMBnet.journal:/Users/lparsons/Library/Application Support/Firefox/Profiles/thd2t4je.default/zotero/storage/ZXZT4PSE/200.html:text/html}
baf52103977b Uploaded
slegras
parents:
diff changeset
479 }
baf52103977b Uploaded
slegras
parents:
diff changeset
480 </citation>
baf52103977b Uploaded
slegras
parents:
diff changeset
481 </citations>
baf52103977b Uploaded
slegras
parents:
diff changeset
482
baf52103977b Uploaded
slegras
parents:
diff changeset
483 </tool>