Mercurial > repos > devteam > tophat
annotate tophat_wrapper.xml @ 2:cc37e3dcc680 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/tophat commit de7140295cce07e1bc1697e51dab4271c8d7a8a6
author | devteam |
---|---|
date | Fri, 18 Dec 2015 19:17:22 -0500 |
parents | af089ca8b4ee |
children |
rev | line source |
---|---|
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
1 <tool id="tophat" name="TopHat for Illumina" version="1.5.0"> |
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
2 <!-- Wrapper compatible with TopHat versions 1.3.0 to 1.4.1 --> |
0 | 3 <description>Find splice junctions using RNA-seq data</description> |
4 <version_command>tophat --version</version_command> | |
5 <requirements> | |
6 <requirement type="package" version="0.1.18">samtools</requirement> | |
7 <requirement type="package" version="0.12.7">bowtie</requirement> | |
8 <requirement type="package" version="1.4.0">tophat</requirement> | |
9 </requirements> | |
10 <command interpreter="python"> | |
11 tophat_wrapper.py | |
12 ## Change this to accommodate the number of threads you have available. | |
13 --num-threads="\${GALAXY_SLOTS:-4}" | |
14 ## Provide outputs. | |
15 --junctions-output=$junctions | |
16 --hits-output=$accepted_hits | |
17 | |
18 ## Handle reference file. | |
19 #if $refGenomeSource.genomeSource == "history": | |
20 --own-file=$refGenomeSource.ownFile | |
21 #else: | |
22 --indexes-path="${refGenomeSource.index.fields.path}" | |
23 #end if | |
24 | |
25 ## Are reads single-end or paired? | |
26 --single-paired=$singlePaired.sPaired | |
27 | |
28 ## First input file always required. | |
29 --input1=$input1 | |
30 | |
31 ## Set params based on whether reads are single-end or paired. | |
32 #if $singlePaired.sPaired == "single": | |
33 --settings=$singlePaired.sParams.sSettingsType | |
34 #if $singlePaired.sParams.sSettingsType == "full": | |
35 -a $singlePaired.sParams.anchor_length | |
36 -m $singlePaired.sParams.splice_mismatches | |
37 -i $singlePaired.sParams.min_intron_length | |
38 -I $singlePaired.sParams.max_intron_length | |
39 -g $singlePaired.sParams.max_multihits | |
40 --min-segment-intron $singlePaired.sParams.min_segment_intron | |
41 --max-segment-intron $singlePaired.sParams.max_segment_intron | |
42 --initial-read-mismatches=$singlePaired.sParams.initial_read_mismatches | |
43 --seg-mismatches=$singlePaired.sParams.seg_mismatches | |
44 --seg-length=$singlePaired.sParams.seg_length | |
45 --library-type=$singlePaired.sParams.library_type | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
46 |
0 | 47 ## Indel search. |
48 #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes": | |
49 ## --allow-indels | |
50 --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length | |
51 --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length | |
52 #else: | |
53 --no-novel-indels | |
54 #end if | |
55 | |
56 ## Supplying junctions parameters. | |
57 #if $singlePaired.sParams.own_junctions.use_junctions == "Yes": | |
58 #if $singlePaired.sParams.own_junctions.gene_model_ann.use_annotations == "Yes": | |
59 -G $singlePaired.sParams.own_junctions.gene_model_ann.gene_annotation_model | |
60 #end if | |
61 #if $singlePaired.sParams.own_junctions.raw_juncs.use_juncs == "Yes": | |
62 -j $singlePaired.sParams.own_junctions.raw_juncs.raw_juncs | |
63 #end if | |
64 ## TODO: No idea why a string cast is necessary, but it is: | |
65 #if str($singlePaired.sParams.own_junctions.no_novel_juncs) == "Yes": | |
66 --no-novel-juncs | |
67 #end if | |
68 #end if | |
69 | |
70 #if $singlePaired.sParams.closure_search.use_search == "Yes": | |
71 --closure-search | |
72 --min-closure-exon $singlePaired.sParams.closure_search.min_closure_exon | |
73 --min-closure-intron $singlePaired.sParams.closure_search.min_closure_intron | |
74 --max-closure-intron $singlePaired.sParams.closure_search.max_closure_intron | |
75 #else: | |
76 --no-closure-search | |
77 #end if | |
78 #if $singlePaired.sParams.coverage_search.use_search == "Yes": | |
79 --coverage-search | |
80 --min-coverage-intron $singlePaired.sParams.coverage_search.min_coverage_intron | |
81 --max-coverage-intron $singlePaired.sParams.coverage_search.max_coverage_intron | |
82 #else: | |
83 --no-coverage-search | |
84 #end if | |
85 ## TODO: No idea why the type conversion is necessary, but it seems to be. | |
86 #if str($singlePaired.sParams.microexon_search) == "Yes": | |
87 --microexon-search | |
88 #end if | |
89 #end if | |
90 #else: | |
91 --input2=$singlePaired.input2 | |
92 -r $singlePaired.mate_inner_distance | |
93 --settings=$singlePaired.pParams.pSettingsType | |
94 #if $singlePaired.pParams.pSettingsType == "full": | |
95 --mate-std-dev=$singlePaired.pParams.mate_std_dev | |
96 -a $singlePaired.pParams.anchor_length | |
97 -m $singlePaired.pParams.splice_mismatches | |
98 -i $singlePaired.pParams.min_intron_length | |
99 -I $singlePaired.pParams.max_intron_length | |
100 -g $singlePaired.pParams.max_multihits | |
101 --min-segment-intron $singlePaired.pParams.min_segment_intron | |
102 --max-segment-intron $singlePaired.pParams.max_segment_intron | |
103 --initial-read-mismatches=$singlePaired.pParams.initial_read_mismatches | |
104 --seg-mismatches=$singlePaired.pParams.seg_mismatches | |
105 --seg-length=$singlePaired.pParams.seg_length | |
106 --library-type=$singlePaired.pParams.library_type | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
107 |
0 | 108 ## Indel search. |
109 #if $singlePaired.pParams.indel_search.allow_indel_search == "Yes": | |
110 ## --allow-indels | |
111 --max-insertion-length $singlePaired.pParams.indel_search.max_insertion_length | |
112 --max-deletion-length $singlePaired.pParams.indel_search.max_deletion_length | |
113 #else: | |
114 --no-novel-indels | |
115 #end if | |
116 | |
117 ## Supplying junctions parameters. | |
118 #if $singlePaired.pParams.own_junctions.use_junctions == "Yes": | |
119 #if $singlePaired.pParams.own_junctions.gene_model_ann.use_annotations == "Yes": | |
120 -G $singlePaired.pParams.own_junctions.gene_model_ann.gene_annotation_model | |
121 #end if | |
122 #if $singlePaired.pParams.own_junctions.raw_juncs.use_juncs == "Yes": | |
123 -j $singlePaired.pParams.own_junctions.raw_juncs.raw_juncs | |
124 #end if | |
125 ## TODO: No idea why type cast is necessary, but it is: | |
126 #if str($singlePaired.pParams.own_junctions.no_novel_juncs) == "Yes": | |
127 --no-novel-juncs | |
128 #end if | |
129 #end if | |
130 | |
131 #if $singlePaired.pParams.closure_search.use_search == "Yes": | |
132 --closure-search | |
133 --min-closure-exon $singlePaired.pParams.closure_search.min_closure_exon | |
134 --min-closure-intron $singlePaired.pParams.closure_search.min_closure_intron | |
135 --max-closure-intron $singlePaired.pParams.closure_search.max_closure_intron | |
136 #else: | |
137 --no-closure-search | |
138 #end if | |
139 #if $singlePaired.pParams.coverage_search.use_search == "Yes": | |
140 --coverage-search | |
141 --min-coverage-intron $singlePaired.pParams.coverage_search.min_coverage_intron | |
142 --max-coverage-intron $singlePaired.pParams.coverage_search.max_coverage_intron | |
143 #else: | |
144 --no-coverage-search | |
145 #end if | |
146 ## TODO: No idea why the type conversion is necessary, but it seems to be. | |
147 #if str ($singlePaired.pParams.microexon_search) == "Yes": | |
148 --microexon-search | |
149 #end if | |
150 #end if | |
151 #end if | |
152 </command> | |
153 <inputs> | |
154 <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> | |
155 <expand macro="refGenomeSourceConditional"> | |
156 <options from_data_table="tophat_indexes"> | |
157 <filter type="sort_by" column="2"/> | |
158 <validator type="no_options" message="No genomes are available for the selected input dataset"/> | |
159 </options> | |
160 </expand> | |
161 <conditional name="singlePaired"> | |
162 <param name="sPaired" type="select" label="Is this library mate-paired?"> | |
163 <option value="single">Single-end</option> | |
164 <option value="paired">Paired-end</option> | |
165 </param> | |
166 <when value="single"> | |
167 <conditional name="sParams"> | |
168 <param name="sSettingsType" type="select" label="TopHat settings to use" help="Use the Full parameter list to change default settings."> | |
169 <option value="preSet">Default settings</option> | |
170 <option value="full">Full parameter list</option> | |
171 </param> | |
172 <when value="preSet" /> | |
173 <!-- Full/advanced params. --> | |
174 <when value="full"> | |
175 <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol."> | |
176 <option value="fr-unstranded">FR Unstranded</option> | |
177 <option value="fr-firststrand">FR First Strand</option> | |
178 <option value="fr-secondstrand">FR Second Strand</option> | |
179 </param> | |
180 <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." /> | |
181 <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" /> | |
182 <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." /> | |
183 <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." /> | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
184 <expand macro="indel_searchConditional" /> |
0 | 185 <param name="max_multihits" type="integer" value="20" label="Maximum number of alignments to be allowed" /> |
186 <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" /> | |
187 <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" /> | |
188 <param name="initial_read_mismatches" type="integer" min="0" value="2" label="Number of mismatches allowed in the initial read mapping" /> | |
189 <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" /> | |
190 <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" /> | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
191 |
0 | 192 <!-- Options for supplying own junctions. --> |
193 <conditional name="own_junctions"> | |
194 <param name="use_junctions" type="select" label="Use Own Junctions"> | |
195 <option value="No">No</option> | |
196 <option value="Yes">Yes</option> | |
197 </param> | |
198 <when value="Yes"> | |
199 <conditional name="gene_model_ann"> | |
200 <param name="use_annotations" type="select" label="Use Gene Annotation Model"> | |
201 <option value="No">No</option> | |
202 <option value="Yes">Yes</option> | |
203 </param> | |
204 <when value="No" /> | |
205 <when value="Yes"> | |
206 <param format="gtf,gff3" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/> | |
207 </when> | |
208 </conditional> | |
209 <conditional name="raw_juncs"> | |
210 <param name="use_juncs" type="select" label="Use Raw Junctions"> | |
211 <option value="No">No</option> | |
212 <option value="Yes">Yes</option> | |
213 </param> | |
214 <when value="No" /> | |
215 <when value="Yes"> | |
216 <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/> | |
217 </when> | |
218 </conditional> | |
219 <param name="no_novel_juncs" type="select" label="Only look for supplied junctions"> | |
220 <option value="No">No</option> | |
221 <option value="Yes">Yes</option> | |
222 </param> | |
223 </when> | |
224 <when value="No" /> | |
225 </conditional> <!-- /own_junctions --> | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
226 |
0 | 227 <!-- Closure search. --> |
228 <conditional name="closure_search"> | |
229 <param name="use_search" type="select" label="Use Closure Search"> | |
230 <option value="No">No</option> | |
231 <option value="Yes">Yes</option> | |
232 </param> | |
233 <when value="Yes"> | |
234 <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." /> | |
235 <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" /> | |
236 <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" /> | |
237 </when> | |
238 <when value="No" /> | |
239 </conditional> | |
240 <!-- Coverage search. --> | |
241 <conditional name="coverage_search"> | |
242 <param name="use_search" type="select" label="Use Coverage Search"> | |
243 <option selected="true" value="Yes">Yes</option> | |
244 <option value="No">No</option> | |
245 </param> | |
246 <when value="Yes"> | |
247 <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" /> | |
248 <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" /> | |
249 </when> | |
250 <when value="No" /> | |
251 </conditional> | |
252 <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer."> | |
253 <option value="No">No</option> | |
254 <option value="Yes">Yes</option> | |
255 </param> | |
256 </when> <!-- full --> | |
257 </conditional> <!-- sParams --> | |
258 </when> <!-- single --> | |
259 <when value="paired"> | |
260 <param format="fastqsanger" name="input2" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> | |
261 <param name="mate_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs" /> | |
262 <conditional name="pParams"> | |
263 <param name="pSettingsType" type="select" label="TopHat settings to use" help="Use the Full parameter list to change default settings."> | |
264 <option value="preSet">Default settings</option> | |
265 <option value="full">Full parameter list</option> | |
266 </param> | |
267 <when value="preSet" /> | |
268 <!-- Full/advanced params. --> | |
269 <when value="full"> | |
270 <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol."> | |
271 <option value="fr-unstranded">FR Unstranded</option> | |
272 <option value="fr-firststrand">FR First Strand</option> | |
273 <option value="fr-secondstrand">FR Second Strand</option> | |
274 </param> | |
275 <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs" help="The standard deviation for the distribution on inner distances between mate pairs."/> | |
276 <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." /> | |
277 <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" /> | |
278 <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." /> | |
279 <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." /> | |
280 <expand macro="indel_searchConditional" /> | |
281 <param name="max_multihits" type="integer" value="20" label="Maximum number of alignments to be allowed" /> | |
282 <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" /> | |
283 <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" /> | |
284 <param name="initial_read_mismatches" type="integer" min="0" value="2" label="Number of mismatches allowed in the initial read mapping" /> | |
285 <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" /> | |
286 <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" /> | |
287 <!-- Options for supplying own junctions. --> | |
288 <expand macro="own_junctionsConditional" /> | |
289 <!-- Closure search. --> | |
290 <conditional name="closure_search"> | |
291 <param name="use_search" type="select" label="Use Closure Search"> | |
292 <option value="No">No</option> | |
293 <option value="Yes">Yes</option> | |
294 </param> | |
295 <when value="Yes"> | |
296 <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." /> | |
297 <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" /> | |
298 <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" /> | |
299 </when> | |
300 <when value="No" /> | |
301 </conditional> | |
302 <!-- Coverage search. --> | |
303 <conditional name="coverage_search"> | |
304 <param name="use_search" type="select" label="Use Coverage Search"> | |
305 <option selected="true" value="Yes">Yes</option> | |
306 <option value="No">No</option> | |
307 </param> | |
308 <when value="Yes"> | |
309 <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" /> | |
310 <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" /> | |
311 </when> | |
312 <when value="No" /> | |
313 </conditional> | |
314 <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer."> | |
315 <option value="No">No</option> | |
316 <option value="Yes">Yes</option> | |
317 </param> | |
318 </when> <!-- full --> | |
319 </conditional> <!-- pParams --> | |
320 </when> <!-- paired --> | |
321 </conditional> | |
322 </inputs> | |
323 | |
324 <outputs> | |
325 <data format="bed" name="insertions" label="${tool.name} on ${on_string}: insertions" from_work_dir="tophat_out/insertions.bed"> | |
326 <expand macro="dbKeyActions" /> | |
327 </data> | |
328 <data format="bed" name="deletions" label="${tool.name} on ${on_string}: deletions" from_work_dir="tophat_out/deletions.bed"> | |
329 <expand macro="dbKeyActions" /> | |
330 </data> | |
331 <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions" from_work_dir="tophat_out/junctions.bed"> | |
332 <expand macro="dbKeyActions" /> | |
333 </data> | |
334 <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits" from_work_dir="tophat_out/accepted_hits.bam"> | |
335 <expand macro="dbKeyActions" /> | |
336 </data> | |
337 </outputs> | |
338 <macros> | |
339 <import>tophat_macros.xml</import> | |
340 <macro name="dbKeyActions"> | |
341 <actions> | |
342 <conditional name="refGenomeSource.genomeSource"> | |
343 <when value="indexed"> | |
344 <action type="metadata" name="dbkey"> | |
345 <option type="from_data_table" name="tophat_indexes" column="1" offset="0"> | |
346 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> | |
347 <filter type="param_value" ref="refGenomeSource.index" column="0"/> | |
348 </option> | |
349 </action> | |
350 </when> | |
351 <when value="history"> | |
352 <action type="metadata" name="dbkey"> | |
353 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" /> | |
354 </action> | |
355 </when> | |
356 </conditional> | |
357 </actions> | |
358 </macro> | |
359 </macros> | |
360 <tests> | |
361 <!-- Test base-space single-end reads with pre-built index and preset parameters --> | |
362 <test> | |
363 <!-- TopHat commands: | |
364 tophat -o tmp_dir -p 1 tophat_in1 test-data/tophat_in2.fastqsanger | |
365 Rename the files in tmp_dir appropriately | |
366 --> | |
367 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" /> | |
368 <param name="genomeSource" value="indexed" /> | |
369 <param name="index" value="tophat_test" /> | |
370 <param name="sPaired" value="single" /> | |
371 <param name="sSettingsType" value="preSet" /> | |
372 <output name="junctions" file="tophat_out1j.bed" /> | |
373 <output name="accepted_hits" file="tophat_out1h.bam" compare="sim_size" /> | |
374 </test> | |
375 <!-- Test using base-space test data: paired-end reads, index from history. --> | |
376 <test> | |
377 <!-- TopHat commands: | |
378 bowtie-build -f test-data/tophat_in1.fasta tophat_in1 | |
379 tophat -o tmp_dir -p 1 -r 20 tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger | |
380 Rename the files in tmp_dir appropriately | |
381 --> | |
382 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" /> | |
383 <param name="genomeSource" value="history" /> | |
384 <param name="ownFile" ftype="fasta" value="tophat_in1.fasta" /> | |
385 <param name="sPaired" value="paired" /> | |
386 <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger" /> | |
387 <param name="mate_inner_distance" value="20" /> | |
388 <param name="pSettingsType" value="preSet" /> | |
389 <output name="junctions" file="tophat_out2j.bed" /> | |
390 <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" /> | |
391 </test> | |
392 <!-- Test base-space single-end reads with user-supplied reference fasta and full parameters --> | |
393 <test> | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
394 <!-- TopHat commands: |
0 | 395 bowtie-build -f test-data/tophat_in1.fasta tophat_in1 |
396 tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger | |
397 Replace the + with double-dash | |
398 Rename the files in tmp_dir appropriately | |
399 --> | |
400 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/> | |
401 <param name="genomeSource" value="history"/> | |
402 <param name="ownFile" value="tophat_in1.fasta"/> | |
403 <param name="sPaired" value="single"/> | |
404 <param name="sSettingsType" value="full"/> | |
405 <param name="library_type" value="FR Unstranded"/> | |
406 <param name="anchor_length" value="8"/> | |
407 <param name="splice_mismatches" value="0"/> | |
408 <param name="min_intron_length" value="70"/> | |
409 <param name="max_intron_length" value="500000"/> | |
410 <param name="max_multihits" value="40"/> | |
411 <param name="min_segment_intron" value="50" /> | |
412 <param name="max_segment_intron" value="500000" /> | |
413 <param name="seg_mismatches" value="2"/> | |
414 <param name="seg_length" value="25"/> | |
415 <param name="allow_indel_search" value="Yes"/> | |
416 <param name="max_insertion_length" value="3"/> | |
417 <param name="max_deletion_length" value="3"/> | |
418 <param name="use_junctions" value="Yes" /> | |
419 <param name="use_annotations" value="No" /> | |
420 <param name="use_juncs" value="No" /> | |
421 <param name="no_novel_juncs" value="No" /> | |
422 <param name="use_search" value="Yes" /> | |
423 <param name="min_closure_exon" value="50" /> | |
424 <param name="min_closure_intron" value="50" /> | |
425 <param name="max_closure_intron" value="5000" /> | |
426 <param name="use_search" value="Yes" /> | |
427 <param name="min_coverage_intron" value="50" /> | |
428 <param name="max_coverage_intron" value="20000" /> | |
429 <param name="microexon_search" value="Yes" /> | |
430 <output name="insertions" file="tophat_out3i.bed" /> | |
431 <output name="deletions" file="tophat_out3d.bed" /> | |
432 <output name="junctions" file="tophat_out3j.bed" /> | |
433 <output name="accepted_hits" file="tophat_out3h.bam" compare="sim_size" /> | |
434 </test> | |
435 <!-- Test base-space paired-end reads with user-supplied reference fasta and full parameters --> | |
436 <test> | |
437 <!-- TopHat commands: | |
438 tophat -o tmp_dir -r 20 -p 1 -a 8 -m 0 -i 70 -I 500000 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger | |
439 Replace the + with double-dash | |
440 Rename the files in tmp_dir appropriately | |
441 --> | |
442 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/> | |
443 <param name="genomeSource" value="indexed"/> | |
444 <param name="index" value="tophat_test"/> | |
445 <param name="sPaired" value="paired"/> | |
446 <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/> | |
447 <param name="mate_inner_distance" value="20"/> | |
448 <param name="pSettingsType" value="full"/> | |
449 <param name="library_type" value="FR Unstranded"/> | |
450 <param name="mate_std_dev" value="20"/> | |
451 <param name="anchor_length" value="8"/> | |
452 <param name="splice_mismatches" value="0"/> | |
453 <param name="min_intron_length" value="70"/> | |
454 <param name="max_intron_length" value="500000"/> | |
455 <param name="max_multihits" value="40"/> | |
456 <param name="min_segment_intron" value="50" /> | |
457 <param name="max_segment_intron" value="500000" /> | |
458 <param name="seg_mismatches" value="2"/> | |
459 <param name="seg_length" value="25"/> | |
460 <param name="allow_indel_search" value="No"/> | |
461 <param name="use_junctions" value="Yes" /> | |
462 <param name="use_annotations" value="No" /> | |
463 <param name="use_juncs" value="No" /> | |
464 <param name="no_novel_juncs" value="No" /> | |
465 <param name="use_search" value="No" /> | |
466 <param name="microexon_search" value="Yes" /> | |
467 <output name="junctions" file="tophat_out4j.bed" /> | |
468 <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" /> | |
469 </test> | |
470 </tests> | |
471 | |
472 <help> | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
473 **TopHat Overview** |
0 | 474 |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
475 TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie, and then analyzes the mapping results to identify splice junctions between exons. |
0 | 476 |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
477 .. _TopHat: http://ccb.jhu.edu/software/tophat/ |
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
478 |
0 | 479 ------ |
480 | |
481 **Know what you are doing** | |
482 | |
483 .. class:: warningmark | |
484 | |
485 There is no such thing (yet) as an automated gearshift in splice junction identification. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. | |
486 | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
487 .. __: http://ccb.jhu.edu/software/tophat/manual.shtml |
0 | 488 |
489 ------ | |
490 | |
491 **Input formats** | |
492 | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
493 TopHat accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files. |
0 | 494 |
495 ------ | |
496 | |
497 **Outputs** | |
498 | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
499 TopHat produces two output files: |
0 | 500 |
501 - junctions -- A UCSC BED_ track of junctions reported by TopHat. Each junction consists of two connected BED blocks, where each block is as long as the maximal overhang of any read spanning the junction. The score is the number of alignments spanning the junction. | |
502 - accepted_hits -- A list of read alignments in BAM_ format. | |
503 | |
504 .. _BED: http://genome.ucsc.edu/FAQ/FAQformat.html#format1 | |
505 .. _BAM: http://samtools.sourceforge.net/ | |
506 | |
507 Two other possible outputs, depending on the options you choose, are insertions and deletions, both of which are in BED format. | |
508 | |
509 ------- | |
510 | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
511 **TopHat settings** |
0 | 512 |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
513 All of the options have a default value. You can change any of them. Some of the options in TopHat have been implemented here. |
0 | 514 |
515 ------ | |
516 | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
517 **TopHat parameter list** |
0 | 518 |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
519 This is a list of implemented TopHat options:: |
0 | 520 |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
521 -r This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments |
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
522 selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter |
0 | 523 is required for paired end runs. |
524 --mate-std-dev INT The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp. | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
525 -a/--min-anchor-length INT The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction. Note that individual spliced |
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
526 alignments may span a junction with fewer than this many bases on one side. However, every junction involved in spliced alignments is supported by at least one |
0 | 527 read with this many bases on each side. This must be at least 3 and the default is 8. |
528 -m/--splice-mismatches INT The maximum number of mismatches that may appear in the "anchor" region of a spliced alignment. The default is 0. | |
529 -i/--min-intron-length INT The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart. The default is 70. | |
530 -I/--max-intron-length INT The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read. The default is 500000. | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
531 -g/--max-multihits INT Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many |
0 | 532 alignments. The default is 40. |
533 -G/--GTF [GTF 2.2 file] Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping. | |
534 -j/--raw-juncs [juncs file] Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive. | |
535 -no-novel-juncs Only look for junctions indicated in the supplied GFF file. (ignored without -G) | |
536 --no-closure-search Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default. | |
537 --closure-search Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (about or less than 50bp) | |
538 --no-coverage-search Disables the coverage based search for junctions. | |
539 --coverage-search Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity. | |
540 --microexon-search With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer. | |
541 --butterfly-search TopHat will use a slower but potentially more sensitive algorithm to find junctions in addition to its standard search. Consider using this if you expect that your experiment produced a lot of reads from pre-mRNA, that fall within the introns of your transcripts. | |
542 --segment-mismatches Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2. | |
543 --segment-length Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25. | |
544 --min-closure-exon During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50. | |
545 --min-closure-intron The minimum intron length that may be found during closure search. The default is 50. | |
546 --max-closure-intron The maximum intron length that may be found during closure search. The default is 5000. | |
547 --min-coverage-intron The minimum intron length that may be found during coverage search. The default is 50. | |
548 --max-coverage-intron The maximum intron length that may be found during coverage search. The default is 20000. | |
549 --min-segment-intron The minimum intron length that may be found during split-segment search. The default is 50. | |
550 --max-segment-intron The maximum intron length that may be found during split-segment search. The default is 500000. | |
551 </help> | |
1
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
552 <citations> |
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
553 <citation type="doi">10.1093/bioinformatics/btp120</citation> |
af089ca8b4ee
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
554 </citations> |
0 | 555 </tool> |