Mercurial > repos > bgruening > flye
comparison flye.xml @ 10:7066276883d6 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flye commit 3507b06e5d0149d514ede3d1a56c082e89f14f89
author | bgruening |
---|---|
date | Mon, 26 Aug 2024 13:54:16 +0000 |
parents | 5f2671cec451 |
children | 6e5b72b4e1fb |
comparison
equal
deleted
inserted
replaced
9:5f2671cec451 | 10:7066276883d6 |
---|---|
1 <tool id="flye" name="Flye" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="20.01"> | 1 <tool id="flye" name="Flye" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="20.01"> |
2 <description>de novo assembler for single molecule sequencing reads</description> | 2 <description>de novo assembler for single molecule sequencing reads</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="edam_ontology"/> | |
7 <expand macro="xrefs"/> | |
6 <expand macro="requirements" /> | 8 <expand macro="requirements" /> |
7 <expand macro="edam_ontology"/> | |
8 <version_command>flye --version</version_command> | 9 <version_command>flye --version</version_command> |
9 <command detect_errors="exit_code"><![CDATA[ | 10 <command detect_errors="exit_code"><![CDATA[ |
10 #for $counter, $input in enumerate($inputs): | 11 #for $counter, $input in enumerate($inputs): |
11 #if $input.is_of_type('fastqsanger', 'fastq'): | 12 #if $input.is_of_type('fastqsanger', 'fastq'): |
12 #set $ext = 'fastq' | 13 #set $ext = 'fastq' |
15 #elif $input.is_of_type('fasta.gz'): | 16 #elif $input.is_of_type('fasta.gz'): |
16 #set $ext = 'fasta.gz' | 17 #set $ext = 'fasta.gz' |
17 #elif $input.is_of_type('fasta'): | 18 #elif $input.is_of_type('fasta'): |
18 #set $ext = 'fasta' | 19 #set $ext = 'fasta' |
19 #end if | 20 #end if |
20 ln -s '$input' ./input_${counter}.${ext} && | 21 ln -sf '$input' ./input_${counter}.${ext} && |
21 #end for | 22 #end for |
22 flye | 23 flye |
23 $mode_conditional.mode | 24 $mode_conditional.mode |
24 #for $counter, $input in enumerate($inputs): | 25 #for $counter, $input in enumerate($inputs): |
25 ./input_${counter}.$ext | 26 ./input_${counter}.$ext |
102 <param name="generate_log" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Generate a log file"/> | 103 <param name="generate_log" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Generate a log file"/> |
103 </inputs> | 104 </inputs> |
104 <outputs> | 105 <outputs> |
105 <data name="consensus" format="fasta" from_work_dir="out_dir/assembly.fasta" label="${tool.name} on ${on_string}: consensus"/> | 106 <data name="consensus" format="fasta" from_work_dir="out_dir/assembly.fasta" label="${tool.name} on ${on_string}: consensus"/> |
106 <data name="assembly_graph" format="graph_dot" from_work_dir="out_dir/assembly_graph.gv" label="${tool.name} on ${on_string}: assembly graph"/> | 107 <data name="assembly_graph" format="graph_dot" from_work_dir="out_dir/assembly_graph.gv" label="${tool.name} on ${on_string}: assembly graph"/> |
107 <data name="assembly_gfa" format="txt" from_work_dir="out_dir/assembly_graph.gfa" label="${tool.name} on ${on_string}: graphical fragment assembly"/> | 108 <data name="assembly_gfa" format="gfa" from_work_dir="out_dir/assembly_graph.gfa" label="${tool.name} on ${on_string}: graphical fragment assembly"/> |
108 <data name="assembly_info" format="tabular" from_work_dir="out_dir/assembly_info.txt" label="${tool.name} on ${on_string}: assembly info"/> | 109 <data name="assembly_info" format="tabular" from_work_dir="out_dir/assembly_info.txt" label="${tool.name} on ${on_string}: assembly info"/> |
109 <data name="flye_log" format="txt" from_work_dir="out_dir/flye.log" label="${tool.name} on ${on_string}: log"> | 110 <data name="flye_log" format="txt" from_work_dir="out_dir/flye.log" label="${tool.name} on ${on_string}: log"> |
110 <filter>generate_log</filter> | 111 <filter>generate_log</filter> |
111 </data> | 112 </data> |
112 </outputs> | 113 </outputs> |
117 <param name="mode" value="--pacbio-raw"/> | 118 <param name="mode" value="--pacbio-raw"/> |
118 <param name="iterations" value="0"/> | 119 <param name="iterations" value="0"/> |
119 <param name="generate_log" value="true"/> | 120 <param name="generate_log" value="true"/> |
120 <output name="assembly_info" file="result1_assembly_info.txt" ftype="tabular" compare="sim_size"/> | 121 <output name="assembly_info" file="result1_assembly_info.txt" ftype="tabular" compare="sim_size"/> |
121 <output name="assembly_graph" file="result1_assembly_graph.dot" ftype="graph_dot" compare="sim_size"/> | 122 <output name="assembly_graph" file="result1_assembly_graph.dot" ftype="graph_dot" compare="sim_size"/> |
122 <output name="assembly_gfa" file="result1_assembly_graph.gfa" ftype="txt" compare="sim_size"/> | 123 <output name="assembly_gfa" file="result1_assembly_graph.gfa" ftype="gfa" compare="diff" lines_diff="10"/> |
123 <output name="consensus" file="result1_assembly.fasta" ftype="fasta" compare="sim_size"/> | 124 <output name="consensus" ftype="fasta"> |
125 <assert_contents> | |
126 <has_line line=">contig_1"/> | |
127 </assert_contents> | |
128 </output> | |
124 <output name="flye_log" file="result1.log" ftype="txt" compare="sim_size"/> | 129 <output name="flye_log" file="result1.log" ftype="txt" compare="sim_size"/> |
125 </test> | 130 </test> |
126 <!--Test 02: nano raw--> | 131 <!--Test 02: nano raw--> |
127 <test expect_num_outputs="4"> | 132 <test expect_num_outputs="4"> |
128 <param name="inputs" ftype="fasta.gz" value="nanopore.fasta.gz"/> | 133 <param name="inputs" ftype="fasta.gz" value="nanopore.fasta.gz"/> |
136 <output name="assembly_graph" ftype="graph_dot"> | 141 <output name="assembly_graph" ftype="graph_dot"> |
137 <assert_contents> | 142 <assert_contents> |
138 <has_size value="803" delta="100"/> | 143 <has_size value="803" delta="100"/> |
139 </assert_contents> | 144 </assert_contents> |
140 </output> | 145 </output> |
141 <output name="assembly_gfa" ftype="txt"> | 146 <output name="assembly_gfa" ftype="gfa"> |
142 <assert_contents> | 147 <assert_contents> |
143 <has_size value="35047" delta="100"/> | 148 <has_size value="35047" delta="100"/> |
144 </assert_contents> | 149 </assert_contents> |
145 </output> | 150 </output> |
146 <output name="consensus" ftype="fasta"> | 151 <output name="consensus" ftype="fasta"> |
168 <output name="assembly_graph" ftype="graph_dot"> | 173 <output name="assembly_graph" ftype="graph_dot"> |
169 <assert_contents> | 174 <assert_contents> |
170 <has_size value="1840" delta="100"/> | 175 <has_size value="1840" delta="100"/> |
171 </assert_contents> | 176 </assert_contents> |
172 </output> | 177 </output> |
173 <output name="assembly_gfa" ftype="txt"> | 178 <output name="assembly_gfa" ftype="gfa"> |
174 <assert_contents> | 179 <assert_contents> |
175 <has_size value="420752" delta="100"/> | 180 <has_size value="420752" delta="100"/> |
176 </assert_contents> | 181 </assert_contents> |
177 </output> | 182 </output> |
178 <output name="consensus" ftype="fasta"> | 183 <output name="consensus" ftype="fasta"> |
196 <output name="assembly_graph" ftype="graph_dot"> | 201 <output name="assembly_graph" ftype="graph_dot"> |
197 <assert_contents> | 202 <assert_contents> |
198 <has_size value="367" delta="100"/> | 203 <has_size value="367" delta="100"/> |
199 </assert_contents> | 204 </assert_contents> |
200 </output> | 205 </output> |
201 <output name="assembly_gfa" ftype="txt"> | 206 <output name="assembly_gfa" ftype="gfa"> |
202 <assert_contents> | 207 <assert_contents> |
203 <has_size value="418729" delta="100"/> | 208 <has_size value="418729" delta="100"/> |
204 </assert_contents> | 209 </assert_contents> |
205 </output> | 210 </output> |
206 <output name="consensus" ftype="fasta"> | 211 <output name="consensus" ftype="fasta"> |
224 <output name="assembly_graph" ftype="graph_dot"> | 229 <output name="assembly_graph" ftype="graph_dot"> |
225 <assert_contents> | 230 <assert_contents> |
226 <has_size value="1248" delta="100"/> | 231 <has_size value="1248" delta="100"/> |
227 </assert_contents> | 232 </assert_contents> |
228 </output> | 233 </output> |
229 <output name="assembly_gfa" ftype="txt"> | 234 <output name="assembly_gfa" ftype="gfa"> |
230 <assert_contents> | 235 <assert_contents> |
231 <has_size value="420252" delta="100"/> | 236 <has_size value="419414" delta="1000"/> |
232 </assert_contents> | 237 </assert_contents> |
233 </output> | 238 </output> |
234 <output name="consensus" ftype="fasta"> | 239 <output name="consensus" ftype="fasta"> |
235 <assert_contents> | 240 <assert_contents> |
236 <has_size value="427129" delta="100"/> | 241 <has_size value="426277" delta="1000"/> |
237 </assert_contents> | 242 </assert_contents> |
238 </output> | 243 </output> |
239 </test> | 244 </test> |
240 <!--Test 06: hifi error option--> | 245 <!--Test 06: hifi error option--> |
241 <test expect_num_outputs="4"> | 246 <test expect_num_outputs="4"> |
250 <has_size value="286" delta="100"/> | 255 <has_size value="286" delta="100"/> |
251 </assert_contents> | 256 </assert_contents> |
252 </output> | 257 </output> |
253 <output name="assembly_graph" ftype="graph_dot"> | 258 <output name="assembly_graph" ftype="graph_dot"> |
254 <assert_contents> | 259 <assert_contents> |
255 <has_size value="1273" delta="100"/> | 260 <has_size value="1248" delta="500"/> |
256 </assert_contents> | 261 </assert_contents> |
257 </output> | 262 </output> |
258 <output name="assembly_gfa" ftype="txt"> | 263 <output name="assembly_gfa" ftype="gfa"> |
259 <assert_contents> | 264 <assert_contents> |
260 <has_size value="420252" delta="100"/> | 265 <has_size value="420254" delta="2000"/> |
261 </assert_contents> | 266 </assert_contents> |
262 </output> | 267 </output> |
263 <output name="consensus" ftype="fasta"> | 268 <output name="consensus" ftype="fasta"> |
264 <assert_contents> | 269 <assert_contents> |
265 <has_size value="427129" delta="100"/> | 270 <has_size value="427131" delta="2000"/> |
266 </assert_contents> | 271 </assert_contents> |
267 </output> | 272 </output> |
268 </test> | 273 </test> |
269 <!--Test 07: keep haplotypes--> | 274 <!--Test 07: keep haplotypes--> |
270 <test expect_num_outputs="4"> | 275 <test expect_num_outputs="4"> |
275 </conditional> | 280 </conditional> |
276 <param name="min_overlap" value="1000"/> | 281 <param name="min_overlap" value="1000"/> |
277 <param name="keep-haplotypes" value="true"/> | 282 <param name="keep-haplotypes" value="true"/> |
278 <output name="assembly_info" ftype="tabular"> | 283 <output name="assembly_info" ftype="tabular"> |
279 <assert_contents> | 284 <assert_contents> |
280 <has_size value="286" delta="100"/> | 285 <has_size value="286" delta="200"/> |
281 </assert_contents> | 286 </assert_contents> |
282 </output> | 287 </output> |
283 <output name="assembly_graph" ftype="graph_dot"> | 288 <output name="assembly_graph" ftype="graph_dot"> |
284 <assert_contents> | 289 <assert_contents> |
285 <has_size value="1273" delta="100"/> | 290 <has_size value="1273" delta="500"/> |
286 </assert_contents> | 291 </assert_contents> |
287 </output> | 292 </output> |
288 <output name="assembly_gfa" ftype="txt"> | 293 <output name="assembly_gfa" ftype="gfa"> |
289 <assert_contents> | 294 <assert_contents> |
290 <has_size value="420252" delta="100"/> | 295 <has_size value="420254" delta="3000"/> |
291 </assert_contents> | 296 </assert_contents> |
292 </output> | 297 </output> |
293 <output name="consensus" ftype="fasta"> | 298 <output name="consensus" ftype="fasta"> |
294 <assert_contents> | 299 <assert_contents> |
295 <has_size value="427129" delta="100"/> | 300 <has_size value="427131" delta="3000"/> |
296 </assert_contents> | 301 </assert_contents> |
297 </output> | 302 </output> |
298 </test> | 303 </test> |
299 <!--Test 08: scaffolding mode--> | 304 <!--Test 08: scaffolding mode--> |
300 <test expect_num_outputs="4"> | 305 <test expect_num_outputs="4"> |
301 <param name="inputs" ftype="fastq.gz" value="ecoli_hifi_01.fastq.gz,ecoli_hifi_02.fastq.gz,ecoli_hifi_03.fastq.gz,ecoli_hifi_04.fastq.gz,ecoli_hifi_05.fastq.gz,ecoli_hifi_06.fastq.gz,ecoli_hifi_07.fastq.gz,ecoli_hifi_08.fastq.gz,ecoli_hifi_09.fastq.gz"/> | 306 <param name="inputs" ftype="fastq.gz" value="ecoli_hifi_01.fastq.gz,ecoli_hifi_02.fastq.gz,ecoli_hifi_03.fastq.gz,ecoli_hifi_04.fastq.gz,ecoli_hifi_05.fastq.gz,ecoli_hifi_06.fastq.gz,ecoli_hifi_07.fastq.gz,ecoli_hifi_08.fastq.gz,ecoli_hifi_09.fastq.gz"/> |
302 <param name="mode" value="--nano-hq"/> | 307 <param name="mode" value="--nano-hq"/> |
303 <param name="min_overlap" value="1000"/> | 308 <param name="min_overlap" value="1000"/> |
304 <param name="scaffolding" value="true"/> | 309 <param name="scaffold" value="true"/> |
305 <output name="assembly_info" ftype="tabular"> | 310 <output name="assembly_info" ftype="tabular"> |
306 <assert_contents> | 311 <assert_contents> |
307 <has_size value="286" delta="100"/> | 312 <has_size value="286" delta="100"/> |
308 </assert_contents> | 313 </assert_contents> |
309 </output> | 314 </output> |
310 <output name="assembly_graph" ftype="graph_dot"> | 315 <output name="assembly_graph" ftype="graph_dot"> |
311 <assert_contents> | 316 <assert_contents> |
312 <has_size value="1248" delta="100"/> | 317 <has_size value="1248" delta="100"/> |
313 </assert_contents> | 318 </assert_contents> |
314 </output> | 319 </output> |
315 <output name="assembly_gfa" ftype="txt"> | 320 <output name="assembly_gfa" ftype="gfa"> |
316 <assert_contents> | 321 <assert_contents> |
317 <has_size value="420252" delta="100"/> | 322 <has_size value="419414" delta="2000"/> |
318 </assert_contents> | 323 </assert_contents> |
319 </output> | 324 </output> |
320 <output name="consensus" ftype="fasta"> | 325 <output name="consensus" ftype="fasta"> |
321 <assert_contents> | 326 <assert_contents> |
322 <has_size value="427129" delta="100"/> | 327 <has_size value="426277" delta="2000"/> |
323 </assert_contents> | 328 </assert_contents> |
324 </output> | 329 </output> |
325 </test> | 330 </test> |
326 <!--Test 09: test not-alt-contigs parameter w--> | 331 <!--Test 09: test not-alt-contigs parameter w--> |
327 <test expect_num_outputs="4"> | 332 <test expect_num_outputs="4"> |
337 <output name="assembly_graph" ftype="graph_dot"> | 342 <output name="assembly_graph" ftype="graph_dot"> |
338 <assert_contents> | 343 <assert_contents> |
339 <has_size value="217" delta="100"/> | 344 <has_size value="217" delta="100"/> |
340 </assert_contents> | 345 </assert_contents> |
341 </output> | 346 </output> |
342 <output name="assembly_gfa" ftype="txt"> | 347 <output name="assembly_gfa" ftype="gfa"> |
343 <assert_contents> | 348 <assert_contents> |
344 <has_size value="5110" delta="100"/> | 349 <has_size value="5110" delta="100"/> |
345 </assert_contents> | 350 </assert_contents> |
346 </output> | 351 </output> |
347 <output name="consensus" ftype="fasta"> | 352 <output name="consensus" ftype="fasta"> |
350 </assert_contents> | 355 </assert_contents> |
351 </output> | 356 </output> |
352 </test> | 357 </test> |
353 </tests> | 358 </tests> |
354 <help><![CDATA[ | 359 <help><![CDATA[ |
355 | |
356 .. class:: infomark | |
357 | 360 |
358 **Purpose** | 361 **Purpose** |
359 | 362 |
360 Flye is a de novo assembler for single molecule sequencing reads, such as those produced by PacBio and Oxford Nanopore Technologies. | 363 Flye is a de novo assembler for single molecule sequencing reads, such as those produced by PacBio and Oxford Nanopore Technologies. |
361 It is designed for a wide range of datasets, from small bacterial projects to large mammalian-scale assemblies. The package represents | 364 It is designed for a wide range of datasets, from small bacterial projects to large mammalian-scale assemblies. The package represents |
362 a complete pipeline: it takes raw PacBio/ONT reads as input and outputs polished contigs. Flye also has a special mode for metagenome | 365 a complete pipeline: it takes raw PacBio/ONT reads as input and outputs polished contigs. Flye also has a special mode for metagenome |
363 assembly. | 366 assembly. |
364 | 367 |
365 ---- | 368 ---- |
366 | 369 |
367 .. class:: infomark | |
368 | |
369 **Quick usage** | 370 **Quick usage** |
370 | 371 |
371 Input reads can be in FASTA or FASTQ format, uncompressed or compressed with gz. Currently, PacBio (raw, corrected, HiFi) and ONT reads | 372 Input reads can be in FASTA or FASTQ format, uncompressed or compressed with gz. Currently, PacBio (raw, corrected, HiFi) and ONT reads |
372 (raw, corrected) are supported. Expected error rates are <30% for raw, <3% for corrected, and <1% for HiFi. Note that Flye was primarily | 373 (raw, corrected) are supported. Expected error rates are <30% for raw, <3% for corrected, and <1% for HiFi. Note that Flye was primarily |
373 developed to run on raw reads. You may specify multiple files with reads (separated by spaces). Mixing different read types is not yet supported. The *--meta* o | 374 developed to run on raw reads. You may specify multiple files with reads (separated by spaces). Mixing different read types is not yet supported. The *--meta* o |
378 To reduce memory consumption for large genome assemblies, you can use a subset of the longest reads for initial disjointig assembly by | 379 To reduce memory consumption for large genome assemblies, you can use a subset of the longest reads for initial disjointig assembly by |
379 specifying *--asm-coverage* and *--genome-size* options. Typically, 40x coverage is enough to produce good disjointigs. | 380 specifying *--asm-coverage* and *--genome-size* options. Typically, 40x coverage is enough to produce good disjointigs. |
380 | 381 |
381 ---- | 382 ---- |
382 | 383 |
383 .. class:: infomark | |
384 | |
385 **Outputs** | 384 **Outputs** |
386 | 385 |
387 The main output files are: | 386 The main output files are: |
388 | 387 |
389 :: | 388 * Final assembly: contains contigs and possibly scaffolds (see below). |
390 | 389 * Final repeat graph: note that the edge sequences might be different (shorter) than contig sequences, because contigs might include multiple graph edges. |
391 - Final assembly: contains contigs and possibly scaffolds (see below). | 390 * Extra information about contigs (such as length or coverage). |
392 - Final repeat graph: note that the edge sequences might be different (shorter) than contig sequences, because contigs might include multiple graph edges. | |
393 - Extra information about contigs (such as length or coverage). | |
394 | 391 |
395 Each contig is formed by a single unique graph edge. If possible, unique contigs are extended with the sequence from flanking unresolved repeats on the graph. Thus, | 392 Each contig is formed by a single unique graph edge. If possible, unique contigs are extended with the sequence from flanking unresolved repeats on the graph. Thus, |
396 a contig fully contains the corresponding graph edge (with the same id), but might be longer then this edge. This is somewhat similar to unitig-contig relation in | 393 a contig fully contains the corresponding graph edge (with the same id), but might be longer then this edge. This is somewhat similar to unitig-contig relation in |
397 OLC assemblers. In a rare case when a repetitive graph edge is not covered by the set of "extended" contigs, it will be also output in the assembly file. | 394 OLC assemblers. In a rare case when a repetitive graph edge is not covered by the set of "extended" contigs, it will be also output in the assembly file. |
398 | 395 |
400 the assembly file (with a scaffold prefix). Since it is hard to give a reliable estimate of the gap size, those gaps are represented with the default 100 Ns. | 397 the assembly file (with a scaffold prefix). Since it is hard to give a reliable estimate of the gap size, those gaps are represented with the default 100 Ns. |
401 assembly_info.txt file (below) contains additional information about how scaffolds were formed. | 398 assembly_info.txt file (below) contains additional information about how scaffolds were formed. |
402 | 399 |
403 Extra information about contigs/scaffolds is output into the assembly_info.txt file. It is a tab-delimited table with the columns as follows: | 400 Extra information about contigs/scaffolds is output into the assembly_info.txt file. It is a tab-delimited table with the columns as follows: |
404 | 401 |
405 :: | 402 * Contig/scaffold id |
406 | 403 * Length |
407 - Contig/scaffold id | 404 * Coverage |
408 - Length | 405 * Is circular, (Y)es or (N)o |
409 - Coverage | 406 * Is repetitive, (Y)es or (N)o |
410 - Is circular, (Y)es or (N)o | 407 * Multiplicity (based on coverage) |
411 - Is repetitive, (Y)es or (N)o | 408 * Alternative group |
412 - Multiplicity (based on coverage) | 409 * Graph path (graph path corresponding to this contig/scaffold). |
413 - Alternative group | 410 |
414 - Graph path (graph path corresponding to this contig/scaffold). | 411 Scaffold gaps are marked with `??` symbols, and `*` symbol denotes a terminal graph node. Alternative contigs (representing alternative haplotypes) will have the same alt. |
415 | 412 group ID. Primary contigs are marked by `*`. |
416 Scaffold gaps are marked with ?? symbols, and * symbol denotes a terminal graph node. Alternative contigs (representing alternative haplotypes) will have the same alt. | |
417 group ID. Primary contigs are marked by *. | |
418 | 413 |
419 ---- | 414 ---- |
420 | 415 |
421 .. class:: infomark | |
422 | |
423 **Algorithm Description** | 416 **Algorithm Description** |
424 | 417 |
425 This is a brief description of the Flye algorithm. Please refer to the manuscript for more detailed information. The draft contig extension is organized as follows: | 418 This is a brief description of the Flye algorithm. Please refer to the manuscript for more detailed information. The draft contig extension is organized as follows: |
426 | 419 |
427 :: | 420 * K-mer counting / erroneous k-mer pre-filtering |
428 | 421 * Solid k-mer selection (k-mers with sufficient frequency, which are unlikely to be erroneous) |
429 - K-mer counting / erroneous k-mer pre-filtering | 422 * Contig extension. The algorithm starts from a single read and extends it with a next overlapping read (overlaps are dynamically detected using the selected solid k-mers). |
430 - Solid k-mer selection (k-mers with sufficient frequency, which are unlikely to be erroneous) | |
431 - Contig extension. The algorithm starts from a single read and extends it with a next overlapping read (overlaps are dynamically detected using the selected solid k-mers). | |
432 | 423 |
433 Note that we do not attempt to resolve repeats at this stage, thus the reconstructed contigs might contain misassemblies. Flye then aligns the reads on these draft | 424 Note that we do not attempt to resolve repeats at this stage, thus the reconstructed contigs might contain misassemblies. Flye then aligns the reads on these draft |
434 contigs using minimap2 and calls a consensus. Afterwards, Flye performs repeat analysis as follows: | 425 contigs using minimap2 and calls a consensus. Afterwards, Flye performs repeat analysis as follows: |
435 | 426 |
436 :: | 427 * Repeat graph is constructed from the (possibly misassembled) contigs |
437 | 428 * In this graph all repeats longer than minimum overlap are collapsed |
438 - Repeat graph is constructed from the (possibly misassembled) contigs | 429 * The algorithm resolves repeats using the read information and graph structure |
439 - In this graph all repeats longer than minimum overlap are collapsed | 430 * The unbranching paths in the graph are output as contigs |
440 - The algorithm resolves repeats using the read information and graph structure | |
441 - The unbranching paths in the graph are output as contigs | |
442 | 431 |
443 If enabled, after resolving bridged repeats, Trestle module attempts to resolve simple unbridged repeats (of multiplicity 2) using the heterogeneities between repeat copies. | 432 If enabled, after resolving bridged repeats, Trestle module attempts to resolve simple unbridged repeats (of multiplicity 2) using the heterogeneities between repeat copies. |
444 Finally, Flye performs polishing of the resulting assembly to correct the remaining errors: | 433 Finally, Flye performs polishing of the resulting assembly to correct the remaining errors: |
445 | 434 |
446 :: | 435 * Alignment of all reads to the current assembly using minimap2 |
447 | 436 * Partition the alignment into mini-alignments (bubbles) |
448 - Alignment of all reads to the current assembly using minimap2 | 437 * Error correction of each bubble using a maximum likelihood approach |
449 - Partition the alignment into mini-alignments (bubbles) | |
450 - Error correction of each bubble using a maximum likelihood approach | |
451 | |
452 | 438 |
453 The polishing steps could be repeated, which might slightly increase quality for some datasets. | 439 The polishing steps could be repeated, which might slightly increase quality for some datasets. |
454 | 440 |
455 | 441 |
456 ]]></help> | 442 ]]></help> |