comparison flye.xml @ 13:0a59ae153827 draft

"planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/flye commit 98d55b7009fa22312b117a0138a17e2037d0cdb3-dirty"
author thanhlv
date Thu, 06 Aug 2020 13:42:00 +0000
parents af4c47bfdd37
children 13ed2ccb0fb2
comparison
equal deleted inserted replaced
12:af4c47bfdd37 13:0a59ae153827
1 <tool id="flye" name="Flye assembler" version="@VERSION@+galaxy1"> 1 <tool id="flye" name="Flye assembler" version="2.8">
2 <description>of long and error-prone reads</description> 2 <description>of long and error-prone reads</description>
3 <macros> 3 <macros>
4 <token name="@VERSION@">2.7</token> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <requirements> 6 <expand macro="requirements" />
7 <requirement type="package" version="@VERSION@">flye</requirement>
8 </requirements>
9 <version_command>flye --version</version_command> 7 <version_command>flye --version</version_command>
10 <command detect_errors="exit_code"> 8 <command detect_errors="exit_code">
11 <![CDATA[ 9 <![CDATA[
10
11 #for $counter, $input in enumerate($inputs):
12
12 #if $input.is_of_type('fastqsanger', 'fastq'): 13 #if $input.is_of_type('fastqsanger', 'fastq'):
13 #set $ext = 'fastq' 14 #set $ext = 'fastq'
14 #elif $input.is_of_type('fastqsanger.gz'): 15 #elif $input.is_of_type('fastqsanger.gz'):
15 #set $ext = 'fastq.gz' 16 #set $ext = 'fastq.gz'
16 #elif $input.is_of_type('fastq'): 17 #elif $input.is_of_type('fastq'):
20 #elif $input.is_of_type('fasta.gz'): 21 #elif $input.is_of_type('fasta.gz'):
21 #set $ext = 'fasta.gz' 22 #set $ext = 'fasta.gz'
22 #elif $input.is_of_type('fasta'): 23 #elif $input.is_of_type('fasta'):
23 #set $ext = 'fasta' 24 #set $ext = 'fasta'
24 #end if 25 #end if
25 ln -s '$input' ./input.${ext} && 26 ln -s '$input' ./input_${counter}.${ext} &&
27 #end for
28
26 flye 29 flye
27 $mode 30 $mode
28 ./input.$ext 31 #for $counter, $input in enumerate($inputs):
32 ./input_${counter}.$ext
33 #end for
34
29 -o out_dir 35 -o out_dir
30 -g '$g' 36 #if $g:
37 -g '$g'
38 #end if
31 -t \${GALAXY_SLOTS:-4} 39 -t \${GALAXY_SLOTS:-4}
32 -i $i 40 -i $i
33 #if $m: 41 #if $m:
34 -m '$m' 42 -m '$m'
35 #end if 43 #end if
36 #if $asm_coverage: 44 #if $asm_coverage:
37 --asm-coverage '$asm_coverage' 45 --asm-coverage '$asm_coverage'
38 #end if 46 #end if
39 #if $plasmid: 47 #if $plasmid:
40 $plasmid 48 '$plasmid'
41 #end if 49 #end if
42 #if $meta: 50 #if $meta:
43 $meta 51 '$meta'
44 #end if 52 #end if
45 $keep_haplotypes 53 #if $no_trestle:
46 $trestle 54 '$no_trestle'
55 #end if
47 2>&1 56 2>&1
48 ]]> </command> 57 ]]> </command>
49 <inputs> 58 <inputs>
50 <param name="input" type="data" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger.gz,fastqsanger" multiple="false" label="Input reads"> 59 <param name="inputs" type="data" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger.gz,fastqsanger" multiple="true" label="Input reads">
51 <help><![CDATA[ 60 <help><![CDATA[
52 61
53 Input reads could be in FASTA or FASTQ format, uncompressed 62 Input reads could be in FASTA or FASTQ format, uncompressed
54 or compressed with gz. Currenlty, raw and corrected reads 63 or compressed with gz. Currenlty, raw and corrected reads
55 from PacBio and ONT are supported. The expected error rates are 64 from PacBio and ONT are supported. The expected error rates are
65 <option value="--nano-corr">Nanopore corrected</option> 74 <option value="--nano-corr">Nanopore corrected</option>
66 <option value="--pacbio-raw">PacBio raw</option> 75 <option value="--pacbio-raw">PacBio raw</option>
67 <option value="--pacbio-corr">PacBio corrected</option> 76 <option value="--pacbio-corr">PacBio corrected</option>
68 <option value="--subassemblies">high-quality contig-like input</option> 77 <option value="--subassemblies">high-quality contig-like input</option>
69 </param> 78 </param>
70 <param argument="-g" type="text" label="estimated genome size (for example, 5m or 2.6g)"> 79 <param argument="-g" optional="true" type="text" label="estimated genome size (for example, 5m or 2.6g)">
71 <help> 80 <help>
72 <![CDATA[ 81 <![CDATA[
82 <span>Since version 2.8, no longer required as input. However, it must be used in conjunction with --asm-coverage option. </span>
83 <br>
73 <span>The genome size estimate is used for solid k-mer selection in the 84 <span>The genome size estimate is used for solid k-mer selection in the
74 initial disjointig assembly stage. <b>Flye is not very sensitive to this 85 initial disjointig assembly stage. <b>Flye is not very sensitive to this
75 parameter, and the estimate could be rough</b>. It is ok if the parameter is 86 parameter, and the estimate could be rough</b>. It is ok if the parameter is
76 within 0.5x-2x of the actual genome size. If the final assembly size is 87 within 0.5x-2x of the actual genome size. If the final assembly size is
77 very different from the initial guess, consider re-running the pipeline 88 very different from the initial guess, consider re-running the pipeline
78 with an updated estimate for better results.</span> 89 with an updated estimate for better results.</span>
79 <br> 90 <br>
80 <span>An alternative option is to run Flye in <b>--meta</b> mode, which uses a different 91 <span>An alternative option is to run Flye in <b>--meta</b> mode, which uses a different
81 approach for solid k-mer selection. This mode is almost independent from the 92 approach for solid k-mer selection. This mode is almost independent from the
82 genome size parameter (you still need to provide an estimate for the selection 93 genome size parameter (you still need to provide an estimate for the selection
83 of some other parameters). When assembly is completed, you can re-run in the 94 of some other parameters). When assembly is completed, you can re-run in the
84 normal mode with the inferred genome size.</span> 95 normal mode with the inferred genome size.</span>
85 ]]> 96 ]]>
89 <param argument="-i" type="integer" value="1" label="number of polishing iterations" /> 100 <param argument="-i" type="integer" value="1" label="number of polishing iterations" />
90 <param argument="-m" type="integer" optional="true" label="minimum overlap between reads (default: auto)" help="This sets a minimum overlap length for two reads to be considered overlapping. In the latest Flye versions, this parameter is chosen automatically based on the read length distribution (reads N90) and does not require manual setting. Typical value is 3k-5k (and down to 1k for datasets with shorter read length). Intuitively, we want to set this parameter as high as possible, so the repeat graph is less tangled. However, higher values might lead to assembly gaps. In some rare cases (for example in case of biased read length distribution) it makes sense to set this parameter manualy."/> 101 <param argument="-m" type="integer" optional="true" label="minimum overlap between reads (default: auto)" help="This sets a minimum overlap length for two reads to be considered overlapping. In the latest Flye versions, this parameter is chosen automatically based on the read length distribution (reads N90) and does not require manual setting. Typical value is 3k-5k (and down to 1k for datasets with shorter read length). Intuitively, we want to set this parameter as high as possible, so the repeat graph is less tangled. However, higher values might lead to assembly gaps. In some rare cases (for example in case of biased read length distribution) it makes sense to set this parameter manualy."/>
91 <param argument="--asm_coverage" type="integer" optional="true" label="reduced coverage for initial contig assembly (default: not set)" /> 102 <param argument="--asm_coverage" type="integer" optional="true" label="reduced coverage for initial contig assembly (default: not set)" />
92 <param argument="--plasmid" type="boolean" truevalue="--plasmid" falsevalue="" checked="False" label="rescue short unassmebled plasmids" /> 103 <param argument="--plasmid" type="boolean" truevalue="--plasmid" falsevalue="" checked="False" label="rescue short unassmebled plasmids" />
93 <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="False" label="metagenome / uneven coverage mode" /> 104 <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="False" label="metagenome / uneven coverage mode" />
94 <param argument="--keep_haplotypes" type="boolean" truevalue="--keep-haplotypes" falsevalue="" checked="False" label="Do not collapse alternative haplotypes"/> 105 <param argument="--no_trestle" type="boolean" truevalue="--no-trestle" falsevalue="" checked="False" label="skip Trestle stage" help="After resolving bridged repeats, Trestle module attempts to resolve simple unbridged repeats (of multiplicity 2) using the heterogeneities between repeat copies"/>
95 <param argument="--trestle" type="boolean" truevalue="--trestle" falsevalue="" checked="False" label="Enable Trestle" help="After resolving bridged repeats, Trestle module attempts to resolve simple unbridged repeats (of multiplicity 2) using the heterogeneities between repeat copies"/>
96 </inputs> 106 </inputs>
97 <outputs> 107 <outputs>
108 <data name="assembly" format="fasta" from_work_dir="out_dir/assembly.fasta" label="${tool.name} on ${on_string} (scaffolds)"/>
98 <data name="assembly_info" format="tabular" from_work_dir="out_dir/assembly_info.txt" label="${tool.name} on ${on_string} (assembly_info)"/> 109 <data name="assembly_info" format="tabular" from_work_dir="out_dir/assembly_info.txt" label="${tool.name} on ${on_string} (assembly_info)"/>
99 <data name="assembly_graph" format="graph_dot" from_work_dir="out_dir/assembly_graph.gv" label="${tool.name} on ${on_string} (assembly_graph)"/> 110 <data name="assembly_graph" format="graph_dot" from_work_dir="out_dir/assembly_graph.gv" label="${tool.name} on ${on_string} (assembly_graph)"/>
100 <data name="assembly_gfa" format="txt" from_work_dir="out_dir/assembly_graph.gfa" label="${tool.name} on ${on_string} (Graphical Fragment Assembly)"/> 111 <data name="assembly_gfa" format="txt" from_work_dir="out_dir/assembly_graph.gfa" label="${tool.name} on ${on_string} (Graphical Fragment Assembly)"/>
101 <data name="flye_log" format="txt" from_work_dir="out_dir/flye.log" label="${tool.name} on ${on_string} (log)"/> 112 <data name="flye_log" format="txt" from_work_dir="out_dir/flye.log" label="${tool.name} on ${on_string} (log)"/>
102 <data name="scaffolds" format="fasta" from_work_dir="out_dir/assembly.fasta" label="${tool.name} on ${on_string} (scaffolds)"/>
103 </outputs> 113 </outputs>
104 <tests> 114 <tests>
105 <test> 115 <test>
106 <param name="inputs" ftype="fasta" value="nanopore.fasta"/> 116 <param name="inputs" ftype="fasta" value="nanopore.fasta"/>
107 <param name="mode" value="--pacbio-raw"/> 117 <param name="mode" value="--pacbio-raw"/>
161 - Graph path (repeat graph path corresponding to this contig/scaffold). Scaffold gaps are marked with ?? symbols, and * symbol denotes a terminal graph node. 171 - Graph path (repeat graph path corresponding to this contig/scaffold). Scaffold gaps are marked with ?? symbols, and * symbol denotes a terminal graph node.
162 172
163 scaffolds.fasta file is a symlink to assembly.fasta, which is retained for the backward compatibility. 173 scaffolds.fasta file is a symlink to assembly.fasta, which is retained for the backward compatibility.
164 ]]> 174 ]]>
165 </help> 175 </help>
166 <citations> 176 <expand macro="citations" />
167 <citation type="doi">10.1073/pnas.1604560113</citation>
168 <citation type="doi">10.1038/s41587-019-0072-8</citation>
169 </citations>
170 </tool> 177 </tool>