Mercurial > repos > thanhlv > flye
comparison flye.xml @ 13:0a59ae153827 draft
"planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/flye commit 98d55b7009fa22312b117a0138a17e2037d0cdb3-dirty"
| author | thanhlv |
|---|---|
| date | Thu, 06 Aug 2020 13:42:00 +0000 |
| parents | af4c47bfdd37 |
| children | 13ed2ccb0fb2 |
comparison
equal
deleted
inserted
replaced
| 12:af4c47bfdd37 | 13:0a59ae153827 |
|---|---|
| 1 <tool id="flye" name="Flye assembler" version="@VERSION@+galaxy1"> | 1 <tool id="flye" name="Flye assembler" version="2.8"> |
| 2 <description>of long and error-prone reads</description> | 2 <description>of long and error-prone reads</description> |
| 3 <macros> | 3 <macros> |
| 4 <token name="@VERSION@">2.7</token> | 4 <import>macros.xml</import> |
| 5 </macros> | 5 </macros> |
| 6 <requirements> | 6 <expand macro="requirements" /> |
| 7 <requirement type="package" version="@VERSION@">flye</requirement> | |
| 8 </requirements> | |
| 9 <version_command>flye --version</version_command> | 7 <version_command>flye --version</version_command> |
| 10 <command detect_errors="exit_code"> | 8 <command detect_errors="exit_code"> |
| 11 <![CDATA[ | 9 <![CDATA[ |
| 10 | |
| 11 #for $counter, $input in enumerate($inputs): | |
| 12 | |
| 12 #if $input.is_of_type('fastqsanger', 'fastq'): | 13 #if $input.is_of_type('fastqsanger', 'fastq'): |
| 13 #set $ext = 'fastq' | 14 #set $ext = 'fastq' |
| 14 #elif $input.is_of_type('fastqsanger.gz'): | 15 #elif $input.is_of_type('fastqsanger.gz'): |
| 15 #set $ext = 'fastq.gz' | 16 #set $ext = 'fastq.gz' |
| 16 #elif $input.is_of_type('fastq'): | 17 #elif $input.is_of_type('fastq'): |
| 20 #elif $input.is_of_type('fasta.gz'): | 21 #elif $input.is_of_type('fasta.gz'): |
| 21 #set $ext = 'fasta.gz' | 22 #set $ext = 'fasta.gz' |
| 22 #elif $input.is_of_type('fasta'): | 23 #elif $input.is_of_type('fasta'): |
| 23 #set $ext = 'fasta' | 24 #set $ext = 'fasta' |
| 24 #end if | 25 #end if |
| 25 ln -s '$input' ./input.${ext} && | 26 ln -s '$input' ./input_${counter}.${ext} && |
| 27 #end for | |
| 28 | |
| 26 flye | 29 flye |
| 27 $mode | 30 $mode |
| 28 ./input.$ext | 31 #for $counter, $input in enumerate($inputs): |
| 32 ./input_${counter}.$ext | |
| 33 #end for | |
| 34 | |
| 29 -o out_dir | 35 -o out_dir |
| 30 -g '$g' | 36 #if $g: |
| 37 -g '$g' | |
| 38 #end if | |
| 31 -t \${GALAXY_SLOTS:-4} | 39 -t \${GALAXY_SLOTS:-4} |
| 32 -i $i | 40 -i $i |
| 33 #if $m: | 41 #if $m: |
| 34 -m '$m' | 42 -m '$m' |
| 35 #end if | 43 #end if |
| 36 #if $asm_coverage: | 44 #if $asm_coverage: |
| 37 --asm-coverage '$asm_coverage' | 45 --asm-coverage '$asm_coverage' |
| 38 #end if | 46 #end if |
| 39 #if $plasmid: | 47 #if $plasmid: |
| 40 $plasmid | 48 '$plasmid' |
| 41 #end if | 49 #end if |
| 42 #if $meta: | 50 #if $meta: |
| 43 $meta | 51 '$meta' |
| 44 #end if | 52 #end if |
| 45 $keep_haplotypes | 53 #if $no_trestle: |
| 46 $trestle | 54 '$no_trestle' |
| 55 #end if | |
| 47 2>&1 | 56 2>&1 |
| 48 ]]> </command> | 57 ]]> </command> |
| 49 <inputs> | 58 <inputs> |
| 50 <param name="input" type="data" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger.gz,fastqsanger" multiple="false" label="Input reads"> | 59 <param name="inputs" type="data" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger.gz,fastqsanger" multiple="true" label="Input reads"> |
| 51 <help><![CDATA[ | 60 <help><![CDATA[ |
| 52 | 61 |
| 53 Input reads could be in FASTA or FASTQ format, uncompressed | 62 Input reads could be in FASTA or FASTQ format, uncompressed |
| 54 or compressed with gz. Currenlty, raw and corrected reads | 63 or compressed with gz. Currenlty, raw and corrected reads |
| 55 from PacBio and ONT are supported. The expected error rates are | 64 from PacBio and ONT are supported. The expected error rates are |
| 65 <option value="--nano-corr">Nanopore corrected</option> | 74 <option value="--nano-corr">Nanopore corrected</option> |
| 66 <option value="--pacbio-raw">PacBio raw</option> | 75 <option value="--pacbio-raw">PacBio raw</option> |
| 67 <option value="--pacbio-corr">PacBio corrected</option> | 76 <option value="--pacbio-corr">PacBio corrected</option> |
| 68 <option value="--subassemblies">high-quality contig-like input</option> | 77 <option value="--subassemblies">high-quality contig-like input</option> |
| 69 </param> | 78 </param> |
| 70 <param argument="-g" type="text" label="estimated genome size (for example, 5m or 2.6g)"> | 79 <param argument="-g" optional="true" type="text" label="estimated genome size (for example, 5m or 2.6g)"> |
| 71 <help> | 80 <help> |
| 72 <![CDATA[ | 81 <![CDATA[ |
| 82 <span>Since version 2.8, no longer required as input. However, it must be used in conjunction with --asm-coverage option. </span> | |
| 83 <br> | |
| 73 <span>The genome size estimate is used for solid k-mer selection in the | 84 <span>The genome size estimate is used for solid k-mer selection in the |
| 74 initial disjointig assembly stage. <b>Flye is not very sensitive to this | 85 initial disjointig assembly stage. <b>Flye is not very sensitive to this |
| 75 parameter, and the estimate could be rough</b>. It is ok if the parameter is | 86 parameter, and the estimate could be rough</b>. It is ok if the parameter is |
| 76 within 0.5x-2x of the actual genome size. If the final assembly size is | 87 within 0.5x-2x of the actual genome size. If the final assembly size is |
| 77 very different from the initial guess, consider re-running the pipeline | 88 very different from the initial guess, consider re-running the pipeline |
| 78 with an updated estimate for better results.</span> | 89 with an updated estimate for better results.</span> |
| 79 <br> | 90 <br> |
| 80 <span>An alternative option is to run Flye in <b>--meta</b> mode, which uses a different | 91 <span>An alternative option is to run Flye in <b>--meta</b> mode, which uses a different |
| 81 approach for solid k-mer selection. This mode is almost independent from the | 92 approach for solid k-mer selection. This mode is almost independent from the |
| 82 genome size parameter (you still need to provide an estimate for the selection | 93 genome size parameter (you still need to provide an estimate for the selection |
| 83 of some other parameters). When assembly is completed, you can re-run in the | 94 of some other parameters). When assembly is completed, you can re-run in the |
| 84 normal mode with the inferred genome size.</span> | 95 normal mode with the inferred genome size.</span> |
| 85 ]]> | 96 ]]> |
| 89 <param argument="-i" type="integer" value="1" label="number of polishing iterations" /> | 100 <param argument="-i" type="integer" value="1" label="number of polishing iterations" /> |
| 90 <param argument="-m" type="integer" optional="true" label="minimum overlap between reads (default: auto)" help="This sets a minimum overlap length for two reads to be considered overlapping. In the latest Flye versions, this parameter is chosen automatically based on the read length distribution (reads N90) and does not require manual setting. Typical value is 3k-5k (and down to 1k for datasets with shorter read length). Intuitively, we want to set this parameter as high as possible, so the repeat graph is less tangled. However, higher values might lead to assembly gaps. In some rare cases (for example in case of biased read length distribution) it makes sense to set this parameter manualy."/> | 101 <param argument="-m" type="integer" optional="true" label="minimum overlap between reads (default: auto)" help="This sets a minimum overlap length for two reads to be considered overlapping. In the latest Flye versions, this parameter is chosen automatically based on the read length distribution (reads N90) and does not require manual setting. Typical value is 3k-5k (and down to 1k for datasets with shorter read length). Intuitively, we want to set this parameter as high as possible, so the repeat graph is less tangled. However, higher values might lead to assembly gaps. In some rare cases (for example in case of biased read length distribution) it makes sense to set this parameter manualy."/> |
| 91 <param argument="--asm_coverage" type="integer" optional="true" label="reduced coverage for initial contig assembly (default: not set)" /> | 102 <param argument="--asm_coverage" type="integer" optional="true" label="reduced coverage for initial contig assembly (default: not set)" /> |
| 92 <param argument="--plasmid" type="boolean" truevalue="--plasmid" falsevalue="" checked="False" label="rescue short unassmebled plasmids" /> | 103 <param argument="--plasmid" type="boolean" truevalue="--plasmid" falsevalue="" checked="False" label="rescue short unassmebled plasmids" /> |
| 93 <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="False" label="metagenome / uneven coverage mode" /> | 104 <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="False" label="metagenome / uneven coverage mode" /> |
| 94 <param argument="--keep_haplotypes" type="boolean" truevalue="--keep-haplotypes" falsevalue="" checked="False" label="Do not collapse alternative haplotypes"/> | 105 <param argument="--no_trestle" type="boolean" truevalue="--no-trestle" falsevalue="" checked="False" label="skip Trestle stage" help="After resolving bridged repeats, Trestle module attempts to resolve simple unbridged repeats (of multiplicity 2) using the heterogeneities between repeat copies"/> |
| 95 <param argument="--trestle" type="boolean" truevalue="--trestle" falsevalue="" checked="False" label="Enable Trestle" help="After resolving bridged repeats, Trestle module attempts to resolve simple unbridged repeats (of multiplicity 2) using the heterogeneities between repeat copies"/> | |
| 96 </inputs> | 106 </inputs> |
| 97 <outputs> | 107 <outputs> |
| 108 <data name="assembly" format="fasta" from_work_dir="out_dir/assembly.fasta" label="${tool.name} on ${on_string} (scaffolds)"/> | |
| 98 <data name="assembly_info" format="tabular" from_work_dir="out_dir/assembly_info.txt" label="${tool.name} on ${on_string} (assembly_info)"/> | 109 <data name="assembly_info" format="tabular" from_work_dir="out_dir/assembly_info.txt" label="${tool.name} on ${on_string} (assembly_info)"/> |
| 99 <data name="assembly_graph" format="graph_dot" from_work_dir="out_dir/assembly_graph.gv" label="${tool.name} on ${on_string} (assembly_graph)"/> | 110 <data name="assembly_graph" format="graph_dot" from_work_dir="out_dir/assembly_graph.gv" label="${tool.name} on ${on_string} (assembly_graph)"/> |
| 100 <data name="assembly_gfa" format="txt" from_work_dir="out_dir/assembly_graph.gfa" label="${tool.name} on ${on_string} (Graphical Fragment Assembly)"/> | 111 <data name="assembly_gfa" format="txt" from_work_dir="out_dir/assembly_graph.gfa" label="${tool.name} on ${on_string} (Graphical Fragment Assembly)"/> |
| 101 <data name="flye_log" format="txt" from_work_dir="out_dir/flye.log" label="${tool.name} on ${on_string} (log)"/> | 112 <data name="flye_log" format="txt" from_work_dir="out_dir/flye.log" label="${tool.name} on ${on_string} (log)"/> |
| 102 <data name="scaffolds" format="fasta" from_work_dir="out_dir/assembly.fasta" label="${tool.name} on ${on_string} (scaffolds)"/> | |
| 103 </outputs> | 113 </outputs> |
| 104 <tests> | 114 <tests> |
| 105 <test> | 115 <test> |
| 106 <param name="inputs" ftype="fasta" value="nanopore.fasta"/> | 116 <param name="inputs" ftype="fasta" value="nanopore.fasta"/> |
| 107 <param name="mode" value="--pacbio-raw"/> | 117 <param name="mode" value="--pacbio-raw"/> |
| 161 - Graph path (repeat graph path corresponding to this contig/scaffold). Scaffold gaps are marked with ?? symbols, and * symbol denotes a terminal graph node. | 171 - Graph path (repeat graph path corresponding to this contig/scaffold). Scaffold gaps are marked with ?? symbols, and * symbol denotes a terminal graph node. |
| 162 | 172 |
| 163 scaffolds.fasta file is a symlink to assembly.fasta, which is retained for the backward compatibility. | 173 scaffolds.fasta file is a symlink to assembly.fasta, which is retained for the backward compatibility. |
| 164 ]]> | 174 ]]> |
| 165 </help> | 175 </help> |
| 166 <citations> | 176 <expand macro="citations" /> |
| 167 <citation type="doi">10.1073/pnas.1604560113</citation> | |
| 168 <citation type="doi">10.1038/s41587-019-0072-8</citation> | |
| 169 </citations> | |
| 170 </tool> | 177 </tool> |
