Mercurial > repos > bgruening > nextdenovo
comparison nextdenovo.xml @ 0:8a0b3887160d draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/nextdenovo commit 5890a5b5b51cfe2dc4a1bbe1866ce508a9f9ce9f
author | bgruening |
---|---|
date | Thu, 09 Feb 2023 21:24:46 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8a0b3887160d |
---|---|
1 <tool id="nextdenovo" name="NextDenovo" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> | |
2 <description>string graph-based de novo assembler for long reads</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="biotools"/> | |
7 <expand macro="requirements" /> | |
8 <version_command>nextDenovo --version</version_command> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 mkdir -p './read_files' && | |
11 #for $index,$sample in enumerate($input_reads) | |
12 #set $ext = $sample.ext | |
13 ln -s '${sample}' './read_files/sample_${index}.${ext}' && | |
14 #end for | |
15 ls './read_files' -1 | sed -e 's|^|read_files/|' > './input.fofn' && | |
16 cat '${configfile}' | sed -e "s/cores/\${GALAXY_SLOTS:-4}/g" | sed -e "s|memory|\$((\${GALAXY_MEMORY_MB:-8192}/1024))|g" > './configfile' && | |
17 nextDenovo './configfile' | |
18 ]]></command> | |
19 <configfiles> | |
20 <configfile name="configfile"><![CDATA[ | |
21 [General] | |
22 job_type = local | |
23 job_prefix = nextDenovo | |
24 task = $task | |
25 rewrite = yes | |
26 deltmp = yes | |
27 parallel_jobs = cores | |
28 input_type = $input_type | |
29 read_type = $read_type | |
30 input_fofn = input.fofn | |
31 | |
32 [correct_option] | |
33 read_cutoff = $read_cutoff | |
34 #if $genome_seed.selector == 'genome' | |
35 genome_size = $genome_seed.genome_size # estimated genome size | |
36 seed_depth = $genome_seed.seed_depth | |
37 #else | |
38 seed_cutoff = $genome_seed.seed_cutoff | |
39 #end if | |
40 blocksize = $blocksize | |
41 sort_options = -m memoryg -t cores -k $ovl_parameters.max_depth_overlap -l $ovl_parameters.max_over_hang_length | |
42 minimap2_options_raw = -t cores --minlen $minimap_parameters.minlen --minmatch $minimap_parameters.minmatch --minide $minimap_parameters.minide --kn $minimap_parameters.kn --wn $minimap_parameters.wn --cn $minimap_parameters.cn --maxhan1 $minimap_parameters.maxhan1 --maxhan2 $minimap_parameters.maxhan2 | |
43 pa_correction = cores | |
44 correction_options = -p cores | |
45 | |
46 [assemble_option] | |
47 minimap2_options_cns = -t cores | |
48 nextgraph_options = -a $np.a $np.c $np.G $np.G $np.k $np.A -E $np.E -q $np.q -i $np.i -I $np.I -R $np.R -S $np.S -r $np.r -M $np.M -T $np.T -N $np.N -u $np.u -w $np.w -D $np.D -P $np.P -m $np.m -n $np.n -B $np.B -C $np.C -z $np.z -l $np.l -L $np.L -t $np.t -F $np.F | |
49 ]]></configfile> | |
50 </configfiles> | |
51 <inputs> | |
52 <param name="input_reads" type="data" format="fasta,fasta.gz,fastq,fastq.gz,fastqsanger,fastqsanger.gz" multiple="true" label="Sequence reads"/> | |
53 <param name="task" type="select" label="Task"> | |
54 <option value="all">All</option> | |
55 <option value="correct">Correct: only do the correction step</option> | |
56 <option value="assemble">Assemble: only do the assembly step (only work if input type = corrected or read type = HiFi)</option> | |
57 </param> | |
58 <param name="input_type" type="select" label="Input type" help="You can use raw or corrected reads."> | |
59 <option value="raw">Raw</option> | |
60 <option value="corrected">Corrected</option> | |
61 </param> | |
62 <param name="read_type" type="select" label="Read type"> | |
63 <option value="clr">CLR: continuous long read</option> | |
64 <option value="hifi">HiFi: PacBio highly accurate long reads</option> | |
65 <option value="ont">ONT: Nanopore 1D reads</option> | |
66 </param> | |
67 <param name="read_cutoff" type="integer" min="0" value="1000" optional="true" label="Read cutoff" help="filter reads with length smaller than read_cutoff"/> | |
68 <conditional name="genome_seed"> | |
69 <param name="selector" type="select" label="Provide estimated genome size or seed cutoff" help="It is requried to perform some simple statistics (such as length distribution, total amount of data and sequencing depth) on the input data"> | |
70 <option value="genome">Estimated genome size</option> | |
71 <option value="seed">Seed cutoff</option> | |
72 </param> | |
73 <when value="genome"> | |
74 <param name="genome_size" type="text" value="" optional="true" label="Estimated genome size" help="Estimated genome size, suffix K/M/G recognized, used to | |
75 calculate seed_cutoff/seed_cutfiles/blocksize and average depth, it can be omitted when manually setting seed_cutoff. Spaces are not allowed."> | |
76 <sanitizer invalid_char=""> | |
77 <valid initial="string.letters,string.digits"> | |
78 <add value="."/> | |
79 </valid> | |
80 </sanitizer> | |
81 <validator type="regex">[0-9KMGkmg.]+</validator> | |
82 </param> | |
83 <param name="seed_depth" type="integer" min="0" value="45" label="Seed depth" help="Expected seed depth, used to calculate seed_cutoff, co-use with | |
84 genome_size, you can try to set it 30-45 to get a better assembly result." /> | |
85 </when> | |
86 <when value="seed"> | |
87 <param name="seed_cutoff" type="integer" min="0" value="0" optional="true" label="Seed cutoff" help="Minimum seed length. Set it to 0 for calculating it automatically." /> | |
88 </when> | |
89 </conditional> | |
90 <param name="blocksize" type="text" value="10g" label="Block size" help="Block size for parallel running, split non-seed reads into small files, the maximum size of | |
91 each file is blocksize."> | |
92 <sanitizer invalid_char=""> | |
93 <valid initial="string.letters,string.digits"/> | |
94 </sanitizer> | |
95 <validator type="regex">[0-9KMGkmg]+</validator> | |
96 </param> | |
97 <section name="ovl_parameters" title="OVL sort parameters" expanded="true"> | |
98 <param name="max_depth_overlap" type="integer" min="0" value="40" label="Max depth of each overlap" help="This value should be equal or smaller than | |
99 the average sequencing depth." /> | |
100 <param name="max_over_hang_length" type="integer" min="0" value="300" label="Max over hang length to filter"/> | |
101 </section> | |
102 <section name="minimap_parameters" title="Minimap2 parameters" expanded="true"> | |
103 <param name="minlen" type="integer" min="0" value="500" label="Minimum overlap length"/> | |
104 <param name="minmatch" type="integer" min="0" value="100" label="Minimum match length"/> | |
105 <param name="minide" type="float" min="0" value="0.05" max="1" label="Minimum identity"/> | |
106 <param name="kn" type="integer" min="0" value="17" max="28" label="K-mer size"/> | |
107 <param name="wn" type="integer" min="0" value="10" label="Minimizer window size"/> | |
108 <param name="cn" type="integer" min="0" value="20" label="Re-align for every n reads"/> | |
109 <param name="maxhan1" type="integer" min="0" value="5000" label="Maximum over hang length for re-align"/> | |
110 <param name="maxhan2" type="integer" min="0" value="500" label="Maximum over hang length for filtering contained reads"/> | |
111 </section> | |
112 <section name="correction_options" title="Correction options" expanded="true"> | |
113 <param name="split" type="boolean" truevalue="--split" falsevalue="" checked="false" label="Split" help="Split the corrected seed with un-corrected regions" /> | |
114 <param name="fast" type="boolean" truevalue="-fast" falsevalue="" checked="false" label="Fast" help="0.5-1 times faster mode with a little lower accuracy." /> | |
115 </section> | |
116 <section name="np" title="NextGraph parameters"> | |
117 <param argument="-c" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Disable pre-filter chimeric reads."/> | |
118 <param argument="-G" type="boolean" truevalue="-G" falsevalue="" checked="false" label="Retain potential chimeric edges."/> | |
119 <param argument="-k" type="boolean" truevalue="-k" falsevalue="" checked="false" label="Delete complex bubble paths."/> | |
120 <param argument="-A" type="boolean" truevalue="-A" falsevalue="" checked="false" label="Output alternative contigs" | |
121 help="For highly heterozygous genomes, it will increase assembly size."/> | |
122 <param argument="-a" type="select" label="Output format"> | |
123 <option value="1">FASTA</option> | |
124 <option value="3">GFA</option> | |
125 </param> | |
126 <param argument="-E" type="integer" min="0" value="1000" label="Minimum contig length"/> | |
127 <param argument="-q" type="integer" min="0" value="0" label="Minimum short branch length" help="By default it is disabled (value = 0)." /> | |
128 <param argument="-i" type="float" min="0" max="1" value="0.1" label="Minimum identity of alignmnents"/> | |
129 <param argument="-I" type="float" min="0" max="1" value="0.7" label="Minimum test-to-best identity ratio"/> | |
130 <param argument="-R" type="float" min="0" max="1" value="0" label="Maximum test-to-best identity ratio"/> | |
131 <param argument="-S" type="float" min="0" max="1" value="0.4" label="Minimum test-to-best aligned length ratio"/> | |
132 <param argument="-r" type="float" min="0" max="1" value="0.5" label="Maximum test-to-best score ratio of a low quality edge"/> | |
133 <param argument="-M" type="float" min="0" max="1" value="0.9" label="Minimum test-to-best aligned matches ratio"/> | |
134 <param argument="-T" type="float" min="0" max="1" value="0.6" label="Minimum test-to-best depth ratio of an edge"/> | |
135 <param argument="-N" type="integer" min="1" max="2" value="2" label="Minimum valid nodes of a read"/> | |
136 <param argument="-u" type="integer" min="1" max="2" value="2" label="Minimum contained number to filter"/> | |
137 <param argument="-w" type="integer" min="1" value="3" label="Minimum depth of an edge"/> | |
138 <param argument="-D" type="integer" min="0" value="2" label="Depth of BFS to identify chimeric nodes"/> | |
139 <param argument="-P" type="integer" min="0" value="2" label="Maximum depth multiple of a node for BFS"/> | |
140 <param argument="-m" type="float" min="0" value="1.5" label="Minimum depth multiple of a repeat node"/> | |
141 <param argument="-n" type="float" min="0" value="2000" label="Maximum depth multiple of a node"/> | |
142 <param argument="-B" type="integer" min="0" value="500" label="Maximum length of a bubble"/> | |
143 <param argument="-C" type="integer" min="0" value="20" label="Maximum length of a compound path"/> | |
144 <param argument="-z" type="integer" min="0" value="8" label="Maximum length of a z branch"/> | |
145 <param argument="-l" type="integer" min="0" value="15" label="Maximum length of a short branch"/> | |
146 <param argument="-L" type="integer" min="0" value="5" label="Maximal length of a short loop"/> | |
147 <param argument="-t" type="integer" min="0" value="500" label="Maximal over hang length of dovetails"/> | |
148 <param argument="-F" type="integer" min="0" value="1000" label="Fuzz length for trans-reduction"/> | |
149 </section> | |
150 </inputs> | |
151 <outputs> | |
152 <data name="stats" format="txt" from_work_dir="03.ctg_graph/nd.asm.fasta.stat" label="${tool.name} on ${on_string}: stats"> | |
153 <filter>task != 'correct'</filter> | |
154 </data> | |
155 <data name="asmp" format="txt" from_work_dir="03.ctg_graph/nd.asm.p.fasta" label="${tool.name} on ${on_string}: nd.asm.p.fasta"> | |
156 <filter>task != 'correct'</filter> | |
157 </data> | |
158 <data name="asm" format="txt" from_work_dir="03.ctg_graph/nd.asm.fasta.stat" label="${tool.name} on ${on_string}: nd.asm.p.fasta"> | |
159 <filter>task != 'correct'</filter> | |
160 </data> | |
161 <data name="asm" format="txt" from_work_dir="02.cns_align/01.seed_cns.sh.work/seed_cns3" label="${tool.name} on ${on_string}: corrected"> | |
162 <filter>task != 'assemble'</filter> | |
163 </data> | |
164 <data name="config" format="txt" from_work_dir="configfile" label="${tool.name} on ${on_string}: configuration file"/> | |
165 </outputs> | |
166 <tests> | |
167 <!-- Default parameters: correct mode --> | |
168 <test expect_num_outputs="2"> | |
169 <param name="input_reads" value="nanopore.fasta.gz"/> | |
170 <param name="task" value="correct"/> | |
171 <param name="input_type" value="raw"/> | |
172 <param name="read_type" value="ont"/> | |
173 <conditional name="genome_seed"> | |
174 <param name="selector" value="genome"/> | |
175 <param name="genome_size" value="2k"/> | |
176 <param name="seed_depth" value="45"/> | |
177 </conditional> | |
178 <output name="config"> | |
179 <assert_contents> | |
180 <has_text text="genome_size = 2k"/> | |
181 <has_n_lines n="26"/> | |
182 </assert_contents> | |
183 </output> | |
184 </test> | |
185 <test expect_failure="true"> | |
186 <param name="input_reads" value="nanopore.fasta.gz"/> | |
187 <param name="task" value="all"/> | |
188 <param name="input_type" value="raw"/> | |
189 <param name="read_type" value="nanopore"/> | |
190 <conditional name="genome_seed"> | |
191 <param name="selector" value="genome"/> | |
192 <param name="genome_size" value="2k"/> | |
193 <param name="seed_depth" value="45"/> | |
194 </conditional> | |
195 <assert_stderr> | |
196 <has_text text="The read/seed length is too short, and the assembly result is unexpected and please check the assembly quality carefully." /> | |
197 </assert_stderr> | |
198 </test> | |
199 <test expect_failure="true"> | |
200 <param name="input_reads" value="nanopore.fasta.gz"/> | |
201 <param name="task" value="assemble"/> | |
202 <param name="input_type" value="raw"/> | |
203 <param name="read_type" value="nanopore"/> | |
204 <conditional name="genome_seed"> | |
205 <param name="selector" value="genome"/> | |
206 <param name="genome_size" value="2k"/> | |
207 <param name="seed_depth" value="45"/> | |
208 </conditional> | |
209 <assert_stderr> | |
210 <has_text text="The read/seed length is too short, and the assembly result is unexpected and please check the assembly quality carefully." /> | |
211 </assert_stderr> | |
212 </test> | |
213 </tests> | |
214 <help><![CDATA[ | |
215 NextDenovo is a string graph-based de novo assembler for long reads (CLR, HiFi and ONT). It uses | |
216 a "correct-then-assemble" strategy similar to canu (no correction step for PacBio HiFi reads), but | |
217 requires significantly less computing resources and storages. | |
218 ]]></help> | |
219 <expand macro="citations" /> | |
220 </tool> |