comparison maxbin2.xml @ 0:614d53aa795f draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/maxbin2/ commit f67400e30b69d2bb47bffed380ac547041b767ac"
author mbernt
date Tue, 29 Oct 2019 06:44:19 -0400
parents
children b2f46fab765e
comparison
equal deleted inserted replaced
-1:000000000000 0:614d53aa795f
1 <tool id="maxbin2" name="MaxBin2" version="@MAXBIN_VERSION@+galaxy1">
2 <description>clusters metagenomic contigs into bins</description>
3 <macros>
4 <token name="@MAXBIN_VERSION@">2.2.7</token>
5 </macros>
6 <requirements>
7 <requirement type="package" version="@MAXBIN_VERSION@">maxbin2</requirement>
8 </requirements>
9 <version_command><![CDATA[run_MaxBin.pl -version | head -n 1]]></version_command>
10 <command detect_errors="exit_code"><![CDATA[
11 ## generate read or abundance files
12 #if $intype_cond.intype_select == 'rds':
13 #for $r in $intype_cond.reads
14 #if $r
15 echo '$r' >> reads_list &&
16 #end if
17 #end for
18 #else if $intype_cond.intype_select == 'abdc':
19 #for $a in $intype_cond.abund
20 #if $a
21 echo '$a' >> abund_list &&
22 #end if
23 #end for
24 #end if
25
26 ## in case of reassembly the IBDA out and err is appended
27 ## to differentiate this a header is added also befor the
28 ## MaxBin2 outputs
29 #if $intype_cond.intype_select == 'rds' and $intype_cond.reassembly != ""
30 echo "==== MaxBin2 stdout ====" &&
31 echo "==== MaxBin2 stderr ====" 1>&2 &&
32 #end if
33
34 run_MaxBin.pl
35 -contig '$contig'
36 -out out
37 #if $intype_cond.intype_select == 'rds':
38 -reads_list reads_list
39 $intype_cond.reassembly
40 #else if $intype_cond.intype_select == 'abdc':
41 -abund_list abund_list
42 #end if
43 #if $adv_cond.adv_select == 'yes':
44 -min_contig_length $adv_cond.min_contig_length
45 -max_iteration $adv_cond.max_iteration
46 -prob_threshold $adv_cond.prob_threshold
47 $adv_cond.plotmarker
48 -markerset $adv_cond.markerset
49 #end if
50 -thread \${GALAXY_SLOTS:-1}
51
52 && tar -xf out.marker_of_each_bin.tar.gz
53
54 ## redirect the idba out and err file content to stdout and err
55 ## since this is also wanted in case the error case ';' is used here to
56 ## separate commands
57 #if $intype_cond.intype_select == 'rds' and $intype_cond.reassembly != ""
58 ; echo "==== IDBA stdout ===="
59 && cat out.idba.out
60 && echo "==== IDBA stderr ====" 1>&2
61 && cat out.idba.err 1>&2
62 #end if
63 ]]></command>
64 <inputs>
65 <param argument="-contig" type="data" format="fasta,fasta.gz" label="Contig file"/>
66 <conditional name="intype_cond">
67 <param name="intype_select" type="select" label="Input type">
68 <option value="rds" selected="true">Sequencing Reads</option>
69 <option value="abdc">Abundances</option>
70 </param>
71 <when value="rds">
72 <param name="reads" type="data" format="fasta,fastq" multiple="true" label="Reads file(s)" help="(-read/-read2/...)"/>
73 <param name="output_abundances" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output abundances" help="" />
74 <param argument="--reassembly" type="boolean" truevalue="-reassembly" falsevalue="" checked="false" label="Reassembly" help="Reassembly option is still highly experimental. To use this function, you need to feed MaxBin interleaved paired-end fastq or fasta file." />
75 </when>
76 <when value="abdc">
77 <param name="abund" type="data" format="tabular" multiple="true" label="Abundance file(s)" help="(-abund/-abund2/...)" />
78 </when>
79 </conditional>
80 <conditional name="adv_cond">
81 <param name="adv_select" type="select" label="Advanced options">
82 <option value="yes">Yes</option>
83 <option value="no" selected="true">No</option>
84 </param>
85 <when value="no"/>
86 <when value="yes">
87 <param argument="-min_contig_length" type="integer" min="0" value="1000" label="minimum contig length" />
88 <param argument="-max_iteration" type="integer" min="0" value="50" label="Maximum Expectation-Maximization algorithm iteration number" />
89 <param argument="-prob_threshold" type="float" min="0" max="1.0" value="0.5" label="Probability threshold for EM final classification" />
90 <param argument="-plotmarker" type="boolean" truevalue="-plotmarker" falsevalue="" checked="false" label="Generate visualization of the marker gene presence numbers" />
91 <param name="output_marker" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output marker gene presence for bins table" />
92 <param name="output_markers" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output marker genes for each bin as fasta" />
93 <param name="output_log" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output log" />
94 <param argument="-markerset" type="select" label="Marker gene set">
95 <option value="107" selected="true">107 marker genes present in >95% of bacteria</option>
96 <option value="40">40 marker gene sets that are universal among bacteria and archaea</option>
97 </param>
98 </when>
99 </conditional>
100 </inputs>
101 <outputs>
102 <!-- default outputs -->
103 <collection name="bins" type="list" label="${tool.name} on ${on_string} (bins)">
104 <discover_datasets pattern="out.(?P&lt;designation&gt;[0-9]+).fasta" format="fasta" visible="false" />
105 </collection>
106 <data name="noclass" format="fasta" label="${tool.name} on ${on_string} (unclassified sequences)" from_work_dir="out.noclass"/>
107 <data name="toshort" format="fasta" label="${tool.name} on ${on_string} (to short sequences)" from_work_dir="out.tooshort"/>
108 <data name="summary" format="tabular" label="${tool.name} on ${on_string} (summary)" from_work_dir="out.summary"/>
109
110 <!-- optional outputs -->
111 <data name="log" format="txt" label="${tool.name} on ${on_string} (log)" from_work_dir="out.log">
112 <filter>adv_cond['adv_select']=='yes' and adv_cond['output_log']</filter>
113 </data>
114 <data name="marker" format="tabular" label="${tool.name} on ${on_string} (marker gene presence)" from_work_dir="out.marker">
115 <filter>adv_cond['adv_select']=='yes' and adv_cond['output_marker']</filter>
116 </data>
117 <data name="abundout" format="tabular" label="${tool.name} on ${on_string} (abundances)" from_work_dir="out.abund1">
118 <filter>intype_cond['intype_select']=='rds' and intype_cond['output_abundances']</filter>
119 </data>
120 <data name="plot" format="pdf" label="${tool.name} on ${on_string} (marker gene presence plot)" from_work_dir="out.marker.pdf">
121 <filter>adv_cond['adv_select']=='yes' and adv_cond['plotmarker']</filter>
122 </data>
123 <collection name="markers" type="list" label="${tool.name} on ${on_string} (markers prediced for bins)">
124 <discover_datasets pattern="out.(?P&lt;designation&gt;[0-9]+).marker.fasta" format="fasta" visible="false" />
125 <filter>adv_cond['adv_select']=='yes' and adv_cond['output_markers']</filter>
126 </collection>
127
128 <!-- additional output in case of reassembly -->
129 <collection name="reassembly_bins" type="list" label="${tool.name} on ${on_string} (reassembly bins)">
130 <discover_datasets directory="out.reassem" pattern="out.(?P&lt;designation&gt;[0-9]+).fasta" format="fasta" visible="false" />
131 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter>
132 </collection>
133 <collection name="reassembly_reads" type="list" label="${tool.name} on ${on_string} (reassembly reads)">
134 <discover_datasets directory="out.reassem" pattern="out.reads.(?P&lt;designation&gt;[0-9]+)" format="fasta" visible="false" />
135 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter>
136 </collection>
137 <data name="reassembly_noclass" format="fasta" label="${tool.name} on ${on_string} (reassembly unclassified sequences)" from_work_dir="out.reassem/out.reads.noclass">
138 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter>
139 </data>
140 <data name="reassembly_n50" format="txt" label="${tool.name} on ${on_string} (reassembly N50)" from_work_dir="out.reassem/N50.txt">
141 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter>
142 </data>
143 </outputs>
144 <tests>
145 <test expect_num_outputs="4"><!-- test w contigs and reads as input -->
146 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" />
147 <conditional name="intype_cond">
148 <param name="intype_select" value="rds"/>
149 <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/>
150 </conditional>
151 <conditional name="adv_cond">
152 <param name="adv_select" value="no"/>
153 </conditional>
154 <output_collection name="bins" type="list" count="2">
155 <element name="001" file="1/out.001.fasta" ftype="fasta"/>
156 <element name="002" file="1/out.002.fasta" ftype="fasta"/>
157 </output_collection>
158 <output name="summary" file="1/out.summary" ftype="tabular" />
159 <output name="noclass" file="1/out.noclass" ftype="fasta" />
160 <output name="toshort" file="1/out.tooshort" ftype="fasta" />
161 </test>
162 <!-- test w contigs and reads as input test for optional outputs -->
163 <test expect_num_outputs="9">
164 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" />
165 <conditional name="intype_cond">
166 <param name="intype_select" value="rds"/>
167 <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/>
168 <param name="output_abundances" value="true" />
169 </conditional>
170 <conditional name="adv_cond">
171 <param name="adv_select" value="yes"/>
172 <param name="plotmarker" value="true" />
173 <param name="output_marker" value="true" />
174 <param name="output_markers" value="true" />
175 <param name="output_log" value="true" />
176 </conditional>
177 <output_collection name="bins" type="list" count="2">
178 <element name="001" file="1/out.001.fasta" ftype="fasta"/>
179 <element name="002" file="1/out.002.fasta" ftype="fasta"/>
180 </output_collection>
181 <output name="summary" file="1/out.summary" ftype="tabular" />
182 <output name="noclass" file="1/out.noclass" ftype="fasta" />
183 <output name="toshort" file="1/out.tooshort" ftype="fasta" />
184 <output name="log" file="1/out.log" ftype="txt" compare="diff" lines_diff="21" />
185 <output name="abundout" file="1/out.abund1" ftype="tabular" />
186 <output name="marker" file="1/out.marker" ftype="tabular" />
187 <output name="plot" file="1/out.marker.pdf" ftype="pdf" compare="sim_size" />
188 <output_collection name="markers" type="list" count="1">
189 <element name="001" file="1/out.001.marker.fasta" ftype="fasta"/>
190 </output_collection>
191 </test>
192 <!--test w contigs and abundances as input + advanced options -->
193 <test expect_num_outputs="5">
194 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" />
195 <conditional name="intype_cond">
196 <param name="intype_select" value="abdc"/>
197 <param name="abund" value="abundances.tsv" ftype="tabular"/>
198 </conditional>
199 <conditional name="adv_cond">
200 <param name="adv_select" value="yes"/>
201 <param name="min_contig_length" value="500"/>
202 <param name="max_iteration" value="10"/>
203 <param name="prob_threshold" value="0.95"/>
204 <param name="plotmarker" value="-plotmarker"/>
205 <param name="markerset" value="107"/>
206 </conditional>
207 <output_collection name="bins" type="list" count="2">
208 <element name="001" file="2/out.001.fasta" ftype="fasta"/>
209 <element name="002" file="2/out.002.fasta" ftype="fasta"/>
210 </output_collection>
211 <output name="summary" file="2/out.summary" ftype="tabular" />
212 <output name="noclass" file="2/out.noclass" ftype="fasta" />
213 <output name="toshort" file="2/out.tooshort" ftype="fasta" />
214 <output name="plot" file="2/out.marker.pdf" ftype="pdf" compare="sim_size" />
215 </test>
216 <!-- test w contigs and reads as input + reassembly-->
217 <test expect_num_outputs="8">
218 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" />
219 <conditional name="intype_cond">
220 <param name="intype_select" value="rds"/>
221 <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/>
222 <param name="reassembly" value="-reassembly"/>
223 </conditional>
224 <conditional name="adv_cond">
225 <param name="adv_select" value="no"/>
226 </conditional>
227 <output_collection name="bins" type="list" count="2">
228 <element name="001" file="3/out.001.fasta" ftype="fasta"/>
229 <element name="002" file="3/out.002.fasta" ftype="fasta"/>
230 </output_collection>
231 <output name="summary" file="3/out.summary" ftype="tabular" />
232 <output name="noclass" file="3/out.noclass" ftype="fasta" />
233 <output name="toshort" file="3/out.tooshort" ftype="fasta" />
234 <output_collection name="reassembly_bins" type="list" count="2">
235 <element name="001" file="3/out.reassem/out.001.fasta" ftype="fasta"/>
236 <element name="002" file="3/out.reassem/out.002.fasta" ftype="fasta"/>
237 </output_collection>
238 <output_collection name="reassembly_reads" type="list" count="2">
239 <element name="001" file="3/out.reassem/out.reads.001" ftype="fasta"/>
240 <element name="002" file="3/out.reassem/out.reads.002" ftype="fasta"/>
241 </output_collection>
242 <output name="reassembly_noclass" file="3/out.reassem/out.reads.noclass" ftype="fasta" />
243 <output name="reassembly_n50" file="3/out.reassem/N50.txt" ftype="txt" />
244 </test>
245 </tests>
246 <help><![CDATA[
247 MaxBin is a software that clusters metagenomic contigs into different bins,
248 each consists (hopefully) of contigs from one species. MaxBin uses the
249 nucleotide composition information and contig abundance information to do
250 achieve binning through an Expectation-Maximization algorithm.
251
252 **Input**:
253
254 MaxBin need the contigs and contig abundance information. The contig abundance
255 information can be provided in two ways: the user can choose to provide
256
257 - the abundance file or
258 - the sequencing reads in fasta format (and MaxBin will use Bowtie2 to map the
259 sequencing reads against the contigs and generate the abundance information)
260
261 The abundance information can be provided as tabular file:
262
263 For example, assume I have three contigs named A0001, A0002, and A0003, then my abundance file will look like
264
265 A0001 30.89
266 A0002 20.02
267 A0003 78.93
268
269 Reads/Abundundance files can be given in multiple files.
270
271 By default MaxBin will look for 107 marker genes present in >95% of bacteria.
272 Alternatively you can also choose 40 marker gene sets that are universal among
273 bacteria and archaea (Wu et al., PLoS ONE 2013). This option may be better
274 suited for environment dominated by archaea; however it tend to split genomes
275 into more bins. You can choose between different marker gene sets and see which
276 one works better.
277
278 **Outputs**
279
280 - bins: binned sequences
281 - summary: a summary file describing which contigs are being classified into which bin.
282 - log: a log file recording the core steps of MaxBin algorithm
283 - abundances (only if reads are used as input): a summary file describing which contigs are being classified into which bin
284 - marker: marker gene presence numbers for each bin. This table is ready to be plotted by R or other 3rd-party software.
285 - marker plot (anly present if selected in the advanced options): visualization of the marker gene presence numbers using R. Will only appear if -plotmarker is specified.
286 - unclassified sequences: this file stores all sequences that pass the minimum length threshold but are not classified successfully.
287 - to short sequences: this file stores all sequences that do not meet the minimum length threshold.
288 - markers prediced for bins: these data sets store all markers predicted from the individual bins.
289
290 **Reassembly**
291
292 This is an experimental feature of MaxBin. It calls for each read bin IDBA_UD with the pre_correction parameter. Of course this IDBA_UD call can be done also with the corresponding Galaxy tool
293
294
295 ** More information **
296
297 https://downloads.jbei.org/data/microbial_communities/MaxBin/MaxBin.html
298
299 ]]></help>
300 <citations>
301 <citation type="doi">10.1093/bioinformatics/btv638</citation>
302 </citations>
303 </tool>