comparison bakta.xml @ 3:eea334d9988b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/bakta commit 73af464cc860250c3fa3dd433602283ab5a44f53-dirty
author pimarin
date Thu, 22 Dec 2022 15:01:43 +0000
parents ca9e2125c5de
children 591cae6ef29d
comparison
equal deleted inserted replaced
2:ca9e2125c5de 3:eea334d9988b
1 <?xml version="1.0" encoding="UTF-8"?> 1 <tool id="bakta" name="Bakta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2
3 <tool id="bakta" name="Bakta genome annotation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
4 <description> 2 <description>
5 Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification 3 genome annotation via alignment-free sequence identification
6 </description> 4 </description>
7 <macros> 5 <macros>
8 <import>macro.xml</import> 6 <import>macro.xml</import>
9 </macros> 7 </macros>
10 <expand macro='edam'/> 8 <expand macro='edam'/>
11 <expand macro='xrefs'/> 9 <expand macro='xrefs'/>
12 <expand macro="requirements"/> 10 <expand macro="requirements"/>
13 <expand macro="version_command"/> 11 <expand macro="version_command"/>
14 12
15 <command detect_errors="aggressive"><![CDATA[ 13 <command detect_errors="aggressive"><![CDATA[
14 mkdir ./database_path &&
15 ln -s '$(input_option.bakta_db_select.fields.path)/'* database_path &&
16 ln -s '$(input_option.amrfinder_db_select.fields.path)' database_path &&
17
16 bakta 18 bakta
17 #*====================================== 19 #*======================================
18 CPU option 20 CPU option
19 ======================================*# 21 ======================================*#
20 --threads \${GALAXY_SLOTS:-1} 22 --threads \${GALAXY_SLOTS:-1}
21 #*====================================== 23 #*======================================
22 Bakta database 24 Bakta database
23 ======================================*# 25 ======================================*#
24 --db $input_option.db_select.fields.path 26 --db ./database_path
25 #if $input_option.min_contig_length 27 #if $input_option.min_contig_length
26 --min-contig-length $input_option.min_contig_length 28 --min-contig-length $input_option.min_contig_length
27 #else if $annotation.compliant 29 #else if $annotation.compliant
28 --min-contig-length 200 30 --min-contig-length 200
29 #else 31 #else
70 #end if 72 #end if
71 #*====================================== 73 #*======================================
72 Workflow OPTIONS 74 Workflow OPTIONS
73 skip some step of the bakta analysis 75 skip some step of the bakta analysis
74 ======================================*# 76 ======================================*#
75 $workflow.skip_trna 77
76 $workflow.skip_tmrna 78 #echo " ".join($workflow.skip_analysis)
77 $workflow.skip_rrna 79
78 $workflow.skip_ncrna
79 $workflow.skip_ncrna_region
80 $workflow.skip_crispr
81 $workflow.skip_cds
82 $workflow.skip_sorf
83 $workflow.skip_gap
84 $workflow.skip_ori
85 #*====================================== 80 #*======================================
86 Genome file 81 Genome file
87 ======================================*# 82 ======================================*#
88 '$input_option.input_file' 83 '$input_option.input_file'
89 #*====================================== 84 #*======================================
90 LOG file 85 LOG file
91 ======================================*# 86 ======================================*#
92 &> '$logfile' 87 | tee '$logfile'
93 ]]></command> 88 ]]></command>
94 <inputs> 89 <inputs>
95 <!-- DB and file INPUT --> 90 <!-- DB and file INPUT -->
96 <section name="input_option" title="Input/Output options" expanded="true"> 91 <section name="input_option" title="Input/Output options" expanded="true">
97 <param name="db_select" type="select" label="The bakta database"> 92 <param name="bakta_db_select" type="select" label="The bakta database">
98 <options from_data_table="bakta_database"> 93 <options from_data_table="bakta_database">
99 <validator message="No bakta database is available" type="no_options"/> 94 <filter type="static_value" value="@BAKTA_VERSION@" column="bakta_version"/>
95 <column name="dbkey" index="2"/>
96 <validator message="No bakta database is available" type="no_options"/>
100 </options> 97 </options>
101 </param> 98 </param>
99 <param name="amrfinder_db_select" type="select" label="The amrfinderplus database">
100 <options from_data_table="amrfinderplus_database">
101 <validator message="No amrfinderplus database is available" type="no_options"/>
102 </options>
103 </param>
104
102 <param name="input_file" type="data" format="fasta,fasta.gz" label="Select genome in fasta format"/> 105 <param name="input_file" type="data" format="fasta,fasta.gz" label="Select genome in fasta format"/>
103 <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/> 106 <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/>
104 </section> 107 </section>
105 <!-- Organism INFORMATION OPTIONS --> 108 <!-- Organism INFORMATION OPTIONS -->
106 <section name="organism" title="Optional organism options" expanded="false"> 109 <section name="organism" title="Optional organism options" expanded="false">
107 <param argument="--genus" type="text" optional="true" label="Specify genus name" help="ex. Escherichia"> 110 <param argument="--genus" type="text" optional="true" label="Specify genus name" help="ex. Escherichia">
108 <validator type="regex">^[A-Z]</validator> 111 <validator type="regex">^[a-zA-Z]+$</validator>
109 </param> 112 </param>
110 <param argument="--species" type="text" optional="true" label="Specify species name" help="ex. 'coli O157:H7'"/> 113 <param argument="--species" type="text" optional="true" label="Specify species name" help="ex. 'coli O157:H7'">
114 <validator type="regex">^[a-zA-Z0-9\s(:\-/)]+$</validator>
115 </param>
111 <param argument="--strain" type="text" optional="true" label="Specify strain name" help="ex. Sakai"> 116 <param argument="--strain" type="text" optional="true" label="Specify strain name" help="ex. Sakai">
112 <validator type="regex">^[A-Z]</validator> 117 <validator type="regex">^[a-zA-Z]+$</validator>
113 </param> 118 </param>
114 <param argument="--plasmid" type="text" optional="true" label="Specify plasmid name" help="ex. pOSAK1"/> 119 <param argument="--plasmid" type="text" optional="true" label="Specify plasmid name" help="ex. pOSAK1">
120 <validator type="regex">^[a-zA-Z0-9\s(:\-/)]+$</validator>
121 </param>
115 </section> 122 </section>
116 <!-- ANNOTATION --> 123 <!-- ANNOTATION -->
117 <section name="annotation" title="Optional annotation"> 124 <section name="annotation" title="Optional annotation">
118 <param argument="--complete" type="boolean" truevalue="--complete" falsevalue="" label="Complete replicons" help="All sequences are complete replicons (chromosome/plasmid[s])"/> 125 <param argument="--complete" type="boolean" truevalue="--complete" falsevalue="" label="Complete replicons" help="All sequences are complete replicons (chromosome/plasmid[s])"/>
119 <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/> 126 <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/>
125 <option value="+">Gram+</option> 132 <option value="+">Gram+</option>
126 <option value="-">Gram-</option> 133 <option value="-">Gram-</option>
127 <option value="?" selected="true">Unknown</option> 134 <option value="?" selected="true">Unknown</option>
128 </param> 135 </param>
129 <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/> 136 <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/>
130 <param argument="--replicons" type="data" format="tsv, csv" optional="true" label="Replicon information table (tsv/csv)" help=""/> 137 <param argument="--replicons" type="data" format="tsv,csv" optional="true" label="Replicon information table (tsv/csv)" help=""/>
131 <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/> 138 <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/>
132 <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/> 139 <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/>
133 </section> 140 </section>
134 <!-- PARAMETER FOR WORKFLOW ANALYSIS --> 141 <!-- PARAMETER FOR WORKFLOW ANALYSIS -->
135 <section name="workflow" title="Workflow option to skip steps"> 142 <section name="workflow" title="Workflow option to skip steps">
136 <param name="skip_trna" type="boolean" truevalue="--skip-trna" falsevalue="" label="Skip tRNA detection and annotation" help="(--skip-trna)"/> 143 <param name="skip_analysis" type="select" display="checkboxes" multiple="true" label="Select steps to skip">
137 <param name="skip_tmrna" type="boolean" truevalue="--skip-tmrna" falsevalue="" label="Skip tmRNA detection and annotation" help="(--skip-tmrna)"/> 144 <option value="--skip-trna"> Skip tRNA detection and annotation </option>
138 <param name="skip_rrna" type="boolean" truevalue="--skip-rrna" falsevalue="" label=" Skip rRNA detection and annotation" help="(--skip-rrna)"/> 145 <option value="--skip-tmrna"> Skip tmRNA detection and annotation </option>
139 <param name="skip_ncrna" type="boolean" truevalue="--skip-ncrna" falsevalue="" label=" Skip ncRNA detection and annotation" help="(--skip-ncrna)"/> 146 <option value="--skip-rrna"> Skip rRNA detection and annotation </option>
140 <param name="skip_ncrna_region" type="boolean" truevalue="--skip-ncrna-region" falsevalue="" label="Skip ncRNA region detection and annotation" help="(--skip-ncrna-region)"/> 147 <option value="--skip-ncrna"> Skip ncRNA detection and annotation </option>
141 <param name="skip_crispr" type="boolean" truevalue="--skip-crispr" falsevalue="" label="Skip CRISPR array detection and annotation" help="(--skip-crispr)"/> 148 <option value="--skip-ncrna-region"> Skip ncRNA region detection and annotation </option>
142 <param name="skip_cds" type="boolean" truevalue="--skip-cds" falsevalue="" label="Skip CDS detection and annotation" help="(--skip-cds)"/> 149 <option value="--skip-crispr"> Skip CRISPR array detection and annotation </option>
143 <param name="skip_sorf" type="boolean" truevalue="--skip-sorf" falsevalue="" label="Skip sORF detection and annotation" help="(--skip-sorf)"/> 150 <option value="--skip-cds"> Skip CDS detection and annotation </option>
144 <param name="skip_gap" type="boolean" truevalue="--skip-gap" falsevalue="" label="Skip gap detection and annotation" help="(--skip-gap)"/> 151 <option value="--skip-pseudo"> Skip pseudogene detection and annotation </option>
145 <param name="skip_ori" type="boolean" truevalue="--skip-ori" falsevalue="" label="Skip oriC/oriT detection and annotation" help="(--skip_ori)"/> 152 <option value="--skip-sorf"> Skip sORF detection and annotation </option>
153 <option value="--skip-gap"> Skip gap detection and annotation </option>
154 <option value="--skip-ori"> Skip oriC/oriT detection and annotation </option>
155 </param>
146 </section> 156 </section>
157 <section name="output_files" title="Selection of the output files">
158 <param name="output_selection" type="select" display="checkboxes" multiple="true" label="Output files selection">
159 <option value="file_tsv" selected="true"> Annotation file in TSV </option>
160 <option value="file_gff3" selected="true"> Annotation and sequence in GFF3 </option>
161 <option value="file_gbff" selected="false"> Annotations and sequences in GenBank format </option>
162 <option value="file_embl" selected="false"> Annotations and sequences in EMBL format </option>
163 <option value="file_fna" selected="false"> Replicon/contig DNA sequences as FASTA </option>
164 <option value="file_ffn" selected="true"> Feature nucleotide sequences as FASTA </option>
165 <option value="file_faa" selected="false"> CDS/sORF amino acid sequences as FASTA </option>
166 <option value="hypo_tsv" selected="false"> Hypothetical protein CDS in TSV</option>
167 <option value="hypo_fa" selected="false"> Hypothetical protein CDS amino sequences as FASTA</option>
168 <option value="sum_txt" selected="false"> Summary as TXT</option>
169 <option value="file_json" selected="false"> Information on each annotated feature as JSON </option>
170 <option value="file_plot" selected="true"> Plot of the annotation result as SVG </option>
171 <option value="log_txt" selected="false"> Log file as TXT </option>
172 </param>
173 </section>
174
147 </inputs> 175 </inputs>
148 <outputs> 176 <outputs>
149 <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file"/> 177 <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output.tsv" label="${tool.name} on ${on_string}: annotation_summary">
150 <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output.tsv" label="${tool.name} on ${on_string}: bakta_output.tsv"/> 178 <filter> output_files['output_selection'] and "file_tsv" in output_files['output_selection'] </filter>
151 <data name="annotation_gff3" format="tabular" from_work_dir="bakta_output.gff3" label="${tool.name} on ${on_string}: bakta_output.gff3"/> 179 </data>
152 <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff"/> 180 <data name="annotation_gff3" format="gff3" from_work_dir="bakta_output.gff3" label="${tool.name} on ${on_string}: Annotation_and_sequences">
153 <data name="annotation_embl" format="tabular" from_work_dir="bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl"/> 181 <filter> output_files['output_selection'] and "file_gff3" in output_files['output_selection'] </filter>
154 <data name="annotation_fna" format="fasta" from_work_dir="bakta_output.fna" label="${tool.name} on ${on_string}: bakta_output.fna"/> 182 </data>
155 <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output.ffn" label="${tool.name} on ${on_string}: bakta_output.ffn"/> 183 <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff">
156 <data name="annotation_faa" format="fasta" from_work_dir="bakta_output.faa" label="${tool.name} on ${on_string}: bakta_output.faa"/> 184 <filter> output_files['output_selection'] and "file_gbff" in output_files['output_selection'] </filter>
157 <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.tsv"> 185 </data>
158 <filter>workflow['skip_cds'] == False</filter> 186 <data name="annotation_embl" format="tabular" from_work_dir="bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl">
159 </data> 187 <filter> output_files['output_selection'] and "file_embl" in output_files['output_selection'] </filter>
160 <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.faa"> 188 </data>
161 <filter>workflow['skip_cds'] == False</filter> 189 <data name="annotation_fna" format="fasta" from_work_dir="bakta_output.fna" label="${tool.name} on ${on_string}: Contig_sequences">
162 </data> 190 <filter> output_files['output_selection'] and "file_fna" in output_files['output_selection'] </filter>
163 <data name="summary_txt" format="txt" from_work_dir="bakta_output.txt" label="${tool.name} on ${on_string}: bakta_output.txt"/> 191 </data>
164 <data name="annotation_json" format="json" from_work_dir="bakta_output.json" label="${tool.name} on ${on_string}: bakta_output.json"/> 192 <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output.ffn" label="${tool.name} on ${on_string}: Nucleotide_sequences">
193 <filter> output_files['output_selection'] and "file_ffn" in output_files['output_selection'] </filter>
194 </data>
195 <data name="annotation_faa" format="fasta" from_work_dir="bakta_output.faa" label="${tool.name} on ${on_string}: Amino_acid_sequences">
196 <filter> output_files['output_selection'] and "file_faa" in output_files['output_selection'] </filter>
197 </data>
198 <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: hypothetical_annotation_summary">
199 <filter> output_files['output_selection'] and "hypo_tsv" in output_files['output_selection'] </filter>
200 </data>
201 <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: hypothetical_amino_acid_sequences">
202 <filter> output_files['output_selection'] and "hypo_fa" in output_files['output_selection'] </filter>
203 </data>
204 <data name="summary_txt" format="txt" from_work_dir="bakta_output.txt" label="${tool.name} on ${on_string}: Analysis_summary">
205 <filter> output_files['output_selection'] and "sum_txt" in output_files['output_selection'] </filter>
206 </data>
207 <data name="annotation_json" format="json" from_work_dir="bakta_output.json" label="${tool.name} on ${on_string}: annotation_machine_readable">
208 <filter> output_files['output_selection'] and "file_json" in output_files['output_selection'] </filter>
209 </data>
210 <data name="annotation_plot" format="svg" from_work_dir="bakta_output.svg" label="${tool.name} on ${on_string}: Plot of the annotation">
211 <filter> output_files['output_selection'] and "file_plot" in output_files['output_selection'] </filter>
212 </data>
213 <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file">
214 <filter> output_files['output_selection'] and "log_txt" in output_files['output_selection'] </filter>
215 </data>
165 </outputs> 216 </outputs>
166
167 <tests> 217 <tests>
168 <test expect_num_outputs="12"> <!-- TEST_1 database + input --> 218 <test expect_num_outputs="13"> <!-- TEST_1 database + input -->
169 <section name="input_option" > 219 <section name="input_option" >
170 <param name="db_select" value="test-db-bakta"/> 220 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
171 <param name="input_file" value="NC_002127.1.fna"/> 221 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
172 </section> 222 <param name="input_file" value="NC_002127.1.fna"/>
173 <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="4"> 223 <param name="min_contig_length" value="250"/>
174 <assert_contents> 224 </section>
175 <has_text_matching n="1" expression="Genome size: 3,306 bp"/> 225 <section name="output_files">
176 <has_n_lines n="90" delta="1"/> 226 <param name="output_selection" value="file_tsv,file_gff3,file_gbff,file_embl,file_fna,file_ffn,file_faa,hypo_tsv,hypo_fa,sum_txt,file_json,file_plot,log_txt"/>
177 </assert_contents> 227 </section>
178 </output> 228 <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="2"/>
179 <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="2"> 229 <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2"/>
180 <assert_contents> 230 <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="8"/>
181 <has_text_matching n="3" expression="contig_1"/> 231 <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="6"/>
182 <has_n_lines n="6" delta="1"/> 232 <output name="annotation_fna" value="TEST_1/TEST_1.fna"/>
183 </assert_contents> 233 <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"/>
184 </output> 234 <output name="annotation_faa" value="TEST_1/TEST_1.faa"/>
185 <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2"> 235 <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv" lines_diff="4"/>
186 <assert_contents> 236 <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"/>
187 <has_text_matching expression="AGCTATTCCTGGTTTCATATGAAACAAACCATGCCTGTTCTCATGCCAGTAAGTGTAGCA"/> 237 <output name="summary_txt" value="TEST_1/TEST_1.txt" lines_diff="4"/>
188 <has_n_lines n="70" delta="1"/> 238 <output name="annotation_plot">
189 </assert_contents> 239 <assert_contents>
190 </output> 240 <has_size value="418991" delta="1000"/>
191 <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="4"> 241 </assert_contents>
192 <assert_contents> 242 </output>
193 <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCL"/> 243
194 <has_n_lines n="133" delta="1"/> 244 <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="6"/>
195 </assert_contents> 245 <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="6"/>
196 </output>
197 <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="2">
198 <assert_contents>
199 <has_text_matching expression="FIFLFSPFCLSSASCDYIAHHFSTVLPPVFCRRTFQSDNTVTAKKQQCFVGNSNLQTGQ"/>
200 <has_n_lines n="137" delta="2"/>
201 </assert_contents>
202 </output>
203 <output name="annotation_fna" value="TEST_1/TEST_1.fna">
204 <assert_contents>
205 <has_text_matching expression="TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC"/>
206 <has_n_lines n="57"/>
207 </assert_contents>
208 </output>
209 <output name="annotation_ffn" value="TEST_1/TEST_1.ffn">
210 <assert_contents>
211 <has_text_matching expression="TCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGC"/>
212 <has_n_lines n="6"/>
213 </assert_contents>
214 </output>
215 <output name="annotation_faa" value="TEST_1/TEST_1.faa">
216 <assert_contents>
217 <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI"/>
218 <has_n_lines n="6"/>
219 </assert_contents>
220 </output>
221 <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv">
222 <assert_contents>
223 <has_text_matching expression="DOGAIA_00010"/>
224 <has_n_lines n="6"/>
225 </assert_contents>
226 </output>
227 <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa">
228 <assert_contents>
229 <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCLFIFLFSPFCLSSASCDYIAHHFSTVLPPVFCRRTF"/>
230 <has_n_lines n="6"/>
231 </assert_contents>
232 </output>
233 <output name="summary_txt" value="TEST_1/TEST_1.txt">
234 <assert_contents>
235 <has_text_matching expression="N50: 3306"/>
236 <has_n_lines n="29"/>
237 </assert_contents>
238 </output>
239 <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="4">
240 <assert_contents>
241 <has_text_matching expression="0.6524500907441017"/>
242 <has_n_lines n="112" delta="1"/>
243 </assert_contents>
244 </output>
245 </test>
246 <test expect_num_outputs="12"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps -->
247 <section name="input_option" >
248 <param name="db_select" value="test-db-bakta"/>
249 <param name="input_file" value="NC_002127.1.fna"/>
250 <param name="min_contig_length" value="250"/>
251 </section>
252 <section name="organism">
253 <param name="genus" value="Escherichia"/>
254 <param name="species" value="coli O157:H7"/>
255 <param name="strain" value="Sakai"/>
256 <param name="plasmid" value="pOSAK1"/>
257 </section>
258 <section name="annotation">
259 <param name="--gram" value="-"/>
260 <param name="keep_contig_headers" value="true"/>
261 </section>
262 <section name="workflow">
263 <param name="skip_crispr" value="true"/>
264 <param name="skip_gap" value="true"/>
265 </section>
266 <output name="logfile" value="TEST_2/TEST_2.log" lines_diff="4">
267 <assert_contents>
268 <has_text_matching expression="Genome size: 3,306 bp"/>
269 </assert_contents>
270 </output>
271 <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="2">
272 <assert_contents>
273 <has_text_matching expression="DOGAIA_00005"/>
274 </assert_contents>
275 </output>
276 <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="2">
277 <assert_contents>
278 <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/>
279 </assert_contents>
280 </output>
281 <output name="annotation_gbff" value="TEST_2/TEST_2.gbff" lines_diff="5">
282 <assert_contents>
283 <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSV"/>
284 </assert_contents>
285 </output>
286 <output name="annotation_embl" value="TEST_2/TEST_2.embl" lines_diff="4">
287 <assert_contents>
288 <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKK"/>
289 </assert_contents>
290 </output>
291 <output name="annotation_fna" value="TEST_2/TEST_2.fna"/>
292 <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/>
293 <output name="annotation_faa" value="TEST_2/TEST_2.faa"/>
294 <output name="hypotheticals_tsv" value="TEST_2/TEST_2.hypotheticals.tsv"/>
295 <output name="hypotheticals_faa" value="TEST_2/TEST_2.hypotheticals.faa"/>
296 <output name="summary_txt" value="TEST_2/TEST_2.txt">
297 <assert_contents>
298 <has_text_matching expression="N50: 3306"/>
299 </assert_contents>
300 </output>
301 <output name="annotation_json" value="TEST_2/TEST_2.json" lines_diff="4">
302 <assert_contents>
303 <has_text_matching expression="0.6524500907441017"/>
304 </assert_contents>
305 </output>
306 </test>
307 <test expect_num_outputs="10"> <!-- TEST_3 test all skip steps -->
308 <section name="input_option" >
309 <param name="db_select" value="test-db-bakta"/>
310 <param name="input_file" value="NC_002127.1.fna"/>
311 <param name="min_contig_length" value="250"/>
312 </section>
313 <section name="workflow">
314 <param name="skip_trna" value="true"/>
315 <param name="skip_tmrna" value="true"/>
316 <param name="skip_rrna" value="true"/>
317 <param name="skip_ncrna" value="true"/>
318 <param name="skip_ncrna_region" value="true"/>
319 <param name="skip_crispr" value="true"/>
320 <param name="skip_cds" value="true"/>
321 <param name="skip_sorf" value="true"/>
322 <param name="skip_gap" value="true"/>
323 <param name="skip_ori" value="true"/>
324 </section>
325 <output name="logfile" value="TEST_3/TEST_3.log" lines_diff="4">
326 <assert_contents>
327 <has_text_matching expression="Genome size: 3,306 bp"/>
328 </assert_contents>
329 </output>
330 <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="1">
331 <assert_contents>
332 <has_n_lines n="3" delta="1"/>
333 </assert_contents>
334 </output>
335 <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="2">
336 <assert_contents>
337 <has_n_lines n="67" delta="1"/>
338 </assert_contents>
339 </output>
340 <output name="annotation_gbff" value="TEST_3/TEST_3.gbff" lines_diff="10"/>
341 <output name="annotation_embl" value="TEST_3/TEST_3.embl" lines_diff="4"/>
342 <output name="annotation_fna" value="TEST_3/TEST_3.fna"/>
343 <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/>
344 <output name="annotation_faa" value="TEST_3/TEST_3.faa"/>
345 <output name="summary_txt" value="TEST_3/TEST_3.txt">
346 <assert_contents>
347 <has_text_matching expression="GC: 43.4"/>
348 </assert_contents>
349 </output>
350 <output name="annotation_json" value="TEST_3/TEST_3.json" lines_diff="4"/>
351 </test> 246 </test>
352 <test expect_num_outputs="12"> <!-- TEST_4 annotations --> 247 <test expect_num_outputs="4"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps -->
353 <section name="input_option" > 248 <section name="input_option" >
354 <param name="db_select" value="test-db-bakta"/> 249 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
250 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
355 <param name="input_file" value="NC_002127.1.fna"/> 251 <param name="input_file" value="NC_002127.1.fna"/>
252 <param name="min_contig_length" value="250"/>
253 </section>
254 <section name="organism">
255 <param name="genus" value="Escherichia"/>
256 <param name="species" value="coli O157:H7"/>
257 <param name="strain" value="Sakai"/>
258 <param name="plasmid" value="pOSAK1"/>
356 </section> 259 </section>
357 <section name="annotation"> 260 <section name="annotation">
358 <param name="complete" value="true"/> 261 <param name="--gram" value="-"/>
359 <param name="translation_table" value="4"/> 262 <param name="keep_contig_headers" value="true"/>
360 <param name="prodigal" value="prodigal.tf"/> 263 </section>
361 <param name="replicons" value="replicons.tsv"/> 264 <section name="workflow">
362 <param name="compliant" value="true"/> 265 <param name="skip_analysis" value="--skip-trna,--skip-tmrna"/>
363 <param name="proteins" value="user-proteins.faa"/> 266 </section>
364 </section> 267 <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="4">
365 <output name="logfile" value="TEST_4/TEST_4.log" lines_diff="4"> 268 <assert_contents>
366 <assert_contents> 269 <has_text_matching expression="IHHALP_00005"/>
367 <has_text_matching expression="Genome size: 3,306 bp"/> 270 </assert_contents>
368 </assert_contents> 271 </output>
369 </output> 272 <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="4">
370 <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="2"> 273 <assert_contents>
371 <assert_contents> 274 <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/>
372 <has_text_matching expression="mock1"/> 275 </assert_contents>
373 </assert_contents> 276 </output>
374 </output> 277 <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/>
375 <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="2"> 278 <output name="annotation_plot">
376 <assert_contents> 279 <assert_contents>
377 <has_text_matching expression="ID=DOGAIA_00005_gene;locus_tag=DOGAIA_00005"/> 280 <has_size value="418991" delta="1000"/>
378 </assert_contents>
379 </output>
380 <output name="annotation_gbff" value="TEST_4/TEST_4.gbff" lines_diff="4">
381 <assert_contents>
382 <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCL"/>
383 </assert_contents>
384 </output>
385 <output name="annotation_embl" value="TEST_4/TEST_4.embl" lines_diff="4">
386 <assert_contents>
387 <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKK"/>
388 </assert_contents>
389 </output>
390 <output name="annotation_fna" value="TEST_4/TEST_4.fna"/>
391 <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/>
392 <output name="annotation_faa" value="TEST_4/TEST_4.faa"/>
393 <output name="hypotheticals_tsv" value="TEST_4/TEST_4.hypotheticals.tsv"/>
394 <output name="hypotheticals_faa" value="TEST_4/TEST_4.hypotheticals.faa"/>
395 <output name="summary_txt" value="TEST_4/TEST_4.txt">
396 <assert_contents>
397 <has_text_matching expression="CDSs: 3"/>
398 </assert_contents>
399 </output>
400 <output name="annotation_json" value="TEST_4/TEST_4.json" lines_diff="4">
401 <assert_contents>
402 <has_text_matching expression="0.4340592861464005"/>
403 </assert_contents> 281 </assert_contents>
404 </output> 282 </output>
405 </test> 283 </test>
284 <test expect_num_outputs="4"> <!-- TEST_3 test all skip steps -->
285 <section name="input_option" >
286 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
287 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
288 <param name="input_file" value="NC_002127.1.fna"/>
289 <param name="min_contig_length" value="350"/>
290 </section>
291 <section name="workflow">
292 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/>
293 </section>
294 <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="4"/>
295 <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="4"/>
296 <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/>
297 <output name="annotation_plot">
298 <assert_contents>
299 <has_size value="418399" delta="1000"/>
300 </assert_contents>
301 </output>
302 </test>
303 <test expect_num_outputs="4"> <!-- TEST_4 annotations -->
304 <section name="input_option" >
305 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
306 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
307 <param name="input_file" value="NC_002127.1.fna"/>
308 </section>
309 <section name="annotation">
310 <param name="complete" value="true"/>
311 <param name="prodigal" value="prodigal.tf"/>
312 <param name="translation_table" value="4"/>
313 <param name="replicons" value="replicons.tsv"/>
314 <param name="compliant" value="true"/>
315 <param name="proteins" value="user-proteins.faa"/>
316 </section>
317 <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="4"/>
318 <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="4"/>
319 <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/>
320 <output name="annotation_plot">
321 <assert_contents>
322 <has_size value="418399" delta="1000"/>
323 </assert_contents>
324 </output>
325 </test>
326 <test expect_num_outputs="2"> <!-- TEST_5 skip all steps and keep only the logfile and summary -->
327 <section name="input_option" >
328 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
329 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
330 <param name="input_file" value="NC_002127.1.fna"/>
331 </section>
332 <section name="annotation">
333 <param name="complete" value="true"/>
334 <param name="translation_table" value="4"/>
335 </section>
336 <section name="workflow">
337 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/>
338 </section>
339 <section name="output_files">
340 <param name="output_selection" value="log_txt,sum_txt"/>
341 </section>
342 <output name="logfile" value="TEST_5/TEST_5.log" lines_diff="6"/>
343 <output name="summary_txt" value="TEST_5/TEST_5.txt" lines_diff="4"/>
344 </test>
406 </tests> 345 </tests>
407 346 <help><![CDATA[**What it does**
408 <help><![CDATA[ 347 Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs.
409 usage: bakta [--db DB] [--min-contig-length MIN_CONTIG_LENGTH] 348
410 [--prefix PREFIX] [--output OUTPUT] [--genus GENUS] 349 *Comprehensive & taxonomy-independent database*
411 [--species SPECIES] [--strain STRAIN] [--plasmid PLASMID] 350 Bakta provides a large and taxonomy-independent database using UniProt's entire UniRef protein sequence cluster universe.
412 [--complete] [--prodigal-tf PRODIGAL_TF] 351
413 [--translation-table {11,4}] [--gram {+,-,?}] [--locus LOCUS] 352 *Protein sequence identification*
414 [--locus-tag LOCUS_TAG] [--keep-contig-headers] 353 Bakta exactly identifies known identical protein sequences (IPS) from RefSeq and UniProt
415 [--replicons REPLICONS] [--compliant] [--proteins PROTEINS] 354 allowing the fine-grained annotation of gene alleles (AMR) or closely related but distinct protein families.
416 [--skip-trna] [--skip-tmrna] [--skip-rrna] [--skip-ncrna] 355 This is achieved via an alignment-free sequence identification (AFSI) approach
417 [--skip-ncrna-region] [--skip-crispr] [--skip-cds] [--skip-sorf] 356 using full-length MD5 protein sequence hash digests.
418 [--skip-gap] [--skip-ori] [--help] [--verbose] 357 *Small proteins/short open reading frames*
419 [--threads THREADS] [--tmp-dir TMP_DIR] [--version] 358 Bakta detects and annotates small proteins/short open reading frames (sORF).
420 <genome> 359
421 360 *Expert annotation systems*
422 Rapid & standardized annotation of bacterial genomes, MAGs & plasmids 361 To provide high quality annotations for certain proteins of higher interest, e.g. AMR & VF genes,
423 362 Bakta includes & merges different expert annotation systems.
424 positional arguments: 363 Currently, Bakta uses NCBI's AMRFinderPlus for AMR gene annotations
425 <genome> Genome sequences in (zipped) fasta format 364 as well as an generalized protein sequence expert system with distinct
426 365 coverage, identity and priority values for each sequence, currenlty comprising the VFDB as well as NCBI's BlastRules.
427 Input / Output: 366
428 --db DB, -d DB Database path (default = <bakta_path>/db). Can also be 367 *Comprehensive workflow*
429 provided as BAKTA_DB environment variable. 368 Bakta annotates ncRNA cis-regulatory regions, oriC/oriV/oriT
430 --min-contig-length MIN_CONTIG_LENGTH, -m MIN_CONTIG_LENGTH 369 and assembly gaps as well as standard feature types: tRNA, tmRNA, rRNA, ncRNA genes, CRISPR, CDS.
431 Minimum contig size (default = 1; 200 in compliant 370
432 mode) 371 *GFF3 & INSDC conform annotations*
433 --prefix PREFIX, -p PREFIX 372 Bakta writes GFF3 and INSDC-compliant (Genbank & EMBL) annotation files ready for submission
434 Prefix for output files 373 (checked via GenomeTools GFF3Validator, table2asn_GFF and ENA Webin-CLI for GFF3 and EMBL file formats,
435 --output OUTPUT, -o OUTPUT 374 respectively for representative genomes of all ESKAPE species).
436 Output directory (default = current working directory) 375
437 376 *Bacteria & plasmids*
438 Organism: 377 Bakta was designed to annotate bacteria (isolates & MAGs) and plasmids, only.
439 --genus GENUS Genus name 378
440 --species SPECIES Species name 379 **Input options**
441 --strain STRAIN Strain name 380 1. Choose a genome or assembly in fasta format to use bakta annotations
442 --plasmid PLASMID Plasmid name 381 2. Choose A version of the Bakta database
443 382
444 Annotation: 383 **Organism options**
445 --complete All sequences are complete replicons (chromosome/plasmid[s]) 384 You can specify informations about analysed fasta as text input for:
446 --prodigal-tf PRODIGAL_TF Path to existing Prodigal training file to use for CDS prediction 385 - genus
447 386 - species
448 --translation-table {11,4} Translation table: 11/4 (default = 11) 387 - strain
449 --gram {+,-,?} Gram type for signal peptide predictions: +/-/? (default = ?) 388 - plasmid
450 --locus LOCUS Locus prefix (default = 'contig') 389
451 --locus-tag LOCUS_TAG Locus tag prefix (default = autogenerated) 390 **Annotation options**
452 --keep-contig-headers Keep original contig headers 391 1. You can specify if all sequences (chromosome or plasmids) are complete or not
453 --replicons REPLICONS Replicon information table (tsv/csv) 392 2. You can add your own prodigal training file for CDS predictionœ
454 --compliant Force Genbank/ENA/DDJB compliance 393 3. The translation table could be modified, default is the 11th for bacteria
455 --proteins PROTEINS Fasta file of trusted protein sequences for CDS annotation 394 4. You can specify if bacteria is gram -/+ or unknonw (default value unknow)
456 395 5. You can keep the name of contig present in the input file
457 396 6. You can specify your own replicon table as a TSV/CSV file
458 Workflow: 397 7. The compliance option is for ready to submit annotation file to Public database
459 --skip-trna Skip tRNA detection & annotation 398 as ENA, Genbank EMBL
460 --skip-tmrna Skip tmRNA detection & annotation 399 8. You can specify a protein sequence file for annotation in GenBank or fasta formats
461 --skip-rrna Skip rRNA detection & annotation 400 Using the Fasta format, each reference sequence can be provided in a short or long format:
462 --skip-ncrna Skip ncRNA detection & annotation 401
463 --skip-ncrna-region Skip ncRNA region detection & annotation 402 # short:
464 --skip-crispr Skip CRISPR array detection & annotation 403 >id gene~~~product~~~dbxrefs
465 --skip-cds Skip CDS detection & annotation 404 MAQ...
466 --skip-sorf Skip sORF detection & annotation 405
467 --skip-gap Skip gap detection & annotation 406 # long:
468 --skip-ori Skip oriC/oriT detection & annotation 407 >id min_identity~~~min_query_cov~~~min_subject_cov~~~gene~~~product~~~dbxrefs
469 408 MAQ...
470 General: 409
471 --help, -h Show this help message and exit 410 **Skip steps**
472 --verbose, -v Print verbose information 411 Some steps could be skiped:
473 --threads THREADS, -t THREADS 412 - skip-trna Skip tRNA detection & annotation
474 Number of threads to use (default = number of 413 - skip-tmrna Skip tmRNA detection & annotation
475 available CPUs) 414 - skip-rrna Skip rRNA detection & annotation
476 --tmp-dir TMP_DIR Location for temporary files (default = system 415 - skip-ncrna Skip ncRNA detection & annotation
477 dependent auto detection) 416 - skip-ncrna-region Skip ncRNA region detection & annotation
478 --version show program's version number and exit 417 - skip-crispr Skip CRISPR array detection & annotation
479 418 - skip-cds Skip CDS detection & annotation
480 419 - skip-pseudo Skip pseudogene detection & annotation
420 - skip-sorf Skip sORF detection & annotation
421 - skip-gap Skip gap detection & annotation
422 - skip-ori Skip oriC/oriT detection & annotation
423
424 **Output options**
425 Bakta produce numbers of output files, you can select what type of file you want:
426 - Summary of the annotation
427 - Annotated files
428 - Sequence files for nucleotide and/or amino acid
481 ]]></help> 429 ]]></help>
482 <expand macro="citations"/> 430 <expand macro="citations"/>
483 </tool> 431 </tool>