Mercurial > repos > pimarin > bakta
comparison bakta.xml @ 3:eea334d9988b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/bakta commit 73af464cc860250c3fa3dd433602283ab5a44f53-dirty
author | pimarin |
---|---|
date | Thu, 22 Dec 2022 15:01:43 +0000 |
parents | ca9e2125c5de |
children | 591cae6ef29d |
comparison
equal
deleted
inserted
replaced
2:ca9e2125c5de | 3:eea334d9988b |
---|---|
1 <?xml version="1.0" encoding="UTF-8"?> | 1 <tool id="bakta" name="Bakta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> |
2 | |
3 <tool id="bakta" name="Bakta genome annotation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
4 <description> | 2 <description> |
5 Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification | 3 genome annotation via alignment-free sequence identification |
6 </description> | 4 </description> |
7 <macros> | 5 <macros> |
8 <import>macro.xml</import> | 6 <import>macro.xml</import> |
9 </macros> | 7 </macros> |
10 <expand macro='edam'/> | 8 <expand macro='edam'/> |
11 <expand macro='xrefs'/> | 9 <expand macro='xrefs'/> |
12 <expand macro="requirements"/> | 10 <expand macro="requirements"/> |
13 <expand macro="version_command"/> | 11 <expand macro="version_command"/> |
14 | 12 |
15 <command detect_errors="aggressive"><![CDATA[ | 13 <command detect_errors="aggressive"><![CDATA[ |
14 mkdir ./database_path && | |
15 ln -s '$(input_option.bakta_db_select.fields.path)/'* database_path && | |
16 ln -s '$(input_option.amrfinder_db_select.fields.path)' database_path && | |
17 | |
16 bakta | 18 bakta |
17 #*====================================== | 19 #*====================================== |
18 CPU option | 20 CPU option |
19 ======================================*# | 21 ======================================*# |
20 --threads \${GALAXY_SLOTS:-1} | 22 --threads \${GALAXY_SLOTS:-1} |
21 #*====================================== | 23 #*====================================== |
22 Bakta database | 24 Bakta database |
23 ======================================*# | 25 ======================================*# |
24 --db $input_option.db_select.fields.path | 26 --db ./database_path |
25 #if $input_option.min_contig_length | 27 #if $input_option.min_contig_length |
26 --min-contig-length $input_option.min_contig_length | 28 --min-contig-length $input_option.min_contig_length |
27 #else if $annotation.compliant | 29 #else if $annotation.compliant |
28 --min-contig-length 200 | 30 --min-contig-length 200 |
29 #else | 31 #else |
70 #end if | 72 #end if |
71 #*====================================== | 73 #*====================================== |
72 Workflow OPTIONS | 74 Workflow OPTIONS |
73 skip some step of the bakta analysis | 75 skip some step of the bakta analysis |
74 ======================================*# | 76 ======================================*# |
75 $workflow.skip_trna | 77 |
76 $workflow.skip_tmrna | 78 #echo " ".join($workflow.skip_analysis) |
77 $workflow.skip_rrna | 79 |
78 $workflow.skip_ncrna | |
79 $workflow.skip_ncrna_region | |
80 $workflow.skip_crispr | |
81 $workflow.skip_cds | |
82 $workflow.skip_sorf | |
83 $workflow.skip_gap | |
84 $workflow.skip_ori | |
85 #*====================================== | 80 #*====================================== |
86 Genome file | 81 Genome file |
87 ======================================*# | 82 ======================================*# |
88 '$input_option.input_file' | 83 '$input_option.input_file' |
89 #*====================================== | 84 #*====================================== |
90 LOG file | 85 LOG file |
91 ======================================*# | 86 ======================================*# |
92 &> '$logfile' | 87 | tee '$logfile' |
93 ]]></command> | 88 ]]></command> |
94 <inputs> | 89 <inputs> |
95 <!-- DB and file INPUT --> | 90 <!-- DB and file INPUT --> |
96 <section name="input_option" title="Input/Output options" expanded="true"> | 91 <section name="input_option" title="Input/Output options" expanded="true"> |
97 <param name="db_select" type="select" label="The bakta database"> | 92 <param name="bakta_db_select" type="select" label="The bakta database"> |
98 <options from_data_table="bakta_database"> | 93 <options from_data_table="bakta_database"> |
99 <validator message="No bakta database is available" type="no_options"/> | 94 <filter type="static_value" value="@BAKTA_VERSION@" column="bakta_version"/> |
95 <column name="dbkey" index="2"/> | |
96 <validator message="No bakta database is available" type="no_options"/> | |
100 </options> | 97 </options> |
101 </param> | 98 </param> |
99 <param name="amrfinder_db_select" type="select" label="The amrfinderplus database"> | |
100 <options from_data_table="amrfinderplus_database"> | |
101 <validator message="No amrfinderplus database is available" type="no_options"/> | |
102 </options> | |
103 </param> | |
104 | |
102 <param name="input_file" type="data" format="fasta,fasta.gz" label="Select genome in fasta format"/> | 105 <param name="input_file" type="data" format="fasta,fasta.gz" label="Select genome in fasta format"/> |
103 <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/> | 106 <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/> |
104 </section> | 107 </section> |
105 <!-- Organism INFORMATION OPTIONS --> | 108 <!-- Organism INFORMATION OPTIONS --> |
106 <section name="organism" title="Optional organism options" expanded="false"> | 109 <section name="organism" title="Optional organism options" expanded="false"> |
107 <param argument="--genus" type="text" optional="true" label="Specify genus name" help="ex. Escherichia"> | 110 <param argument="--genus" type="text" optional="true" label="Specify genus name" help="ex. Escherichia"> |
108 <validator type="regex">^[A-Z]</validator> | 111 <validator type="regex">^[a-zA-Z]+$</validator> |
109 </param> | 112 </param> |
110 <param argument="--species" type="text" optional="true" label="Specify species name" help="ex. 'coli O157:H7'"/> | 113 <param argument="--species" type="text" optional="true" label="Specify species name" help="ex. 'coli O157:H7'"> |
114 <validator type="regex">^[a-zA-Z0-9\s(:\-/)]+$</validator> | |
115 </param> | |
111 <param argument="--strain" type="text" optional="true" label="Specify strain name" help="ex. Sakai"> | 116 <param argument="--strain" type="text" optional="true" label="Specify strain name" help="ex. Sakai"> |
112 <validator type="regex">^[A-Z]</validator> | 117 <validator type="regex">^[a-zA-Z]+$</validator> |
113 </param> | 118 </param> |
114 <param argument="--plasmid" type="text" optional="true" label="Specify plasmid name" help="ex. pOSAK1"/> | 119 <param argument="--plasmid" type="text" optional="true" label="Specify plasmid name" help="ex. pOSAK1"> |
120 <validator type="regex">^[a-zA-Z0-9\s(:\-/)]+$</validator> | |
121 </param> | |
115 </section> | 122 </section> |
116 <!-- ANNOTATION --> | 123 <!-- ANNOTATION --> |
117 <section name="annotation" title="Optional annotation"> | 124 <section name="annotation" title="Optional annotation"> |
118 <param argument="--complete" type="boolean" truevalue="--complete" falsevalue="" label="Complete replicons" help="All sequences are complete replicons (chromosome/plasmid[s])"/> | 125 <param argument="--complete" type="boolean" truevalue="--complete" falsevalue="" label="Complete replicons" help="All sequences are complete replicons (chromosome/plasmid[s])"/> |
119 <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/> | 126 <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/> |
125 <option value="+">Gram+</option> | 132 <option value="+">Gram+</option> |
126 <option value="-">Gram-</option> | 133 <option value="-">Gram-</option> |
127 <option value="?" selected="true">Unknown</option> | 134 <option value="?" selected="true">Unknown</option> |
128 </param> | 135 </param> |
129 <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/> | 136 <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/> |
130 <param argument="--replicons" type="data" format="tsv, csv" optional="true" label="Replicon information table (tsv/csv)" help=""/> | 137 <param argument="--replicons" type="data" format="tsv,csv" optional="true" label="Replicon information table (tsv/csv)" help=""/> |
131 <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/> | 138 <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/> |
132 <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/> | 139 <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/> |
133 </section> | 140 </section> |
134 <!-- PARAMETER FOR WORKFLOW ANALYSIS --> | 141 <!-- PARAMETER FOR WORKFLOW ANALYSIS --> |
135 <section name="workflow" title="Workflow option to skip steps"> | 142 <section name="workflow" title="Workflow option to skip steps"> |
136 <param name="skip_trna" type="boolean" truevalue="--skip-trna" falsevalue="" label="Skip tRNA detection and annotation" help="(--skip-trna)"/> | 143 <param name="skip_analysis" type="select" display="checkboxes" multiple="true" label="Select steps to skip"> |
137 <param name="skip_tmrna" type="boolean" truevalue="--skip-tmrna" falsevalue="" label="Skip tmRNA detection and annotation" help="(--skip-tmrna)"/> | 144 <option value="--skip-trna"> Skip tRNA detection and annotation </option> |
138 <param name="skip_rrna" type="boolean" truevalue="--skip-rrna" falsevalue="" label=" Skip rRNA detection and annotation" help="(--skip-rrna)"/> | 145 <option value="--skip-tmrna"> Skip tmRNA detection and annotation </option> |
139 <param name="skip_ncrna" type="boolean" truevalue="--skip-ncrna" falsevalue="" label=" Skip ncRNA detection and annotation" help="(--skip-ncrna)"/> | 146 <option value="--skip-rrna"> Skip rRNA detection and annotation </option> |
140 <param name="skip_ncrna_region" type="boolean" truevalue="--skip-ncrna-region" falsevalue="" label="Skip ncRNA region detection and annotation" help="(--skip-ncrna-region)"/> | 147 <option value="--skip-ncrna"> Skip ncRNA detection and annotation </option> |
141 <param name="skip_crispr" type="boolean" truevalue="--skip-crispr" falsevalue="" label="Skip CRISPR array detection and annotation" help="(--skip-crispr)"/> | 148 <option value="--skip-ncrna-region"> Skip ncRNA region detection and annotation </option> |
142 <param name="skip_cds" type="boolean" truevalue="--skip-cds" falsevalue="" label="Skip CDS detection and annotation" help="(--skip-cds)"/> | 149 <option value="--skip-crispr"> Skip CRISPR array detection and annotation </option> |
143 <param name="skip_sorf" type="boolean" truevalue="--skip-sorf" falsevalue="" label="Skip sORF detection and annotation" help="(--skip-sorf)"/> | 150 <option value="--skip-cds"> Skip CDS detection and annotation </option> |
144 <param name="skip_gap" type="boolean" truevalue="--skip-gap" falsevalue="" label="Skip gap detection and annotation" help="(--skip-gap)"/> | 151 <option value="--skip-pseudo"> Skip pseudogene detection and annotation </option> |
145 <param name="skip_ori" type="boolean" truevalue="--skip-ori" falsevalue="" label="Skip oriC/oriT detection and annotation" help="(--skip_ori)"/> | 152 <option value="--skip-sorf"> Skip sORF detection and annotation </option> |
153 <option value="--skip-gap"> Skip gap detection and annotation </option> | |
154 <option value="--skip-ori"> Skip oriC/oriT detection and annotation </option> | |
155 </param> | |
146 </section> | 156 </section> |
157 <section name="output_files" title="Selection of the output files"> | |
158 <param name="output_selection" type="select" display="checkboxes" multiple="true" label="Output files selection"> | |
159 <option value="file_tsv" selected="true"> Annotation file in TSV </option> | |
160 <option value="file_gff3" selected="true"> Annotation and sequence in GFF3 </option> | |
161 <option value="file_gbff" selected="false"> Annotations and sequences in GenBank format </option> | |
162 <option value="file_embl" selected="false"> Annotations and sequences in EMBL format </option> | |
163 <option value="file_fna" selected="false"> Replicon/contig DNA sequences as FASTA </option> | |
164 <option value="file_ffn" selected="true"> Feature nucleotide sequences as FASTA </option> | |
165 <option value="file_faa" selected="false"> CDS/sORF amino acid sequences as FASTA </option> | |
166 <option value="hypo_tsv" selected="false"> Hypothetical protein CDS in TSV</option> | |
167 <option value="hypo_fa" selected="false"> Hypothetical protein CDS amino sequences as FASTA</option> | |
168 <option value="sum_txt" selected="false"> Summary as TXT</option> | |
169 <option value="file_json" selected="false"> Information on each annotated feature as JSON </option> | |
170 <option value="file_plot" selected="true"> Plot of the annotation result as SVG </option> | |
171 <option value="log_txt" selected="false"> Log file as TXT </option> | |
172 </param> | |
173 </section> | |
174 | |
147 </inputs> | 175 </inputs> |
148 <outputs> | 176 <outputs> |
149 <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file"/> | 177 <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output.tsv" label="${tool.name} on ${on_string}: annotation_summary"> |
150 <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output.tsv" label="${tool.name} on ${on_string}: bakta_output.tsv"/> | 178 <filter> output_files['output_selection'] and "file_tsv" in output_files['output_selection'] </filter> |
151 <data name="annotation_gff3" format="tabular" from_work_dir="bakta_output.gff3" label="${tool.name} on ${on_string}: bakta_output.gff3"/> | 179 </data> |
152 <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff"/> | 180 <data name="annotation_gff3" format="gff3" from_work_dir="bakta_output.gff3" label="${tool.name} on ${on_string}: Annotation_and_sequences"> |
153 <data name="annotation_embl" format="tabular" from_work_dir="bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl"/> | 181 <filter> output_files['output_selection'] and "file_gff3" in output_files['output_selection'] </filter> |
154 <data name="annotation_fna" format="fasta" from_work_dir="bakta_output.fna" label="${tool.name} on ${on_string}: bakta_output.fna"/> | 182 </data> |
155 <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output.ffn" label="${tool.name} on ${on_string}: bakta_output.ffn"/> | 183 <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff"> |
156 <data name="annotation_faa" format="fasta" from_work_dir="bakta_output.faa" label="${tool.name} on ${on_string}: bakta_output.faa"/> | 184 <filter> output_files['output_selection'] and "file_gbff" in output_files['output_selection'] </filter> |
157 <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.tsv"> | 185 </data> |
158 <filter>workflow['skip_cds'] == False</filter> | 186 <data name="annotation_embl" format="tabular" from_work_dir="bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl"> |
159 </data> | 187 <filter> output_files['output_selection'] and "file_embl" in output_files['output_selection'] </filter> |
160 <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.faa"> | 188 </data> |
161 <filter>workflow['skip_cds'] == False</filter> | 189 <data name="annotation_fna" format="fasta" from_work_dir="bakta_output.fna" label="${tool.name} on ${on_string}: Contig_sequences"> |
162 </data> | 190 <filter> output_files['output_selection'] and "file_fna" in output_files['output_selection'] </filter> |
163 <data name="summary_txt" format="txt" from_work_dir="bakta_output.txt" label="${tool.name} on ${on_string}: bakta_output.txt"/> | 191 </data> |
164 <data name="annotation_json" format="json" from_work_dir="bakta_output.json" label="${tool.name} on ${on_string}: bakta_output.json"/> | 192 <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output.ffn" label="${tool.name} on ${on_string}: Nucleotide_sequences"> |
193 <filter> output_files['output_selection'] and "file_ffn" in output_files['output_selection'] </filter> | |
194 </data> | |
195 <data name="annotation_faa" format="fasta" from_work_dir="bakta_output.faa" label="${tool.name} on ${on_string}: Amino_acid_sequences"> | |
196 <filter> output_files['output_selection'] and "file_faa" in output_files['output_selection'] </filter> | |
197 </data> | |
198 <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: hypothetical_annotation_summary"> | |
199 <filter> output_files['output_selection'] and "hypo_tsv" in output_files['output_selection'] </filter> | |
200 </data> | |
201 <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: hypothetical_amino_acid_sequences"> | |
202 <filter> output_files['output_selection'] and "hypo_fa" in output_files['output_selection'] </filter> | |
203 </data> | |
204 <data name="summary_txt" format="txt" from_work_dir="bakta_output.txt" label="${tool.name} on ${on_string}: Analysis_summary"> | |
205 <filter> output_files['output_selection'] and "sum_txt" in output_files['output_selection'] </filter> | |
206 </data> | |
207 <data name="annotation_json" format="json" from_work_dir="bakta_output.json" label="${tool.name} on ${on_string}: annotation_machine_readable"> | |
208 <filter> output_files['output_selection'] and "file_json" in output_files['output_selection'] </filter> | |
209 </data> | |
210 <data name="annotation_plot" format="svg" from_work_dir="bakta_output.svg" label="${tool.name} on ${on_string}: Plot of the annotation"> | |
211 <filter> output_files['output_selection'] and "file_plot" in output_files['output_selection'] </filter> | |
212 </data> | |
213 <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file"> | |
214 <filter> output_files['output_selection'] and "log_txt" in output_files['output_selection'] </filter> | |
215 </data> | |
165 </outputs> | 216 </outputs> |
166 | |
167 <tests> | 217 <tests> |
168 <test expect_num_outputs="12"> <!-- TEST_1 database + input --> | 218 <test expect_num_outputs="13"> <!-- TEST_1 database + input --> |
169 <section name="input_option" > | 219 <section name="input_option" > |
170 <param name="db_select" value="test-db-bakta"/> | 220 <param name="bakta_db_select" value="V0.1_2022-08-29"/> |
171 <param name="input_file" value="NC_002127.1.fna"/> | 221 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> |
172 </section> | 222 <param name="input_file" value="NC_002127.1.fna"/> |
173 <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="4"> | 223 <param name="min_contig_length" value="250"/> |
174 <assert_contents> | 224 </section> |
175 <has_text_matching n="1" expression="Genome size: 3,306 bp"/> | 225 <section name="output_files"> |
176 <has_n_lines n="90" delta="1"/> | 226 <param name="output_selection" value="file_tsv,file_gff3,file_gbff,file_embl,file_fna,file_ffn,file_faa,hypo_tsv,hypo_fa,sum_txt,file_json,file_plot,log_txt"/> |
177 </assert_contents> | 227 </section> |
178 </output> | 228 <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="2"/> |
179 <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="2"> | 229 <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2"/> |
180 <assert_contents> | 230 <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="8"/> |
181 <has_text_matching n="3" expression="contig_1"/> | 231 <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="6"/> |
182 <has_n_lines n="6" delta="1"/> | 232 <output name="annotation_fna" value="TEST_1/TEST_1.fna"/> |
183 </assert_contents> | 233 <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"/> |
184 </output> | 234 <output name="annotation_faa" value="TEST_1/TEST_1.faa"/> |
185 <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2"> | 235 <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv" lines_diff="4"/> |
186 <assert_contents> | 236 <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"/> |
187 <has_text_matching expression="AGCTATTCCTGGTTTCATATGAAACAAACCATGCCTGTTCTCATGCCAGTAAGTGTAGCA"/> | 237 <output name="summary_txt" value="TEST_1/TEST_1.txt" lines_diff="4"/> |
188 <has_n_lines n="70" delta="1"/> | 238 <output name="annotation_plot"> |
189 </assert_contents> | 239 <assert_contents> |
190 </output> | 240 <has_size value="418991" delta="1000"/> |
191 <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="4"> | 241 </assert_contents> |
192 <assert_contents> | 242 </output> |
193 <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCL"/> | 243 |
194 <has_n_lines n="133" delta="1"/> | 244 <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="6"/> |
195 </assert_contents> | 245 <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="6"/> |
196 </output> | |
197 <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="2"> | |
198 <assert_contents> | |
199 <has_text_matching expression="FIFLFSPFCLSSASCDYIAHHFSTVLPPVFCRRTFQSDNTVTAKKQQCFVGNSNLQTGQ"/> | |
200 <has_n_lines n="137" delta="2"/> | |
201 </assert_contents> | |
202 </output> | |
203 <output name="annotation_fna" value="TEST_1/TEST_1.fna"> | |
204 <assert_contents> | |
205 <has_text_matching expression="TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC"/> | |
206 <has_n_lines n="57"/> | |
207 </assert_contents> | |
208 </output> | |
209 <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"> | |
210 <assert_contents> | |
211 <has_text_matching expression="TCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGC"/> | |
212 <has_n_lines n="6"/> | |
213 </assert_contents> | |
214 </output> | |
215 <output name="annotation_faa" value="TEST_1/TEST_1.faa"> | |
216 <assert_contents> | |
217 <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI"/> | |
218 <has_n_lines n="6"/> | |
219 </assert_contents> | |
220 </output> | |
221 <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv"> | |
222 <assert_contents> | |
223 <has_text_matching expression="DOGAIA_00010"/> | |
224 <has_n_lines n="6"/> | |
225 </assert_contents> | |
226 </output> | |
227 <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"> | |
228 <assert_contents> | |
229 <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCLFIFLFSPFCLSSASCDYIAHHFSTVLPPVFCRRTF"/> | |
230 <has_n_lines n="6"/> | |
231 </assert_contents> | |
232 </output> | |
233 <output name="summary_txt" value="TEST_1/TEST_1.txt"> | |
234 <assert_contents> | |
235 <has_text_matching expression="N50: 3306"/> | |
236 <has_n_lines n="29"/> | |
237 </assert_contents> | |
238 </output> | |
239 <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="4"> | |
240 <assert_contents> | |
241 <has_text_matching expression="0.6524500907441017"/> | |
242 <has_n_lines n="112" delta="1"/> | |
243 </assert_contents> | |
244 </output> | |
245 </test> | |
246 <test expect_num_outputs="12"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps --> | |
247 <section name="input_option" > | |
248 <param name="db_select" value="test-db-bakta"/> | |
249 <param name="input_file" value="NC_002127.1.fna"/> | |
250 <param name="min_contig_length" value="250"/> | |
251 </section> | |
252 <section name="organism"> | |
253 <param name="genus" value="Escherichia"/> | |
254 <param name="species" value="coli O157:H7"/> | |
255 <param name="strain" value="Sakai"/> | |
256 <param name="plasmid" value="pOSAK1"/> | |
257 </section> | |
258 <section name="annotation"> | |
259 <param name="--gram" value="-"/> | |
260 <param name="keep_contig_headers" value="true"/> | |
261 </section> | |
262 <section name="workflow"> | |
263 <param name="skip_crispr" value="true"/> | |
264 <param name="skip_gap" value="true"/> | |
265 </section> | |
266 <output name="logfile" value="TEST_2/TEST_2.log" lines_diff="4"> | |
267 <assert_contents> | |
268 <has_text_matching expression="Genome size: 3,306 bp"/> | |
269 </assert_contents> | |
270 </output> | |
271 <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="2"> | |
272 <assert_contents> | |
273 <has_text_matching expression="DOGAIA_00005"/> | |
274 </assert_contents> | |
275 </output> | |
276 <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="2"> | |
277 <assert_contents> | |
278 <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/> | |
279 </assert_contents> | |
280 </output> | |
281 <output name="annotation_gbff" value="TEST_2/TEST_2.gbff" lines_diff="5"> | |
282 <assert_contents> | |
283 <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSV"/> | |
284 </assert_contents> | |
285 </output> | |
286 <output name="annotation_embl" value="TEST_2/TEST_2.embl" lines_diff="4"> | |
287 <assert_contents> | |
288 <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKK"/> | |
289 </assert_contents> | |
290 </output> | |
291 <output name="annotation_fna" value="TEST_2/TEST_2.fna"/> | |
292 <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/> | |
293 <output name="annotation_faa" value="TEST_2/TEST_2.faa"/> | |
294 <output name="hypotheticals_tsv" value="TEST_2/TEST_2.hypotheticals.tsv"/> | |
295 <output name="hypotheticals_faa" value="TEST_2/TEST_2.hypotheticals.faa"/> | |
296 <output name="summary_txt" value="TEST_2/TEST_2.txt"> | |
297 <assert_contents> | |
298 <has_text_matching expression="N50: 3306"/> | |
299 </assert_contents> | |
300 </output> | |
301 <output name="annotation_json" value="TEST_2/TEST_2.json" lines_diff="4"> | |
302 <assert_contents> | |
303 <has_text_matching expression="0.6524500907441017"/> | |
304 </assert_contents> | |
305 </output> | |
306 </test> | |
307 <test expect_num_outputs="10"> <!-- TEST_3 test all skip steps --> | |
308 <section name="input_option" > | |
309 <param name="db_select" value="test-db-bakta"/> | |
310 <param name="input_file" value="NC_002127.1.fna"/> | |
311 <param name="min_contig_length" value="250"/> | |
312 </section> | |
313 <section name="workflow"> | |
314 <param name="skip_trna" value="true"/> | |
315 <param name="skip_tmrna" value="true"/> | |
316 <param name="skip_rrna" value="true"/> | |
317 <param name="skip_ncrna" value="true"/> | |
318 <param name="skip_ncrna_region" value="true"/> | |
319 <param name="skip_crispr" value="true"/> | |
320 <param name="skip_cds" value="true"/> | |
321 <param name="skip_sorf" value="true"/> | |
322 <param name="skip_gap" value="true"/> | |
323 <param name="skip_ori" value="true"/> | |
324 </section> | |
325 <output name="logfile" value="TEST_3/TEST_3.log" lines_diff="4"> | |
326 <assert_contents> | |
327 <has_text_matching expression="Genome size: 3,306 bp"/> | |
328 </assert_contents> | |
329 </output> | |
330 <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="1"> | |
331 <assert_contents> | |
332 <has_n_lines n="3" delta="1"/> | |
333 </assert_contents> | |
334 </output> | |
335 <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="2"> | |
336 <assert_contents> | |
337 <has_n_lines n="67" delta="1"/> | |
338 </assert_contents> | |
339 </output> | |
340 <output name="annotation_gbff" value="TEST_3/TEST_3.gbff" lines_diff="10"/> | |
341 <output name="annotation_embl" value="TEST_3/TEST_3.embl" lines_diff="4"/> | |
342 <output name="annotation_fna" value="TEST_3/TEST_3.fna"/> | |
343 <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/> | |
344 <output name="annotation_faa" value="TEST_3/TEST_3.faa"/> | |
345 <output name="summary_txt" value="TEST_3/TEST_3.txt"> | |
346 <assert_contents> | |
347 <has_text_matching expression="GC: 43.4"/> | |
348 </assert_contents> | |
349 </output> | |
350 <output name="annotation_json" value="TEST_3/TEST_3.json" lines_diff="4"/> | |
351 </test> | 246 </test> |
352 <test expect_num_outputs="12"> <!-- TEST_4 annotations --> | 247 <test expect_num_outputs="4"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps --> |
353 <section name="input_option" > | 248 <section name="input_option" > |
354 <param name="db_select" value="test-db-bakta"/> | 249 <param name="bakta_db_select" value="V0.1_2022-08-29"/> |
250 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> | |
355 <param name="input_file" value="NC_002127.1.fna"/> | 251 <param name="input_file" value="NC_002127.1.fna"/> |
252 <param name="min_contig_length" value="250"/> | |
253 </section> | |
254 <section name="organism"> | |
255 <param name="genus" value="Escherichia"/> | |
256 <param name="species" value="coli O157:H7"/> | |
257 <param name="strain" value="Sakai"/> | |
258 <param name="plasmid" value="pOSAK1"/> | |
356 </section> | 259 </section> |
357 <section name="annotation"> | 260 <section name="annotation"> |
358 <param name="complete" value="true"/> | 261 <param name="--gram" value="-"/> |
359 <param name="translation_table" value="4"/> | 262 <param name="keep_contig_headers" value="true"/> |
360 <param name="prodigal" value="prodigal.tf"/> | 263 </section> |
361 <param name="replicons" value="replicons.tsv"/> | 264 <section name="workflow"> |
362 <param name="compliant" value="true"/> | 265 <param name="skip_analysis" value="--skip-trna,--skip-tmrna"/> |
363 <param name="proteins" value="user-proteins.faa"/> | 266 </section> |
364 </section> | 267 <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="4"> |
365 <output name="logfile" value="TEST_4/TEST_4.log" lines_diff="4"> | 268 <assert_contents> |
366 <assert_contents> | 269 <has_text_matching expression="IHHALP_00005"/> |
367 <has_text_matching expression="Genome size: 3,306 bp"/> | 270 </assert_contents> |
368 </assert_contents> | 271 </output> |
369 </output> | 272 <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="4"> |
370 <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="2"> | 273 <assert_contents> |
371 <assert_contents> | 274 <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/> |
372 <has_text_matching expression="mock1"/> | 275 </assert_contents> |
373 </assert_contents> | 276 </output> |
374 </output> | 277 <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/> |
375 <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="2"> | 278 <output name="annotation_plot"> |
376 <assert_contents> | 279 <assert_contents> |
377 <has_text_matching expression="ID=DOGAIA_00005_gene;locus_tag=DOGAIA_00005"/> | 280 <has_size value="418991" delta="1000"/> |
378 </assert_contents> | |
379 </output> | |
380 <output name="annotation_gbff" value="TEST_4/TEST_4.gbff" lines_diff="4"> | |
381 <assert_contents> | |
382 <has_text_matching expression="SSASSCSFSHMVACSSASSASSFSSSVRLWLFMNPAMLSAVCCCL"/> | |
383 </assert_contents> | |
384 </output> | |
385 <output name="annotation_embl" value="TEST_4/TEST_4.embl" lines_diff="4"> | |
386 <assert_contents> | |
387 <has_text_matching expression="MKKDKKYQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKK"/> | |
388 </assert_contents> | |
389 </output> | |
390 <output name="annotation_fna" value="TEST_4/TEST_4.fna"/> | |
391 <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/> | |
392 <output name="annotation_faa" value="TEST_4/TEST_4.faa"/> | |
393 <output name="hypotheticals_tsv" value="TEST_4/TEST_4.hypotheticals.tsv"/> | |
394 <output name="hypotheticals_faa" value="TEST_4/TEST_4.hypotheticals.faa"/> | |
395 <output name="summary_txt" value="TEST_4/TEST_4.txt"> | |
396 <assert_contents> | |
397 <has_text_matching expression="CDSs: 3"/> | |
398 </assert_contents> | |
399 </output> | |
400 <output name="annotation_json" value="TEST_4/TEST_4.json" lines_diff="4"> | |
401 <assert_contents> | |
402 <has_text_matching expression="0.4340592861464005"/> | |
403 </assert_contents> | 281 </assert_contents> |
404 </output> | 282 </output> |
405 </test> | 283 </test> |
284 <test expect_num_outputs="4"> <!-- TEST_3 test all skip steps --> | |
285 <section name="input_option" > | |
286 <param name="bakta_db_select" value="V0.1_2022-08-29"/> | |
287 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> | |
288 <param name="input_file" value="NC_002127.1.fna"/> | |
289 <param name="min_contig_length" value="350"/> | |
290 </section> | |
291 <section name="workflow"> | |
292 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/> | |
293 </section> | |
294 <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="4"/> | |
295 <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="4"/> | |
296 <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/> | |
297 <output name="annotation_plot"> | |
298 <assert_contents> | |
299 <has_size value="418399" delta="1000"/> | |
300 </assert_contents> | |
301 </output> | |
302 </test> | |
303 <test expect_num_outputs="4"> <!-- TEST_4 annotations --> | |
304 <section name="input_option" > | |
305 <param name="bakta_db_select" value="V0.1_2022-08-29"/> | |
306 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> | |
307 <param name="input_file" value="NC_002127.1.fna"/> | |
308 </section> | |
309 <section name="annotation"> | |
310 <param name="complete" value="true"/> | |
311 <param name="prodigal" value="prodigal.tf"/> | |
312 <param name="translation_table" value="4"/> | |
313 <param name="replicons" value="replicons.tsv"/> | |
314 <param name="compliant" value="true"/> | |
315 <param name="proteins" value="user-proteins.faa"/> | |
316 </section> | |
317 <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="4"/> | |
318 <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="4"/> | |
319 <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/> | |
320 <output name="annotation_plot"> | |
321 <assert_contents> | |
322 <has_size value="418399" delta="1000"/> | |
323 </assert_contents> | |
324 </output> | |
325 </test> | |
326 <test expect_num_outputs="2"> <!-- TEST_5 skip all steps and keep only the logfile and summary --> | |
327 <section name="input_option" > | |
328 <param name="bakta_db_select" value="V0.1_2022-08-29"/> | |
329 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> | |
330 <param name="input_file" value="NC_002127.1.fna"/> | |
331 </section> | |
332 <section name="annotation"> | |
333 <param name="complete" value="true"/> | |
334 <param name="translation_table" value="4"/> | |
335 </section> | |
336 <section name="workflow"> | |
337 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/> | |
338 </section> | |
339 <section name="output_files"> | |
340 <param name="output_selection" value="log_txt,sum_txt"/> | |
341 </section> | |
342 <output name="logfile" value="TEST_5/TEST_5.log" lines_diff="6"/> | |
343 <output name="summary_txt" value="TEST_5/TEST_5.txt" lines_diff="4"/> | |
344 </test> | |
406 </tests> | 345 </tests> |
407 | 346 <help><![CDATA[**What it does** |
408 <help><![CDATA[ | 347 Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs. |
409 usage: bakta [--db DB] [--min-contig-length MIN_CONTIG_LENGTH] | 348 |
410 [--prefix PREFIX] [--output OUTPUT] [--genus GENUS] | 349 *Comprehensive & taxonomy-independent database* |
411 [--species SPECIES] [--strain STRAIN] [--plasmid PLASMID] | 350 Bakta provides a large and taxonomy-independent database using UniProt's entire UniRef protein sequence cluster universe. |
412 [--complete] [--prodigal-tf PRODIGAL_TF] | 351 |
413 [--translation-table {11,4}] [--gram {+,-,?}] [--locus LOCUS] | 352 *Protein sequence identification* |
414 [--locus-tag LOCUS_TAG] [--keep-contig-headers] | 353 Bakta exactly identifies known identical protein sequences (IPS) from RefSeq and UniProt |
415 [--replicons REPLICONS] [--compliant] [--proteins PROTEINS] | 354 allowing the fine-grained annotation of gene alleles (AMR) or closely related but distinct protein families. |
416 [--skip-trna] [--skip-tmrna] [--skip-rrna] [--skip-ncrna] | 355 This is achieved via an alignment-free sequence identification (AFSI) approach |
417 [--skip-ncrna-region] [--skip-crispr] [--skip-cds] [--skip-sorf] | 356 using full-length MD5 protein sequence hash digests. |
418 [--skip-gap] [--skip-ori] [--help] [--verbose] | 357 *Small proteins/short open reading frames* |
419 [--threads THREADS] [--tmp-dir TMP_DIR] [--version] | 358 Bakta detects and annotates small proteins/short open reading frames (sORF). |
420 <genome> | 359 |
421 | 360 *Expert annotation systems* |
422 Rapid & standardized annotation of bacterial genomes, MAGs & plasmids | 361 To provide high quality annotations for certain proteins of higher interest, e.g. AMR & VF genes, |
423 | 362 Bakta includes & merges different expert annotation systems. |
424 positional arguments: | 363 Currently, Bakta uses NCBI's AMRFinderPlus for AMR gene annotations |
425 <genome> Genome sequences in (zipped) fasta format | 364 as well as an generalized protein sequence expert system with distinct |
426 | 365 coverage, identity and priority values for each sequence, currenlty comprising the VFDB as well as NCBI's BlastRules. |
427 Input / Output: | 366 |
428 --db DB, -d DB Database path (default = <bakta_path>/db). Can also be | 367 *Comprehensive workflow* |
429 provided as BAKTA_DB environment variable. | 368 Bakta annotates ncRNA cis-regulatory regions, oriC/oriV/oriT |
430 --min-contig-length MIN_CONTIG_LENGTH, -m MIN_CONTIG_LENGTH | 369 and assembly gaps as well as standard feature types: tRNA, tmRNA, rRNA, ncRNA genes, CRISPR, CDS. |
431 Minimum contig size (default = 1; 200 in compliant | 370 |
432 mode) | 371 *GFF3 & INSDC conform annotations* |
433 --prefix PREFIX, -p PREFIX | 372 Bakta writes GFF3 and INSDC-compliant (Genbank & EMBL) annotation files ready for submission |
434 Prefix for output files | 373 (checked via GenomeTools GFF3Validator, table2asn_GFF and ENA Webin-CLI for GFF3 and EMBL file formats, |
435 --output OUTPUT, -o OUTPUT | 374 respectively for representative genomes of all ESKAPE species). |
436 Output directory (default = current working directory) | 375 |
437 | 376 *Bacteria & plasmids* |
438 Organism: | 377 Bakta was designed to annotate bacteria (isolates & MAGs) and plasmids, only. |
439 --genus GENUS Genus name | 378 |
440 --species SPECIES Species name | 379 **Input options** |
441 --strain STRAIN Strain name | 380 1. Choose a genome or assembly in fasta format to use bakta annotations |
442 --plasmid PLASMID Plasmid name | 381 2. Choose A version of the Bakta database |
443 | 382 |
444 Annotation: | 383 **Organism options** |
445 --complete All sequences are complete replicons (chromosome/plasmid[s]) | 384 You can specify informations about analysed fasta as text input for: |
446 --prodigal-tf PRODIGAL_TF Path to existing Prodigal training file to use for CDS prediction | 385 - genus |
447 | 386 - species |
448 --translation-table {11,4} Translation table: 11/4 (default = 11) | 387 - strain |
449 --gram {+,-,?} Gram type for signal peptide predictions: +/-/? (default = ?) | 388 - plasmid |
450 --locus LOCUS Locus prefix (default = 'contig') | 389 |
451 --locus-tag LOCUS_TAG Locus tag prefix (default = autogenerated) | 390 **Annotation options** |
452 --keep-contig-headers Keep original contig headers | 391 1. You can specify if all sequences (chromosome or plasmids) are complete or not |
453 --replicons REPLICONS Replicon information table (tsv/csv) | 392 2. You can add your own prodigal training file for CDS predictionœ |
454 --compliant Force Genbank/ENA/DDJB compliance | 393 3. The translation table could be modified, default is the 11th for bacteria |
455 --proteins PROTEINS Fasta file of trusted protein sequences for CDS annotation | 394 4. You can specify if bacteria is gram -/+ or unknonw (default value unknow) |
456 | 395 5. You can keep the name of contig present in the input file |
457 | 396 6. You can specify your own replicon table as a TSV/CSV file |
458 Workflow: | 397 7. The compliance option is for ready to submit annotation file to Public database |
459 --skip-trna Skip tRNA detection & annotation | 398 as ENA, Genbank EMBL |
460 --skip-tmrna Skip tmRNA detection & annotation | 399 8. You can specify a protein sequence file for annotation in GenBank or fasta formats |
461 --skip-rrna Skip rRNA detection & annotation | 400 Using the Fasta format, each reference sequence can be provided in a short or long format: |
462 --skip-ncrna Skip ncRNA detection & annotation | 401 |
463 --skip-ncrna-region Skip ncRNA region detection & annotation | 402 # short: |
464 --skip-crispr Skip CRISPR array detection & annotation | 403 >id gene~~~product~~~dbxrefs |
465 --skip-cds Skip CDS detection & annotation | 404 MAQ... |
466 --skip-sorf Skip sORF detection & annotation | 405 |
467 --skip-gap Skip gap detection & annotation | 406 # long: |
468 --skip-ori Skip oriC/oriT detection & annotation | 407 >id min_identity~~~min_query_cov~~~min_subject_cov~~~gene~~~product~~~dbxrefs |
469 | 408 MAQ... |
470 General: | 409 |
471 --help, -h Show this help message and exit | 410 **Skip steps** |
472 --verbose, -v Print verbose information | 411 Some steps could be skiped: |
473 --threads THREADS, -t THREADS | 412 - skip-trna Skip tRNA detection & annotation |
474 Number of threads to use (default = number of | 413 - skip-tmrna Skip tmRNA detection & annotation |
475 available CPUs) | 414 - skip-rrna Skip rRNA detection & annotation |
476 --tmp-dir TMP_DIR Location for temporary files (default = system | 415 - skip-ncrna Skip ncRNA detection & annotation |
477 dependent auto detection) | 416 - skip-ncrna-region Skip ncRNA region detection & annotation |
478 --version show program's version number and exit | 417 - skip-crispr Skip CRISPR array detection & annotation |
479 | 418 - skip-cds Skip CDS detection & annotation |
480 | 419 - skip-pseudo Skip pseudogene detection & annotation |
420 - skip-sorf Skip sORF detection & annotation | |
421 - skip-gap Skip gap detection & annotation | |
422 - skip-ori Skip oriC/oriT detection & annotation | |
423 | |
424 **Output options** | |
425 Bakta produce numbers of output files, you can select what type of file you want: | |
426 - Summary of the annotation | |
427 - Annotated files | |
428 - Sequence files for nucleotide and/or amino acid | |
481 ]]></help> | 429 ]]></help> |
482 <expand macro="citations"/> | 430 <expand macro="citations"/> |
483 </tool> | 431 </tool> |