Mercurial > repos > iuc > checkm_qa
comparison qa.xml @ 0:159422a38a42 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/checkm commit 2a3b068a98bf0e913dc03e0d5c2182cfd102cf27
| author | iuc |
|---|---|
| date | Fri, 29 Jul 2022 20:24:07 +0000 |
| parents | |
| children | cb4a5b624518 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:159422a38a42 |
|---|---|
| 1 <tool id="checkm_qa" name="CheckM qa" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description> | |
| 3 Assess bins for contamination and completeness | |
| 4 </description> | |
| 5 <macros> | |
| 6 <import>macros.xml</import> | |
| 7 </macros> | |
| 8 <expand macro="biotools"/> | |
| 9 <expand macro="requirements"/> | |
| 10 <expand macro="version"/> | |
| 11 <command detect_errors="exit_code"><![CDATA[ | |
| 12 #import re | |
| 13 mkdir -p 'output/storage/' && | |
| 14 cp '$checkm_hmm_info' 'output/storage/checkm_hmm_info.pkl.gz' && | |
| 15 cp '$bin_stats_analyze' 'output/storage/bin_stats.analyze.tsv' && | |
| 16 #for $i in $hmmer_analyze | |
| 17 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($i.element_identifier)) | |
| 18 mkdir -p 'output/bins/${identifier}' && | |
| 19 cp '$i' 'output/bins/${identifier}/hmmer.analyze.txt' && | |
| 20 #end for | |
| 21 #if $output.out_format == '9' | |
| 22 #for $i in $output.genes_faa | |
| 23 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($i.element_identifier)) | |
| 24 mkdir -p 'output/bins/${identifier}' && | |
| 25 cp '$i' 'output/bins/${identifier}/genes.faa' && | |
| 26 #end for | |
| 27 #end if | |
| 28 | |
| 29 checkm qa | |
| 30 '$marker_file' | |
| 31 'output' | |
| 32 --out_format $output.out_format | |
| 33 --tab_table | |
| 34 --file 'output_file' | |
| 35 #if $exclude_markers | |
| 36 --exclude_markers '$exclude_markers' | |
| 37 #end if | |
| 38 $individual_markers | |
| 39 $skip_adj_correction | |
| 40 $skip_pseudogene_correction | |
| 41 --aai_strain $aai_strain | |
| 42 $ignore_thresholds | |
| 43 --e_value $e_value | |
| 44 --length $length | |
| 45 #if $coverage | |
| 46 --coverage_file '$coverage' | |
| 47 #end if | |
| 48 --threads \${GALAXY_SLOTS:-1} | |
| 49 ]]></command> | |
| 50 <inputs> | |
| 51 <expand macro="marker_file" /> | |
| 52 <param name="checkm_hmm_info" type="data" format="zip" label="Marker gene HMM info for each bin" help="Output of the CheckM analyze tool" /> | |
| 53 <param name="bin_stats_analyze" type="data" format="tabular" label="Marker gene bin stats" help="Output of the CheckM analyze tool" /> | |
| 54 <param name="hmmer_analyze" type="data_collection" collection_type="list" format="txt" label="Marker gene HMM hits to each bin" help="Output of the CheckM analyze tool" /> | |
| 55 <conditional name="output"> | |
| 56 <param argument="--out_format" type="select" label="Desired output"> | |
| 57 <option value="1">Summary of bin completeness and contamination</option> | |
| 58 <option value="2">Extended summary of bin statistics (includes GC, genome size, ...)</option> | |
| 59 <option value="3">Summary of bin quality for increasingly basal lineage-specific marker sets</option> | |
| 60 <option value="4">List of marker genes and their counts</option> | |
| 61 <option value="5">List of bin id, marker gene id, gene id</option> | |
| 62 <option value="6">List of marker genes present multiple times in a bin</option> | |
| 63 <option value="7">List of marker genes present multiple times on the same scaffold</option> | |
| 64 <option value="8">List indicating position of each marker gene within a bin</option> | |
| 65 <option value="9">Marker genes identified in each bin and their sequence</option> | |
| 66 </param> | |
| 67 <when value="1"/> | |
| 68 <when value="2"/> | |
| 69 <when value="3"/> | |
| 70 <when value="4"/> | |
| 71 <when value="5"/> | |
| 72 <when value="6"/> | |
| 73 <when value="7"/> | |
| 74 <when value="8"/> | |
| 75 <when value="9"> | |
| 76 <param name="genes_faa" type="data_collection" collection_type="list" label="Nucleotide gene sequences for each bin"/> | |
| 77 </when> | |
| 78 </conditional> | |
| 79 <param argument="exclude_markers" type="data" format="txt" optional="true" label="Markers to exclude from marker sets" /> | |
| 80 <expand macro="qa_params" /> | |
| 81 <param argument="coverage" type="data" format="txt" optional="true" label="Coverage of each sequence" help="Generated by the coverage command" /> | |
| 82 <param name="extra_outputs" type="select" multiple="true" optional="true" label="Extra outputs"> | |
| 83 <expand macro="qa_extra_output_options" /> | |
| 84 </param> | |
| 85 </inputs> | |
| 86 <outputs> | |
| 87 <data name="output_f1" format="tabular" from_work_dir="output_file" label="${tool.name} on ${on_string}: Summary of bin completeness and contamination"> | |
| 88 <filter>output['out_format']=="1"</filter> | |
| 89 </data> | |
| 90 <data name="output_f2" format="tabular" from_work_dir="output_file" label="${tool.name} on ${on_string}: Extended summary of bin statistics"> | |
| 91 <filter>output['out_format']=="2"</filter> | |
| 92 </data> | |
| 93 <data name="output_f3" format="tabular" from_work_dir="output_file" label="${tool.name} on ${on_string}: Summary of bin quality for increasingly basal lineage-specific marker sets"> | |
| 94 <filter>output['out_format']=="3"</filter> | |
| 95 </data> | |
| 96 <data name="output_f4" format="tabular" from_work_dir="output_file" label="${tool.name} on ${on_string}: Marker genes and their counts"> | |
| 97 <filter>output['out_format']=="4"</filter> | |
| 98 </data> | |
| 99 <data name="output_f5" format="tabular" from_work_dir="output_file" label="${tool.name} on ${on_string}: Bin id, marker gene id, gene id"> | |
| 100 <filter>output['out_format']=="5"</filter> | |
| 101 </data> | |
| 102 <data name="output_f6" format="tabular" from_work_dir="output_file" label="${tool.name} on ${on_string}: Marker genes present multiple times in a bin"> | |
| 103 <filter>output['out_format']=="6"</filter> | |
| 104 </data> | |
| 105 <data name="output_f7" format="tabular" from_work_dir="output_file" label="${tool.name} on ${on_string}: Marker genes present multiple times on the same scaffold"> | |
| 106 <filter>output['out_format']=="7"</filter> | |
| 107 </data> | |
| 108 <data name="output_f8" format="tabular" from_work_dir="output_file" label="${tool.name} on ${on_string}: Indicating position of each marker gene within a bin"> | |
| 109 <filter>output['out_format']=="8"</filter> | |
| 110 </data> | |
| 111 <data name="output_f9" format="tabular" from_work_dir="output_file" label="${tool.name} on ${on_string}: Marker genes identified in each bin and their sequence"> | |
| 112 <filter>output['out_format']=="9"</filter> | |
| 113 </data> | |
| 114 <data name="bin_stats_ext" format="tabular" from_work_dir="output/storage/bin_stats_ext.tsv" label="${tool.name} on ${on_string}: Marker gene bin extensive stats"/> | |
| 115 <expand macro="qa_extra_outputs" /> | |
| 116 </outputs> | |
| 117 <tests> | |
| 118 <test expect_num_outputs="3"> | |
| 119 <param name="marker_file" ftype="tabular" value="lineage_marker_set"/> | |
| 120 <param name="hmmer_analyze"> | |
| 121 <collection type="list"> | |
| 122 <element name="637000110" ftype="txt" value="hmmer.analyze.txt"/> | |
| 123 </collection> | |
| 124 </param> | |
| 125 <param name="bin_stats_analyze" ftype="tabular" value="bin_stats.analyze.tsv"/> | |
| 126 <param name="checkm_hmm_info" ftype="zip" value="checkm_hmm_info.pkl.gz"/> | |
| 127 <conditional name="output"> | |
| 128 <param name="out_format" value="1"/> | |
| 129 </conditional> | |
| 130 <param name="individual_markers" value="false"/> | |
| 131 <param name="skip_adj_correction" value="false"/> | |
| 132 <param name="skip_pseudogene_correction" value="false"/> | |
| 133 <param name="aai_strain" value="0.9"/> | |
| 134 <param name="ignore_thresholds" value="false"/> | |
| 135 <param name="e_value" value="1e-10"/> | |
| 136 <param name="length" value="0.7"/> | |
| 137 <param name="extra_outputs" value="marker_gene_stats"/> | |
| 138 <output name="output_f1" ftype="tabular"> | |
| 139 <assert_contents> | |
| 140 <has_text text="Marker lineage"/> | |
| 141 <has_text text="637000110"/> | |
| 142 <has_text text="f__Enterobacteriaceae"/> | |
| 143 </assert_contents> | |
| 144 </output> | |
| 145 <output name="bin_stats_ext" ftype="tabular"> | |
| 146 <assert_contents> | |
| 147 <has_text text="637000110"/> | |
| 148 <has_text text="marker lineage"/> | |
| 149 <has_text text="GCN0"/> | |
| 150 <has_text text="Longest contig"/> | |
| 151 </assert_contents> | |
| 152 </output> | |
| 153 <!--<output name="alignment_file" ftype="tabular"> | |
| 154 <assert_contents> | |
| 155 <has_text text="637000110"/> | |
| 156 <has_text text="Lineage Marker File"/> | |
| 157 <has_text text="UID5139"/> | |
| 158 </assert_contents> | |
| 159 </output>--> | |
| 160 <output name="marker_gene_stats" ftype="tabular"> | |
| 161 <assert_contents> | |
| 162 <has_text text="637000110"/> | |
| 163 <has_text text="AC_000091_751"/> | |
| 164 <has_text text="TIGR02432"/> | |
| 165 </assert_contents> | |
| 166 </output> | |
| 167 </test> | |
| 168 <test expect_num_outputs="2"> | |
| 169 <param name="marker_file" ftype="tabular" value="lineage_marker_set"/> | |
| 170 <param name="hmmer_analyze"> | |
| 171 <collection type="list"> | |
| 172 <element name="637000110" ftype="txt" value="hmmer.analyze.txt"/> | |
| 173 </collection> | |
| 174 </param> | |
| 175 <param name="bin_stats_analyze" ftype="tabular" value="bin_stats.analyze.tsv"/> | |
| 176 <param name="checkm_hmm_info" ftype="zip" value="checkm_hmm_info.pkl.gz"/> | |
| 177 <conditional name="output"> | |
| 178 <param name="out_format" value="2"/> | |
| 179 </conditional> | |
| 180 <param name="individual_markers" value="false"/> | |
| 181 <param name="skip_adj_correction" value="false"/> | |
| 182 <param name="skip_pseudogene_correction" value="false"/> | |
| 183 <param name="aai_strain" value="0.9"/> | |
| 184 <param name="ignore_thresholds" value="false"/> | |
| 185 <param name="e_value" value="1e-10"/> | |
| 186 <param name="length" value="0.7"/> | |
| 187 <param name="extra_outputs" value=""/> | |
| 188 <output name="output_f2" ftype="tabular"> | |
| 189 <assert_contents> | |
| 190 <has_text text="Marker lineage"/> | |
| 191 <has_text text="Mean scaffold length"/> | |
| 192 <has_text text="f__Enterobacteriaceae"/> | |
| 193 </assert_contents> | |
| 194 </output> | |
| 195 <output name="bin_stats_ext" ftype="tabular"> | |
| 196 <assert_contents> | |
| 197 <has_text text="637000110"/> | |
| 198 <has_text text="marker lineage"/> | |
| 199 <has_text text="GCN0"/> | |
| 200 <has_text text="Longest contig"/> | |
| 201 </assert_contents> | |
| 202 </output> | |
| 203 </test> | |
| 204 <test expect_num_outputs="2"> | |
| 205 <param name="marker_file" ftype="tabular" value="lineage_marker_set"/> | |
| 206 <param name="hmmer_analyze"> | |
| 207 <collection type="list"> | |
| 208 <element name="637000110" ftype="txt" value="hmmer.analyze.txt"/> | |
| 209 </collection> | |
| 210 </param> | |
| 211 <param name="bin_stats_analyze" ftype="tabular" value="bin_stats.analyze.tsv"/> | |
| 212 <param name="checkm_hmm_info" ftype="zip" value="checkm_hmm_info.pkl.gz"/> | |
| 213 <conditional name="output"> | |
| 214 <param name="out_format" value="3"/> | |
| 215 </conditional> | |
| 216 <param name="individual_markers" value="false"/> | |
| 217 <param name="skip_adj_correction" value="false"/> | |
| 218 <param name="skip_pseudogene_correction" value="false"/> | |
| 219 <param name="aai_strain" value="0.9"/> | |
| 220 <param name="ignore_thresholds" value="false"/> | |
| 221 <param name="e_value" value="1e-10"/> | |
| 222 <param name="length" value="0.7"/> | |
| 223 <param name="extra_outputs" value=""/> | |
| 224 <output name="output_f3" ftype="tabular"> | |
| 225 <assert_contents> | |
| 226 <has_text text="637000110"/> | |
| 227 <has_text text="Strain heterogeneity"/> | |
| 228 <has_text text="UID5139"/> | |
| 229 <has_text text="p__Proteobacteria"/> | |
| 230 </assert_contents> | |
| 231 </output> | |
| 232 <output name="bin_stats_ext" ftype="tabular"> | |
| 233 <assert_contents> | |
| 234 <has_text text="637000110"/> | |
| 235 <has_text text="marker lineage"/> | |
| 236 <has_text text="GCN0"/> | |
| 237 <has_text text="Longest contig"/> | |
| 238 </assert_contents> | |
| 239 </output> | |
| 240 </test> | |
| 241 <test expect_num_outputs="2"> | |
| 242 <param name="marker_file" ftype="tabular" value="lineage_marker_set"/> | |
| 243 <param name="hmmer_analyze"> | |
| 244 <collection type="list"> | |
| 245 <element name="637000110" ftype="txt" value="hmmer.analyze.txt"/> | |
| 246 </collection> | |
| 247 </param> | |
| 248 <param name="bin_stats_analyze" ftype="tabular" value="bin_stats.analyze.tsv"/> | |
| 249 <param name="checkm_hmm_info" ftype="zip" value="checkm_hmm_info.pkl.gz"/> | |
| 250 <conditional name="output"> | |
| 251 <param name="out_format" value="4"/> | |
| 252 </conditional> | |
| 253 <param name="individual_markers" value="false"/> | |
| 254 <param name="skip_adj_correction" value="false"/> | |
| 255 <param name="skip_pseudogene_correction" value="false"/> | |
| 256 <param name="aai_strain" value="0.9"/> | |
| 257 <param name="ignore_thresholds" value="false"/> | |
| 258 <param name="e_value" value="1e-10"/> | |
| 259 <param name="length" value="0.7"/> | |
| 260 <param name="extra_outputs" value=""/> | |
| 261 <output name="output_f4" ftype="tabular"> | |
| 262 <assert_contents> | |
| 263 <has_text text="637000110"/> | |
| 264 <has_text text="Node Id: UID5103; Marker lineage: f__Enterobacteriaceae"/> | |
| 265 <has_text text="PF02542.1"/> | |
| 266 </assert_contents> | |
| 267 </output> | |
| 268 <output name="bin_stats_ext" ftype="tabular"> | |
| 269 <assert_contents> | |
| 270 <has_text text="637000110"/> | |
| 271 <has_text text="marker lineage"/> | |
| 272 <has_text text="GCN0"/> | |
| 273 <has_text text="Longest contig"/> | |
| 274 </assert_contents> | |
| 275 </output> | |
| 276 </test> | |
| 277 <test expect_num_outputs="2"> | |
| 278 <param name="marker_file" ftype="tabular" value="lineage_marker_set"/> | |
| 279 <param name="hmmer_analyze"> | |
| 280 <collection type="list"> | |
| 281 <element name="637000110" ftype="txt" value="hmmer.analyze.txt"/> | |
| 282 </collection> | |
| 283 </param> | |
| 284 <param name="bin_stats_analyze" ftype="tabular" value="bin_stats.analyze.tsv"/> | |
| 285 <param name="checkm_hmm_info" ftype="zip" value="checkm_hmm_info.pkl.gz"/> | |
| 286 <conditional name="output"> | |
| 287 <param name="out_format" value="5"/> | |
| 288 </conditional> | |
| 289 <param name="individual_markers" value="false"/> | |
| 290 <param name="skip_adj_correction" value="false"/> | |
| 291 <param name="skip_pseudogene_correction" value="false"/> | |
| 292 <param name="aai_strain" value="0.9"/> | |
| 293 <param name="ignore_thresholds" value="false"/> | |
| 294 <param name="e_value" value="1e-10"/> | |
| 295 <param name="length" value="0.7"/> | |
| 296 <param name="extra_outputs" value=""/> | |
| 297 <output name="output_f5" ftype="tabular"> | |
| 298 <assert_contents> | |
| 299 <has_text text="637000110"/> | |
| 300 <has_text text="TIGR02432"/> | |
| 301 <has_text text="AC_000091_165"/> | |
| 302 </assert_contents> | |
| 303 </output> | |
| 304 <output name="bin_stats_ext" ftype="tabular"> | |
| 305 <assert_contents> | |
| 306 <has_text text="637000110"/> | |
| 307 <has_text text="marker lineage"/> | |
| 308 <has_text text="GCN0"/> | |
| 309 <has_text text="Longest contig"/> | |
| 310 </assert_contents> | |
| 311 </output> | |
| 312 </test> | |
| 313 <test expect_num_outputs="2"> | |
| 314 <param name="marker_file" ftype="tabular" value="lineage_marker_set"/> | |
| 315 <param name="hmmer_analyze"> | |
| 316 <collection type="list"> | |
| 317 <element name="637000110" ftype="txt" value="hmmer.analyze.txt"/> | |
| 318 </collection> | |
| 319 </param> | |
| 320 <param name="bin_stats_analyze" ftype="tabular" value="bin_stats.analyze.tsv"/> | |
| 321 <param name="checkm_hmm_info" ftype="zip" value="checkm_hmm_info.pkl.gz"/> | |
| 322 <conditional name="output"> | |
| 323 <param name="out_format" value="6"/> | |
| 324 </conditional> | |
| 325 <param name="individual_markers" value="false"/> | |
| 326 <param name="skip_adj_correction" value="false"/> | |
| 327 <param name="skip_pseudogene_correction" value="false"/> | |
| 328 <param name="aai_strain" value="0.9"/> | |
| 329 <param name="ignore_thresholds" value="false"/> | |
| 330 <param name="e_value" value="1e-10"/> | |
| 331 <param name="length" value="0.7"/> | |
| 332 <param name="extra_outputs" value=""/> | |
| 333 <output name="output_f6" ftype="tabular"> | |
| 334 <assert_contents> | |
| 335 <has_text text="Marker Id"/> | |
| 336 <has_text text="No marker genes satisfied"/> | |
| 337 </assert_contents> | |
| 338 </output> | |
| 339 <output name="bin_stats_ext" ftype="tabular"> | |
| 340 <assert_contents> | |
| 341 <has_text text="637000110"/> | |
| 342 <has_text text="marker lineage"/> | |
| 343 <has_text text="GCN0"/> | |
| 344 <has_text text="Longest contig"/> | |
| 345 </assert_contents> | |
| 346 </output> | |
| 347 </test> | |
| 348 <test expect_num_outputs="2"> | |
| 349 <param name="marker_file" ftype="tabular" value="lineage_marker_set"/> | |
| 350 <param name="hmmer_analyze"> | |
| 351 <collection type="list"> | |
| 352 <element name="637000110" ftype="txt" value="hmmer.analyze.txt"/> | |
| 353 </collection> | |
| 354 </param> | |
| 355 <param name="bin_stats_analyze" ftype="tabular" value="bin_stats.analyze.tsv"/> | |
| 356 <param name="checkm_hmm_info" ftype="zip" value="checkm_hmm_info.pkl.gz"/> | |
| 357 <conditional name="output"> | |
| 358 <param name="out_format" value="7"/> | |
| 359 </conditional> | |
| 360 <param name="individual_markers" value="false"/> | |
| 361 <param name="skip_adj_correction" value="false"/> | |
| 362 <param name="skip_pseudogene_correction" value="false"/> | |
| 363 <param name="aai_strain" value="0.9"/> | |
| 364 <param name="ignore_thresholds" value="false"/> | |
| 365 <param name="e_value" value="1e-10"/> | |
| 366 <param name="length" value="0.7"/> | |
| 367 <param name="extra_outputs" value=""/> | |
| 368 <output name="output_f7" ftype="tabular"> | |
| 369 <assert_contents> | |
| 370 <has_text text="Marker Id"/> | |
| 371 <has_text text="No marker genes satisfied"/> | |
| 372 </assert_contents> | |
| 373 </output> | |
| 374 <output name="bin_stats_ext" ftype="tabular"> | |
| 375 <assert_contents> | |
| 376 <has_text text="637000110"/> | |
| 377 <has_text text="marker lineage"/> | |
| 378 <has_text text="GCN0"/> | |
| 379 <has_text text="Longest contig"/> | |
| 380 </assert_contents> | |
| 381 </output> | |
| 382 </test> | |
| 383 <test expect_num_outputs="2"> | |
| 384 <param name="marker_file" ftype="tabular" value="lineage_marker_set"/> | |
| 385 <param name="hmmer_analyze"> | |
| 386 <collection type="list"> | |
| 387 <element name="637000110" ftype="txt" value="hmmer.analyze.txt"/> | |
| 388 </collection> | |
| 389 </param> | |
| 390 <param name="bin_stats_analyze" ftype="tabular" value="bin_stats.analyze.tsv"/> | |
| 391 <param name="checkm_hmm_info" ftype="zip" value="checkm_hmm_info.pkl.gz"/> | |
| 392 <conditional name="output"> | |
| 393 <param name="out_format" value="8"/> | |
| 394 </conditional> | |
| 395 <param name="individual_markers" value="false"/> | |
| 396 <param name="skip_adj_correction" value="false"/> | |
| 397 <param name="skip_pseudogene_correction" value="false"/> | |
| 398 <param name="aai_strain" value="0.9"/> | |
| 399 <param name="ignore_thresholds" value="false"/> | |
| 400 <param name="e_value" value="1e-10"/> | |
| 401 <param name="length" value="0.7"/> | |
| 402 <param name="extra_outputs" value=""/> | |
| 403 <output name="output_f8" ftype="tabular"> | |
| 404 <assert_contents> | |
| 405 <has_text text="637000110"/> | |
| 406 <has_text text="AC_000091_183"/> | |
| 407 <has_text text="TIGR02075,9,240"/> | |
| 408 </assert_contents> | |
| 409 </output> | |
| 410 <output name="bin_stats_ext" ftype="tabular"> | |
| 411 <assert_contents> | |
| 412 <has_text text="637000110"/> | |
| 413 <has_text text="marker lineage"/> | |
| 414 <has_text text="GCN0"/> | |
| 415 <has_text text="Longest contig"/> | |
| 416 </assert_contents> | |
| 417 </output> | |
| 418 </test> | |
| 419 <test expect_num_outputs="2"> | |
| 420 <param name="marker_file" ftype="tabular" value="lineage_marker_set"/> | |
| 421 <param name="hmmer_analyze"> | |
| 422 <collection type="list"> | |
| 423 <element name="637000110" ftype="txt" value="hmmer.analyze.txt"/> | |
| 424 </collection> | |
| 425 </param> | |
| 426 <param name="bin_stats_analyze" ftype="tabular" value="bin_stats.analyze.tsv"/> | |
| 427 <param name="checkm_hmm_info" ftype="zip" value="checkm_hmm_info.pkl.gz"/> | |
| 428 <conditional name="output"> | |
| 429 <param name="out_format" value="9"/> | |
| 430 <param name="genes_faa"> | |
| 431 <collection type="list"> | |
| 432 <element name="637000110" ftype="fasta" value="637000110.faa"/> | |
| 433 </collection> | |
| 434 </param> | |
| 435 </conditional> | |
| 436 <param name="exclude_markers" ftype="txt" value="markers_to_exclude" /> | |
| 437 <param name="individual_markers" value="false"/> | |
| 438 <param name="skip_adj_correction" value="false"/> | |
| 439 <param name="skip_pseudogene_correction" value="false"/> | |
| 440 <param name="aai_strain" value="0.9"/> | |
| 441 <param name="ignore_thresholds" value="false"/> | |
| 442 <param name="e_value" value="1e-10"/> | |
| 443 <param name="length" value="0.7"/> | |
| 444 <param name="extra_outputs" value=""/> | |
| 445 <output name="output_f9" ftype="tabular"> | |
| 446 <assert_contents> | |
| 447 <has_text text="637000110"/> | |
| 448 <has_text text="Sequence"/> | |
| 449 <has_text text="PF06574.7"/> | |
| 450 <has_text text="MKLIRGI"/> | |
| 451 </assert_contents> | |
| 452 </output> | |
| 453 <output name="bin_stats_ext" ftype="tabular"> | |
| 454 <assert_contents> | |
| 455 <has_text text="637000110"/> | |
| 456 <has_text text="marker lineage"/> | |
| 457 <has_text text="GCN0"/> | |
| 458 <has_text text="Longest contig"/> | |
| 459 </assert_contents> | |
| 460 </output> | |
| 461 </test> | |
| 462 </tests> | |
| 463 <help><![CDATA[ | |
| 464 @HELP_HEADER@ | |
| 465 | |
| 466 This command identifies marker genes in bins and calculates genome statistics | |
| 467 | |
| 468 Adjacent called genes matching the same marker gene may indicate a true duplication event, a gene calling error, or an assembly error. If adjacent genes hit distinct regions of the same marker gene HMM, CheckM assumes a gene calling error has occurred and concatenate the two genes. When this occurs, CheckM concatenates the gene ids of the two genes with a pair of ampersands (&&). | |
| 469 | |
| 470 Outputs | |
| 471 ======= | |
| 472 Output in function of selection output format | |
| 473 | |
| 474 1. Summary of bin completeness, contamination, and strain heterogeneity | |
| 475 Bin Id: bin identifier derived from input FASTA file | |
| 476 Marker lineage: indicates lineage used for inferring marker set (a precise indication of where a bin was placed in CheckM's reference tree can be obtained with the tree_qa command) | |
| 477 No. genomes: number of reference genomes used to infer marker set | |
| 478 No. markers: number of inferred marker genes | |
| 479 No. marker sets: number of inferred co-located marker sets | |
| 480 0-5+: number of times each marker gene is identified | |
| 481 Completeness: estimated completeness | |
| 482 Contamination: estimated contamination | |
| 483 Strain heterogeneity: estimated strain heterogeneity | |
| 484 2. Extended summary of bin quality (includes GC, genome size, coding density, ...) | |
| 485 3. Summary of bin quality for increasingly basal lineage-specific marker sets | |
| 486 Node Id: unique id of internal node in genome tree from which lineage-specific markers were inferred | |
| 487 4. ist of marker genes for each bin along with the number of times each marker was identified | |
| 488 Node Id: unique id of internal node in genome tree from which lineage-specific markers were inferred | |
| 489 Marker lineage: indicates lineage used for inferring marker set | |
| 490 Useful for identifying lineage-specific gene loss or duplication | |
| 491 5. List of bin id, marker gene id, and called gene id for each identified marker gene | |
| 492 6. List of marker genes present multiple times in a bin | |
| 493 7. List of marker genes present multiple times on the same scaffold | |
| 494 Useful for identifying true gene duplication events, gene calling errors, or assembly errors. See note below. | |
| 495 8. List indicating the position of each marker genes within a bin | |
| 496 9. Marker genes identified in each bin and their sequence | |
| 497 | |
| 498 ]]></help> | |
| 499 <expand macro="citations"/> | |
| 500 </tool> |
