Mercurial > repos > iuc > checkm_tree

<?xml version="1.0"?>
<macros>
    <token name="@TOOL_VERSION@">1.2.4</token>
    <token name="@VERSION_SUFFIX@">1</token>
    <token name="@PROFILE@">21.01</token>
    <xml name="biotools">
        <xrefs>
            <xref type="bio.tools">checkm</xref>
        </xrefs>
    </xml>
    <xml name="requirements">
        <requirements>
            <requirement type="package" version="@TOOL_VERSION@">checkm-genome</requirement>
            <yield/>
        </requirements>
    </xml>
    <xml name="bin_inputs">
        <conditional name="bins">
            <param name="select" type="select" label="Data structure for bins">
                <option value="collection">In collection</option>
                <option value="individual">In individual datasets</option>
            </param>
            <when value="collection">
                <param name="bins_coll" type="data_collection" collection_type="list" format="fasta" label="Bins"/>
            </when>
            <when value="individual">
                <param name="bins_ind" type="data" format="fasta" multiple="true" label="Bins"/>
            </when>
        </conditional>
    </xml>
    <token name="@BIN_INPUTS@"><![CDATA[
#import re
mkdir 'bins' &&
#if $bins.select == 'collection'
    #for $b in $bins.bins_coll
        #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($b.element_identifier))
ln -s '$b.file_name' 'bins/${identifier}.fasta' &&
    #end for
#else
    #for $b in $bins.bins_ind
        #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($b.element_identifier))
ln -s '$b.file_name' 'bins/${identifier}.fasta' &&
    #end for
#end if
]]></token>
    <xml name="ali">
        <param argument="--ali" type="boolean" truevalue="--ali" falsevalue="" checked="false" label="Generate HMMER alignment file for each bin?" />
    </xml>
    <xml name="nt">
        <param argument="--nt" type="boolean" truevalue="--nt" falsevalue="" checked="false" label="Generate nucleotide gene sequences for each bin?" />
    </xml>
    <xml name="genes">
        <param argument="--genes" type="boolean" truevalue="--genes" falsevalue="" checked="false" label="Bins contain genes as amino acids instead of nucleotide contigs?" />
    </xml>
    <xml name="tree_params">
        <param argument="--reduced_tree" type="boolean" truevalue="--reduced_tree" falsevalue="" checked="false" label="Use reduced tree for determining lineage of each bin?" help="This option requires less than 16GB of memory"/>
        <expand macro="ali" />
        <expand macro="nt" />
        <expand macro="genes" />
    </xml>
    <xml name="tree_extra_output_options">
        <option value="hmmer_tree_ali">Phylogenetic HMMER alignment file for each bin</option>
        <option value="concatenate_pplacer_json">Concatenated pplacer JSON</option>
        <option value="genes_fna">Nucleotide gene sequences for each bin (if generate nucleotide gene sequences)</option>
        <option value="genes_faa">Protein gene sequences for each bin</option>
        <option value="genes_gff">Gene feature files for each bin (if bins do not contain genes as amino acids instead of nucleotide contigs)</option>
    </xml>
    <xml name="analyze_params">
        <expand macro="ali" />
        <expand macro="nt" />
        <expand macro="genes" />
    </xml>
    <xml name="analyze_extra_output_options">
        <option value="hmmer_analyze_ali">Marker HMMER alignment file for each bin</option>
    </xml>
    <xml name="hmm_marker_stats_inputs">
        <param name="phylo_hmm_info" type="data" format="zip" label="Phylogenetic HMM model info for each bin" help="Output of the CheckM tree tool"/>
        <param name="bin_stats_tree" type="data" format="tabular" label="Phylogenetic bin stats" help="Output of the CheckM tree tool" />
        <param name="hmmer_tree" type="data_collection" collection_type="list" format="txt" label="Phylogenetic HMM hits to each bin" help="Output of the CheckM tree tool" />
    </xml>
    <token name="@HMM_MARKER_STATS_INPUTS@"><![CDATA[
#import re
mkdir -p 'inputs/storage/' &&
ln -s '$phylo_hmm_info' 'inputs/storage/phylo_hmm_info.pkl.gz' &&
ln -s '$bin_stats_tree' 'inputs/storage/bin_stats.tree.tsv' &&
#for $i in $hmmer_tree
    #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($i.element_identifier))
mkdir -p 'inputs/bins/${identifier}' &&
ln -s '$i' 'inputs/bins/${identifier}/hmmer.tree.txt' &&
#end for
]]></token>
    <xml name="lineage_set_params">
        <param argument="--unique" type="integer" min="0" value="10" label="Minimum number of unique phylogenetic markers required to use lineage-specific marker set" />
        <param argument="--multi" type="integer" min="0" value="10" label="Maximum number of multi-copy phylogenetic markers before defaulting to domain-level marker set " />
        <param argument="--force_domain" type="boolean" truevalue="--force_domain" falsevalue="" checked="false" label="Use domain-level sets for all bins?" />
        <param argument="--no_refinement" type="boolean" truevalue="--no_refinement" falsevalue="" checked="false" label="Do not perform lineage-specific marker set refinement?" />
    </xml>
    <xml name="marker_file">
        <param name="marker_file" type="data" format="tabular" label="Marker for assessing bins" help="Each marker to exclude should be listed on a separate line of the file. Output of the CheckM lineage_set or taxon_set tools"/>
    </xml>
    <xml name="marker_output">
        <data name="marker" format="tabular" label="${tool.name} on ${on_string}: Marker genes"/>
    </xml>
    <xml name="qa_params">
        <param argument="--individual_markers" type="boolean" truevalue="--individual_markers" falsevalue="" checked="false" label="Treat marker as independent?" help="It will ignore co-located set structure"/>
        <param argument="--skip_adj_correction" type="boolean" truevalue="--skip_adj_correction" falsevalue="" checked="false" label="Do not exclude adjacent marker genes when estimating contamination?"/>
        <param argument="--skip_pseudogene_correction" type="boolean" truevalue="--skip_pseudogene_correction" falsevalue="" checked="false" label="Skip identification and filtering of pseudogenes?"/>
        <param argument="--aai_strain" type="float" min="0" max="1" value="0.9" label="AAI threshold used to identify strain heterogeneity"/>
        <param argument="--ignore_thresholds" type="boolean" truevalue="--ignore_thresholds" falsevalue="" checked="false" label="Ignore model-specific score thresholds?"/>
        <param argument="--e_value" type="float" min="0" max="1" value="1e-10" label="E-value cut off"/>
        <param argument="--length" type="float" min="0" max="1" value="0.7" label="Percent overlap between target and query"/>
    </xml>
    <xml name="qa_extra_output_options">
        <!--<option value="alignment_file">Alignment of multi-copy genes and their AAI identity</option>-->
        <option value="marker_gene_stats">Marker gene stats</option>
    </xml>
    <xml name="qa_extra_outputs">
        <!--<data name="alignment_file" format="tabular" label="${tool.name} on ${on_string}: Alignment of multi-copy genes and their AAI identity" >
            <filter>'alignment_file' in extra_outputs</filter>
        </data>-->
        <data name="marker_gene_stats" format="tabular" from_work_dir="output/storage/marker_gene_stats.tsv"  label="${tool.name} on ${on_string}: Marker gene statistics">
            <filter>extra_outputs and 'marker_gene_stats' in extra_outputs</filter>
        </data>
    </xml>
    <xml name="rank_taxon">
        <conditional name="rank_taxon">
            <param argument="--rank" type="select" label="Taxonomic rank">
                <option value="life">Life</option>
                <option value="domain">Domain</option>
                <option value="phylum">Phylum</option>
                <option value="order">Order</option>
                <option value="family">Family</option>
                <option value="genus">Genus</option>
                <option value="species">Species</option>
            </param>
            <when value="life">
                <param name="taxon" type="select" label="Taxon of interest" >
                    <options from_data_table="life_taxons">
                        <validator message="No taxon database is available" type="no_options" />
                    </options>
                </param>
            </when>
            <when value="domain">
                <param name="taxon" type="select" label="Taxon of interest" >
                    <options from_data_table="domain_taxons">
                        <validator message="No taxon database is available" type="no_options" />
                    </options>
                </param>
            </when>
            <when value="phylum">
                <param name="taxon" type="select" label="Taxon of interest" >
                    <options from_data_table="phylum_taxons">
                        <validator message="No taxon database is available" type="no_options" />
                    </options>
                </param>
            </when>
            <when value="order">
                <param name="taxon" type="select" label="Taxon of interest" >
                    <options from_data_table="order_taxons">
                        <validator message="No taxon database is available" type="no_options" />
                    </options>
                </param>
            </when>
            <when value="family">
                <param name="taxon" type="select" label="Taxon of interest" >
                    <options from_data_table="family_taxons">
                        <validator message="No taxon database is available" type="no_options" />
                    </options>
                </param>
            </when>
            <when value="genus">
                <param name="taxon" type="select" label="Taxon of interest" >
                    <options from_data_table="genus_taxons">
                        <validator message="No taxon database is available" type="no_options" />
                    </options>
                </param>
            </when>
            <when value="species">
                <param name="taxon" type="select" label="Taxon of interest" >
                    <options from_data_table="species_taxons">
                        <validator message="No taxon database is available" type="no_options" />
                    </options>
                </param>
            </when>
        </conditional>
    </xml>
    <xml name="version">
        <version_command>checkm</version_command>
    </xml>
    <token name="@HELP_HEADER@"><![CDATA[
What it does
============

CheckM provides a set of tools for assessing the quality of genomes recovered from isolates, single cells, or metagenomes. It provides robust estimates of genome completeness and contamination by using collocated sets of genes that are ubiquitous and single-copy within a phylogenetic lineage. Assessment of genome quality can also be examined using plots depicting key genomic characteristics (e.g., GC, coding density) which highlight sequences outside the expected distributions of a typical genome. CheckM also provides tools for identifying genome bins that are likely candidates for merging based on marker set compatibility, similarity in genomic characteristics, and proximity within a reference genome tree.
]]></token>
    <token name="@HELP_BINS@"><![CDATA[
- Bins
]]></token>
    <token name="@HELP_TREE_DEFAULT_OUTPUTS@"><![CDATA[
- Phylogenetic HMM model info for each bin
- Phylogenetic bin stats
- Phylogenetic HMM hits to each bin
- Concatenated masked sequences
- Concatenated tree
]]></token>
    <token name="@HELP_TREE_EXTRA_OUTPUTS@"><![CDATA[


        <collection name="hmmer_tree_ali" type="list" label="${tool.name} on ${on_string}: Phylogenetic HMMER alignment file for each bin">
            <filter>ali and 'hmmer_tree_ali' in extra_outputs</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)/hmmer\.tree\.ali\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/>
        </collection>
        <data name="concatenated_pplacer_json" format="json" from_work_dir="output/storage/tree/concatenated.pplacer.json" label="${tool.name} on ${on_string}: Concatenated pplacer JSON">
            <filter>'concatenate_pplacer_json' in extra_outputs</filter>
        </data>
        <collection name="genes_fna" type="list" label="${tool.name} on ${on_string}: Protein gene sequences for each bin">
            <filter>not genes and nt and 'genes_fna' in extra_outputs</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)/genes\.fna" format="fasta" directory="output/bins/" recurse="true" match_relative_path="true"/>
        </collection>
        <collection name="genes_faa" type="list" label="${tool.name} on ${on_string}: Nucleotide gene sequences for each bin">
            <filter>'genes_faa' in extra_outputs</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)/genes\.faa" format="fasta" directory="output/bins/" recurse="true" match_relative_path="true"/>
        </collection>
        <collection name="genes_gff" type="list" label="${tool.name} on ${on_string}: Gene feature files for each bin">
            <filter>not genes and 'genes_gff' in extra_outputs</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)/genes\.gff" format="gff" directory="output/bins/" recurse="true" match_relative_path="true"/>
        </collection>
    ]]></token>
    <xml name="citations">
        <citations>
            <citation type="doi">10.1101/gr.186072.114</citation>
        </citations>
    </xml>
</macros>
author	iuc
date	Tue, 17 Jun 2025 12:55:09 +0000
parents	c0e0ddb22785
children