view cluster_butina.xml @ 1:b70d1bcb0680 draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/im-pipelines commit d1d0ec4ebc97b2274b18a0aab99f41addd5357ae"
author bgruening
date Mon, 27 Jul 2020 15:26:33 +0000
parents 4025aba6e864
children
line wrap: on
line source

<tool id="ctb_im_cluster_butina" name="Butina Cluster" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@">
    <description>using RDKit</description>
    <macros>
        <import>macros.xml</import>
        <token name="@GALAXY_VERSION@">0</token>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
        cluster_butina
            -i '$infile'
            -if sdf
            -t '$threshold'
            -d '$descriptor_opts'
            -m '$metric_opts'
            #if $number != ''
                -n '$number'
            #end if
            -e '$exclude'
            #if $fragment_opts:
                --fragment-method '$fragment_opts'
            #end if
            #if $field_conditional.use_field == 't':
                #if $field_conditional.field:
                    -f '$field_conditional.field'
                    $field_conditional.field_opts
                #end if
            #end if
            $output_fragment
            --meta
            -o outp
            -of sdf &>> $logfile &&
        cat outp_metrics.txt &>> $logfile &&
        gzip -d outp.sdf.gz
    ]]></command>
    <inputs>
        <param name="infile" type="data" format="sdf" label="Input file" help="Input file in SDF format"/>
        <param name="threshold" min="0" max="1" type="float" value="0.7" label="Threshold" help="Similarity clustering threshold (1.0 means identical)"/>
        <param name="descriptor_opts" type="select" label="Descriptor Options" help="Descriptor or fingerprint type (default rdkit)">
            <option value="rdkit" selected="true">rdkit</option>
            <option value="maccs">maccs</option>
            <option value="morgan2">morgan2</option>
            <option value="morgan3">morgan3</option>
        </param>
        <param name="metric_opts" type="select" label="Similarity metric" help="Default is the Tanimoto coefficient">
            <option value="tanimoto" selected="true">tanimoto</option>
            <option value="asymmetric">asymmetric</option>
            <option value="braunblanquet">braunblanquet</option>
            <option value="cosine">cosine</option>
            <option value="dice">dice</option>
            <option value="kulczynski">kulczynski</option>
            <option value="mcconnaughey">mcconnaughey</option>
            <option value="rogotgoldberg">rogotgoldberg</option>
            <option value="russel">russel</option>
            <option value="sokal">sokal</option>
        </param>
        <param name="number" type="integer" optional="true" label="Maximum number for diverse subset selection" help="Maximum number to pick for diverse subset selection. Leave blank for no maximum."/>
        <param name="exclude" min="0" max="1" type="float" value="0.9" label="Exclude threshold" help="Threshold for excluding structures in diverse subset selection (1.0 means identical)"/>
        <param name="fragment_opts" type="select" optional="true" label="Find single fragment" help="Find single fragment if more than one">
            <option value="hac" selected="true">Biggest by heavy atom count</option>
            <option value="mw">Biggest by molecular weight</option>
        </param>
        <param name="output_fragment" type="boolean" label="Output Fragment" truevalue="--output-fragment" falsevalue="" help="Output the biggest fragment rather than the original molecule"/>
        <conditional name="field_conditional">
            <param name="use_field" type="boolean" label="Use field" truevalue="t" falsevalue="f" help="Use a field to optimize diverse subset selection"/>
            <when value="t">
                <param name="field" type="text" label="Field" help="Field to use to optimise diverse subset selection"/>
                <param name="field_opts" type="select" label="Field options" help="pick lowest or highest value specified by the 'field' component">
                    <option value="--min" selected="true">min</option>
                    <option value="--max">max</option>
                </param>
            </when>
            <when value="f" />
        </conditional>
    </inputs>

    <expand macro="outputs" />
    <tests>
        <test>
            <param name="infile" value="Kinase_inhibs.sdf" ftype="sdf"/>
            <param name="descriptor_opts" value="rdkit"/>
            <param name="metric_opts" value="tanimoto"/>
            <param name="threshold" value="0.6"/>
            <param name="fragment_opts" value="hac"/>
            <output name="outfile" ftype='sdf' file="cluster_butina_output1.sdf"/>
            <output name="logfile">
                <assert_contents>
                    <has_text text="Found 30 clusters"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[

.. class:: infomark

**What this tool does**

This tool performs Butina clustering for a set of input molecules, using the chemistry toolkit RDKit, and returns results in SDF format.


-----

.. class:: infomark

**Input**

| - Molecules in `SDF format`_
| - A number of other parameters can be set, including the fingerprint type and the similarity metric to use.

.. _SDF format: http://en.wikipedia.org/wiki/Chemical_table_file


-----

.. class:: infomark

 **Output**

SD file containing clusters.

    ]]></help>
    <expand macro="citations"/>
</tool>