view chemfp_ob2fps/ob2fps.xml @ 6:438bc12d591b

Uploaded
author bgruening
date Fri, 26 Apr 2013 08:02:45 -0400
parents a8ac5250d59c
children
line wrap: on
line source

<tool id="chemfp_mol2fps" name="Molecules to Fingerprints" version="0.2.0">
    <description>with different fingerprint types</description>
    <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism>
    <requirements>
        <requirement type="package" version="1.1p1">chemfp</requirement>
        <requirement type="package" version="2012_12_1">rdkit</requirement>
    </requirements>
    <command>
        #if $fptype in ['--FP2', '--FP3', '--FP4', '--MACCS']:
            ## Open Babel fingerprints
            ob2fps $fptype --in "${infile.ext}" "${infile}" -o "${outfile}" --errors report 2>&#38;1
        #else:
            ## RDKit fingerprints
            rdkit2fps --in "${infile.ext}" "${infile}" -o "${outfile}"
            #if $fp_opts.fp_opts_selector=="--RDK":
                --RDK
                --fpSize $fp_opts.fpSize
                --minPath $fp_opts.minPath
                --maxPath $fp_opts.maxPath
                --nBitsPerHash $fp_opts.nBitsPerHash
                $fp_opts.useHs
            #elif $fp_opts.fp_opts_selector=="--torsions":
                --torsions
                --fpSize $fp_opts.fpSize
                --targetSize $fp_opts.targetSize
            #elif $fp_opts.fp_opts_selector=="--morgan":
                --morgan
                --fpSize $fp_opts.fpSize
                --radius $fp_opts.radius
                $fp_opts.useFeatures
                $fp_opts.useChirality
                $fp_opts.useBondTypes
            #elif $fp_opts.fp_opts_selector=="--pairs":
                --paris
                --fpSize $fp_opts.fpSize
                --minLength $fp_opts.minLength
                --maxLength $fp_opts.maxLength
            #elif $fp_opts.fp_opts_selector=="--maccs166":
                --maccs166
            #elif $fp_opts.fp_opts_selector=="--substruct":
                --substruct
            #endif
            --errors report 2>&#38;1
        #endif
    </command>
    <inputs>
        <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/>
        <conditional name="fp_opts">
            <param name="fp_opts_selector" type="select" label="Type of fingerprint">
                <option value='--FP2' selected="True">Open Babel FP2 fingerprints</option>
                <option value='--FP3'>Open Babel FP3 fingerprints</option>
                <option value='--FP4'>Open Babel FP4 fingerprints</option>
                <option value='--MACCS'>Open Babel MACCS fingerprints</option>
                <option value='--RDK'>RDKit topological fingerprint</option>
                <option value='--torsions'>RDKit topological Torsion fingerprints</option>
                <option value='--morgan'>RDKit Morgan fingerprints</option>
                <option value='--pairs'>RDKit Atom Pair fingerprints</option>
                <option value='--maccs166'>RDKit MACCS fingerprints</option>
                <option value='--substruct'>RDKit substructure fingerprints</option>
            </param>
            <when value="--FP2" />
            <when value="--FP3" />
            <when value="--FP4" />
            <when value="--MACCS" />
            <when value="--RDK">
                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
                    <validator type="in_range" min="1" />
                </param>
                <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help="">
                    <validator type="in_range" min="1" />
                </param>
                <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the subgraph" help="">
                    <validator type="in_range" min="1" />
                </param>
                <param name="nBitsPerHash" type="integer" value="4" label="number of bits to set per path" help="">
                    <validator type="in_range" min="1" />
                </param>
                <param name="useHs" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useHs 1" falsevalue="--useHs 0" checked="true" />
            </when>
            <when value="--torsions">
                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
                    <validator type="in_range" min="1" />
                </param>
                <param name="targetSize" type="integer" value="4" label="number of target bits in the fingerprint" help="">
                    <validator type="in_range" min="1" />
                </param>
            </when>
            <when value="--morgan">
                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
                    <validator type="in_range" min="1" />
                </param>
                <param name="radius" type="integer" value="2" label="radius for the Morgan algorithm" help="">
                    <validator type="in_range" min="1" />
                </param>
                <param name="useFeatures" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useFeatures 1" falsevalue="--useFeatures 0" checked="false" />
                <param name="useChirality" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useChirality 1" falsevalue="--useChirality 0" checked="false" />
                <param name="useBondTypes" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useBondTypes 1" falsevalue="--useBondTypes 0" checked="true" />
            </when>
            <when value="--pairs">
                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
                    <validator type="in_range" min="1" />
                </param>
                <param name="minLength" type="integer" value="1" label="minimum bond count for a pair" help="">
                    <validator type="in_range" min="1" />
                </param>
                <param name="maxLength" type="integer" value="30" label="maximum bond count for a pair" help="">
                    <validator type="in_range" min="1" />
                </param>
            </when>
            <when value="--maccs166" />
            <when value="--substruct" />
        </conditional>

    </inputs>
    <outputs>
        <data name="outfile" format="fps" />
    </outputs>
    <tests>
        <!-- FP2 -->
        <test>
            <param name="infile" value="CID_2244.sdf" ftype="sdf" />
            <param name="fptype" value="--FP2" />
            <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" />
        </test>
        <test>
            <param name="infile" value="CID_2244.smi" ftype="smi" />
            <param name="fptype" value="--FP2" />
            <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" />
        </test>
        <!-- FP3 -->
        <test>
            <param name="infile" value="CID_2244.sdf" ftype="sdf" />
            <param name="fptype" value="--FP3" />
            <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" />
        </test>
        <test>
            <param name="infile" value="CID_2244.smi" ftype="smi" />
            <param name="fptype" value="--FP3" />
            <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" />
        </test>
        <!-- FP4 -->
        <test>
            <param name="infile" value="CID_2244.sdf" ftype="sdf" />
            <param name="fptype" value="--FP4" />
            <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" />
        </test>
        <test>
            <param name="infile" value="CID_2244.smi" ftype="smi" />
            <param name="fptype" value="--FP4" />
            <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" />
        </test>
        <!-- MACCS -->
        <test>
            <param name="infile" value="CID_2244.sdf" ftype="sdf" />
            <param name="fptype" value="--MACCS" />
            <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" />
        </test>
        <test>
            <param name="infile" value="CID_2244.smi" ftype="smi" />
            <param name="fptype" value="--MACCS" />
            <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" />
        </test>
    </tests>
    <help>


**What it does**

Generate fingerprints using OpenBabel

-----

**Example**

* input::
	
	      - SDF File

		28434379
		  -OEChem-02031205132D

		 37 39  0     0  0  0  0  0  0999 V2000
		    8.1648   -1.8842    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
		    6.0812   -0.2134    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
		    6.0812   -1.8229    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
		    2.5369   -2.0182    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
		    6.3919    0.7371    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
		    7.3704    0.9433    0.0000 C   0  0  0  0 
		    ......
		  1 15  1  0  0  0  0
		  1 35  1  0  0  0  0
		  2  5  1  0  0  0  0
		  2 11  1  0  0  0  0
		  2 12  1  0  0  0  0
		  3 12  2  0  0  0  0
		  3 13  1  0  0  0  0
		  4 18  1  0  0  0  0
		  ......

			&gt;PUBCHEM_COMPOUND_CID&lt;
			28434379

			&gt; &lt;PUBCHEM_COMPOUND_CANONICALIZED&gt;
			1

			&gt; &lt;PUBCHEM_CACTVS_COMPLEXITY&gt;
			280

			&gt; &lt;PUBCHEM_CACTVS_HBOND_ACCEPTOR&gt;
			2

			&gt; &lt;PUBCHEM_CACTVS_HBOND_DONOR&gt;
			2

			&gt; &lt;PUBCHEM_CACTVS_ROTATABLE_BOND&gt;
			2

			&gt; &lt;PUBCHEM_CACTVS_SUBSKEYS&gt;
			AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==

			&gt;

		- type : FP2

* output::

	#FPS1
	#num_bits=1021
	#type=OpenBabel-FP2/1
	#software=OpenBabel/2.3.0
	#source=/tmp/dataset_409.dat.sdf
	#date=2012-02-03T11:13:39
	c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c
	0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300
	10000000000080000000c0000060000c0000060810000010000000800102000000	28434379


    </help>
</tool>