Mercurial > repos > bgruening > chemfp
view chemfp_ob2fps/ob2fps.xml @ 6:438bc12d591b
Uploaded
author | bgruening |
---|---|
date | Fri, 26 Apr 2013 08:02:45 -0400 |
parents | a8ac5250d59c |
children |
line wrap: on
line source
<tool id="chemfp_mol2fps" name="Molecules to Fingerprints" version="0.2.0"> <description>with different fingerprint types</description> <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism> <requirements> <requirement type="package" version="1.1p1">chemfp</requirement> <requirement type="package" version="2012_12_1">rdkit</requirement> </requirements> <command> #if $fptype in ['--FP2', '--FP3', '--FP4', '--MACCS']: ## Open Babel fingerprints ob2fps $fptype --in "${infile.ext}" "${infile}" -o "${outfile}" --errors report 2>&1 #else: ## RDKit fingerprints rdkit2fps --in "${infile.ext}" "${infile}" -o "${outfile}" #if $fp_opts.fp_opts_selector=="--RDK": --RDK --fpSize $fp_opts.fpSize --minPath $fp_opts.minPath --maxPath $fp_opts.maxPath --nBitsPerHash $fp_opts.nBitsPerHash $fp_opts.useHs #elif $fp_opts.fp_opts_selector=="--torsions": --torsions --fpSize $fp_opts.fpSize --targetSize $fp_opts.targetSize #elif $fp_opts.fp_opts_selector=="--morgan": --morgan --fpSize $fp_opts.fpSize --radius $fp_opts.radius $fp_opts.useFeatures $fp_opts.useChirality $fp_opts.useBondTypes #elif $fp_opts.fp_opts_selector=="--pairs": --paris --fpSize $fp_opts.fpSize --minLength $fp_opts.minLength --maxLength $fp_opts.maxLength #elif $fp_opts.fp_opts_selector=="--maccs166": --maccs166 #elif $fp_opts.fp_opts_selector=="--substruct": --substruct #endif --errors report 2>&1 #endif </command> <inputs> <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/> <conditional name="fp_opts"> <param name="fp_opts_selector" type="select" label="Type of fingerprint"> <option value='--FP2' selected="True">Open Babel FP2 fingerprints</option> <option value='--FP3'>Open Babel FP3 fingerprints</option> <option value='--FP4'>Open Babel FP4 fingerprints</option> <option value='--MACCS'>Open Babel MACCS fingerprints</option> <option value='--RDK'>RDKit topological fingerprint</option> <option value='--torsions'>RDKit topological Torsion fingerprints</option> <option value='--morgan'>RDKit Morgan fingerprints</option> <option value='--pairs'>RDKit Atom Pair fingerprints</option> <option value='--maccs166'>RDKit MACCS fingerprints</option> <option value='--substruct'>RDKit substructure fingerprints</option> </param> <when value="--FP2" /> <when value="--FP3" /> <when value="--FP4" /> <when value="--MACCS" /> <when value="--RDK"> <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> <validator type="in_range" min="1" /> </param> <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help=""> <validator type="in_range" min="1" /> </param> <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the subgraph" help=""> <validator type="in_range" min="1" /> </param> <param name="nBitsPerHash" type="integer" value="4" label="number of bits to set per path" help=""> <validator type="in_range" min="1" /> </param> <param name="useHs" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useHs 1" falsevalue="--useHs 0" checked="true" /> </when> <when value="--torsions"> <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> <validator type="in_range" min="1" /> </param> <param name="targetSize" type="integer" value="4" label="number of target bits in the fingerprint" help=""> <validator type="in_range" min="1" /> </param> </when> <when value="--morgan"> <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> <validator type="in_range" min="1" /> </param> <param name="radius" type="integer" value="2" label="radius for the Morgan algorithm" help=""> <validator type="in_range" min="1" /> </param> <param name="useFeatures" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useFeatures 1" falsevalue="--useFeatures 0" checked="false" /> <param name="useChirality" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useChirality 1" falsevalue="--useChirality 0" checked="false" /> <param name="useBondTypes" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useBondTypes 1" falsevalue="--useBondTypes 0" checked="true" /> </when> <when value="--pairs"> <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> <validator type="in_range" min="1" /> </param> <param name="minLength" type="integer" value="1" label="minimum bond count for a pair" help=""> <validator type="in_range" min="1" /> </param> <param name="maxLength" type="integer" value="30" label="maximum bond count for a pair" help=""> <validator type="in_range" min="1" /> </param> </when> <when value="--maccs166" /> <when value="--substruct" /> </conditional> </inputs> <outputs> <data name="outfile" format="fps" /> </outputs> <tests> <!-- FP2 --> <test> <param name="infile" value="CID_2244.sdf" ftype="sdf" /> <param name="fptype" value="--FP2" /> <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" /> </test> <test> <param name="infile" value="CID_2244.smi" ftype="smi" /> <param name="fptype" value="--FP2" /> <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" /> </test> <!-- FP3 --> <test> <param name="infile" value="CID_2244.sdf" ftype="sdf" /> <param name="fptype" value="--FP3" /> <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" /> </test> <test> <param name="infile" value="CID_2244.smi" ftype="smi" /> <param name="fptype" value="--FP3" /> <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" /> </test> <!-- FP4 --> <test> <param name="infile" value="CID_2244.sdf" ftype="sdf" /> <param name="fptype" value="--FP4" /> <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" /> </test> <test> <param name="infile" value="CID_2244.smi" ftype="smi" /> <param name="fptype" value="--FP4" /> <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" /> </test> <!-- MACCS --> <test> <param name="infile" value="CID_2244.sdf" ftype="sdf" /> <param name="fptype" value="--MACCS" /> <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" /> </test> <test> <param name="infile" value="CID_2244.smi" ftype="smi" /> <param name="fptype" value="--MACCS" /> <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" /> </test> </tests> <help> **What it does** Generate fingerprints using OpenBabel ----- **Example** * input:: - SDF File 28434379 -OEChem-02031205132D 37 39 0 0 0 0 0 0 0999 V2000 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7.3704 0.9433 0.0000 C 0 0 0 0 ...... 1 15 1 0 0 0 0 1 35 1 0 0 0 0 2 5 1 0 0 0 0 2 11 1 0 0 0 0 2 12 1 0 0 0 0 3 12 2 0 0 0 0 3 13 1 0 0 0 0 4 18 1 0 0 0 0 ...... >PUBCHEM_COMPOUND_CID< 28434379 > <PUBCHEM_COMPOUND_CANONICALIZED> 1 > <PUBCHEM_CACTVS_COMPLEXITY> 280 > <PUBCHEM_CACTVS_HBOND_ACCEPTOR> 2 > <PUBCHEM_CACTVS_HBOND_DONOR> 2 > <PUBCHEM_CACTVS_ROTATABLE_BOND> 2 > <PUBCHEM_CACTVS_SUBSKEYS> AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== > - type : FP2 * output:: #FPS1 #num_bits=1021 #type=OpenBabel-FP2/1 #software=OpenBabel/2.3.0 #source=/tmp/dataset_409.dat.sdf #date=2012-02-03T11:13:39 c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379 </help> </tool>