Mercurial > repos > bgruening > chemfp
changeset 9:18617b9fc70e
Uploaded
author | bgruening |
---|---|
date | Sat, 11 May 2013 17:12:34 -0400 |
parents | d241ff83e485 |
children | 4b5cb4328146 |
files | chemfp_mol2fps/mol2fps.xml chemfp_mol2fps/ob2fps.xml repository_dependencies.xml tool_dependencies.xml |
diffstat | 4 files changed, 244 insertions(+), 244 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_mol2fps/mol2fps.xml Sat May 11 17:12:34 2013 -0400 @@ -0,0 +1,239 @@ +<tool id="ctb_chemfp_mol2fps" name="Molecules to Fingerprints" version="0.2.0"> + <description>with different fingerprint types</description> + <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism> + <requirements> + <requirement type="package" version="1.1p1">chemfp</requirement> + <requirement type="package" version="2012_12_1">rdkit</requirement> + </requirements> + <command> + #if $fptype in ['--FP2', '--FP3', '--FP4', '--MACCS']: + ## Open Babel fingerprints + ob2fps $fptype --in "${infile.ext}" "${infile}" -o "${outfile}" --errors report 2>&1 + #else: + ## RDKit fingerprints + rdkit2fps --in "${infile.ext}" "${infile}" -o "${outfile}" + #if $fp_opts.fp_opts_selector=="--RDK": + --RDK + --fpSize $fp_opts.fpSize + --minPath $fp_opts.minPath + --maxPath $fp_opts.maxPath + --nBitsPerHash $fp_opts.nBitsPerHash + $fp_opts.useHs + #elif $fp_opts.fp_opts_selector=="--torsions": + --torsions + --fpSize $fp_opts.fpSize + --targetSize $fp_opts.targetSize + #elif $fp_opts.fp_opts_selector=="--morgan": + --morgan + --fpSize $fp_opts.fpSize + --radius $fp_opts.radius + $fp_opts.useFeatures + $fp_opts.useChirality + $fp_opts.useBondTypes + #elif $fp_opts.fp_opts_selector=="--pairs": + --paris + --fpSize $fp_opts.fpSize + --minLength $fp_opts.minLength + --maxLength $fp_opts.maxLength + #elif $fp_opts.fp_opts_selector=="--maccs166": + --maccs166 + #elif $fp_opts.fp_opts_selector=="--substruct": + --substruct + #endif + --errors report 2>&1 + #endif + </command> + <inputs> + <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/> + <conditional name="fp_opts"> + <param name="fp_opts_selector" type="select" label="Type of fingerprint"> + <option value='--FP2' selected="True">Open Babel FP2 fingerprints</option> + <option value='--FP3'>Open Babel FP3 fingerprints</option> + <option value='--FP4'>Open Babel FP4 fingerprints</option> + <option value='--MACCS'>Open Babel MACCS fingerprints</option> + <option value='--RDK'>RDKit topological fingerprint</option> + <option value='--torsions'>RDKit topological Torsion fingerprints</option> + <option value='--morgan'>RDKit Morgan fingerprints</option> + <option value='--pairs'>RDKit Atom Pair fingerprints</option> + <option value='--maccs166'>RDKit MACCS fingerprints</option> + <option value='--substruct'>RDKit substructure fingerprints</option> + </param> + <when value="--FP2" /> + <when value="--FP3" /> + <when value="--FP4" /> + <when value="--MACCS" /> + <when value="--RDK"> + <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the subgraph" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="nBitsPerHash" type="integer" value="4" label="number of bits to set per path" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="useHs" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useHs 1" falsevalue="--useHs 0" checked="true" /> + </when> + <when value="--torsions"> + <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="targetSize" type="integer" value="4" label="number of target bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + </when> + <when value="--morgan"> + <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="radius" type="integer" value="2" label="radius for the Morgan algorithm" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="useFeatures" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useFeatures 1" falsevalue="--useFeatures 0" checked="false" /> + <param name="useChirality" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useChirality 1" falsevalue="--useChirality 0" checked="false" /> + <param name="useBondTypes" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useBondTypes 1" falsevalue="--useBondTypes 0" checked="true" /> + </when> + <when value="--pairs"> + <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="minLength" type="integer" value="1" label="minimum bond count for a pair" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="maxLength" type="integer" value="30" label="maximum bond count for a pair" help=""> + <validator type="in_range" min="1" /> + </param> + </when> + <when value="--maccs166" /> + <when value="--substruct" /> + </conditional> + + </inputs> + <outputs> + <data name="outfile" format="fps" /> + </outputs> + <tests> + <!-- FP2 --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fptype" value="--FP2" /> + <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" /> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fptype" value="--FP2" /> + <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" /> + </test> + <!-- FP3 --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fptype" value="--FP3" /> + <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" /> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fptype" value="--FP3" /> + <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" /> + </test> + <!-- FP4 --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fptype" value="--FP4" /> + <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" /> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fptype" value="--FP4" /> + <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" /> + </test> + <!-- MACCS --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fptype" value="--MACCS" /> + <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" /> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fptype" value="--MACCS" /> + <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" /> + </test> + </tests> + <help> + + +**What it does** + +Generate fingerprints using OpenBabel + +----- + +**Example** + +* input:: + + - SDF File + + 28434379 + -OEChem-02031205132D + + 37 39 0 0 0 0 0 0 0999 V2000 + 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 7.3704 0.9433 0.0000 C 0 0 0 0 + ...... + 1 15 1 0 0 0 0 + 1 35 1 0 0 0 0 + 2 5 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 12 1 0 0 0 0 + 3 12 2 0 0 0 0 + 3 13 1 0 0 0 0 + 4 18 1 0 0 0 0 + ...... + + >PUBCHEM_COMPOUND_CID< + 28434379 + + > <PUBCHEM_COMPOUND_CANONICALIZED> + 1 + + > <PUBCHEM_CACTVS_COMPLEXITY> + 280 + + > <PUBCHEM_CACTVS_HBOND_ACCEPTOR> + 2 + + > <PUBCHEM_CACTVS_HBOND_DONOR> + 2 + + > <PUBCHEM_CACTVS_ROTATABLE_BOND> + 2 + + > <PUBCHEM_CACTVS_SUBSKEYS> + AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== + + > + + - type : FP2 + +* output:: + + #FPS1 + #num_bits=1021 + #type=OpenBabel-FP2/1 + #software=OpenBabel/2.3.0 + #source=/tmp/dataset_409.dat.sdf + #date=2012-02-03T11:13:39 + c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c + 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300 + 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379 + + + </help> +</tool>
--- a/chemfp_mol2fps/ob2fps.xml Fri Apr 26 11:07:08 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,239 +0,0 @@ -<tool id="chemfp_mol2fps" name="Molecules to Fingerprints" version="0.2.0"> - <description>with different fingerprint types</description> - <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism> - <requirements> - <requirement type="package" version="1.1p1">chemfp</requirement> - <requirement type="package" version="2012_12_1">rdkit</requirement> - </requirements> - <command> - #if $fptype in ['--FP2', '--FP3', '--FP4', '--MACCS']: - ## Open Babel fingerprints - ob2fps $fptype --in "${infile.ext}" "${infile}" -o "${outfile}" --errors report 2>&1 - #else: - ## RDKit fingerprints - rdkit2fps --in "${infile.ext}" "${infile}" -o "${outfile}" - #if $fp_opts.fp_opts_selector=="--RDK": - --RDK - --fpSize $fp_opts.fpSize - --minPath $fp_opts.minPath - --maxPath $fp_opts.maxPath - --nBitsPerHash $fp_opts.nBitsPerHash - $fp_opts.useHs - #elif $fp_opts.fp_opts_selector=="--torsions": - --torsions - --fpSize $fp_opts.fpSize - --targetSize $fp_opts.targetSize - #elif $fp_opts.fp_opts_selector=="--morgan": - --morgan - --fpSize $fp_opts.fpSize - --radius $fp_opts.radius - $fp_opts.useFeatures - $fp_opts.useChirality - $fp_opts.useBondTypes - #elif $fp_opts.fp_opts_selector=="--pairs": - --paris - --fpSize $fp_opts.fpSize - --minLength $fp_opts.minLength - --maxLength $fp_opts.maxLength - #elif $fp_opts.fp_opts_selector=="--maccs166": - --maccs166 - #elif $fp_opts.fp_opts_selector=="--substruct": - --substruct - #endif - --errors report 2>&1 - #endif - </command> - <inputs> - <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/> - <conditional name="fp_opts"> - <param name="fp_opts_selector" type="select" label="Type of fingerprint"> - <option value='--FP2' selected="True">Open Babel FP2 fingerprints</option> - <option value='--FP3'>Open Babel FP3 fingerprints</option> - <option value='--FP4'>Open Babel FP4 fingerprints</option> - <option value='--MACCS'>Open Babel MACCS fingerprints</option> - <option value='--RDK'>RDKit topological fingerprint</option> - <option value='--torsions'>RDKit topological Torsion fingerprints</option> - <option value='--morgan'>RDKit Morgan fingerprints</option> - <option value='--pairs'>RDKit Atom Pair fingerprints</option> - <option value='--maccs166'>RDKit MACCS fingerprints</option> - <option value='--substruct'>RDKit substructure fingerprints</option> - </param> - <when value="--FP2" /> - <when value="--FP3" /> - <when value="--FP4" /> - <when value="--MACCS" /> - <when value="--RDK"> - <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the subgraph" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="nBitsPerHash" type="integer" value="4" label="number of bits to set per path" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="useHs" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useHs 1" falsevalue="--useHs 0" checked="true" /> - </when> - <when value="--torsions"> - <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="targetSize" type="integer" value="4" label="number of target bits in the fingerprint" help=""> - <validator type="in_range" min="1" /> - </param> - </when> - <when value="--morgan"> - <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="radius" type="integer" value="2" label="radius for the Morgan algorithm" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="useFeatures" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useFeatures 1" falsevalue="--useFeatures 0" checked="false" /> - <param name="useChirality" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useChirality 1" falsevalue="--useChirality 0" checked="false" /> - <param name="useBondTypes" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useBondTypes 1" falsevalue="--useBondTypes 0" checked="true" /> - </when> - <when value="--pairs"> - <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="minLength" type="integer" value="1" label="minimum bond count for a pair" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="maxLength" type="integer" value="30" label="maximum bond count for a pair" help=""> - <validator type="in_range" min="1" /> - </param> - </when> - <when value="--maccs166" /> - <when value="--substruct" /> - </conditional> - - </inputs> - <outputs> - <data name="outfile" format="fps" /> - </outputs> - <tests> - <!-- FP2 --> - <test> - <param name="infile" value="CID_2244.sdf" ftype="sdf" /> - <param name="fptype" value="--FP2" /> - <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" /> - </test> - <test> - <param name="infile" value="CID_2244.smi" ftype="smi" /> - <param name="fptype" value="--FP2" /> - <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" /> - </test> - <!-- FP3 --> - <test> - <param name="infile" value="CID_2244.sdf" ftype="sdf" /> - <param name="fptype" value="--FP3" /> - <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" /> - </test> - <test> - <param name="infile" value="CID_2244.smi" ftype="smi" /> - <param name="fptype" value="--FP3" /> - <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" /> - </test> - <!-- FP4 --> - <test> - <param name="infile" value="CID_2244.sdf" ftype="sdf" /> - <param name="fptype" value="--FP4" /> - <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" /> - </test> - <test> - <param name="infile" value="CID_2244.smi" ftype="smi" /> - <param name="fptype" value="--FP4" /> - <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" /> - </test> - <!-- MACCS --> - <test> - <param name="infile" value="CID_2244.sdf" ftype="sdf" /> - <param name="fptype" value="--MACCS" /> - <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" /> - </test> - <test> - <param name="infile" value="CID_2244.smi" ftype="smi" /> - <param name="fptype" value="--MACCS" /> - <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" /> - </test> - </tests> - <help> - - -**What it does** - -Generate fingerprints using OpenBabel - ------ - -**Example** - -* input:: - - - SDF File - - 28434379 - -OEChem-02031205132D - - 37 39 0 0 0 0 0 0 0999 V2000 - 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 - 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 - 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 7.3704 0.9433 0.0000 C 0 0 0 0 - ...... - 1 15 1 0 0 0 0 - 1 35 1 0 0 0 0 - 2 5 1 0 0 0 0 - 2 11 1 0 0 0 0 - 2 12 1 0 0 0 0 - 3 12 2 0 0 0 0 - 3 13 1 0 0 0 0 - 4 18 1 0 0 0 0 - ...... - - >PUBCHEM_COMPOUND_CID< - 28434379 - - > <PUBCHEM_COMPOUND_CANONICALIZED> - 1 - - > <PUBCHEM_CACTVS_COMPLEXITY> - 280 - - > <PUBCHEM_CACTVS_HBOND_ACCEPTOR> - 2 - - > <PUBCHEM_CACTVS_HBOND_DONOR> - 2 - - > <PUBCHEM_CACTVS_ROTATABLE_BOND> - 2 - - > <PUBCHEM_CACTVS_SUBSKEYS> - AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== - - > - - - type : FP2 - -* output:: - - #FPS1 - #num_bits=1021 - #type=OpenBabel-FP2/1 - #software=OpenBabel/2.3.0 - #source=/tmp/dataset_409.dat.sdf - #date=2012-02-03T11:13:39 - c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c - 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300 - 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379 - - - </help> -</tool>
--- a/repository_dependencies.xml Fri Apr 26 11:07:08 2013 -0400 +++ b/repository_dependencies.xml Sat May 11 17:12:34 2013 -0400 @@ -1,5 +1,5 @@ <?xml version="1.0"?> <repositories description="This requires the Molecule datatype definitions (e.g. SMILES, InChI, SD-format) and the python numpy package."> - <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="molecule_datatypes" owner="bgruening" changeset_revision="e533de975501" /> - <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_numpy_1_7" owner="bgruening" changeset_revision="c3041382815c" /> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="molecule_datatypes" owner="bgruening" changeset_revision="1a070566e9c6" /> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_numpy_1_7" owner="bgruening" changeset_revision="ec80bba4bccb" /> </repositories>
--- a/tool_dependencies.xml Fri Apr 26 11:07:08 2013 -0400 +++ b/tool_dependencies.xml Sat May 11 17:12:34 2013 -0400 @@ -1,12 +1,12 @@ <tool_dependency> <package name="rdkit" version="2012_12_1"> - <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_rdkit_2012_12" owner="bgruening" changeset_revision="0adc830e7d71" /> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_rdkit_2012_12" owner="bgruening" changeset_revision="158956be0623" /> </package> <package name="numpy" version="1.7.1"> - <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_numpy_1_7" owner="bgruening" changeset_revision="c3041382815c" /> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_numpy_1_7" owner="bgruening" changeset_revision="ec80bba4bccb" /> </package> <package name="matplotlib" version="1.2.1"> - <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_matplotlib_1_2" owner="bgruening" changeset_revision="ae165d9e610a" /> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_matplotlib_1_2" owner="bgruening" changeset_revision="f2c19c75befd" /> </package> <package name="chemfp" version="1.1p1">