Mercurial > repos > bgruening > chemfp
changeset 7:0e5307e9a072
Uploaded
author | bgruening |
---|---|
date | Fri, 26 Apr 2013 10:01:47 -0400 |
parents | 438bc12d591b |
children | d241ff83e485 |
files | chemfp_mol2fps/ob2fps.xml chemfp_mol2fps/test-data/CID_2244.can chemfp_mol2fps/test-data/CID_2244.inchi chemfp_mol2fps/test-data/CID_2244.sdf chemfp_mol2fps/test-data/CID_2244.smi chemfp_mol2fps/test-data/CID_2244_FP2.fps chemfp_mol2fps/test-data/CID_2244_FP3.fps chemfp_mol2fps/test-data/CID_2244_FP4.fps chemfp_mol2fps/test-data/CID_2244_maccs.fps chemfp_ob2fps/ob2fps.xml chemfp_ob2fps/test-data/CID_2244.can chemfp_ob2fps/test-data/CID_2244.inchi chemfp_ob2fps/test-data/CID_2244.sdf chemfp_ob2fps/test-data/CID_2244.smi chemfp_ob2fps/test-data/CID_2244_FP2.fps chemfp_ob2fps/test-data/CID_2244_FP3.fps chemfp_ob2fps/test-data/CID_2244_FP4.fps chemfp_ob2fps/test-data/CID_2244_maccs.fps repository_dependencies.xml |
diffstat | 19 files changed, 427 insertions(+), 427 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_mol2fps/ob2fps.xml Fri Apr 26 10:01:47 2013 -0400 @@ -0,0 +1,239 @@ +<tool id="chemfp_mol2fps" name="Molecules to Fingerprints" version="0.2.0"> + <description>with different fingerprint types</description> + <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism> + <requirements> + <requirement type="package" version="1.1p1">chemfp</requirement> + <requirement type="package" version="2012_12_1">rdkit</requirement> + </requirements> + <command> + #if $fptype in ['--FP2', '--FP3', '--FP4', '--MACCS']: + ## Open Babel fingerprints + ob2fps $fptype --in "${infile.ext}" "${infile}" -o "${outfile}" --errors report 2>&1 + #else: + ## RDKit fingerprints + rdkit2fps --in "${infile.ext}" "${infile}" -o "${outfile}" + #if $fp_opts.fp_opts_selector=="--RDK": + --RDK + --fpSize $fp_opts.fpSize + --minPath $fp_opts.minPath + --maxPath $fp_opts.maxPath + --nBitsPerHash $fp_opts.nBitsPerHash + $fp_opts.useHs + #elif $fp_opts.fp_opts_selector=="--torsions": + --torsions + --fpSize $fp_opts.fpSize + --targetSize $fp_opts.targetSize + #elif $fp_opts.fp_opts_selector=="--morgan": + --morgan + --fpSize $fp_opts.fpSize + --radius $fp_opts.radius + $fp_opts.useFeatures + $fp_opts.useChirality + $fp_opts.useBondTypes + #elif $fp_opts.fp_opts_selector=="--pairs": + --paris + --fpSize $fp_opts.fpSize + --minLength $fp_opts.minLength + --maxLength $fp_opts.maxLength + #elif $fp_opts.fp_opts_selector=="--maccs166": + --maccs166 + #elif $fp_opts.fp_opts_selector=="--substruct": + --substruct + #endif + --errors report 2>&1 + #endif + </command> + <inputs> + <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/> + <conditional name="fp_opts"> + <param name="fp_opts_selector" type="select" label="Type of fingerprint"> + <option value='--FP2' selected="True">Open Babel FP2 fingerprints</option> + <option value='--FP3'>Open Babel FP3 fingerprints</option> + <option value='--FP4'>Open Babel FP4 fingerprints</option> + <option value='--MACCS'>Open Babel MACCS fingerprints</option> + <option value='--RDK'>RDKit topological fingerprint</option> + <option value='--torsions'>RDKit topological Torsion fingerprints</option> + <option value='--morgan'>RDKit Morgan fingerprints</option> + <option value='--pairs'>RDKit Atom Pair fingerprints</option> + <option value='--maccs166'>RDKit MACCS fingerprints</option> + <option value='--substruct'>RDKit substructure fingerprints</option> + </param> + <when value="--FP2" /> + <when value="--FP3" /> + <when value="--FP4" /> + <when value="--MACCS" /> + <when value="--RDK"> + <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the subgraph" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="nBitsPerHash" type="integer" value="4" label="number of bits to set per path" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="useHs" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useHs 1" falsevalue="--useHs 0" checked="true" /> + </when> + <when value="--torsions"> + <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="targetSize" type="integer" value="4" label="number of target bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + </when> + <when value="--morgan"> + <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="radius" type="integer" value="2" label="radius for the Morgan algorithm" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="useFeatures" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useFeatures 1" falsevalue="--useFeatures 0" checked="false" /> + <param name="useChirality" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useChirality 1" falsevalue="--useChirality 0" checked="false" /> + <param name="useBondTypes" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useBondTypes 1" falsevalue="--useBondTypes 0" checked="true" /> + </when> + <when value="--pairs"> + <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="minLength" type="integer" value="1" label="minimum bond count for a pair" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="maxLength" type="integer" value="30" label="maximum bond count for a pair" help=""> + <validator type="in_range" min="1" /> + </param> + </when> + <when value="--maccs166" /> + <when value="--substruct" /> + </conditional> + + </inputs> + <outputs> + <data name="outfile" format="fps" /> + </outputs> + <tests> + <!-- FP2 --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fptype" value="--FP2" /> + <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" /> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fptype" value="--FP2" /> + <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" /> + </test> + <!-- FP3 --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fptype" value="--FP3" /> + <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" /> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fptype" value="--FP3" /> + <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" /> + </test> + <!-- FP4 --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fptype" value="--FP4" /> + <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" /> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fptype" value="--FP4" /> + <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" /> + </test> + <!-- MACCS --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fptype" value="--MACCS" /> + <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" /> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fptype" value="--MACCS" /> + <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" /> + </test> + </tests> + <help> + + +**What it does** + +Generate fingerprints using OpenBabel + +----- + +**Example** + +* input:: + + - SDF File + + 28434379 + -OEChem-02031205132D + + 37 39 0 0 0 0 0 0 0999 V2000 + 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 7.3704 0.9433 0.0000 C 0 0 0 0 + ...... + 1 15 1 0 0 0 0 + 1 35 1 0 0 0 0 + 2 5 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 12 1 0 0 0 0 + 3 12 2 0 0 0 0 + 3 13 1 0 0 0 0 + 4 18 1 0 0 0 0 + ...... + + >PUBCHEM_COMPOUND_CID< + 28434379 + + > <PUBCHEM_COMPOUND_CANONICALIZED> + 1 + + > <PUBCHEM_CACTVS_COMPLEXITY> + 280 + + > <PUBCHEM_CACTVS_HBOND_ACCEPTOR> + 2 + + > <PUBCHEM_CACTVS_HBOND_DONOR> + 2 + + > <PUBCHEM_CACTVS_ROTATABLE_BOND> + 2 + + > <PUBCHEM_CACTVS_SUBSKEYS> + AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== + + > + + - type : FP2 + +* output:: + + #FPS1 + #num_bits=1021 + #type=OpenBabel-FP2/1 + #software=OpenBabel/2.3.0 + #source=/tmp/dataset_409.dat.sdf + #date=2012-02-03T11:13:39 + c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c + 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300 + 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379 + + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_mol2fps/test-data/CID_2244.can Fri Apr 26 10:01:47 2013 -0400 @@ -0,0 +1,1 @@ +CC(=O)Oc1ccccc1C(=O)O 2244
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_mol2fps/test-data/CID_2244.inchi Fri Apr 26 10:01:47 2013 -0400 @@ -0,0 +1,1 @@ +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_mol2fps/test-data/CID_2244.sdf Fri Apr 26 10:01:47 2013 -0400 @@ -0,0 +1,155 @@ +2244 + -OEChem-05151212332D + + 21 21 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> <PUBCHEM_COMPOUND_CID> +2244 + +> <PUBCHEM_COMPOUND_CANONICALIZED> +1 + +> <PUBCHEM_CACTVS_COMPLEXITY> +212 + +> <PUBCHEM_CACTVS_HBOND_ACCEPTOR> +4 + +> <PUBCHEM_CACTVS_HBOND_DONOR> +1 + +> <PUBCHEM_CACTVS_ROTATABLE_BOND> +3 + +> <PUBCHEM_CACTVS_SUBSKEYS> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> <PUBCHEM_IUPAC_OPENEYE_NAME> +2-acetoxybenzoic acid + +> <PUBCHEM_IUPAC_CAS_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_SYSTEMATIC_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_TRADITIONAL_NAME> +2-acetoxybenzoic acid + +> <PUBCHEM_IUPAC_INCHI> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> <PUBCHEM_IUPAC_INCHIKEY> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> <PUBCHEM_XLOGP3> +1.2 + +> <PUBCHEM_EXACT_MASS> +180.042259 + +> <PUBCHEM_MOLECULAR_FORMULA> +C9H8O4 + +> <PUBCHEM_MOLECULAR_WEIGHT> +180.15742 + +> <PUBCHEM_OPENEYE_CAN_SMILES> +CC(=O)OC1=CC=CC=C1C(=O)O + +> <PUBCHEM_OPENEYE_ISO_SMILES> +CC(=O)OC1=CC=CC=C1C(=O)O + +> <PUBCHEM_CACTVS_TPSA> +63.6 + +> <PUBCHEM_MONOISOTOPIC_WEIGHT> +180.042259 + +> <PUBCHEM_TOTAL_CHARGE> +0 + +> <PUBCHEM_HEAVY_ATOM_COUNT> +13 + +> <PUBCHEM_ATOM_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_ISOTOPIC_ATOM_COUNT> +0 + +> <PUBCHEM_COMPONENT_COUNT> +1 + +> <PUBCHEM_CACTVS_TAUTO_COUNT> +1 + +> <PUBCHEM_COORDINATE_TYPE> +1 +5 +255 + +> <PUBCHEM_BONDANNOTATIONS> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +$$$$ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_mol2fps/test-data/CID_2244.smi Fri Apr 26 10:01:47 2013 -0400 @@ -0,0 +1,1 @@ +O(c1c(cccc1)C(=O)O)C(=O)C 2244
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_mol2fps/test-data/CID_2244_FP2.fps Fri Apr 26 10:01:47 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=1021 +#type=OpenBabel-FP2/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:40:38 +00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_mol2fps/test-data/CID_2244_FP3.fps Fri Apr 26 10:01:47 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=55 +#type=OpenBabel-FP3/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:59:15 +0400000c50b007 2244
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_mol2fps/test-data/CID_2244_FP4.fps Fri Apr 26 10:01:47 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=307 +#type=OpenBabel-FP4/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:59:22 +010000000000000000009800000000004001000000000000000000000000000000000240402801 2244
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_mol2fps/test-data/CID_2244_maccs.fps Fri Apr 26 10:01:47 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=166 +#type=OpenBabel-MACCS/2 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T17:00:39 +0000000000000000000000010000016480cca2d21e 2244
--- a/chemfp_ob2fps/ob2fps.xml Fri Apr 26 08:02:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,239 +0,0 @@ -<tool id="chemfp_mol2fps" name="Molecules to Fingerprints" version="0.2.0"> - <description>with different fingerprint types</description> - <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism> - <requirements> - <requirement type="package" version="1.1p1">chemfp</requirement> - <requirement type="package" version="2012_12_1">rdkit</requirement> - </requirements> - <command> - #if $fptype in ['--FP2', '--FP3', '--FP4', '--MACCS']: - ## Open Babel fingerprints - ob2fps $fptype --in "${infile.ext}" "${infile}" -o "${outfile}" --errors report 2>&1 - #else: - ## RDKit fingerprints - rdkit2fps --in "${infile.ext}" "${infile}" -o "${outfile}" - #if $fp_opts.fp_opts_selector=="--RDK": - --RDK - --fpSize $fp_opts.fpSize - --minPath $fp_opts.minPath - --maxPath $fp_opts.maxPath - --nBitsPerHash $fp_opts.nBitsPerHash - $fp_opts.useHs - #elif $fp_opts.fp_opts_selector=="--torsions": - --torsions - --fpSize $fp_opts.fpSize - --targetSize $fp_opts.targetSize - #elif $fp_opts.fp_opts_selector=="--morgan": - --morgan - --fpSize $fp_opts.fpSize - --radius $fp_opts.radius - $fp_opts.useFeatures - $fp_opts.useChirality - $fp_opts.useBondTypes - #elif $fp_opts.fp_opts_selector=="--pairs": - --paris - --fpSize $fp_opts.fpSize - --minLength $fp_opts.minLength - --maxLength $fp_opts.maxLength - #elif $fp_opts.fp_opts_selector=="--maccs166": - --maccs166 - #elif $fp_opts.fp_opts_selector=="--substruct": - --substruct - #endif - --errors report 2>&1 - #endif - </command> - <inputs> - <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/> - <conditional name="fp_opts"> - <param name="fp_opts_selector" type="select" label="Type of fingerprint"> - <option value='--FP2' selected="True">Open Babel FP2 fingerprints</option> - <option value='--FP3'>Open Babel FP3 fingerprints</option> - <option value='--FP4'>Open Babel FP4 fingerprints</option> - <option value='--MACCS'>Open Babel MACCS fingerprints</option> - <option value='--RDK'>RDKit topological fingerprint</option> - <option value='--torsions'>RDKit topological Torsion fingerprints</option> - <option value='--morgan'>RDKit Morgan fingerprints</option> - <option value='--pairs'>RDKit Atom Pair fingerprints</option> - <option value='--maccs166'>RDKit MACCS fingerprints</option> - <option value='--substruct'>RDKit substructure fingerprints</option> - </param> - <when value="--FP2" /> - <when value="--FP3" /> - <when value="--FP4" /> - <when value="--MACCS" /> - <when value="--RDK"> - <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the subgraph" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="nBitsPerHash" type="integer" value="4" label="number of bits to set per path" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="useHs" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useHs 1" falsevalue="--useHs 0" checked="true" /> - </when> - <when value="--torsions"> - <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="targetSize" type="integer" value="4" label="number of target bits in the fingerprint" help=""> - <validator type="in_range" min="1" /> - </param> - </when> - <when value="--morgan"> - <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="radius" type="integer" value="2" label="radius for the Morgan algorithm" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="useFeatures" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useFeatures 1" falsevalue="--useFeatures 0" checked="false" /> - <param name="useChirality" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useChirality 1" falsevalue="--useChirality 0" checked="false" /> - <param name="useBondTypes" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useBondTypes 1" falsevalue="--useBondTypes 0" checked="true" /> - </when> - <when value="--pairs"> - <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="minLength" type="integer" value="1" label="minimum bond count for a pair" help=""> - <validator type="in_range" min="1" /> - </param> - <param name="maxLength" type="integer" value="30" label="maximum bond count for a pair" help=""> - <validator type="in_range" min="1" /> - </param> - </when> - <when value="--maccs166" /> - <when value="--substruct" /> - </conditional> - - </inputs> - <outputs> - <data name="outfile" format="fps" /> - </outputs> - <tests> - <!-- FP2 --> - <test> - <param name="infile" value="CID_2244.sdf" ftype="sdf" /> - <param name="fptype" value="--FP2" /> - <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" /> - </test> - <test> - <param name="infile" value="CID_2244.smi" ftype="smi" /> - <param name="fptype" value="--FP2" /> - <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" /> - </test> - <!-- FP3 --> - <test> - <param name="infile" value="CID_2244.sdf" ftype="sdf" /> - <param name="fptype" value="--FP3" /> - <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" /> - </test> - <test> - <param name="infile" value="CID_2244.smi" ftype="smi" /> - <param name="fptype" value="--FP3" /> - <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" /> - </test> - <!-- FP4 --> - <test> - <param name="infile" value="CID_2244.sdf" ftype="sdf" /> - <param name="fptype" value="--FP4" /> - <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" /> - </test> - <test> - <param name="infile" value="CID_2244.smi" ftype="smi" /> - <param name="fptype" value="--FP4" /> - <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" /> - </test> - <!-- MACCS --> - <test> - <param name="infile" value="CID_2244.sdf" ftype="sdf" /> - <param name="fptype" value="--MACCS" /> - <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" /> - </test> - <test> - <param name="infile" value="CID_2244.smi" ftype="smi" /> - <param name="fptype" value="--MACCS" /> - <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" /> - </test> - </tests> - <help> - - -**What it does** - -Generate fingerprints using OpenBabel - ------ - -**Example** - -* input:: - - - SDF File - - 28434379 - -OEChem-02031205132D - - 37 39 0 0 0 0 0 0 0999 V2000 - 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 - 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 - 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 7.3704 0.9433 0.0000 C 0 0 0 0 - ...... - 1 15 1 0 0 0 0 - 1 35 1 0 0 0 0 - 2 5 1 0 0 0 0 - 2 11 1 0 0 0 0 - 2 12 1 0 0 0 0 - 3 12 2 0 0 0 0 - 3 13 1 0 0 0 0 - 4 18 1 0 0 0 0 - ...... - - >PUBCHEM_COMPOUND_CID< - 28434379 - - > <PUBCHEM_COMPOUND_CANONICALIZED> - 1 - - > <PUBCHEM_CACTVS_COMPLEXITY> - 280 - - > <PUBCHEM_CACTVS_HBOND_ACCEPTOR> - 2 - - > <PUBCHEM_CACTVS_HBOND_DONOR> - 2 - - > <PUBCHEM_CACTVS_ROTATABLE_BOND> - 2 - - > <PUBCHEM_CACTVS_SUBSKEYS> - AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== - - > - - - type : FP2 - -* output:: - - #FPS1 - #num_bits=1021 - #type=OpenBabel-FP2/1 - #software=OpenBabel/2.3.0 - #source=/tmp/dataset_409.dat.sdf - #date=2012-02-03T11:13:39 - c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c - 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300 - 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379 - - - </help> -</tool>
--- a/chemfp_ob2fps/test-data/CID_2244.can Fri Apr 26 08:02:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -CC(=O)Oc1ccccc1C(=O)O 2244
--- a/chemfp_ob2fps/test-data/CID_2244.inchi Fri Apr 26 08:02:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
--- a/chemfp_ob2fps/test-data/CID_2244.sdf Fri Apr 26 08:02:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,155 +0,0 @@ -2244 - -OEChem-05151212332D - - 21 21 0 0 0 0 0 0 0999 V2000 - 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1 5 1 0 0 0 0 - 1 12 1 0 0 0 0 - 2 11 1 0 0 0 0 - 2 21 1 0 0 0 0 - 3 11 2 0 0 0 0 - 4 12 2 0 0 0 0 - 5 6 1 0 0 0 0 - 5 7 2 0 0 0 0 - 6 8 2 0 0 0 0 - 6 11 1 0 0 0 0 - 7 9 1 0 0 0 0 - 7 14 1 0 0 0 0 - 8 10 1 0 0 0 0 - 8 15 1 0 0 0 0 - 9 10 2 0 0 0 0 - 9 16 1 0 0 0 0 - 10 17 1 0 0 0 0 - 12 13 1 0 0 0 0 - 13 18 1 0 0 0 0 - 13 19 1 0 0 0 0 - 13 20 1 0 0 0 0 -M END -> <PUBCHEM_COMPOUND_CID> -2244 - -> <PUBCHEM_COMPOUND_CANONICALIZED> -1 - -> <PUBCHEM_CACTVS_COMPLEXITY> -212 - -> <PUBCHEM_CACTVS_HBOND_ACCEPTOR> -4 - -> <PUBCHEM_CACTVS_HBOND_DONOR> -1 - -> <PUBCHEM_CACTVS_ROTATABLE_BOND> -3 - -> <PUBCHEM_CACTVS_SUBSKEYS> -AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== - -> <PUBCHEM_IUPAC_OPENEYE_NAME> -2-acetoxybenzoic acid - -> <PUBCHEM_IUPAC_CAS_NAME> -2-acetyloxybenzoic acid - -> <PUBCHEM_IUPAC_NAME> -2-acetyloxybenzoic acid - -> <PUBCHEM_IUPAC_SYSTEMATIC_NAME> -2-acetyloxybenzoic acid - -> <PUBCHEM_IUPAC_TRADITIONAL_NAME> -2-acetoxybenzoic acid - -> <PUBCHEM_IUPAC_INCHI> -InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) - -> <PUBCHEM_IUPAC_INCHIKEY> -BSYNRYMUTXBXSQ-UHFFFAOYSA-N - -> <PUBCHEM_XLOGP3> -1.2 - -> <PUBCHEM_EXACT_MASS> -180.042259 - -> <PUBCHEM_MOLECULAR_FORMULA> -C9H8O4 - -> <PUBCHEM_MOLECULAR_WEIGHT> -180.15742 - -> <PUBCHEM_OPENEYE_CAN_SMILES> -CC(=O)OC1=CC=CC=C1C(=O)O - -> <PUBCHEM_OPENEYE_ISO_SMILES> -CC(=O)OC1=CC=CC=C1C(=O)O - -> <PUBCHEM_CACTVS_TPSA> -63.6 - -> <PUBCHEM_MONOISOTOPIC_WEIGHT> -180.042259 - -> <PUBCHEM_TOTAL_CHARGE> -0 - -> <PUBCHEM_HEAVY_ATOM_COUNT> -13 - -> <PUBCHEM_ATOM_DEF_STEREO_COUNT> -0 - -> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> -0 - -> <PUBCHEM_BOND_DEF_STEREO_COUNT> -0 - -> <PUBCHEM_BOND_UDEF_STEREO_COUNT> -0 - -> <PUBCHEM_ISOTOPIC_ATOM_COUNT> -0 - -> <PUBCHEM_COMPONENT_COUNT> -1 - -> <PUBCHEM_CACTVS_TAUTO_COUNT> -1 - -> <PUBCHEM_COORDINATE_TYPE> -1 -5 -255 - -> <PUBCHEM_BONDANNOTATIONS> -5 6 8 -5 7 8 -6 8 8 -7 9 8 -8 10 8 -9 10 8 - -$$$$ -
--- a/chemfp_ob2fps/test-data/CID_2244.smi Fri Apr 26 08:02:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -O(c1c(cccc1)C(=O)O)C(=O)C 2244
--- a/chemfp_ob2fps/test-data/CID_2244_FP2.fps Fri Apr 26 08:02:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -#FPS1 -#num_bits=1021 -#type=OpenBabel-FP2/1 -#software=OpenBabel/2.3.1 -#source=CID_2244.sdf -#date=2012-05-15T16:40:38 -00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
--- a/chemfp_ob2fps/test-data/CID_2244_FP3.fps Fri Apr 26 08:02:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -#FPS1 -#num_bits=55 -#type=OpenBabel-FP3/1 -#software=OpenBabel/2.3.1 -#source=CID_2244.sdf -#date=2012-05-15T16:59:15 -0400000c50b007 2244
--- a/chemfp_ob2fps/test-data/CID_2244_FP4.fps Fri Apr 26 08:02:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -#FPS1 -#num_bits=307 -#type=OpenBabel-FP4/1 -#software=OpenBabel/2.3.1 -#source=CID_2244.sdf -#date=2012-05-15T16:59:22 -010000000000000000009800000000004001000000000000000000000000000000000240402801 2244
--- a/chemfp_ob2fps/test-data/CID_2244_maccs.fps Fri Apr 26 08:02:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -#FPS1 -#num_bits=166 -#type=OpenBabel-MACCS/2 -#software=OpenBabel/2.3.1 -#source=CID_2244.sdf -#date=2012-05-15T17:00:39 -0000000000000000000000010000016480cca2d21e 2244
--- a/repository_dependencies.xml Fri Apr 26 08:02:45 2013 -0400 +++ b/repository_dependencies.xml Fri Apr 26 10:01:47 2013 -0400 @@ -1,5 +1,5 @@ <?xml version="1.0"?> <repositories description="This requires the Molecule datatype definitions (e.g. SMILES, InChI, SD-format) and the python numpy package."> - <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="molecule_datatypes" owner="bgruening" changeset_revision="25698453d7d7" /> - <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_numpy_1_7" owner="bgruening" changeset_revision="c3041382815c" /> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="molecule_datatypes" owner="bgruening" changeset_revision="25698453d7d7" /> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_numpy_1_7" owner="bgruening" changeset_revision="c3041382815c" /> </repositories>