# HG changeset patch # User Bjoern Gruening # Date 1374244085 -7200 # Node ID 1868005213a1d0f8b64a5d7140b8bb56825c1b8e # Parent 6c496b524b41446494f54fe04f6dd9f43753d8af ChemicalToolBoX update. diff -r 6c496b524b41 -r 1868005213a1 chemfp_clustering/butina_clustering.xml --- a/chemfp_clustering/butina_clustering.xml Sun Jun 02 19:53:56 2013 +0200 +++ b/chemfp_clustering/butina_clustering.xml Fri Jul 19 16:28:05 2013 +0200 @@ -27,18 +27,25 @@ -**Note**. You need molecular fingerprints in FPS format. Open Babel Fastsearch index is not supported. + +.. class:: infomark -**What it does** -Clustering of molecule libraries using the Taylor-Butina algorithm. This tool is based on the chemfp_ project. +**What this tool does** + +Unsupervised non-hierarchical clustering method based on the Taylor-Butina algorithm, which guarantees that every cluster contains molecules which are within a distance cutoff of the central molecule. This tool is based on the chemfp_ project. .. _chemfp: http://chemfp.com/ ----- -**Example** +.. class:: infomark + +**Input** -* input:: +| Molecular fingerprints in FPS format. +| Open Babel Fastsearch index is not supported. + +* Example:: - fingerprints in FPS format @@ -56,7 +63,13 @@ - Tanimoto threshold : 0.8 (between 0 and 1) -* output:: +----- + +.. class:: infomark + +**Output** + +* Example:: 0 true singletons => @@ -68,10 +81,13 @@ 55091849 has 12 other members => 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823 +----- -**References** +.. class:: infomark -Please reference the chemfp_ project. +**Cite** + +The chemfp_ project from Andrew Dalke! .. _chemfp: http://chemfp.com/ diff -r 6c496b524b41 -r 1868005213a1 chemfp_clustering/nxn_clustering.py --- a/chemfp_clustering/nxn_clustering.py Sun Jun 02 19:53:56 2013 +0200 +++ b/chemfp_clustering/nxn_clustering.py Fri Jul 19 16:28:05 2013 +0200 @@ -47,14 +47,17 @@ required=True, help="Path to the input file.") - parser.add_argument("-o", "--output", dest="output_path", - help="Path to the output file.") + parser.add_argument("-c", "--cluster", dest="cluster_image", + help="Path to the output cluster image.") + + parser.add_argument("-s", "--smatrix", dest="similarity_matrix", + help="Path to the similarity matrix output file.") parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", type=float, default=0.0, help="Tanimoto threshold [0.0]") - parser.add_argument("--oformat", default='png', help="Output format (png, svg).") + parser.add_argument("--oformat", default='png', help="Output format (png, svg)") parser.add_argument('-p', '--processors', type=int, default=4) @@ -64,9 +67,14 @@ targets = chemfp.open( args.input_path, format='fps' ) arena = chemfp.load_fingerprints( targets ) distances = distance_matrix( arena, args.tanimoto_threshold ) - linkage = hcluster.linkage( distances, method="single", metric="euclidean" ) + + if args.similarity_matrix: + distances.tofile( args.similarity_matrix ) - hcluster.dendrogram(linkage, labels=arena.ids) + if args.cluster_image: + linkage = hcluster.linkage( distances, method="single", metric="euclidean" ) - pylab.savefig( args.output_path, format=args.oformat ) + hcluster.dendrogram(linkage, labels=arena.ids) + pylab.savefig( args.cluster_image, format=args.oformat ) + diff -r 6c496b524b41 -r 1868005213a1 chemfp_clustering/nxn_clustering.xml --- a/chemfp_clustering/nxn_clustering.xml Sun Jun 02 19:53:56 2013 +0200 +++ b/chemfp_clustering/nxn_clustering.xml Fri Jul 19 16:28:05 2013 +0200 @@ -1,4 +1,4 @@ - + of molecular fingerprints numpy @@ -11,40 +11,53 @@ nxn_clustering.py -i $infile -t $threshold - -o $outfile + #if str($output_files) in ['both', 'image']: + --cluster $image + #end if + #if str($output_files) in ['both', 'matrix']: + --smatrix $smilarity_matrix + #end if --oformat $oformat - + + + + + + - + + output_files == "both" or output_files == "image" + + output_files == "both" or output_files == "matrix" + - + + -**Note**. You need molecular fingerprints in FPS format. Open Babel Fastsearch index is not supported. +.. class:: infomark -**Note**. Currently, that tool can only be used with a small dataset. +**What this tool does** - -**What it does** Generating hierarchical clusters and visualizing clusters with dendrograms. For the clustering and the fingerprint handling the chemfp_ project is used. @@ -52,9 +65,21 @@ ----- -**Example** +.. class:: warningmark + +**Hint** + +The plotting of the cluster image is sensible only with a small dataset. + +----- -* input:: +.. class:: infomark + +**Input** + +Molecular fingerprints in FPS format. Open Babel Fastsearch index is not supported. + +* Example:: - fingerprints in FPS format @@ -72,11 +97,25 @@ - Tanimoto threshold : 0.8 (between 0 and 1) -* output:: +----- + +.. class:: informark + +**Output** + +* Example:: + + .. image:: $PATH_TO_IMAGES/NxN_clustering.png - clustring plot +----- + +.. class:: infomark -.. image:: $PATH_TO_IMAGES/NxN_clustering.png +**Cite** + +The chemfp_ project from Andrew Dalke! + +.. _chemfp: http://chemfp.com/ diff -r 6c496b524b41 -r 1868005213a1 chemfp_mol2fps/mol2fps.xml --- a/chemfp_mol2fps/mol2fps.xml Sun Jun 02 19:53:56 2013 +0200 +++ b/chemfp_mol2fps/mol2fps.xml Fri Jul 19 16:28:05 2013 +0200 @@ -166,21 +166,26 @@ +.. class:: infomark -**What it does** +**What this tool does** -Generates different types of fingerprints from the `Open Babel`_ and RDkit_ project. -This tool is using chemfp_. For more information please have a look at: +This tool uses chemfp_ to calculate 10 different fingerprints of common file formats. Chemfp uses `Open Babel`_, OpenEye_ and RDKit_. - - http://code.google.com/p/rdkit/wiki/FingerprintsInTheRDKit - - http://openbabel.org/wiki/Tutorial:Fingerprints +For more information check the websites listed below:: + - http://code.google.com/p/rdkit/wiki/FingerprintsInTheRDKit + - http://openbabel.org/wiki/Tutorial:Fingerprints ----- -**Example** +.. class:: infomark + +**Input** -* input:: +FPS fingerprint file format + +* Example:: - SDF File @@ -230,7 +235,13 @@ - type : FP2 -* output:: +----- + +.. class:: infomark + +**Output** + +* Example:: #FPS1 #num_bits=1021 @@ -242,16 +253,20 @@ 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379 +----- -**References** +.. class:: infomark -Please reference the `Open Babel`_ or RDKit_ project and the chemfp_ project. +**Cite** -N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch, and G R Hutchison. "Open Babel: An open chemical toolbox." J. Cheminf. (2011), 3, 33. `DOI:10.1186/1758-2946-3-33`_ -The Open Babel Package http://openbabel.sourceforge.net/ +| `Open Babel`_ +| RDKit_ project +| chemfp_ project. +| +| N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch and G R Hutchison. `Open Babel: An open chemical toolbox.`_ - -.. _DOI:10.1186/1758-2946-3-33: http://www.jcheminf.com/content/3/1/33 +.. _`Open Babel: An open chemical toolbox.`: http://www.jcheminf.com/content/3/1/33 +.. _OpenEye: http://www.eyesopen.com/ .. _chemfp: http://chemfp.com/ .. _RDKit: http://www.rdkit.org/ .. _`Open Babel`: http://openbabel.org/ diff -r 6c496b524b41 -r 1868005213a1 chemfp_sdf2fps/sdf2fps.xml --- a/chemfp_sdf2fps/sdf2fps.xml Sun Jun 02 19:53:56 2013 +0200 +++ b/chemfp_sdf2fps/sdf2fps.xml Fri Jul 19 16:28:05 2013 +0200 @@ -18,18 +18,24 @@ +.. class:: infomark -**What it does** +**What this tool does** -Read a PubChem_ SD file and extract the fingerprints, to stores them in a FPS-file. +Read an input SD file, extract the fingerprints and store them in a FPS-file. ----- -**Example** - * input:: +.. class:: infomark + +**Input** + +`SD-Format`_ + +.. _`SD-Format`: http://en.wikipedia.org/wiki/Chemical_table_file + +* Example:: - SDF File - 28434379 -OEChem-02031205132D @@ -74,7 +80,13 @@ > -* output:: +----- + +.. class:: infomark + +**Output** + +* Example:: #FPS1 #num_bits=881 @@ -88,13 +100,15 @@ 8b2924101609401b13e4080000000000010020000004008000 0010000002000000000000 28434379 +----- -**References** +.. class:: infomark -Please reference the chemfp_ project. +**Cite** + +chemfp_ project .. _chemfp: http://chemfp.com/ -.. _PubChem: http://pubchem.ncbi.nlm.nih.gov/ diff -r 6c496b524b41 -r 1868005213a1 force_pre-commit_hook_temp_file