Mercurial > repos > bgruening > chemfp
comparison chemfp_clustering/nxn_clustering.py @ 0:a8ac5250d59c
Uploaded
| author | bgruening |
|---|---|
| date | Tue, 26 Mar 2013 13:05:41 -0400 |
| parents | |
| children | 438bc12d591b |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a8ac5250d59c |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 Modified version of code examples from the chemfp project. | |
| 4 http://code.google.com/p/chem-fingerprints/ | |
| 5 Thanks to Andrew Dalke of Andrew Dalke Scientific! | |
| 6 """ | |
| 7 import matplotlib | |
| 8 matplotlib.use('Agg') | |
| 9 import sys | |
| 10 import os | |
| 11 import chemfp | |
| 12 import scipy.cluster.hierarchy as hcluster | |
| 13 import pylab | |
| 14 import numpy | |
| 15 | |
| 16 | |
| 17 def distance_matrix(arena,t): | |
| 18 n = len(arena) | |
| 19 # The Tanimoto search computes all of the scores when threshold=0.0. | |
| 20 # The SearchResult contains sparse data, so I set all values | |
| 21 # now to 1.0 so you can experiment with higher thresholds. | |
| 22 distances = numpy.ones((n, n), numpy.float64) | |
| 23 | |
| 24 # Keep track of where the query subarena is in the query | |
| 25 query_row = 0 | |
| 26 | |
| 27 for query_arena in arena.iter_arenas(): | |
| 28 results = arena.threshold_tanimoto_search_arena(query_arena, threshold=t) | |
| 29 for q_i, hits in enumerate(results.iter_indices_and_scores()): | |
| 30 query_idx = query_row + q_i | |
| 31 for target_idx, score in hits: | |
| 32 distances[query_idx, target_idx] = 1.0 - score | |
| 33 query_row += len(query_arena) | |
| 34 | |
| 35 return distances | |
| 36 | |
| 37 dataset = chemfp.load_fingerprints( sys.argv[1] ) | |
| 38 distances = distance_matrix( dataset,float( sys.argv[2] ) ) | |
| 39 linkage = hcluster.linkage( distances, method="single", metric="euclidean" ) | |
| 40 | |
| 41 # Plot using matplotlib, which you must have installed | |
| 42 hcluster.dendrogram(linkage, labels=dataset.ids) | |
| 43 | |
| 44 pylab.savefig( sys.argv[3], format='svg' ) | |
| 45 | |
| 46 | |
| 47 | |
| 48 | |
| 49 | |
| 50 | |
| 51 |
