Mercurial > repos > bgruening > chemfp
comparison chemfp_clustering/nxn_clustering.py @ 22:6c496b524b41
ChemicalToolBoX update.
author | Bjoern Gruening <bjoern.gruening@gmail.com> |
---|---|
date | Sun, 02 Jun 2013 19:53:56 +0200 |
parents | 7c84cfa515e0 |
children | 1868005213a1 |
comparison
equal
deleted
inserted
replaced
21:7c84cfa515e0 | 22:6c496b524b41 |
---|---|
10 import os | 10 import os |
11 import chemfp | 11 import chemfp |
12 import scipy.cluster.hierarchy as hcluster | 12 import scipy.cluster.hierarchy as hcluster |
13 import pylab | 13 import pylab |
14 import numpy | 14 import numpy |
15 import tempfile | |
16 | |
17 | 15 |
18 def distance_matrix(arena, tanimoto_threshold = 0.0): | 16 def distance_matrix(arena, tanimoto_threshold = 0.0): |
19 n = len(arena) | 17 n = len(arena) |
20 # Start off a similarity matrix with 1.0s along the diagonal | 18 # Start off a similarity matrix with 1.0s along the diagonal |
21 try: | 19 try: |
35 for target_index, target_score in row: | 33 for target_index, target_score in row: |
36 similarities[row_index, target_index] = target_score | 34 similarities[row_index, target_index] = target_score |
37 | 35 |
38 # Return the distance matrix using the similarity matrix | 36 # Return the distance matrix using the similarity matrix |
39 return 1.0 - similarities | 37 return 1.0 - similarities |
40 | |
41 | 38 |
42 | 39 |
43 if __name__ == "__main__": | 40 if __name__ == "__main__": |
44 parser = argparse.ArgumentParser(description="""NxN clustering for fps files. | 41 parser = argparse.ArgumentParser(description="""NxN clustering for fps files. |
45 For more details please see the chemfp documentation: | 42 For more details please see the chemfp documentation: |
62 parser.add_argument('-p', '--processors', type=int, | 59 parser.add_argument('-p', '--processors', type=int, |
63 default=4) | 60 default=4) |
64 | 61 |
65 args = parser.parse_args() | 62 args = parser.parse_args() |
66 | 63 |
67 # make sure that the file ending is fps | 64 targets = chemfp.open( args.input_path, format='fps' ) |
68 temp_file = tempfile.NamedTemporaryFile() | 65 arena = chemfp.load_fingerprints( targets ) |
69 temp_link = "%s.%s" % (temp_file.name, 'fps') | |
70 temp_file.close() | |
71 os.symlink(args.input_path, temp_link) | |
72 | |
73 arena = chemfp.load_fingerprints( temp_link ) | |
74 distances = distance_matrix( arena, args.tanimoto_threshold ) | 66 distances = distance_matrix( arena, args.tanimoto_threshold ) |
75 linkage = hcluster.linkage( distances, method="single", metric="euclidean" ) | 67 linkage = hcluster.linkage( distances, method="single", metric="euclidean" ) |
76 | 68 |
77 hcluster.dendrogram(linkage, labels=arena.ids) | 69 hcluster.dendrogram(linkage, labels=arena.ids) |
78 | 70 |
79 pylab.savefig( args.output_path, format=args.oformat ) | 71 pylab.savefig( args.output_path, format=args.oformat ) |
80 | 72 |
81 | |
82 |