comparison chemfp_clustering/nxn_clustering.py @ 22:6c496b524b41

ChemicalToolBoX update.
author Bjoern Gruening <bjoern.gruening@gmail.com>
date Sun, 02 Jun 2013 19:53:56 +0200
parents 7c84cfa515e0
children 1868005213a1
comparison
equal deleted inserted replaced
21:7c84cfa515e0 22:6c496b524b41
10 import os 10 import os
11 import chemfp 11 import chemfp
12 import scipy.cluster.hierarchy as hcluster 12 import scipy.cluster.hierarchy as hcluster
13 import pylab 13 import pylab
14 import numpy 14 import numpy
15 import tempfile
16
17 15
18 def distance_matrix(arena, tanimoto_threshold = 0.0): 16 def distance_matrix(arena, tanimoto_threshold = 0.0):
19 n = len(arena) 17 n = len(arena)
20 # Start off a similarity matrix with 1.0s along the diagonal 18 # Start off a similarity matrix with 1.0s along the diagonal
21 try: 19 try:
35 for target_index, target_score in row: 33 for target_index, target_score in row:
36 similarities[row_index, target_index] = target_score 34 similarities[row_index, target_index] = target_score
37 35
38 # Return the distance matrix using the similarity matrix 36 # Return the distance matrix using the similarity matrix
39 return 1.0 - similarities 37 return 1.0 - similarities
40
41 38
42 39
43 if __name__ == "__main__": 40 if __name__ == "__main__":
44 parser = argparse.ArgumentParser(description="""NxN clustering for fps files. 41 parser = argparse.ArgumentParser(description="""NxN clustering for fps files.
45 For more details please see the chemfp documentation: 42 For more details please see the chemfp documentation:
62 parser.add_argument('-p', '--processors', type=int, 59 parser.add_argument('-p', '--processors', type=int,
63 default=4) 60 default=4)
64 61
65 args = parser.parse_args() 62 args = parser.parse_args()
66 63
67 # make sure that the file ending is fps 64 targets = chemfp.open( args.input_path, format='fps' )
68 temp_file = tempfile.NamedTemporaryFile() 65 arena = chemfp.load_fingerprints( targets )
69 temp_link = "%s.%s" % (temp_file.name, 'fps')
70 temp_file.close()
71 os.symlink(args.input_path, temp_link)
72
73 arena = chemfp.load_fingerprints( temp_link )
74 distances = distance_matrix( arena, args.tanimoto_threshold ) 66 distances = distance_matrix( arena, args.tanimoto_threshold )
75 linkage = hcluster.linkage( distances, method="single", metric="euclidean" ) 67 linkage = hcluster.linkage( distances, method="single", metric="euclidean" )
76 68
77 hcluster.dendrogram(linkage, labels=arena.ids) 69 hcluster.dendrogram(linkage, labels=arena.ids)
78 70
79 pylab.savefig( args.output_path, format=args.oformat ) 71 pylab.savefig( args.output_path, format=args.oformat )
80 72
81
82