Mercurial > repos > chemteam > biomd_rmsd_clustering
comparison get_clusters.py @ 0:f45b010339f6 draft default tip
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
| author | chemteam |
|---|---|
| date | Mon, 24 Aug 2020 10:06:27 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:f45b010339f6 |
|---|---|
| 1 import argparse | |
| 2 import collections | |
| 3 import json | |
| 4 | |
| 5 import numpy as np | |
| 6 | |
| 7 from scipy.cluster.hierarchy import fcluster | |
| 8 | |
| 9 | |
| 10 def separate_clusters(Z_fpath, threshold, min_members, output): | |
| 11 Z = np.loadtxt(Z_fpath) | |
| 12 branch_assignments = fcluster(Z, threshold, criterion='distance') | |
| 13 cluster_dict = collections.defaultdict(list) | |
| 14 for n, val in enumerate(branch_assignments): | |
| 15 cluster_dict[branch_assignments[n]].append(n) | |
| 16 cluster_dict = {int(k): v for k, v in cluster_dict.items() | |
| 17 if len(v) >= min_members} | |
| 18 with open(output, 'w') as f: | |
| 19 json.dump(cluster_dict, f, indent=4, sort_keys=True) | |
| 20 | |
| 21 | |
| 22 def main(): | |
| 23 parser = argparse.ArgumentParser() | |
| 24 parser.add_argument('--Z', required=True, | |
| 25 help='File for cluster linkage array.') | |
| 26 parser.add_argument('--threshold', type=int, required=True, | |
| 27 help='Distance cutoff.') | |
| 28 parser.add_argument('--min-members', type=int, required=True, | |
| 29 help='Minimum number of members of the cluster.') | |
| 30 parser.add_argument('--output', required=True, | |
| 31 help='Output file.') | |
| 32 args = parser.parse_args() | |
| 33 | |
| 34 separate_clusters(args.Z, args.threshold, | |
| 35 args.min_members, args.output) | |
| 36 | |
| 37 | |
| 38 if __name__ == "__main__": | |
| 39 main() |
