# HG changeset patch # User bgruening # Date 1495299369 14400 # Node ID bcb3c078b2b4168d74206e63303e8b7264a20af8 # Parent a9fd0995b5bfc6f78440e3f814809a28f9428dfc planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit d786052cd04f8b25eb4aff80b1b9724f62031b61 diff -r a9fd0995b5bf -r bcb3c078b2b4 butina_clustering.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/butina_clustering.py Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,134 @@ +#!/usr/bin/env python +""" + Modified version of code examples from the chemfp project. + http://code.google.com/p/chem-fingerprints/ + Thanks to Andrew Dalke of Andrew Dalke Scientific! +""" + +import chemfp +import sys +import os +import tempfile +import argparse +import subprocess +from chemfp import search + +def unix_sort(results): + temp_unsorted = tempfile.NamedTemporaryFile(delete=False) + for (i,indices) in enumerate( results.iter_indices() ): + temp_unsorted.write('%s %s\n' % (len(indices), i)) + temp_unsorted.close() + temp_sorted = tempfile.NamedTemporaryFile(delete=False) + temp_sorted.close() + p = subprocess.Popen(['sort', '-n', '-r', '-k', '1,1'], stdin=open(temp_unsorted.name), stdout=open(temp_sorted.name, 'w+')) + stdout, stderr = p.communicate() + return_code = p.returncode + + if return_code: + sys.stdout.write(stdout) + sys.stderr.write(stderr) + sys.stderr.write("Return error code %i from command:\n" % return_code) + temp_sorted.close() + os.remove(temp_unsorted.name) + + for line in open(temp_sorted.name): + size, fp_idx = line.strip().split() + yield (int(size), int(fp_idx)) + + os.remove(temp_sorted.name) + +def butina( args ): + """ + Taylor-Butina clustering from the chemfp help. + """ + out = args.output_path + targets = chemfp.open( args.input_path, format='fps' ) + arena = chemfp.load_fingerprints( targets ) + + chemfp.set_num_threads( args.processors ) + results = search.threshold_tanimoto_search_symmetric(arena, threshold = args.tanimoto_threshold) + results.reorder_all("move-closest-first") + + sorted_ids = unix_sort(results) + + # Determine the true/false singletons and the clusters + true_singletons = [] + false_singletons = [] + clusters = [] + + seen = set() + #for (size, fp_idx, members) in results: + for (size, fp_idx) in sorted_ids: + members = results[fp_idx].get_indices() + #print arena.ids[ fp_idx ], [arena.ids[ m ] for m in members] + if fp_idx in seen: + # Can't use a centroid which is already assigned + continue + seen.add(fp_idx) + + if size == 0: + # The only fingerprint in the exclusion sphere is itself + true_singletons.append( fp_idx ) + continue + + # Figure out which ones haven't yet been assigned + unassigned = set(members) - seen + + if not unassigned: + false_singletons.append(fp_idx) + continue + + # this is a new cluster + clusters.append( (fp_idx, unassigned) ) + seen.update(unassigned) + + len_cluster = len(clusters) + #out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(arena.ids[idx] for idx in true_singletons)) ) ) + #out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(arena.ids[idx] for idx in false_singletons)) ) ) + + out.write( "#%s true singletons\n" % len(true_singletons) ) + out.write( "#%s false singletons\n" % len(false_singletons) ) + out.write( "#clusters: %s\n" % len_cluster ) + + # Sort so the cluster with the most compounds comes first, + # then by alphabetically smallest id + def cluster_sort_key(cluster): + centroid_idx, members = cluster + return -len(members), arena.ids[centroid_idx] + + clusters.sort(key=cluster_sort_key) + + for centroid_idx, members in clusters: + centroid_name = arena.ids[centroid_idx] + out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(arena.ids[idx] for idx in members))) + #ToDo: len(members) need to be some biggest top 90% or something ... + + for idx in true_singletons: + out.write("%s\t%s\n" % (arena.ids[idx], 0)) + + out.close() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="""Taylor-Butina clustering for fps files. +For more details please see the original publication or the chemfp documentation: +http://www.chemomine.co.uk/dbclus-paper.pdf +https://chemfp.readthedocs.org +""") + + parser.add_argument("-i", "--input", dest="input_path", + required=True, + help="Path to the input file.") + + parser.add_argument("-o", "--output", dest="output_path", type=argparse.FileType('w'), + default=sys.stdout, + help="Path to the output file.") + + parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", type=float, + default=0.8, + help="Tanimoto threshold [0.8]") + + parser.add_argument('-p', '--processors', type=int, default=4) + + options = parser.parse_args() + butina( options ) diff -r a9fd0995b5bf -r bcb3c078b2b4 butina_clustering.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/butina_clustering.xml Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,93 @@ + + of molecular fingerprints + + chemfp + openbabel + + + + + + + + + + + + + + + + + + + + + + 0 false singletons + => + + 1 clusters + 55091849 has 12 other members + => 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823 + + +]]> + + + 10.1186/1758-2946-5-S1-P36 + + diff -r a9fd0995b5bf -r bcb3c078b2b4 mol2fps.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mol2fps.xml Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,276 @@ + + with different fingerprint types + + + chemfp + rdkit + openbabel + + +&1 +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +PUBCHEM_COMPOUND_CID< + 28434379 + + > + 1 + + > + 280 + + > + 2 + + > + 2 + + > + 2 + + > + AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== + + > + + - type : FP2 + +----- + +.. class:: infomark + +**Output** + +* Example:: + + #FPS1 + #num_bits=1021 + #type=OpenBabel-FP2/1 + #software=OpenBabel/2.3.0 + #source=/tmp/dataset_409.dat.sdf + #date=2012-02-03T11:13:39 + c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c + 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300 + 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379 + + +]]> + + + 10.1186/1758-2946-3-33 + 10.1186/1758-2946-5-S1-P36 + + @electronic{rdkit, + title = {RDKit: Open-source cheminformatics}, + url ={http://www.rdkit.org} + } + + + diff -r a9fd0995b5bf -r bcb3c078b2b4 nxn_clustering.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nxn_clustering.py Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,80 @@ +#!/usr/bin/env python +""" + Modified version of code examples from the chemfp project. + http://code.google.com/p/chem-fingerprints/ + Thanks to Andrew Dalke of Andrew Dalke Scientific! +""" +import matplotlib +matplotlib.use('Agg') +import argparse +import os +import chemfp +import scipy.cluster.hierarchy as hcluster +import pylab +import numpy + +def distance_matrix(arena, tanimoto_threshold = 0.0): + n = len(arena) + # Start off a similarity matrix with 1.0s along the diagonal + try: + similarities = numpy.identity(n, "d") + except: + raise Exception('Input dataset is to large!') + chemfp.set_num_threads( args.processors ) + + ## Compute the full similarity matrix. + # The implementation computes the upper-triangle then copies + # the upper-triangle into lower-triangle. It does not include + # terms for the diagonal. + results = chemfp.search.threshold_tanimoto_search_symmetric(arena, threshold=tanimoto_threshold) + + # Copy the results into the NumPy array. + for row_index, row in enumerate(results.iter_indices_and_scores()): + for target_index, target_score in row: + similarities[row_index, target_index] = target_score + + # Return the distance matrix using the similarity matrix + return 1.0 - similarities + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="""NxN clustering for fps files. +For more details please see the chemfp documentation: +https://chemfp.readthedocs.org +""") + + parser.add_argument("-i", "--input", dest="input_path", + required=True, + help="Path to the input file.") + + parser.add_argument("-c", "--cluster", dest="cluster_image", + help="Path to the output cluster image.") + + parser.add_argument("-s", "--smatrix", dest="similarity_matrix", + help="Path to the similarity matrix output file.") + + parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", + type=float, default=0.0, + help="Tanimoto threshold [0.0]") + + parser.add_argument("--oformat", default='png', help="Output format (png, svg)") + + parser.add_argument('-p', '--processors', type=int, + default=4) + + args = parser.parse_args() + + targets = chemfp.open( args.input_path, format='fps' ) + arena = chemfp.load_fingerprints( targets ) + distances = distance_matrix( arena, args.tanimoto_threshold ) + + if args.similarity_matrix: + distances.tofile( args.similarity_matrix ) + + if args.cluster_image: + linkage = hcluster.linkage( distances, method="single", metric="euclidean" ) + + hcluster.dendrogram(linkage, labels=arena.ids) + + pylab.savefig( args.cluster_image, format=args.oformat ) + diff -r a9fd0995b5bf -r bcb3c078b2b4 nxn_clustering.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nxn_clustering.xml Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,120 @@ + + of molecular fingerprints + + chemfp + python + matplotlib + scipy + openbabel + + + + + + + + + + + + + + + + + + + + + output_files == "both" or output_files == "image" + + + + + + output_files == "both" or output_files == "matrix" + + + + + + + + + + + + + + + + 10.1186/1758-2946-5-S1-P36 + + diff -r a9fd0995b5bf -r bcb3c078b2b4 sdf2fps.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sdf2fps.xml Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,116 @@ + + extract fingerprints from sdf files metadata + + + chemfp + openbabel + + + '${outfile}' +]]> + + + + + + + + + + + + + + +PUBCHEM_COMPOUND_CID< + 28434379 + + > + 1 + + > + 280 + + > + 2 + + > + 2 + + > + 2 + + > + AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== + + > + +----- + +.. class:: infomark + +**Output** + +* Example:: + + #FPS1 + #num_bits=881 + #type=CACTVS-E_SCREEN/1.0 extended=2 + #software=CACTVS/unknown + #source=/home/mohammed/galaxy-central/database/files/000/dataset_409.dat + #date=2012-02-03T10:44:12 + 07ce04000000000000000000000000000080060000000c0600 + 00000000001a800f0000780008100000101487e9608c0bed32 + 48000580644626204101b4844805901b041c2e19511e45039b + 8b2924101609401b13e4080000000000010020000004008000 + 0010000002000000000000 28434379 + + +]]> + + + 10.1186/1758-2946-5-S1-P36 + + diff -r a9fd0995b5bf -r bcb3c078b2b4 simsearch.xml --- a/simsearch.xml Sat May 20 12:44:31 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,121 +0,0 @@ - - of fingerprint data sets - - chemfp - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 10.1186/1758-2946-3-33 - 10.1186/1758-2946-5-S1-P36 - - diff -r a9fd0995b5bf -r bcb3c078b2b4 static/images/NxN_clustering.png Binary file static/images/NxN_clustering.png has changed diff -r a9fd0995b5bf -r bcb3c078b2b4 static/images/NxN_clustering.svg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/static/images/NxN_clustering.svg Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,2275 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r a9fd0995b5bf -r bcb3c078b2b4 test-data/CID_2244.can --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244.can Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,1 @@ +CC(=O)Oc1ccccc1C(=O)O 2244 diff -r a9fd0995b5bf -r bcb3c078b2b4 test-data/CID_2244.inchi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244.inchi Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,1 @@ +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) diff -r a9fd0995b5bf -r bcb3c078b2b4 test-data/CID_2244.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244.sdf Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,154 @@ +2244 + -OEChem-05151212332D + + 21 21 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> +2244 + +> +1 + +> +212 + +> +4 + +> +1 + +> +3 + +> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> +2-acetoxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetoxybenzoic acid + +> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> +1.2 + +> +180.042259 + +> +C9H8O4 + +> +180.15742 + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +63.6 + +> +180.042259 + +> +0 + +> +13 + +> +0 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +1 + +> +1 +5 +255 + +> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +$$$$ diff -r a9fd0995b5bf -r bcb3c078b2b4 test-data/CID_2244.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244.smi Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,1 @@ +O(c1c(cccc1)C(=O)O)C(=O)C 2244 diff -r a9fd0995b5bf -r bcb3c078b2b4 test-data/CID_2244_FP2.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244_FP2.fps Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=1021 +#type=OpenBabel-FP2/1 +#software=OpenBabel/2.4.1 +#source=/tmp/tmptaAke4/files/000/dataset_3.dat +#date=2017-05-19T13:52:59 +00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 diff -r a9fd0995b5bf -r bcb3c078b2b4 test-data/CID_2244_FP3.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244_FP3.fps Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=1021 +#type=OpenBabel-FP2/1 +#software=OpenBabel/2.4.1 +#source=/tmp/tmptaAke4/files/000/dataset_7.dat +#date=2017-05-19T13:53:45 +00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 diff -r a9fd0995b5bf -r bcb3c078b2b4 test-data/CID_2244_FP4.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244_FP4.fps Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=1021 +#type=OpenBabel-FP2/1 +#software=OpenBabel/2.4.1 +#source=/tmp/tmptaAke4/files/000/dataset_11.dat +#date=2017-05-19T13:54:39 +00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 diff -r a9fd0995b5bf -r bcb3c078b2b4 test-data/CID_2244_MACCS.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244_MACCS.fps Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=1021 +#type=OpenBabel-FP2/1 +#software=OpenBabel/2.4.1 +#source=/tmp/tmptaAke4/files/000/dataset_15.dat +#date=2017-05-19T13:55:30 +00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 diff -r a9fd0995b5bf -r bcb3c078b2b4 test-data/CID_2244_maccs.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244_maccs.fps Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=166 +#type=OpenBabel-MACCS/2 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T17:00:39 +0000000000000000000000010000016480cca2d21e 2244 diff -r a9fd0995b5bf -r bcb3c078b2b4 test-data/NxN_Clustering_on_q.svg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/NxN_Clustering_on_q.svg Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,707 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r a9fd0995b5bf -r bcb3c078b2b4 test-data/Taylor-Butina_Clustering_on_data_q.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Taylor-Butina_Clustering_on_data_q.txt Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,4 @@ +#0 true singletons +#0 false singletons +#clusters: 1 +55091752 12 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091467 55168823 55091849 diff -r a9fd0995b5bf -r bcb3c078b2b4 test-data/sdf2fps_result1.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sdf2fps_result1.fps Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=881 +#type=CACTVS-E_SCREEN/1.0 extended=2 +#software=CACTVS/unknown +#source=/tmp/tmpN2w37z/files/000/dataset_1.dat +#date=2017-05-19T14:27:41 +030e1c000000000000000000000000000000000000000c00000000000000008000000058000010000030200119004c70010060001140044b100040100024040010118060101330e46c21ac5841de06a50788109de11113047100000000001000000000000000080000000000000000 2244 diff -r a9fd0995b5bf -r bcb3c078b2b4 test-data/simsearch_on_tragets_and_q.tabular --- a/test-data/simsearch_on_tragets_and_q.tabular Sat May 20 12:44:31 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ -#Simsearch/1 -#num_bits=881 -#type=Tanimoto k=all threshold=0.7 -#software=chemfp/1.1p1 -#queries=./query.fps -#targets=./targets.fps -#query_sources=CID_28434379.sdf -#target_sources=Desktop/3579363516810334491.sdf -13 28434379 6499094 0.9615 6485578 0.9679 55079807 0.9313 3153534 0.9557 55102353 0.9682 55091466 0.9682 55091416 0.9682 6485577 0.9497 55169009 0.9560 55091752 0.9684 55091467 0.9623 55168823 0.9563 55091849 0.9563