# HG changeset patch # User bgruening # Date 1495283797 14400 # Node ID 72e0a9fd6abc160bb3abbc6dedcdc4584ea83acc # Parent 73b8c87779aeba91c4e4bb8d032ada2291a33fbc planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/simsearch commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502 diff -r 73b8c87779ae -r 72e0a9fd6abc butina_clustering.py --- a/butina_clustering.py Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,134 +0,0 @@ -#!/usr/bin/env python -""" - Modified version of code examples from the chemfp project. - http://code.google.com/p/chem-fingerprints/ - Thanks to Andrew Dalke of Andrew Dalke Scientific! -""" - -import chemfp -import sys -import os -import tempfile -import argparse -import subprocess -from chemfp import search - -def unix_sort(results): - temp_unsorted = tempfile.NamedTemporaryFile(delete=False) - for (i,indices) in enumerate( results.iter_indices() ): - temp_unsorted.write('%s %s\n' % (len(indices), i)) - temp_unsorted.close() - temp_sorted = tempfile.NamedTemporaryFile(delete=False) - temp_sorted.close() - p = subprocess.Popen(['sort', '-n', '-r', '-k', '1,1'], stdin=open(temp_unsorted.name), stdout=open(temp_sorted.name, 'w+')) - stdout, stderr = p.communicate() - return_code = p.returncode - - if return_code: - sys.stdout.write(stdout) - sys.stderr.write(stderr) - sys.stderr.write("Return error code %i from command:\n" % return_code) - temp_sorted.close() - os.remove(temp_unsorted.name) - - for line in open(temp_sorted.name): - size, fp_idx = line.strip().split() - yield (int(size), int(fp_idx)) - - os.remove(temp_sorted.name) - -def butina( args ): - """ - Taylor-Butina clustering from the chemfp help. - """ - out = args.output_path - targets = chemfp.open( args.input_path, format='fps' ) - arena = chemfp.load_fingerprints( targets ) - - chemfp.set_num_threads( args.processors ) - results = search.threshold_tanimoto_search_symmetric(arena, threshold = args.tanimoto_threshold) - results.reorder_all("move-closest-first") - - sorted_ids = unix_sort(results) - - # Determine the true/false singletons and the clusters - true_singletons = [] - false_singletons = [] - clusters = [] - - seen = set() - #for (size, fp_idx, members) in results: - for (size, fp_idx) in sorted_ids: - members = results[fp_idx].get_indices() - #print arena.ids[ fp_idx ], [arena.ids[ m ] for m in members] - if fp_idx in seen: - # Can't use a centroid which is already assigned - continue - seen.add(fp_idx) - - if size == 0: - # The only fingerprint in the exclusion sphere is itself - true_singletons.append( fp_idx ) - continue - - # Figure out which ones haven't yet been assigned - unassigned = set(members) - seen - - if not unassigned: - false_singletons.append(fp_idx) - continue - - # this is a new cluster - clusters.append( (fp_idx, unassigned) ) - seen.update(unassigned) - - len_cluster = len(clusters) - #out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(arena.ids[idx] for idx in true_singletons)) ) ) - #out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(arena.ids[idx] for idx in false_singletons)) ) ) - - out.write( "#%s true singletons\n" % len(true_singletons) ) - out.write( "#%s false singletons\n" % len(false_singletons) ) - out.write( "#clusters: %s\n" % len_cluster ) - - # Sort so the cluster with the most compounds comes first, - # then by alphabetically smallest id - def cluster_sort_key(cluster): - centroid_idx, members = cluster - return -len(members), arena.ids[centroid_idx] - - clusters.sort(key=cluster_sort_key) - - for centroid_idx, members in clusters: - centroid_name = arena.ids[centroid_idx] - out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(arena.ids[idx] for idx in members))) - #ToDo: len(members) need to be some biggest top 90% or something ... - - for idx in true_singletons: - out.write("%s\t%s\n" % (arena.ids[idx], 0)) - - out.close() - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="""Taylor-Butina clustering for fps files. -For more details please see the original publication or the chemfp documentation: -http://www.chemomine.co.uk/dbclus-paper.pdf -https://chemfp.readthedocs.org -""") - - parser.add_argument("-i", "--input", dest="input_path", - required=True, - help="Path to the input file.") - - parser.add_argument("-o", "--output", dest="output_path", type=argparse.FileType('w'), - default=sys.stdout, - help="Path to the output file.") - - parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", type=float, - default=0.8, - help="Tanimoto threshold [0.8]") - - parser.add_argument('-p', '--processors', type=int, default=4) - - options = parser.parse_args() - butina( options ) diff -r 73b8c87779ae -r 72e0a9fd6abc butina_clustering.xml --- a/butina_clustering.xml Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,93 +0,0 @@ - - of molecular fingerprints - - chemfp - openbabel - - - - - - - - - - - - - - - - - - - - - - 0 false singletons - => - - 1 clusters - 55091849 has 12 other members - => 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823 - - -]]> - - - 10.1186/1758-2946-5-S1-P36 - - diff -r 73b8c87779ae -r 72e0a9fd6abc mol2fps.xml --- a/mol2fps.xml Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,276 +0,0 @@ - - with different fingerprint types - - - chemfp - rdkit - openbabel - - -&1 -]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -PUBCHEM_COMPOUND_CID< - 28434379 - - > - 1 - - > - 280 - - > - 2 - - > - 2 - - > - 2 - - > - AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== - - > - - - type : FP2 - ------ - -.. class:: infomark - -**Output** - -* Example:: - - #FPS1 - #num_bits=1021 - #type=OpenBabel-FP2/1 - #software=OpenBabel/2.3.0 - #source=/tmp/dataset_409.dat.sdf - #date=2012-02-03T11:13:39 - c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c - 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300 - 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379 - - -]]> - - - 10.1186/1758-2946-3-33 - 10.1186/1758-2946-5-S1-P36 - - @electronic{rdkit, - title = {RDKit: Open-source cheminformatics}, - url ={http://www.rdkit.org} - } - - - diff -r 73b8c87779ae -r 72e0a9fd6abc nxn_clustering.py --- a/nxn_clustering.py Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -#!/usr/bin/env python -""" - Modified version of code examples from the chemfp project. - http://code.google.com/p/chem-fingerprints/ - Thanks to Andrew Dalke of Andrew Dalke Scientific! -""" -import matplotlib -matplotlib.use('Agg') -import argparse -import os -import chemfp -import scipy.cluster.hierarchy as hcluster -import pylab -import numpy - -def distance_matrix(arena, tanimoto_threshold = 0.0): - n = len(arena) - # Start off a similarity matrix with 1.0s along the diagonal - try: - similarities = numpy.identity(n, "d") - except: - raise Exception('Input dataset is to large!') - chemfp.set_num_threads( args.processors ) - - ## Compute the full similarity matrix. - # The implementation computes the upper-triangle then copies - # the upper-triangle into lower-triangle. It does not include - # terms for the diagonal. - results = chemfp.search.threshold_tanimoto_search_symmetric(arena, threshold=tanimoto_threshold) - - # Copy the results into the NumPy array. - for row_index, row in enumerate(results.iter_indices_and_scores()): - for target_index, target_score in row: - similarities[row_index, target_index] = target_score - - # Return the distance matrix using the similarity matrix - return 1.0 - similarities - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="""NxN clustering for fps files. -For more details please see the chemfp documentation: -https://chemfp.readthedocs.org -""") - - parser.add_argument("-i", "--input", dest="input_path", - required=True, - help="Path to the input file.") - - parser.add_argument("-c", "--cluster", dest="cluster_image", - help="Path to the output cluster image.") - - parser.add_argument("-s", "--smatrix", dest="similarity_matrix", - help="Path to the similarity matrix output file.") - - parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", - type=float, default=0.0, - help="Tanimoto threshold [0.0]") - - parser.add_argument("--oformat", default='png', help="Output format (png, svg)") - - parser.add_argument('-p', '--processors', type=int, - default=4) - - args = parser.parse_args() - - targets = chemfp.open( args.input_path, format='fps' ) - arena = chemfp.load_fingerprints( targets ) - distances = distance_matrix( arena, args.tanimoto_threshold ) - - if args.similarity_matrix: - distances.tofile( args.similarity_matrix ) - - if args.cluster_image: - linkage = hcluster.linkage( distances, method="single", metric="euclidean" ) - - hcluster.dendrogram(linkage, labels=arena.ids) - - pylab.savefig( args.cluster_image, format=args.oformat ) - diff -r 73b8c87779ae -r 72e0a9fd6abc nxn_clustering.xml --- a/nxn_clustering.xml Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,120 +0,0 @@ - - of molecular fingerprints - - chemfp - python - matplotlib - scipy - openbabel - - - - - - - - - - - - - - - - - - - - - output_files == "both" or output_files == "image" - - - - - - output_files == "both" or output_files == "matrix" - - - - - - - - - - - - - - - - 10.1186/1758-2946-5-S1-P36 - - diff -r 73b8c87779ae -r 72e0a9fd6abc sdf2fps.xml --- a/sdf2fps.xml Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,116 +0,0 @@ - - extract fingerprints from sdf files metadata - - - chemfp - openbabel - - - '${outfile}' -]]> - - - - - - - - - - - - - - -PUBCHEM_COMPOUND_CID< - 28434379 - - > - 1 - - > - 280 - - > - 2 - - > - 2 - - > - 2 - - > - AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== - - > - ------ - -.. class:: infomark - -**Output** - -* Example:: - - #FPS1 - #num_bits=881 - #type=CACTVS-E_SCREEN/1.0 extended=2 - #software=CACTVS/unknown - #source=/home/mohammed/galaxy-central/database/files/000/dataset_409.dat - #date=2012-02-03T10:44:12 - 07ce04000000000000000000000000000080060000000c0600 - 00000000001a800f0000780008100000101487e9608c0bed32 - 48000580644626204101b4844805901b041c2e19511e45039b - 8b2924101609401b13e4080000000000010020000004008000 - 0010000002000000000000 28434379 - - -]]> - - - 10.1186/1758-2946-5-S1-P36 - - diff -r 73b8c87779ae -r 72e0a9fd6abc simsearch.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/simsearch.xml Sat May 20 08:36:37 2017 -0400 @@ -0,0 +1,131 @@ + + of fingerprint data sets + + chemfp + + +&1; + rm $temp_link + #end if +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10.1186/1758-2946-3-33 + + diff -r 73b8c87779ae -r 72e0a9fd6abc static/images/NxN_clustering.png Binary file static/images/NxN_clustering.png has changed diff -r 73b8c87779ae -r 72e0a9fd6abc static/images/NxN_clustering.svg --- a/static/images/NxN_clustering.svg Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2275 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r 73b8c87779ae -r 72e0a9fd6abc test-data/CID_2244.can --- a/test-data/CID_2244.can Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -CC(=O)Oc1ccccc1C(=O)O 2244 diff -r 73b8c87779ae -r 72e0a9fd6abc test-data/CID_2244.inchi --- a/test-data/CID_2244.inchi Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) diff -r 73b8c87779ae -r 72e0a9fd6abc test-data/CID_2244.sdf --- a/test-data/CID_2244.sdf Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,154 +0,0 @@ -2244 - -OEChem-05151212332D - - 21 21 0 0 0 0 0 0 0999 V2000 - 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1 5 1 0 0 0 0 - 1 12 1 0 0 0 0 - 2 11 1 0 0 0 0 - 2 21 1 0 0 0 0 - 3 11 2 0 0 0 0 - 4 12 2 0 0 0 0 - 5 6 1 0 0 0 0 - 5 7 2 0 0 0 0 - 6 8 2 0 0 0 0 - 6 11 1 0 0 0 0 - 7 9 1 0 0 0 0 - 7 14 1 0 0 0 0 - 8 10 1 0 0 0 0 - 8 15 1 0 0 0 0 - 9 10 2 0 0 0 0 - 9 16 1 0 0 0 0 - 10 17 1 0 0 0 0 - 12 13 1 0 0 0 0 - 13 18 1 0 0 0 0 - 13 19 1 0 0 0 0 - 13 20 1 0 0 0 0 -M END -> -2244 - -> -1 - -> -212 - -> -4 - -> -1 - -> -3 - -> -AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== - -> -2-acetoxybenzoic acid - -> -2-acetyloxybenzoic acid - -> -2-acetyloxybenzoic acid - -> -2-acetyloxybenzoic acid - -> -2-acetoxybenzoic acid - -> -InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) - -> -BSYNRYMUTXBXSQ-UHFFFAOYSA-N - -> -1.2 - -> -180.042259 - -> -C9H8O4 - -> -180.15742 - -> -CC(=O)OC1=CC=CC=C1C(=O)O - -> -CC(=O)OC1=CC=CC=C1C(=O)O - -> -63.6 - -> -180.042259 - -> -0 - -> -13 - -> -0 - -> -0 - -> -0 - -> -0 - -> -0 - -> -1 - -> -1 - -> -1 -5 -255 - -> -5 6 8 -5 7 8 -6 8 8 -7 9 8 -8 10 8 -9 10 8 - -$$$$ diff -r 73b8c87779ae -r 72e0a9fd6abc test-data/CID_2244.smi --- a/test-data/CID_2244.smi Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -O(c1c(cccc1)C(=O)O)C(=O)C 2244 diff -r 73b8c87779ae -r 72e0a9fd6abc test-data/CID_2244_FP2.fps --- a/test-data/CID_2244_FP2.fps Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -#FPS1 -#num_bits=1021 -#type=OpenBabel-FP2/1 -#software=OpenBabel/2.4.1 -#source=/tmp/tmptaAke4/files/000/dataset_3.dat -#date=2017-05-19T13:52:59 -00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 diff -r 73b8c87779ae -r 72e0a9fd6abc test-data/CID_2244_FP3.fps --- a/test-data/CID_2244_FP3.fps Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -#FPS1 -#num_bits=1021 -#type=OpenBabel-FP2/1 -#software=OpenBabel/2.4.1 -#source=/tmp/tmptaAke4/files/000/dataset_7.dat -#date=2017-05-19T13:53:45 -00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 diff -r 73b8c87779ae -r 72e0a9fd6abc test-data/CID_2244_FP4.fps --- a/test-data/CID_2244_FP4.fps Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -#FPS1 -#num_bits=1021 -#type=OpenBabel-FP2/1 -#software=OpenBabel/2.4.1 -#source=/tmp/tmptaAke4/files/000/dataset_11.dat -#date=2017-05-19T13:54:39 -00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 diff -r 73b8c87779ae -r 72e0a9fd6abc test-data/CID_2244_MACCS.fps --- a/test-data/CID_2244_MACCS.fps Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -#FPS1 -#num_bits=1021 -#type=OpenBabel-FP2/1 -#software=OpenBabel/2.4.1 -#source=/tmp/tmptaAke4/files/000/dataset_15.dat -#date=2017-05-19T13:55:30 -00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 diff -r 73b8c87779ae -r 72e0a9fd6abc test-data/CID_2244_maccs.fps --- a/test-data/CID_2244_maccs.fps Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -#FPS1 -#num_bits=166 -#type=OpenBabel-MACCS/2 -#software=OpenBabel/2.3.1 -#source=CID_2244.sdf -#date=2012-05-15T17:00:39 -0000000000000000000000010000016480cca2d21e 2244 diff -r 73b8c87779ae -r 72e0a9fd6abc test-data/NxN_Clustering_on_q.svg --- a/test-data/NxN_Clustering_on_q.svg Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,707 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r 73b8c87779ae -r 72e0a9fd6abc test-data/Taylor-Butina_Clustering_on_data_q.txt --- a/test-data/Taylor-Butina_Clustering_on_data_q.txt Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -#0 true singletons -#0 false singletons -#clusters: 1 -55091752 12 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091467 55168823 55091849 diff -r 73b8c87779ae -r 72e0a9fd6abc test-data/sdf2fps_result1.fps --- a/test-data/sdf2fps_result1.fps Sat May 20 08:31:15 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -#FPS1 -#num_bits=881 -#type=CACTVS-E_SCREEN/1.0 extended=2 -#software=CACTVS/unknown -#source=/tmp/tmpN2w37z/files/000/dataset_1.dat -#date=2017-05-19T14:27:41 -030e1c000000000000000000000000000000000000000c00000000000000008000000058000010000030200119004c70010060001140044b100040100024040010118060101330e46c21ac5841de06a50788109de11113047100000000001000000000000000080000000000000000 2244 diff -r 73b8c87779ae -r 72e0a9fd6abc test-data/simsearch_on_tragets_and_q.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/simsearch_on_tragets_and_q.tabular Sat May 20 08:36:37 2017 -0400 @@ -0,0 +1,9 @@ +#Simsearch/1 +#num_bits=881 +#type=Tanimoto k=all threshold=0.7 +#software=chemfp/1.1p1 +#queries=./query.fps +#targets=./targets.fps +#query_sources=CID_28434379.sdf +#target_sources=Desktop/3579363516810334491.sdf +13 28434379 6499094 0.9615 6485578 0.9679 55079807 0.9313 3153534 0.9557 55102353 0.9682 55091466 0.9682 55091416 0.9682 6485577 0.9497 55169009 0.9560 55091752 0.9684 55091467 0.9623 55168823 0.9563 55091849 0.9563