Mercurial > repos > bgruening > chemfp
changeset 0:a8ac5250d59c
Uploaded
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_clustering/butina_clustering.py Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,91 @@ +#!/usr/bin/env python +""" + Modified version of code examples from the chemfp project. + http://code.google.com/p/chem-fingerprints/ + Thanks to Andrew Dalke of Andrew Dalke Scientific! +""" + +import chemfp +import sys +import os + +chemfp_fingerprint_file = sys.argv[1] +tanimoto_threshold = float(sys.argv[2]) +outfile = sys.argv[3] +processors = int(sys.argv[4]) + + +def get_hit_indicies(hits): + return [id for (id, score) in hits] + +out = open(outfile, 'w') +dataset = chemfp.load_fingerprints( chemfp_fingerprint_file ) + +chemfp.set_num_threads( processors ) +search = dataset.threshold_tanimoto_search_arena(dataset, threshold = tanimoto_threshold) + +# Reorder so the centroid with the most hits comes first. +# (That's why I do a reverse search.) +# Ignore the arbitrariness of breaking ties by fingerprint index +results = sorted( ( (len(hits), i, hits) for (i, hits) in enumerate(search.iter_indices_and_scores()) ),reverse=True) + + +# Determine the true/false singletons and the clusters +true_singletons = [] +false_singletons = [] +clusters = [] + +seen = set() + +for (size, fp_idx, hits) in results: + if fp_idx in seen: + # Can't use a centroid which is already assigned + continue + seen.add(fp_idx) + + if size == 1: + # The only fingerprint in the exclusion sphere is itself + true_singletons.append(fp_idx) + continue + + members = get_hit_indicies(hits) + # Figure out which ones haven't yet been assigned + unassigned = [target_idx for target_idx in members if target_idx not in seen] + + if not unassigned: + false_singletons.append(fp_idx) + continue + + # this is a new cluster + clusters.append( (fp_idx, unassigned) ) + seen.update(unassigned) + +len_cluster = len(clusters) +#out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(dataset.ids[idx] for idx in true_singletons)) ) ) +#out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(dataset.ids[idx] for idx in false_singletons)) ) ) + +out.write( "#%s true singletons\n" % len(true_singletons) ) +out.write( "#%s false singletons\n" % len(false_singletons) ) +out.write( "#clusters: %s\n" % len_cluster ) + + +# Sort so the cluster with the most compounds comes first, +# then by alphabetically smallest id +def cluster_sort_key(cluster): + centroid_idx, members = cluster + return -len(members), dataset.ids[centroid_idx] + +clusters.sort(key=cluster_sort_key) + + +for centroid_idx, members in clusters: + centroid_name = dataset.ids[centroid_idx] + out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(dataset.ids[idx] for idx in members))) + #ToDo: len(members) need to be some biggest top 90% or something ... + +for idx in true_singletons: + out.write("%s\t%s\n" % (dataset.ids[idx], 0)) + +out.close() + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_clustering/butina_clustering.xml Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,65 @@ +<tool id="chemfp_butina_clustering" name="Taylor-Butina Clustering" version="0.1"> + <description>of molecular libraries</description> + <requirements> + <requirement type="package" version="1.1p1">chemfp</requirement> + </requirements> + <command interpreter='python'> + butina_clustering.py $infile $threshold $outfile 4 + </command> + <inputs> + <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/> + <param name='threshold' type='float' value='0.8' ></param> + </inputs> + <outputs> + <data format="txt" name="outfile" /> + </outputs> + <tests> + <test> + <param name="infile" ftype="fps" value="q.fps"/> + <param name='threshold' value='0.8' ></param> + <output name="outfile" ftype="txt" file='Taylor-Butina_Clustering_on_data_q.txt'/> + </test> + </tests> +<help> + + +**What it does** +Molecule library clustering using the Taylor-Butina algorithm. + +----- + +**Example** + +* input:: + + - fingerprints in FPS format + + #FPS1 + #num_bits=881 + #type=CACTVS-E_SCREEN/1.0 extended=2 + #software=CACTVS/unknown + #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat + #date=2012-02-09T13:20:37 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e + 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e + 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807 + ........ + + - Tanimoto threshold : 0.8 (between 0 and 1) + +* output:: + + 0 true singletons + => + + 0 false singletons + => + + 1 clusters + 55091849 has 12 other members + => 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823 + + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_clustering/nxn_clustering.py Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,51 @@ +#!/usr/bin/env python +""" + Modified version of code examples from the chemfp project. + http://code.google.com/p/chem-fingerprints/ + Thanks to Andrew Dalke of Andrew Dalke Scientific! +""" +import matplotlib +matplotlib.use('Agg') +import sys +import os +import chemfp +import scipy.cluster.hierarchy as hcluster +import pylab +import numpy + + +def distance_matrix(arena,t): + n = len(arena) + # The Tanimoto search computes all of the scores when threshold=0.0. + # The SearchResult contains sparse data, so I set all values + # now to 1.0 so you can experiment with higher thresholds. + distances = numpy.ones((n, n), numpy.float64) + + # Keep track of where the query subarena is in the query + query_row = 0 + + for query_arena in arena.iter_arenas(): + results = arena.threshold_tanimoto_search_arena(query_arena, threshold=t) + for q_i, hits in enumerate(results.iter_indices_and_scores()): + query_idx = query_row + q_i + for target_idx, score in hits: + distances[query_idx, target_idx] = 1.0 - score + query_row += len(query_arena) + + return distances + +dataset = chemfp.load_fingerprints( sys.argv[1] ) +distances = distance_matrix( dataset,float( sys.argv[2] ) ) +linkage = hcluster.linkage( distances, method="single", metric="euclidean" ) + +# Plot using matplotlib, which you must have installed +hcluster.dendrogram(linkage, labels=dataset.ids) + +pylab.savefig( sys.argv[3], format='svg' ) + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_clustering/nxn_clustering.xml Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,61 @@ +<tool id="chemfp_nxn_clustering" name="NxN Clustering" version="0.1"> + <description>of molecular libraries</description> + <requirements> + <requirement type="package" version="1.7.0">numpy</requirement> + <requirement type="package" version="1.1p1">chemfp</requirement> + </requirements> + <command interpreter='python'> + nxn_clustering.py $infile $threshold $outfile + </command> + <inputs> + <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/> + <param name='threshold' type='float' value='0.75' ></param> + </inputs> + <outputs> + <data type="data" format="svg" name="outfile" /> + </outputs> + <tests> + <test> + <param name="infile" ftype="fps" value="q.fps" /> + <param value='0.75' /> + <output ftype="svg" name="outfile" file='NxN_Clustering_on_q.svg' /> + </test> + </tests> + <help> + + +**What it does** +Generating hierarchical clusters and visualizing clusters with dendrograms. + +----- + +**Example** + +* input:: + + - fingerprints in FPS format + + #FPS1 + #num_bits=881 + #type=CACTVS-E_SCREEN/1.0 extended=2 + #software=CACTVS/unknown + #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat + #date=2012-02-09T13:20:37 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e + 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e + 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807 + ........ + + - Tanimoto threshold : 0.8 (between 0 and 1) + +* output:: + + plot for the clustring + +.. image:: ./static/images/chemfpclustoutput.svg + + + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_clustering/test-data/NxN_Clustering_on_q.svg Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,793 @@ +<?xml version="1.0" encoding="utf-8" standalone="no"?> +<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" + "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> +<!-- Created with matplotlib (http://matplotlib.sourceforge.net/) --> +<svg height="432pt" version="1.1" viewBox="0 0 576 432" width="576pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> + <defs> + <style type="text/css"> +*{stroke-linecap:square;stroke-linejoin:round;} + </style> + </defs> + <g id="figure_1"> + <g id="patch_1"> + <path d=" +M0 432 +L576 432 +L576 0 +L0 0 +z +" style="fill:#ffffff;"/> + </g> + <g id="axes_1"> + <g id="patch_2"> + <path d=" +M72 388.8 +L518.4 388.8 +L518.4 43.2 +L72 43.2 +z +" style="fill:#ffffff;"/> + </g> + <g id="LineCollection_1"> + <defs> + <path d=" +M123.508 -43.2 +L123.508 -234.55 +L157.846 -234.55 +L157.846 -43.2" id="C0_0_36e0ca0abb"/> + </defs> + <g clip-path="url(#p7ff5b81e1d)"> + <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_0_36e0ca0abb" y="432.0"/> + </g> + </g> + <g id="LineCollection_2"> + <defs> + <path d=" +M260.862 -43.2 +L260.862 -43.2 +L295.2 -43.2 +L295.2 -43.2" id="C1_0_d55749c544"/> + <path d=" +M226.523 -43.2 +L226.523 -43.2 +L278.031 -43.2 +L278.031 -43.2" id="C1_1_f284ff091a"/> + <path d=" +M329.538 -43.2 +L329.538 -151.689 +L363.877 -151.689 +L363.877 -43.2" id="C1_2_ad9c2700c6"/> + <path d=" +M252.277 -43.2 +L252.277 -180.048 +L346.708 -180.048 +L346.708 -151.689" id="C1_3_59bcda1988"/> + </defs> + <g clip-path="url(#p7ff5b81e1d)"> + <use style="fill:none;stroke:#ff0000;stroke-linecap:butt;" x="0" xlink:href="#C1_0_d55749c544" y="432.0"/> + </g> + <g clip-path="url(#p7ff5b81e1d)"> + <use style="fill:none;stroke:#ff0000;stroke-linecap:butt;" x="0" xlink:href="#C1_1_f284ff091a" y="432.0"/> + </g> + <g clip-path="url(#p7ff5b81e1d)"> + <use style="fill:none;stroke:#ff0000;stroke-linecap:butt;" x="0" xlink:href="#C1_2_ad9c2700c6" y="432.0"/> + </g> + <g clip-path="url(#p7ff5b81e1d)"> + <use style="fill:none;stroke:#ff0000;stroke-linecap:butt;" x="0" xlink:href="#C1_3_59bcda1988" y="432.0"/> + </g> + </g> + <g id="LineCollection_3"> + <defs> + <path d=" +M398.215 -43.2 +L398.215 -147.208 +L432.554 -147.208 +L432.554 -43.2" id="C2_0_63eb41fae6"/> + <path d=" +M466.892 -43.2 +L466.892 -149.207 +L501.231 -149.207 +L501.231 -43.2" id="C2_1_2114d8afff"/> + <path d=" +M415.385 -147.208 +L415.385 -210.283 +L484.062 -210.283 +L484.062 -149.207" id="C2_2_580dfac2d3"/> + </defs> + <g clip-path="url(#p7ff5b81e1d)"> + <use style="fill:none;stroke:#00bfbf;stroke-linecap:butt;" x="0" xlink:href="#C2_0_63eb41fae6" y="432.0"/> + </g> + <g clip-path="url(#p7ff5b81e1d)"> + <use style="fill:none;stroke:#00bfbf;stroke-linecap:butt;" x="0" xlink:href="#C2_1_2114d8afff" y="432.0"/> + </g> + <g clip-path="url(#p7ff5b81e1d)"> + <use style="fill:none;stroke:#00bfbf;stroke-linecap:butt;" x="0" xlink:href="#C2_2_580dfac2d3" y="432.0"/> + </g> + </g> + <g id="LineCollection_4"> + <defs> + <path d=" +M299.492 -180.048 +L299.492 -278.97 +L449.723 -278.97 +L449.723 -210.283" id="C3_0_351ea019a2"/> + <path d=" +M192.185 -43.2 +L192.185 -315.042 +L374.608 -315.042 +L374.608 -278.97" id="C3_1_f2cbe41b26"/> + <path d=" +M140.677 -234.55 +L140.677 -322.212 +L283.396 -322.212 +L283.396 -315.042" id="C3_2_0ff010f580"/> + <path d=" +M89.1692 -43.2 +L89.1692 -372.343 +L212.037 -372.343 +L212.037 -322.212" id="C3_3_64df8a0051"/> + </defs> + <g clip-path="url(#p7ff5b81e1d)"> + <use style="fill:none;stroke:#0000ff;stroke-linecap:butt;" x="0" xlink:href="#C3_0_351ea019a2" y="432.0"/> + </g> + <g clip-path="url(#p7ff5b81e1d)"> + <use style="fill:none;stroke:#0000ff;stroke-linecap:butt;" x="0" xlink:href="#C3_1_f2cbe41b26" y="432.0"/> + </g> + <g clip-path="url(#p7ff5b81e1d)"> + <use style="fill:none;stroke:#0000ff;stroke-linecap:butt;" x="0" xlink:href="#C3_2_0ff010f580" y="432.0"/> + </g> + <g clip-path="url(#p7ff5b81e1d)"> + <use style="fill:none;stroke:#0000ff;stroke-linecap:butt;" x="0" xlink:href="#C3_3_64df8a0051" y="432.0"/> + </g> + </g> + <g id="matplotlib.axis_1"> + <g id="xtick_1"> + <g id="text_1"> + <!-- 55079807 --> + <defs> + <path d=" +M10.9844 1.51562 +L10.9844 10.5 +Q14.7031 8.73438 18.5 7.8125 +Q22.3125 6.89062 25.9844 6.89062 +Q35.75 6.89062 40.8906 13.4531 +Q46.0469 20.0156 46.7812 33.4062 +Q43.9531 29.2031 39.5938 26.9531 +Q35.25 24.7031 29.9844 24.7031 +Q19.0469 24.7031 12.6719 31.3125 +Q6.29688 37.9375 6.29688 49.4219 +Q6.29688 60.6406 12.9375 67.4219 +Q19.5781 74.2188 30.6094 74.2188 +Q43.2656 74.2188 49.9219 64.5156 +Q56.5938 54.8281 56.5938 36.375 +Q56.5938 19.1406 48.4062 8.85938 +Q40.2344 -1.42188 26.4219 -1.42188 +Q22.7031 -1.42188 18.8906 -0.6875 +Q15.0938 0.046875 10.9844 1.51562 +M30.6094 32.4219 +Q37.25 32.4219 41.125 36.9531 +Q45.0156 41.5 45.0156 49.4219 +Q45.0156 57.2812 41.125 61.8438 +Q37.25 66.4062 30.6094 66.4062 +Q23.9688 66.4062 20.0938 61.8438 +Q16.2188 57.2812 16.2188 49.4219 +Q16.2188 41.5 20.0938 36.9531 +Q23.9688 32.4219 30.6094 32.4219" id="DejaVuSans-39"/> + <path d=" +M31.7812 66.4062 +Q24.1719 66.4062 20.3281 58.9062 +Q16.5 51.4219 16.5 36.375 +Q16.5 21.3906 20.3281 13.8906 +Q24.1719 6.39062 31.7812 6.39062 +Q39.4531 6.39062 43.2812 13.8906 +Q47.125 21.3906 47.125 36.375 +Q47.125 51.4219 43.2812 58.9062 +Q39.4531 66.4062 31.7812 66.4062 +M31.7812 74.2188 +Q44.0469 74.2188 50.5156 64.5156 +Q56.9844 54.8281 56.9844 36.375 +Q56.9844 17.9688 50.5156 8.26562 +Q44.0469 -1.42188 31.7812 -1.42188 +Q19.5312 -1.42188 13.0625 8.26562 +Q6.59375 17.9688 6.59375 36.375 +Q6.59375 54.8281 13.0625 64.5156 +Q19.5312 74.2188 31.7812 74.2188" id="DejaVuSans-30"/> + <path d=" +M10.7969 72.9062 +L49.5156 72.9062 +L49.5156 64.5938 +L19.8281 64.5938 +L19.8281 46.7344 +Q21.9688 47.4688 24.1094 47.8281 +Q26.2656 48.1875 28.4219 48.1875 +Q40.625 48.1875 47.75 41.5 +Q54.8906 34.8125 54.8906 23.3906 +Q54.8906 11.625 47.5625 5.09375 +Q40.2344 -1.42188 26.9062 -1.42188 +Q22.3125 -1.42188 17.5469 -0.640625 +Q12.7969 0.140625 7.71875 1.70312 +L7.71875 11.625 +Q12.1094 9.23438 16.7969 8.0625 +Q21.4844 6.89062 26.7031 6.89062 +Q35.1562 6.89062 40.0781 11.3281 +Q45.0156 15.7656 45.0156 23.3906 +Q45.0156 31 40.0781 35.4375 +Q35.1562 39.8906 26.7031 39.8906 +Q22.75 39.8906 18.8125 39.0156 +Q14.8906 38.1406 10.7969 36.2812 +z +" id="DejaVuSans-35"/> + <path d=" +M8.20312 72.9062 +L55.0781 72.9062 +L55.0781 68.7031 +L28.6094 0 +L18.3125 0 +L43.2188 64.5938 +L8.20312 64.5938 +z +" id="DejaVuSans-37"/> + <path d=" +M31.7812 34.625 +Q24.75 34.625 20.7188 30.8594 +Q16.7031 27.0938 16.7031 20.5156 +Q16.7031 13.9219 20.7188 10.1562 +Q24.75 6.39062 31.7812 6.39062 +Q38.8125 6.39062 42.8594 10.1719 +Q46.9219 13.9688 46.9219 20.5156 +Q46.9219 27.0938 42.8906 30.8594 +Q38.875 34.625 31.7812 34.625 +M21.9219 38.8125 +Q15.5781 40.375 12.0312 44.7188 +Q8.5 49.0781 8.5 55.3281 +Q8.5 64.0625 14.7188 69.1406 +Q20.9531 74.2188 31.7812 74.2188 +Q42.6719 74.2188 48.875 69.1406 +Q55.0781 64.0625 55.0781 55.3281 +Q55.0781 49.0781 51.5312 44.7188 +Q48 40.375 41.7031 38.8125 +Q48.8281 37.1562 52.7969 32.3125 +Q56.7812 27.4844 56.7812 20.5156 +Q56.7812 9.90625 50.3125 4.23438 +Q43.8438 -1.42188 31.7812 -1.42188 +Q19.7344 -1.42188 13.25 4.23438 +Q6.78125 9.90625 6.78125 20.5156 +Q6.78125 27.4844 10.7812 32.3125 +Q14.7969 37.1562 21.9219 38.8125 +M18.3125 54.3906 +Q18.3125 48.7344 21.8438 45.5625 +Q25.3906 42.3906 31.7812 42.3906 +Q38.1406 42.3906 41.7188 45.5625 +Q45.3125 48.7344 45.3125 54.3906 +Q45.3125 60.0625 41.7188 63.2344 +Q38.1406 66.4062 31.7812 66.4062 +Q25.3906 66.4062 21.8438 63.2344 +Q18.3125 60.0625 18.3125 54.3906" id="DejaVuSans-38"/> + </defs> + <g transform="translate(59.6051682692 401.70625)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-35"/> + <use x="63.623046875" xlink:href="#DejaVuSans-35"/> + <use x="127.24609375" xlink:href="#DejaVuSans-30"/> + <use x="190.869140625" xlink:href="#DejaVuSans-37"/> + <use x="254.4921875" xlink:href="#DejaVuSans-39"/> + <use x="318.115234375" xlink:href="#DejaVuSans-38"/> + <use x="381.73828125" xlink:href="#DejaVuSans-30"/> + <use x="445.361328125" xlink:href="#DejaVuSans-37"/> + </g> + </g> + </g> + <g id="xtick_2"> + <g id="text_2"> + <!-- 55091752 --> + <defs> + <path d=" +M12.4062 8.29688 +L28.5156 8.29688 +L28.5156 63.9219 +L10.9844 60.4062 +L10.9844 69.3906 +L28.4219 72.9062 +L38.2812 72.9062 +L38.2812 8.29688 +L54.3906 8.29688 +L54.3906 0 +L12.4062 0 +z +" id="DejaVuSans-31"/> + <path d=" +M19.1875 8.29688 +L53.6094 8.29688 +L53.6094 0 +L7.32812 0 +L7.32812 8.29688 +Q12.9375 14.1094 22.625 23.8906 +Q32.3281 33.6875 34.8125 36.5312 +Q39.5469 41.8438 41.4219 45.5312 +Q43.3125 49.2188 43.3125 52.7812 +Q43.3125 58.5938 39.2344 62.25 +Q35.1562 65.9219 28.6094 65.9219 +Q23.9688 65.9219 18.8125 64.3125 +Q13.6719 62.7031 7.8125 59.4219 +L7.8125 69.3906 +Q13.7656 71.7812 18.9375 73 +Q24.125 74.2188 28.4219 74.2188 +Q39.75 74.2188 46.4844 68.5469 +Q53.2188 62.8906 53.2188 53.4219 +Q53.2188 48.9219 51.5312 44.8906 +Q49.8594 40.875 45.4062 35.4062 +Q44.1875 33.9844 37.6406 27.2188 +Q31.1094 20.4531 19.1875 8.29688" id="DejaVuSans-32"/> + </defs> + <g transform="translate(94.0317548077 401.70625)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-35"/> + <use x="63.623046875" xlink:href="#DejaVuSans-35"/> + <use x="127.24609375" xlink:href="#DejaVuSans-30"/> + <use x="190.869140625" xlink:href="#DejaVuSans-39"/> + <use x="254.4921875" xlink:href="#DejaVuSans-31"/> + <use x="318.115234375" xlink:href="#DejaVuSans-37"/> + <use x="381.73828125" xlink:href="#DejaVuSans-35"/> + <use x="445.361328125" xlink:href="#DejaVuSans-32"/> + </g> + </g> + </g> + <g id="xtick_3"> + <g id="text_3"> + <!-- 55168823 --> + <defs> + <path d=" +M40.5781 39.3125 +Q47.6562 37.7969 51.625 33 +Q55.6094 28.2188 55.6094 21.1875 +Q55.6094 10.4062 48.1875 4.48438 +Q40.7656 -1.42188 27.0938 -1.42188 +Q22.5156 -1.42188 17.6562 -0.515625 +Q12.7969 0.390625 7.625 2.20312 +L7.625 11.7188 +Q11.7188 9.32812 16.5938 8.10938 +Q21.4844 6.89062 26.8125 6.89062 +Q36.0781 6.89062 40.9375 10.5469 +Q45.7969 14.2031 45.7969 21.1875 +Q45.7969 27.6406 41.2812 31.2656 +Q36.7656 34.9062 28.7188 34.9062 +L20.2188 34.9062 +L20.2188 43.0156 +L29.1094 43.0156 +Q36.375 43.0156 40.2344 45.9219 +Q44.0938 48.8281 44.0938 54.2969 +Q44.0938 59.9062 40.1094 62.9062 +Q36.1406 65.9219 28.7188 65.9219 +Q24.6562 65.9219 20.0156 65.0312 +Q15.375 64.1562 9.8125 62.3125 +L9.8125 71.0938 +Q15.4375 72.6562 20.3438 73.4375 +Q25.25 74.2188 29.5938 74.2188 +Q40.8281 74.2188 47.3594 69.1094 +Q53.9062 64.0156 53.9062 55.3281 +Q53.9062 49.2656 50.4375 45.0938 +Q46.9688 40.9219 40.5781 39.3125" id="DejaVuSans-33"/> + <path d=" +M33.0156 40.375 +Q26.375 40.375 22.4844 35.8281 +Q18.6094 31.2969 18.6094 23.3906 +Q18.6094 15.5312 22.4844 10.9531 +Q26.375 6.39062 33.0156 6.39062 +Q39.6562 6.39062 43.5312 10.9531 +Q47.4062 15.5312 47.4062 23.3906 +Q47.4062 31.2969 43.5312 35.8281 +Q39.6562 40.375 33.0156 40.375 +M52.5938 71.2969 +L52.5938 62.3125 +Q48.875 64.0625 45.0938 64.9844 +Q41.3125 65.9219 37.5938 65.9219 +Q27.8281 65.9219 22.6719 59.3281 +Q17.5312 52.7344 16.7969 39.4062 +Q19.6719 43.6562 24.0156 45.9219 +Q28.375 48.1875 33.5938 48.1875 +Q44.5781 48.1875 50.9531 41.5156 +Q57.3281 34.8594 57.3281 23.3906 +Q57.3281 12.1562 50.6875 5.35938 +Q44.0469 -1.42188 33.0156 -1.42188 +Q20.3594 -1.42188 13.6719 8.26562 +Q6.98438 17.9688 6.98438 36.375 +Q6.98438 53.6562 15.1875 63.9375 +Q23.3906 74.2188 37.2031 74.2188 +Q40.9219 74.2188 44.7031 73.4844 +Q48.4844 72.75 52.5938 71.2969" id="DejaVuSans-36"/> + </defs> + <g transform="translate(128.250216346 401.70625)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-35"/> + <use x="63.623046875" xlink:href="#DejaVuSans-35"/> + <use x="127.24609375" xlink:href="#DejaVuSans-31"/> + <use x="190.869140625" xlink:href="#DejaVuSans-36"/> + <use x="254.4921875" xlink:href="#DejaVuSans-38"/> + <use x="318.115234375" xlink:href="#DejaVuSans-38"/> + <use x="381.73828125" xlink:href="#DejaVuSans-32"/> + <use x="445.361328125" xlink:href="#DejaVuSans-33"/> + </g> + </g> + </g> + <g id="xtick_4"> + <g id="text_4"> + <!-- 55169009 --> + <g transform="translate(162.529615385 401.70625)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-35"/> + <use x="63.623046875" xlink:href="#DejaVuSans-35"/> + <use x="127.24609375" xlink:href="#DejaVuSans-31"/> + <use x="190.869140625" xlink:href="#DejaVuSans-36"/> + <use x="254.4921875" xlink:href="#DejaVuSans-39"/> + <use x="318.115234375" xlink:href="#DejaVuSans-30"/> + <use x="381.73828125" xlink:href="#DejaVuSans-30"/> + <use x="445.361328125" xlink:href="#DejaVuSans-39"/> + </g> + </g> + </g> + <g id="xtick_5"> + <g id="text_5"> + <!-- 55102353 --> + <g transform="translate(196.927139423 401.70625)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-35"/> + <use x="63.623046875" xlink:href="#DejaVuSans-35"/> + <use x="127.24609375" xlink:href="#DejaVuSans-31"/> + <use x="190.869140625" xlink:href="#DejaVuSans-30"/> + <use x="254.4921875" xlink:href="#DejaVuSans-32"/> + <use x="318.115234375" xlink:href="#DejaVuSans-33"/> + <use x="381.73828125" xlink:href="#DejaVuSans-35"/> + <use x="445.361328125" xlink:href="#DejaVuSans-33"/> + </g> + </g> + </g> + <g id="xtick_6"> + <g id="text_6"> + <!-- 55091466 --> + <defs> + <path d=" +M37.7969 64.3125 +L12.8906 25.3906 +L37.7969 25.3906 +z + +M35.2031 72.9062 +L47.6094 72.9062 +L47.6094 25.3906 +L58.0156 25.3906 +L58.0156 17.1875 +L47.6094 17.1875 +L47.6094 0 +L37.7969 0 +L37.7969 17.1875 +L4.89062 17.1875 +L4.89062 26.7031 +z +" id="DejaVuSans-34"/> + </defs> + <g transform="translate(231.162475962 401.70625)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-35"/> + <use x="63.623046875" xlink:href="#DejaVuSans-35"/> + <use x="127.24609375" xlink:href="#DejaVuSans-30"/> + <use x="190.869140625" xlink:href="#DejaVuSans-39"/> + <use x="254.4921875" xlink:href="#DejaVuSans-31"/> + <use x="318.115234375" xlink:href="#DejaVuSans-34"/> + <use x="381.73828125" xlink:href="#DejaVuSans-36"/> + <use x="445.361328125" xlink:href="#DejaVuSans-36"/> + </g> + </g> + </g> + <g id="xtick_7"> + <g id="text_7"> + <!-- 55091416 --> + <g transform="translate(265.5009375 401.70625)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-35"/> + <use x="63.623046875" xlink:href="#DejaVuSans-35"/> + <use x="127.24609375" xlink:href="#DejaVuSans-30"/> + <use x="190.869140625" xlink:href="#DejaVuSans-39"/> + <use x="254.4921875" xlink:href="#DejaVuSans-31"/> + <use x="318.115234375" xlink:href="#DejaVuSans-34"/> + <use x="381.73828125" xlink:href="#DejaVuSans-31"/> + <use x="445.361328125" xlink:href="#DejaVuSans-36"/> + </g> + </g> + </g> + <g id="xtick_8"> + <g id="text_8"> + <!-- 6499094 --> + <g transform="translate(303.571586538 401.70625)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-36"/> + <use x="63.623046875" xlink:href="#DejaVuSans-34"/> + <use x="127.24609375" xlink:href="#DejaVuSans-39"/> + <use x="190.869140625" xlink:href="#DejaVuSans-39"/> + <use x="254.4921875" xlink:href="#DejaVuSans-30"/> + <use x="318.115234375" xlink:href="#DejaVuSans-39"/> + <use x="381.73828125" xlink:href="#DejaVuSans-34"/> + </g> + </g> + </g> + <g id="xtick_9"> + <g id="text_9"> + <!-- 6485578 --> + <g transform="translate(337.984110577 401.70625)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-36"/> + <use x="63.623046875" xlink:href="#DejaVuSans-34"/> + <use x="127.24609375" xlink:href="#DejaVuSans-38"/> + <use x="190.869140625" xlink:href="#DejaVuSans-35"/> + <use x="254.4921875" xlink:href="#DejaVuSans-35"/> + <use x="318.115234375" xlink:href="#DejaVuSans-37"/> + <use x="381.73828125" xlink:href="#DejaVuSans-38"/> + </g> + </g> + </g> + <g id="xtick_10"> + <g id="text_10"> + <!-- 55091467 --> + <g transform="translate(368.651322115 401.70625)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-35"/> + <use x="63.623046875" xlink:href="#DejaVuSans-35"/> + <use x="127.24609375" xlink:href="#DejaVuSans-30"/> + <use x="190.869140625" xlink:href="#DejaVuSans-39"/> + <use x="254.4921875" xlink:href="#DejaVuSans-31"/> + <use x="318.115234375" xlink:href="#DejaVuSans-34"/> + <use x="381.73828125" xlink:href="#DejaVuSans-36"/> + <use x="445.361328125" xlink:href="#DejaVuSans-37"/> + </g> + </g> + </g> + <g id="xtick_11"> + <g id="text_11"> + <!-- 55091849 --> + <g transform="translate(402.898846154 401.70625)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-35"/> + <use x="63.623046875" xlink:href="#DejaVuSans-35"/> + <use x="127.24609375" xlink:href="#DejaVuSans-30"/> + <use x="190.869140625" xlink:href="#DejaVuSans-39"/> + <use x="254.4921875" xlink:href="#DejaVuSans-31"/> + <use x="318.115234375" xlink:href="#DejaVuSans-38"/> + <use x="381.73828125" xlink:href="#DejaVuSans-34"/> + <use x="445.361328125" xlink:href="#DejaVuSans-39"/> + </g> + </g> + </g> + <g id="xtick_12"> + <g id="text_12"> + <!-- 3153534 --> + <g transform="translate(440.963870192 401.70625)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-33"/> + <use x="63.623046875" xlink:href="#DejaVuSans-31"/> + <use x="127.24609375" xlink:href="#DejaVuSans-35"/> + <use x="190.869140625" xlink:href="#DejaVuSans-33"/> + <use x="254.4921875" xlink:href="#DejaVuSans-35"/> + <use x="318.115234375" xlink:href="#DejaVuSans-33"/> + <use x="381.73828125" xlink:href="#DejaVuSans-34"/> + </g> + </g> + </g> + <g id="xtick_13"> + <g id="text_13"> + <!-- 6485577 --> + <g transform="translate(475.440144231 401.70625)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-36"/> + <use x="63.623046875" xlink:href="#DejaVuSans-34"/> + <use x="127.24609375" xlink:href="#DejaVuSans-38"/> + <use x="190.869140625" xlink:href="#DejaVuSans-35"/> + <use x="254.4921875" xlink:href="#DejaVuSans-35"/> + <use x="318.115234375" xlink:href="#DejaVuSans-37"/> + <use x="381.73828125" xlink:href="#DejaVuSans-37"/> + </g> + </g> + </g> + </g> + <g id="matplotlib.axis_2"> + <g id="ytick_1"> + <g id="line2d_1"> + <defs> + <path d=" +M0 0 +L4 0" id="me8a85f7bf6" style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;"/> + </defs> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="388.8"/> + </g> + </g> + <g id="line2d_2"> + <defs> + <path d=" +M0 0 +L-4 0" id="m1a32005dea" style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;"/> + </defs> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="388.8"/> + </g> + </g> + <g id="text_14"> + <!-- 0.00 --> + <defs> + <path d=" +M10.6875 12.4062 +L21 12.4062 +L21 0 +L10.6875 0 +z +" id="DejaVuSans-2e"/> + </defs> + <g transform="translate(42.869375 393.1678125)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-30"/> + <use x="63.623046875" xlink:href="#DejaVuSans-2e"/> + <use x="95.41015625" xlink:href="#DejaVuSans-30"/> + <use x="159.033203125" xlink:href="#DejaVuSans-30"/> + </g> + </g> + </g> + <g id="ytick_2"> + <g id="line2d_3"> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="341.625438456"/> + </g> + </g> + <g id="line2d_4"> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="341.625438456"/> + </g> + </g> + <g id="text_15"> + <!-- 0.01 --> + <g transform="translate(43.180625 345.993250956)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-30"/> + <use x="63.623046875" xlink:href="#DejaVuSans-2e"/> + <use x="95.41015625" xlink:href="#DejaVuSans-30"/> + <use x="159.033203125" xlink:href="#DejaVuSans-31"/> + </g> + </g> + </g> + <g id="ytick_3"> + <g id="line2d_5"> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="294.450876912"/> + </g> + </g> + <g id="line2d_6"> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="294.450876912"/> + </g> + </g> + <g id="text_16"> + <!-- 0.02 --> + <g transform="translate(43.274375 298.818689412)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-30"/> + <use x="63.623046875" xlink:href="#DejaVuSans-2e"/> + <use x="95.41015625" xlink:href="#DejaVuSans-30"/> + <use x="159.033203125" xlink:href="#DejaVuSans-32"/> + </g> + </g> + </g> + <g id="ytick_4"> + <g id="line2d_7"> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="247.276315367"/> + </g> + </g> + <g id="line2d_8"> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="247.276315367"/> + </g> + </g> + <g id="text_17"> + <!-- 0.03 --> + <g transform="translate(43.034375 251.644127867)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-30"/> + <use x="63.623046875" xlink:href="#DejaVuSans-2e"/> + <use x="95.41015625" xlink:href="#DejaVuSans-30"/> + <use x="159.033203125" xlink:href="#DejaVuSans-33"/> + </g> + </g> + </g> + <g id="ytick_5"> + <g id="line2d_9"> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="200.101753823"/> + </g> + </g> + <g id="line2d_10"> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="200.101753823"/> + </g> + </g> + <g id="text_18"> + <!-- 0.04 --> + <g transform="translate(42.745625 204.469566323)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-30"/> + <use x="63.623046875" xlink:href="#DejaVuSans-2e"/> + <use x="95.41015625" xlink:href="#DejaVuSans-30"/> + <use x="159.033203125" xlink:href="#DejaVuSans-34"/> + </g> + </g> + </g> + <g id="ytick_6"> + <g id="line2d_11"> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="152.927192279"/> + </g> + </g> + <g id="line2d_12"> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="152.927192279"/> + </g> + </g> + <g id="text_19"> + <!-- 0.05 --> + <g transform="translate(43.120625 157.295004779)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-30"/> + <use x="63.623046875" xlink:href="#DejaVuSans-2e"/> + <use x="95.41015625" xlink:href="#DejaVuSans-30"/> + <use x="159.033203125" xlink:href="#DejaVuSans-35"/> + </g> + </g> + </g> + <g id="ytick_7"> + <g id="line2d_13"> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="105.752630735"/> + </g> + </g> + <g id="line2d_14"> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="105.752630735"/> + </g> + </g> + <g id="text_20"> + <!-- 0.06 --> + <g transform="translate(42.828125 110.120443235)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-30"/> + <use x="63.623046875" xlink:href="#DejaVuSans-2e"/> + <use x="95.41015625" xlink:href="#DejaVuSans-30"/> + <use x="159.033203125" xlink:href="#DejaVuSans-36"/> + </g> + </g> + </g> + <g id="ytick_8"> + <g id="line2d_15"> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="58.5780691907"/> + </g> + </g> + <g id="line2d_16"> + <g> + <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="58.5780691907"/> + </g> + </g> + <g id="text_21"> + <!-- 0.07 --> + <g transform="translate(43.098125 62.9458816907)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-30"/> + <use x="63.623046875" xlink:href="#DejaVuSans-2e"/> + <use x="95.41015625" xlink:href="#DejaVuSans-30"/> + <use x="159.033203125" xlink:href="#DejaVuSans-37"/> + </g> + </g> + </g> + </g> + <g id="patch_3"> + <path d=" +M72 43.2 +L518.4 43.2" style="fill:none;stroke:#000000;"/> + </g> + <g id="patch_4"> + <path d=" +M518.4 388.8 +L518.4 43.2" style="fill:none;stroke:#000000;"/> + </g> + <g id="patch_5"> + <path d=" +M72 388.8 +L518.4 388.8" style="fill:none;stroke:#000000;"/> + </g> + <g id="patch_6"> + <path d=" +M72 388.8 +L72 43.2" style="fill:none;stroke:#000000;"/> + </g> + </g> + </g> + <defs> + <clipPath id="p7ff5b81e1d"> + <rect height="345.6" width="446.4" x="72.0" y="43.2"/> + </clipPath> + </defs> +</svg>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_clustering/test-data/Taylor-Butina_Clustering_on_data_q.txt Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,4 @@ +#0 true singletons +#0 false singletons +#clusters: 1 +55091849 12 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/ob2fps.xml Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,143 @@ +<tool id="chemfp_ob2fps" name="Molecules to Fingerprints" version="0.1.2"> + <description>with different fingerprint types</description> + <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism> + <requirements> + <requirement type="package" version="1.1p1">chemfp</requirement> + </requirements> + <command> + ob2fps $fptype --in "${infile.ext}" "${infile}" -o "${outfile}" --errors report 2>&1 + </command> + <inputs> + <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/> + <param name='fptype' type='select' format='text'> + <option value='--FP2'>FP2</option> + <option value='--FP3'>FP3</option> + <option value='--FP4'>FP4</option> + <option value='--MACCS'>MACCS</option> + </param> + </inputs> + <outputs> + <data name="outfile" format="fps" /> + </outputs> + <tests> + <!-- FP2 --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fptype" value="--FP2" /> + <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" /> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fptype" value="--FP2" /> + <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" /> + </test> + <!-- FP3 --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fptype" value="--FP3" /> + <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" /> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fptype" value="--FP3" /> + <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" /> + </test> + <!-- FP4 --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fptype" value="--FP4" /> + <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" /> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fptype" value="--FP4" /> + <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" /> + </test> + <!-- MACCS --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fptype" value="--MACCS" /> + <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" /> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fptype" value="--MACCS" /> + <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" /> + </test> + </tests> + <help> + + +**What it does** + +Generate fingerprints using OpenBabel + +----- + +**Example** + +* input:: + + - SDF File + + 28434379 + -OEChem-02031205132D + + 37 39 0 0 0 0 0 0 0999 V2000 + 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 7.3704 0.9433 0.0000 C 0 0 0 0 + ...... + 1 15 1 0 0 0 0 + 1 35 1 0 0 0 0 + 2 5 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 12 1 0 0 0 0 + 3 12 2 0 0 0 0 + 3 13 1 0 0 0 0 + 4 18 1 0 0 0 0 + ...... + + >PUBCHEM_COMPOUND_CID< + 28434379 + + > <PUBCHEM_COMPOUND_CANONICALIZED> + 1 + + > <PUBCHEM_CACTVS_COMPLEXITY> + 280 + + > <PUBCHEM_CACTVS_HBOND_ACCEPTOR> + 2 + + > <PUBCHEM_CACTVS_HBOND_DONOR> + 2 + + > <PUBCHEM_CACTVS_ROTATABLE_BOND> + 2 + + > <PUBCHEM_CACTVS_SUBSKEYS> + AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== + + > + + - type : FP2 + +* output:: + + #FPS1 + #num_bits=1021 + #type=OpenBabel-FP2/1 + #software=OpenBabel/2.3.0 + #source=/tmp/dataset_409.dat.sdf + #date=2012-02-03T11:13:39 + c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c + 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300 + 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379 + + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244.can Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,1 @@ +CC(=O)Oc1ccccc1C(=O)O 2244
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244.inchi Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,1 @@ +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244.sdf Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,155 @@ +2244 + -OEChem-05151212332D + + 21 21 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> <PUBCHEM_COMPOUND_CID> +2244 + +> <PUBCHEM_COMPOUND_CANONICALIZED> +1 + +> <PUBCHEM_CACTVS_COMPLEXITY> +212 + +> <PUBCHEM_CACTVS_HBOND_ACCEPTOR> +4 + +> <PUBCHEM_CACTVS_HBOND_DONOR> +1 + +> <PUBCHEM_CACTVS_ROTATABLE_BOND> +3 + +> <PUBCHEM_CACTVS_SUBSKEYS> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> <PUBCHEM_IUPAC_OPENEYE_NAME> +2-acetoxybenzoic acid + +> <PUBCHEM_IUPAC_CAS_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_SYSTEMATIC_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_TRADITIONAL_NAME> +2-acetoxybenzoic acid + +> <PUBCHEM_IUPAC_INCHI> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> <PUBCHEM_IUPAC_INCHIKEY> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> <PUBCHEM_XLOGP3> +1.2 + +> <PUBCHEM_EXACT_MASS> +180.042259 + +> <PUBCHEM_MOLECULAR_FORMULA> +C9H8O4 + +> <PUBCHEM_MOLECULAR_WEIGHT> +180.15742 + +> <PUBCHEM_OPENEYE_CAN_SMILES> +CC(=O)OC1=CC=CC=C1C(=O)O + +> <PUBCHEM_OPENEYE_ISO_SMILES> +CC(=O)OC1=CC=CC=C1C(=O)O + +> <PUBCHEM_CACTVS_TPSA> +63.6 + +> <PUBCHEM_MONOISOTOPIC_WEIGHT> +180.042259 + +> <PUBCHEM_TOTAL_CHARGE> +0 + +> <PUBCHEM_HEAVY_ATOM_COUNT> +13 + +> <PUBCHEM_ATOM_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_ISOTOPIC_ATOM_COUNT> +0 + +> <PUBCHEM_COMPONENT_COUNT> +1 + +> <PUBCHEM_CACTVS_TAUTO_COUNT> +1 + +> <PUBCHEM_COORDINATE_TYPE> +1 +5 +255 + +> <PUBCHEM_BONDANNOTATIONS> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +$$$$ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244.smi Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,1 @@ +O(c1c(cccc1)C(=O)O)C(=O)C 2244
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244_FP2.fps Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=1021 +#type=OpenBabel-FP2/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:40:38 +00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244_FP3.fps Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=55 +#type=OpenBabel-FP3/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:59:15 +0400000c50b007 2244
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244_FP4.fps Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=307 +#type=OpenBabel-FP4/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:59:22 +010000000000000000009800000000004001000000000000000000000000000000000240402801 2244
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244_maccs.fps Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=166 +#type=OpenBabel-MACCS/2 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T17:00:39 +0000000000000000000000010000016480cca2d21e 2244
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_sdf2fps/sdf2fps.xml Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,93 @@ +<tool id="sdf2fps" name="SDF to Fingerprint" version="0.1.1"> + <description>extract fingerprints from sdf files metadata</description> + <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism> + <requirements> + <requirement type="package" version="1.1p1">chemfp</requirement> + </requirements> + <command> + sdf2fps --pubchem "${infile}" > "${outfile}" + </command> + <inputs> + <param name="infile" type='data' format="sdf" label="SDF file with fingerprints as metadata"/> + </inputs> + <outputs> + <data name="outfile" format="fps"/> + </outputs> + <tests> + </tests> + <help> + + +**What it does** + +Read a SDF file and extract the fingerprints, to stores them in a fps-file. +TODO: currently it only works for PubChem + +----- + +**Example** + * input:: + + SDF File + + 28434379 + -OEChem-02031205132D + + 37 39 0 0 0 0 0 0 0999 V2000 + 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 7.3704 0.9433 0.0000 C 0 0 0 0 + ...... + 1 15 1 0 0 0 0 + 1 35 1 0 0 0 0 + 2 5 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 12 1 0 0 0 0 + 3 12 2 0 0 0 0 + 3 13 1 0 0 0 0 + 4 18 1 0 0 0 0 + ...... + + >PUBCHEM_COMPOUND_CID< + 28434379 + + > <PUBCHEM_COMPOUND_CANONICALIZED> + 1 + + > <PUBCHEM_CACTVS_COMPLEXITY> + 280 + + > <PUBCHEM_CACTVS_HBOND_ACCEPTOR> + 2 + + > <PUBCHEM_CACTVS_HBOND_DONOR> + 2 + + > <PUBCHEM_CACTVS_ROTATABLE_BOND> + 2 + + > <PUBCHEM_CACTVS_SUBSKEYS> + AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== + + > + +* output:: + + #FPS1 + #num_bits=881 + #type=CACTVS-E_SCREEN/1.0 extended=2 + #software=CACTVS/unknown + #source=/home/mohammed/galaxy-central/database/files/000/dataset_409.dat + #date=2012-02-03T10:44:12 + 07ce04000000000000000000000000000080060000000c0600 + 00000000001a800f0000780008100000101487e9608c0bed32 + 48000580644626204101b4844805901b041c2e19511e45039b + 8b2924101609401b13e4080000000000010020000004008000 + 0010000002000000000000 28434379 + + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,5 @@ +<?xml version="1.0"?> +<repositories description="This requires the Molecule datatype definitions (e.g. SMILES, InChI, SD-format)."> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="chemical_datatypes" owner="bgruening" changeset_revision="dbf93116a809" /> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_numpy_1_7" owner="bgruening" changeset_revision="3bc566b84b93" /> +</repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,20 @@ +<tool_dependency> + <package name="chemfp" version="1.1p1"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://chem-fingerprints.googlecode.com/files/chemfp-1.1p1.tar.gz</action> + <action type="make_directory">$INSTALL_DIR/lib/python</action> + <action type="shell_command">export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python && python setup.py install --home $INSTALL_DIR --install-scripts $INSTALL_DIR/bin</action> + <action type="set_environment"> + <environment_variable name="PYTHONPATH" action="append_to">$INSTALL_DIR/lib/python</environment_variable> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme> + The core chemfp functionality does not depend on a third-party library but you will need a chemistry toolkit in order to generate new fingerprints + from structure files. chemfp supports the free Open Babel and RDKit toolkits and the proprietary OEChem toolkit. + Currently the Galaxy-wrappers are using openbabel as underlying toolkit. + Compiling chemfp requires gcc and a python2.5+ version.</readme> + </package> +</tool_dependency>