changeset 0:a8ac5250d59c

Uploaded
author bgruening
date Tue, 26 Mar 2013 13:05:41 -0400
parents
children 8df9e27d8671
files chemfp_clustering/butina_clustering.py chemfp_clustering/butina_clustering.xml chemfp_clustering/nxn_clustering.py chemfp_clustering/nxn_clustering.xml chemfp_clustering/test-data/NxN_Clustering_on_q.svg chemfp_clustering/test-data/Taylor-Butina_Clustering_on_data_q.txt chemfp_ob2fps/ob2fps.xml chemfp_ob2fps/test-data/CID_2244.can chemfp_ob2fps/test-data/CID_2244.inchi chemfp_ob2fps/test-data/CID_2244.sdf chemfp_ob2fps/test-data/CID_2244.smi chemfp_ob2fps/test-data/CID_2244_FP2.fps chemfp_ob2fps/test-data/CID_2244_FP3.fps chemfp_ob2fps/test-data/CID_2244_FP4.fps chemfp_ob2fps/test-data/CID_2244_maccs.fps chemfp_sdf2fps/sdf2fps.xml repository_dependencies.xml tool_dependencies.xml
diffstat 18 files changed, 1512 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_clustering/butina_clustering.py	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+"""
+    Modified version of code examples from the chemfp project.
+    http://code.google.com/p/chem-fingerprints/
+    Thanks to Andrew Dalke of Andrew Dalke Scientific!
+"""
+
+import chemfp
+import sys
+import os
+
+chemfp_fingerprint_file = sys.argv[1]
+tanimoto_threshold = float(sys.argv[2])
+outfile = sys.argv[3]
+processors = int(sys.argv[4])
+
+
+def get_hit_indicies(hits):
+    return [id for (id, score) in hits]
+
+out = open(outfile, 'w')
+dataset = chemfp.load_fingerprints( chemfp_fingerprint_file )
+
+chemfp.set_num_threads( processors )
+search = dataset.threshold_tanimoto_search_arena(dataset, threshold = tanimoto_threshold)
+
+# Reorder so the centroid with the most hits comes first.
+# (That's why I do a reverse search.)
+# Ignore the arbitrariness of breaking ties by fingerprint index
+results = sorted( (  (len(hits), i, hits) for (i, hits) in enumerate(search.iter_indices_and_scores())  ),reverse=True)
+
+
+# Determine the true/false singletons and the clusters
+true_singletons = []
+false_singletons = []
+clusters = []
+
+seen = set()
+
+for (size, fp_idx, hits) in results:
+    if fp_idx in seen:
+        # Can't use a centroid which is already assigned
+        continue
+    seen.add(fp_idx)
+
+    if size == 1:
+        # The only fingerprint in the exclusion sphere is itself
+        true_singletons.append(fp_idx)
+        continue
+
+    members = get_hit_indicies(hits)
+    # Figure out which ones haven't yet been assigned
+    unassigned = [target_idx for target_idx in members if target_idx not in seen]
+
+    if not unassigned:
+        false_singletons.append(fp_idx)
+        continue
+
+    # this is a new cluster
+    clusters.append( (fp_idx, unassigned) )
+    seen.update(unassigned)
+
+len_cluster = len(clusters)
+#out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(dataset.ids[idx] for idx in true_singletons)) ) )
+#out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(dataset.ids[idx] for idx in false_singletons)) ) )
+
+out.write( "#%s true singletons\n" % len(true_singletons) )
+out.write( "#%s false singletons\n" % len(false_singletons) )
+out.write( "#clusters: %s\n" % len_cluster )
+
+
+# Sort so the cluster with the most compounds comes first,
+# then by alphabetically smallest id
+def cluster_sort_key(cluster):
+    centroid_idx, members = cluster
+    return -len(members), dataset.ids[centroid_idx]
+
+clusters.sort(key=cluster_sort_key)
+
+
+for centroid_idx, members in clusters:
+    centroid_name = dataset.ids[centroid_idx]
+    out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(dataset.ids[idx] for idx in members)))
+    #ToDo: len(members) need to be some biggest top 90% or something ...
+
+for idx in true_singletons:
+    out.write("%s\t%s\n" % (dataset.ids[idx], 0))
+
+out.close()
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_clustering/butina_clustering.xml	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,65 @@
+<tool id="chemfp_butina_clustering" name="Taylor-Butina Clustering" version="0.1">
+    <description>of molecular libraries</description>
+    <requirements>
+        <requirement type="package" version="1.1p1">chemfp</requirement>
+    </requirements>
+    <command interpreter='python'>
+        butina_clustering.py $infile $threshold $outfile 4
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/>
+        <param name='threshold' type='float' value='0.8' ></param>
+    </inputs>
+    <outputs>
+        <data format="txt" name="outfile" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" ftype="fps" value="q.fps"/>
+            <param name='threshold' value='0.8' ></param>
+            <output name="outfile" ftype="txt"  file='Taylor-Butina_Clustering_on_data_q.txt'/>
+        </test>
+    </tests>
+<help>
+
+
+**What it does**
+Molecule library clustering using the Taylor-Butina algorithm.
+
+-----
+
+**Example**
+
+* input::
+
+	-  fingerprints in FPS format
+
+		#FPS1
+		#num_bits=881
+		#type=CACTVS-E_SCREEN/1.0 extended=2
+		#software=CACTVS/unknown
+		#source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat
+		#date=2012-02-09T13:20:37
+		07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e
+		19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000	55169009
+		07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e
+		19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000	55079807
+		........
+
+	- Tanimoto threshold : 0.8 (between 0 and 1)
+
+* output::
+
+	0 true singletons
+	=> 
+
+	0 false singletons
+	=> 
+
+	1 clusters
+	55091849 has 12 other members
+	=> 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823
+
+ </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_clustering/nxn_clustering.py	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+"""
+    Modified version of code examples from the chemfp project.
+    http://code.google.com/p/chem-fingerprints/
+    Thanks to Andrew Dalke of Andrew Dalke Scientific!
+"""
+import matplotlib
+matplotlib.use('Agg')
+import sys
+import os
+import chemfp
+import scipy.cluster.hierarchy as hcluster
+import pylab
+import numpy
+
+
+def distance_matrix(arena,t):
+    n = len(arena)
+    # The Tanimoto search computes all of the scores when threshold=0.0.
+    # The SearchResult contains sparse data, so I set all values
+    # now to 1.0 so you can experiment with higher thresholds.
+    distances = numpy.ones((n, n), numpy.float64)
+
+    # Keep track of where the query subarena is in the query
+    query_row = 0
+
+    for query_arena in arena.iter_arenas():
+        results = arena.threshold_tanimoto_search_arena(query_arena, threshold=t)  
+    for q_i, hits in enumerate(results.iter_indices_and_scores()):
+            query_idx = query_row + q_i
+            for target_idx, score in hits:
+                distances[query_idx, target_idx] = 1.0 - score
+        query_row += len(query_arena)
+
+    return distances
+
+dataset = chemfp.load_fingerprints( sys.argv[1] )
+distances  = distance_matrix( dataset,float( sys.argv[2] ) )
+linkage = hcluster.linkage( distances, method="single", metric="euclidean" )
+
+# Plot using matplotlib, which you must have installed
+hcluster.dendrogram(linkage, labels=dataset.ids)
+
+pylab.savefig( sys.argv[3], format='svg' )
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_clustering/nxn_clustering.xml	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,61 @@
+<tool id="chemfp_nxn_clustering" name="NxN Clustering" version="0.1">
+    <description>of molecular libraries</description>
+    <requirements>
+        <requirement type="package" version="1.7.0">numpy</requirement>
+        <requirement type="package" version="1.1p1">chemfp</requirement>
+    </requirements>
+    <command interpreter='python'>
+        nxn_clustering.py $infile $threshold $outfile
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/>
+        <param name='threshold' type='float' value='0.75' ></param>
+    </inputs>
+    <outputs>
+        <data type="data" format="svg" name="outfile" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" ftype="fps" value="q.fps" />
+            <param value='0.75' />
+            <output ftype="svg" name="outfile" file='NxN_Clustering_on_q.svg' />
+        </test>
+    </tests>
+    <help>
+
+
+**What it does**
+Generating hierarchical clusters and visualizing clusters with dendrograms.
+
+-----
+
+**Example**
+
+* input::
+
+	-  fingerprints in FPS format
+
+		#FPS1
+		#num_bits=881
+		#type=CACTVS-E_SCREEN/1.0 extended=2
+		#software=CACTVS/unknown
+		#source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat
+		#date=2012-02-09T13:20:37
+		07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e
+		19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000	55169009
+		07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e
+		19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000	55079807
+		........
+
+	- Tanimoto threshold : 0.8 (between 0 and 1)
+
+* output::
+
+	plot for the clustring
+
+.. image:: ./static/images/chemfpclustoutput.svg
+
+ 
+    </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_clustering/test-data/NxN_Clustering_on_q.svg	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,793 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Created with matplotlib (http://matplotlib.sourceforge.net/) -->
+<svg height="432pt" version="1.1" viewBox="0 0 576 432" width="576pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <defs>
+  <style type="text/css">
+*{stroke-linecap:square;stroke-linejoin:round;}
+  </style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="
+M0 432
+L576 432
+L576 0
+L0 0
+z
+" style="fill:#ffffff;"/>
+  </g>
+  <g id="axes_1">
+   <g id="patch_2">
+    <path d="
+M72 388.8
+L518.4 388.8
+L518.4 43.2
+L72 43.2
+z
+" style="fill:#ffffff;"/>
+   </g>
+   <g id="LineCollection_1">
+    <defs>
+     <path d="
+M123.508 -43.2
+L123.508 -234.55
+L157.846 -234.55
+L157.846 -43.2" id="C0_0_36e0ca0abb"/>
+    </defs>
+    <g clip-path="url(#p7ff5b81e1d)">
+     <use style="fill:none;stroke:#008000;stroke-linecap:butt;" x="0" xlink:href="#C0_0_36e0ca0abb" y="432.0"/>
+    </g>
+   </g>
+   <g id="LineCollection_2">
+    <defs>
+     <path d="
+M260.862 -43.2
+L260.862 -43.2
+L295.2 -43.2
+L295.2 -43.2" id="C1_0_d55749c544"/>
+     <path d="
+M226.523 -43.2
+L226.523 -43.2
+L278.031 -43.2
+L278.031 -43.2" id="C1_1_f284ff091a"/>
+     <path d="
+M329.538 -43.2
+L329.538 -151.689
+L363.877 -151.689
+L363.877 -43.2" id="C1_2_ad9c2700c6"/>
+     <path d="
+M252.277 -43.2
+L252.277 -180.048
+L346.708 -180.048
+L346.708 -151.689" id="C1_3_59bcda1988"/>
+    </defs>
+    <g clip-path="url(#p7ff5b81e1d)">
+     <use style="fill:none;stroke:#ff0000;stroke-linecap:butt;" x="0" xlink:href="#C1_0_d55749c544" y="432.0"/>
+    </g>
+    <g clip-path="url(#p7ff5b81e1d)">
+     <use style="fill:none;stroke:#ff0000;stroke-linecap:butt;" x="0" xlink:href="#C1_1_f284ff091a" y="432.0"/>
+    </g>
+    <g clip-path="url(#p7ff5b81e1d)">
+     <use style="fill:none;stroke:#ff0000;stroke-linecap:butt;" x="0" xlink:href="#C1_2_ad9c2700c6" y="432.0"/>
+    </g>
+    <g clip-path="url(#p7ff5b81e1d)">
+     <use style="fill:none;stroke:#ff0000;stroke-linecap:butt;" x="0" xlink:href="#C1_3_59bcda1988" y="432.0"/>
+    </g>
+   </g>
+   <g id="LineCollection_3">
+    <defs>
+     <path d="
+M398.215 -43.2
+L398.215 -147.208
+L432.554 -147.208
+L432.554 -43.2" id="C2_0_63eb41fae6"/>
+     <path d="
+M466.892 -43.2
+L466.892 -149.207
+L501.231 -149.207
+L501.231 -43.2" id="C2_1_2114d8afff"/>
+     <path d="
+M415.385 -147.208
+L415.385 -210.283
+L484.062 -210.283
+L484.062 -149.207" id="C2_2_580dfac2d3"/>
+    </defs>
+    <g clip-path="url(#p7ff5b81e1d)">
+     <use style="fill:none;stroke:#00bfbf;stroke-linecap:butt;" x="0" xlink:href="#C2_0_63eb41fae6" y="432.0"/>
+    </g>
+    <g clip-path="url(#p7ff5b81e1d)">
+     <use style="fill:none;stroke:#00bfbf;stroke-linecap:butt;" x="0" xlink:href="#C2_1_2114d8afff" y="432.0"/>
+    </g>
+    <g clip-path="url(#p7ff5b81e1d)">
+     <use style="fill:none;stroke:#00bfbf;stroke-linecap:butt;" x="0" xlink:href="#C2_2_580dfac2d3" y="432.0"/>
+    </g>
+   </g>
+   <g id="LineCollection_4">
+    <defs>
+     <path d="
+M299.492 -180.048
+L299.492 -278.97
+L449.723 -278.97
+L449.723 -210.283" id="C3_0_351ea019a2"/>
+     <path d="
+M192.185 -43.2
+L192.185 -315.042
+L374.608 -315.042
+L374.608 -278.97" id="C3_1_f2cbe41b26"/>
+     <path d="
+M140.677 -234.55
+L140.677 -322.212
+L283.396 -322.212
+L283.396 -315.042" id="C3_2_0ff010f580"/>
+     <path d="
+M89.1692 -43.2
+L89.1692 -372.343
+L212.037 -372.343
+L212.037 -322.212" id="C3_3_64df8a0051"/>
+    </defs>
+    <g clip-path="url(#p7ff5b81e1d)">
+     <use style="fill:none;stroke:#0000ff;stroke-linecap:butt;" x="0" xlink:href="#C3_0_351ea019a2" y="432.0"/>
+    </g>
+    <g clip-path="url(#p7ff5b81e1d)">
+     <use style="fill:none;stroke:#0000ff;stroke-linecap:butt;" x="0" xlink:href="#C3_1_f2cbe41b26" y="432.0"/>
+    </g>
+    <g clip-path="url(#p7ff5b81e1d)">
+     <use style="fill:none;stroke:#0000ff;stroke-linecap:butt;" x="0" xlink:href="#C3_2_0ff010f580" y="432.0"/>
+    </g>
+    <g clip-path="url(#p7ff5b81e1d)">
+     <use style="fill:none;stroke:#0000ff;stroke-linecap:butt;" x="0" xlink:href="#C3_3_64df8a0051" y="432.0"/>
+    </g>
+   </g>
+   <g id="matplotlib.axis_1">
+    <g id="xtick_1">
+     <g id="text_1">
+      <!-- 55079807 -->
+      <defs>
+       <path d="
+M10.9844 1.51562
+L10.9844 10.5
+Q14.7031 8.73438 18.5 7.8125
+Q22.3125 6.89062 25.9844 6.89062
+Q35.75 6.89062 40.8906 13.4531
+Q46.0469 20.0156 46.7812 33.4062
+Q43.9531 29.2031 39.5938 26.9531
+Q35.25 24.7031 29.9844 24.7031
+Q19.0469 24.7031 12.6719 31.3125
+Q6.29688 37.9375 6.29688 49.4219
+Q6.29688 60.6406 12.9375 67.4219
+Q19.5781 74.2188 30.6094 74.2188
+Q43.2656 74.2188 49.9219 64.5156
+Q56.5938 54.8281 56.5938 36.375
+Q56.5938 19.1406 48.4062 8.85938
+Q40.2344 -1.42188 26.4219 -1.42188
+Q22.7031 -1.42188 18.8906 -0.6875
+Q15.0938 0.046875 10.9844 1.51562
+M30.6094 32.4219
+Q37.25 32.4219 41.125 36.9531
+Q45.0156 41.5 45.0156 49.4219
+Q45.0156 57.2812 41.125 61.8438
+Q37.25 66.4062 30.6094 66.4062
+Q23.9688 66.4062 20.0938 61.8438
+Q16.2188 57.2812 16.2188 49.4219
+Q16.2188 41.5 20.0938 36.9531
+Q23.9688 32.4219 30.6094 32.4219" id="DejaVuSans-39"/>
+       <path d="
+M31.7812 66.4062
+Q24.1719 66.4062 20.3281 58.9062
+Q16.5 51.4219 16.5 36.375
+Q16.5 21.3906 20.3281 13.8906
+Q24.1719 6.39062 31.7812 6.39062
+Q39.4531 6.39062 43.2812 13.8906
+Q47.125 21.3906 47.125 36.375
+Q47.125 51.4219 43.2812 58.9062
+Q39.4531 66.4062 31.7812 66.4062
+M31.7812 74.2188
+Q44.0469 74.2188 50.5156 64.5156
+Q56.9844 54.8281 56.9844 36.375
+Q56.9844 17.9688 50.5156 8.26562
+Q44.0469 -1.42188 31.7812 -1.42188
+Q19.5312 -1.42188 13.0625 8.26562
+Q6.59375 17.9688 6.59375 36.375
+Q6.59375 54.8281 13.0625 64.5156
+Q19.5312 74.2188 31.7812 74.2188" id="DejaVuSans-30"/>
+       <path d="
+M10.7969 72.9062
+L49.5156 72.9062
+L49.5156 64.5938
+L19.8281 64.5938
+L19.8281 46.7344
+Q21.9688 47.4688 24.1094 47.8281
+Q26.2656 48.1875 28.4219 48.1875
+Q40.625 48.1875 47.75 41.5
+Q54.8906 34.8125 54.8906 23.3906
+Q54.8906 11.625 47.5625 5.09375
+Q40.2344 -1.42188 26.9062 -1.42188
+Q22.3125 -1.42188 17.5469 -0.640625
+Q12.7969 0.140625 7.71875 1.70312
+L7.71875 11.625
+Q12.1094 9.23438 16.7969 8.0625
+Q21.4844 6.89062 26.7031 6.89062
+Q35.1562 6.89062 40.0781 11.3281
+Q45.0156 15.7656 45.0156 23.3906
+Q45.0156 31 40.0781 35.4375
+Q35.1562 39.8906 26.7031 39.8906
+Q22.75 39.8906 18.8125 39.0156
+Q14.8906 38.1406 10.7969 36.2812
+z
+" id="DejaVuSans-35"/>
+       <path d="
+M8.20312 72.9062
+L55.0781 72.9062
+L55.0781 68.7031
+L28.6094 0
+L18.3125 0
+L43.2188 64.5938
+L8.20312 64.5938
+z
+" id="DejaVuSans-37"/>
+       <path d="
+M31.7812 34.625
+Q24.75 34.625 20.7188 30.8594
+Q16.7031 27.0938 16.7031 20.5156
+Q16.7031 13.9219 20.7188 10.1562
+Q24.75 6.39062 31.7812 6.39062
+Q38.8125 6.39062 42.8594 10.1719
+Q46.9219 13.9688 46.9219 20.5156
+Q46.9219 27.0938 42.8906 30.8594
+Q38.875 34.625 31.7812 34.625
+M21.9219 38.8125
+Q15.5781 40.375 12.0312 44.7188
+Q8.5 49.0781 8.5 55.3281
+Q8.5 64.0625 14.7188 69.1406
+Q20.9531 74.2188 31.7812 74.2188
+Q42.6719 74.2188 48.875 69.1406
+Q55.0781 64.0625 55.0781 55.3281
+Q55.0781 49.0781 51.5312 44.7188
+Q48 40.375 41.7031 38.8125
+Q48.8281 37.1562 52.7969 32.3125
+Q56.7812 27.4844 56.7812 20.5156
+Q56.7812 9.90625 50.3125 4.23438
+Q43.8438 -1.42188 31.7812 -1.42188
+Q19.7344 -1.42188 13.25 4.23438
+Q6.78125 9.90625 6.78125 20.5156
+Q6.78125 27.4844 10.7812 32.3125
+Q14.7969 37.1562 21.9219 38.8125
+M18.3125 54.3906
+Q18.3125 48.7344 21.8438 45.5625
+Q25.3906 42.3906 31.7812 42.3906
+Q38.1406 42.3906 41.7188 45.5625
+Q45.3125 48.7344 45.3125 54.3906
+Q45.3125 60.0625 41.7188 63.2344
+Q38.1406 66.4062 31.7812 66.4062
+Q25.3906 66.4062 21.8438 63.2344
+Q18.3125 60.0625 18.3125 54.3906" id="DejaVuSans-38"/>
+      </defs>
+      <g transform="translate(59.6051682692 401.70625)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-35"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-35"/>
+       <use x="127.24609375" xlink:href="#DejaVuSans-30"/>
+       <use x="190.869140625" xlink:href="#DejaVuSans-37"/>
+       <use x="254.4921875" xlink:href="#DejaVuSans-39"/>
+       <use x="318.115234375" xlink:href="#DejaVuSans-38"/>
+       <use x="381.73828125" xlink:href="#DejaVuSans-30"/>
+       <use x="445.361328125" xlink:href="#DejaVuSans-37"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_2">
+     <g id="text_2">
+      <!-- 55091752 -->
+      <defs>
+       <path d="
+M12.4062 8.29688
+L28.5156 8.29688
+L28.5156 63.9219
+L10.9844 60.4062
+L10.9844 69.3906
+L28.4219 72.9062
+L38.2812 72.9062
+L38.2812 8.29688
+L54.3906 8.29688
+L54.3906 0
+L12.4062 0
+z
+" id="DejaVuSans-31"/>
+       <path d="
+M19.1875 8.29688
+L53.6094 8.29688
+L53.6094 0
+L7.32812 0
+L7.32812 8.29688
+Q12.9375 14.1094 22.625 23.8906
+Q32.3281 33.6875 34.8125 36.5312
+Q39.5469 41.8438 41.4219 45.5312
+Q43.3125 49.2188 43.3125 52.7812
+Q43.3125 58.5938 39.2344 62.25
+Q35.1562 65.9219 28.6094 65.9219
+Q23.9688 65.9219 18.8125 64.3125
+Q13.6719 62.7031 7.8125 59.4219
+L7.8125 69.3906
+Q13.7656 71.7812 18.9375 73
+Q24.125 74.2188 28.4219 74.2188
+Q39.75 74.2188 46.4844 68.5469
+Q53.2188 62.8906 53.2188 53.4219
+Q53.2188 48.9219 51.5312 44.8906
+Q49.8594 40.875 45.4062 35.4062
+Q44.1875 33.9844 37.6406 27.2188
+Q31.1094 20.4531 19.1875 8.29688" id="DejaVuSans-32"/>
+      </defs>
+      <g transform="translate(94.0317548077 401.70625)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-35"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-35"/>
+       <use x="127.24609375" xlink:href="#DejaVuSans-30"/>
+       <use x="190.869140625" xlink:href="#DejaVuSans-39"/>
+       <use x="254.4921875" xlink:href="#DejaVuSans-31"/>
+       <use x="318.115234375" xlink:href="#DejaVuSans-37"/>
+       <use x="381.73828125" xlink:href="#DejaVuSans-35"/>
+       <use x="445.361328125" xlink:href="#DejaVuSans-32"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_3">
+     <g id="text_3">
+      <!-- 55168823 -->
+      <defs>
+       <path d="
+M40.5781 39.3125
+Q47.6562 37.7969 51.625 33
+Q55.6094 28.2188 55.6094 21.1875
+Q55.6094 10.4062 48.1875 4.48438
+Q40.7656 -1.42188 27.0938 -1.42188
+Q22.5156 -1.42188 17.6562 -0.515625
+Q12.7969 0.390625 7.625 2.20312
+L7.625 11.7188
+Q11.7188 9.32812 16.5938 8.10938
+Q21.4844 6.89062 26.8125 6.89062
+Q36.0781 6.89062 40.9375 10.5469
+Q45.7969 14.2031 45.7969 21.1875
+Q45.7969 27.6406 41.2812 31.2656
+Q36.7656 34.9062 28.7188 34.9062
+L20.2188 34.9062
+L20.2188 43.0156
+L29.1094 43.0156
+Q36.375 43.0156 40.2344 45.9219
+Q44.0938 48.8281 44.0938 54.2969
+Q44.0938 59.9062 40.1094 62.9062
+Q36.1406 65.9219 28.7188 65.9219
+Q24.6562 65.9219 20.0156 65.0312
+Q15.375 64.1562 9.8125 62.3125
+L9.8125 71.0938
+Q15.4375 72.6562 20.3438 73.4375
+Q25.25 74.2188 29.5938 74.2188
+Q40.8281 74.2188 47.3594 69.1094
+Q53.9062 64.0156 53.9062 55.3281
+Q53.9062 49.2656 50.4375 45.0938
+Q46.9688 40.9219 40.5781 39.3125" id="DejaVuSans-33"/>
+       <path d="
+M33.0156 40.375
+Q26.375 40.375 22.4844 35.8281
+Q18.6094 31.2969 18.6094 23.3906
+Q18.6094 15.5312 22.4844 10.9531
+Q26.375 6.39062 33.0156 6.39062
+Q39.6562 6.39062 43.5312 10.9531
+Q47.4062 15.5312 47.4062 23.3906
+Q47.4062 31.2969 43.5312 35.8281
+Q39.6562 40.375 33.0156 40.375
+M52.5938 71.2969
+L52.5938 62.3125
+Q48.875 64.0625 45.0938 64.9844
+Q41.3125 65.9219 37.5938 65.9219
+Q27.8281 65.9219 22.6719 59.3281
+Q17.5312 52.7344 16.7969 39.4062
+Q19.6719 43.6562 24.0156 45.9219
+Q28.375 48.1875 33.5938 48.1875
+Q44.5781 48.1875 50.9531 41.5156
+Q57.3281 34.8594 57.3281 23.3906
+Q57.3281 12.1562 50.6875 5.35938
+Q44.0469 -1.42188 33.0156 -1.42188
+Q20.3594 -1.42188 13.6719 8.26562
+Q6.98438 17.9688 6.98438 36.375
+Q6.98438 53.6562 15.1875 63.9375
+Q23.3906 74.2188 37.2031 74.2188
+Q40.9219 74.2188 44.7031 73.4844
+Q48.4844 72.75 52.5938 71.2969" id="DejaVuSans-36"/>
+      </defs>
+      <g transform="translate(128.250216346 401.70625)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-35"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-35"/>
+       <use x="127.24609375" xlink:href="#DejaVuSans-31"/>
+       <use x="190.869140625" xlink:href="#DejaVuSans-36"/>
+       <use x="254.4921875" xlink:href="#DejaVuSans-38"/>
+       <use x="318.115234375" xlink:href="#DejaVuSans-38"/>
+       <use x="381.73828125" xlink:href="#DejaVuSans-32"/>
+       <use x="445.361328125" xlink:href="#DejaVuSans-33"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_4">
+     <g id="text_4">
+      <!-- 55169009 -->
+      <g transform="translate(162.529615385 401.70625)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-35"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-35"/>
+       <use x="127.24609375" xlink:href="#DejaVuSans-31"/>
+       <use x="190.869140625" xlink:href="#DejaVuSans-36"/>
+       <use x="254.4921875" xlink:href="#DejaVuSans-39"/>
+       <use x="318.115234375" xlink:href="#DejaVuSans-30"/>
+       <use x="381.73828125" xlink:href="#DejaVuSans-30"/>
+       <use x="445.361328125" xlink:href="#DejaVuSans-39"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_5">
+     <g id="text_5">
+      <!-- 55102353 -->
+      <g transform="translate(196.927139423 401.70625)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-35"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-35"/>
+       <use x="127.24609375" xlink:href="#DejaVuSans-31"/>
+       <use x="190.869140625" xlink:href="#DejaVuSans-30"/>
+       <use x="254.4921875" xlink:href="#DejaVuSans-32"/>
+       <use x="318.115234375" xlink:href="#DejaVuSans-33"/>
+       <use x="381.73828125" xlink:href="#DejaVuSans-35"/>
+       <use x="445.361328125" xlink:href="#DejaVuSans-33"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_6">
+     <g id="text_6">
+      <!-- 55091466 -->
+      <defs>
+       <path d="
+M37.7969 64.3125
+L12.8906 25.3906
+L37.7969 25.3906
+z
+
+M35.2031 72.9062
+L47.6094 72.9062
+L47.6094 25.3906
+L58.0156 25.3906
+L58.0156 17.1875
+L47.6094 17.1875
+L47.6094 0
+L37.7969 0
+L37.7969 17.1875
+L4.89062 17.1875
+L4.89062 26.7031
+z
+" id="DejaVuSans-34"/>
+      </defs>
+      <g transform="translate(231.162475962 401.70625)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-35"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-35"/>
+       <use x="127.24609375" xlink:href="#DejaVuSans-30"/>
+       <use x="190.869140625" xlink:href="#DejaVuSans-39"/>
+       <use x="254.4921875" xlink:href="#DejaVuSans-31"/>
+       <use x="318.115234375" xlink:href="#DejaVuSans-34"/>
+       <use x="381.73828125" xlink:href="#DejaVuSans-36"/>
+       <use x="445.361328125" xlink:href="#DejaVuSans-36"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_7">
+     <g id="text_7">
+      <!-- 55091416 -->
+      <g transform="translate(265.5009375 401.70625)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-35"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-35"/>
+       <use x="127.24609375" xlink:href="#DejaVuSans-30"/>
+       <use x="190.869140625" xlink:href="#DejaVuSans-39"/>
+       <use x="254.4921875" xlink:href="#DejaVuSans-31"/>
+       <use x="318.115234375" xlink:href="#DejaVuSans-34"/>
+       <use x="381.73828125" xlink:href="#DejaVuSans-31"/>
+       <use x="445.361328125" xlink:href="#DejaVuSans-36"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_8">
+     <g id="text_8">
+      <!-- 6499094 -->
+      <g transform="translate(303.571586538 401.70625)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-36"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-34"/>
+       <use x="127.24609375" xlink:href="#DejaVuSans-39"/>
+       <use x="190.869140625" xlink:href="#DejaVuSans-39"/>
+       <use x="254.4921875" xlink:href="#DejaVuSans-30"/>
+       <use x="318.115234375" xlink:href="#DejaVuSans-39"/>
+       <use x="381.73828125" xlink:href="#DejaVuSans-34"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_9">
+     <g id="text_9">
+      <!-- 6485578 -->
+      <g transform="translate(337.984110577 401.70625)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-36"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-34"/>
+       <use x="127.24609375" xlink:href="#DejaVuSans-38"/>
+       <use x="190.869140625" xlink:href="#DejaVuSans-35"/>
+       <use x="254.4921875" xlink:href="#DejaVuSans-35"/>
+       <use x="318.115234375" xlink:href="#DejaVuSans-37"/>
+       <use x="381.73828125" xlink:href="#DejaVuSans-38"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_10">
+     <g id="text_10">
+      <!-- 55091467 -->
+      <g transform="translate(368.651322115 401.70625)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-35"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-35"/>
+       <use x="127.24609375" xlink:href="#DejaVuSans-30"/>
+       <use x="190.869140625" xlink:href="#DejaVuSans-39"/>
+       <use x="254.4921875" xlink:href="#DejaVuSans-31"/>
+       <use x="318.115234375" xlink:href="#DejaVuSans-34"/>
+       <use x="381.73828125" xlink:href="#DejaVuSans-36"/>
+       <use x="445.361328125" xlink:href="#DejaVuSans-37"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_11">
+     <g id="text_11">
+      <!-- 55091849 -->
+      <g transform="translate(402.898846154 401.70625)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-35"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-35"/>
+       <use x="127.24609375" xlink:href="#DejaVuSans-30"/>
+       <use x="190.869140625" xlink:href="#DejaVuSans-39"/>
+       <use x="254.4921875" xlink:href="#DejaVuSans-31"/>
+       <use x="318.115234375" xlink:href="#DejaVuSans-38"/>
+       <use x="381.73828125" xlink:href="#DejaVuSans-34"/>
+       <use x="445.361328125" xlink:href="#DejaVuSans-39"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_12">
+     <g id="text_12">
+      <!-- 3153534 -->
+      <g transform="translate(440.963870192 401.70625)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-33"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-31"/>
+       <use x="127.24609375" xlink:href="#DejaVuSans-35"/>
+       <use x="190.869140625" xlink:href="#DejaVuSans-33"/>
+       <use x="254.4921875" xlink:href="#DejaVuSans-35"/>
+       <use x="318.115234375" xlink:href="#DejaVuSans-33"/>
+       <use x="381.73828125" xlink:href="#DejaVuSans-34"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_13">
+     <g id="text_13">
+      <!-- 6485577 -->
+      <g transform="translate(475.440144231 401.70625)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-36"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-34"/>
+       <use x="127.24609375" xlink:href="#DejaVuSans-38"/>
+       <use x="190.869140625" xlink:href="#DejaVuSans-35"/>
+       <use x="254.4921875" xlink:href="#DejaVuSans-35"/>
+       <use x="318.115234375" xlink:href="#DejaVuSans-37"/>
+       <use x="381.73828125" xlink:href="#DejaVuSans-37"/>
+      </g>
+     </g>
+    </g>
+   </g>
+   <g id="matplotlib.axis_2">
+    <g id="ytick_1">
+     <g id="line2d_1">
+      <defs>
+       <path d="
+M0 0
+L4 0" id="me8a85f7bf6" style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;"/>
+      </defs>
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="388.8"/>
+      </g>
+     </g>
+     <g id="line2d_2">
+      <defs>
+       <path d="
+M0 0
+L-4 0" id="m1a32005dea" style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;"/>
+      </defs>
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="388.8"/>
+      </g>
+     </g>
+     <g id="text_14">
+      <!-- 0.00 -->
+      <defs>
+       <path d="
+M10.6875 12.4062
+L21 12.4062
+L21 0
+L10.6875 0
+z
+" id="DejaVuSans-2e"/>
+      </defs>
+      <g transform="translate(42.869375 393.1678125)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-2e"/>
+       <use x="95.41015625" xlink:href="#DejaVuSans-30"/>
+       <use x="159.033203125" xlink:href="#DejaVuSans-30"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_2">
+     <g id="line2d_3">
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="341.625438456"/>
+      </g>
+     </g>
+     <g id="line2d_4">
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="341.625438456"/>
+      </g>
+     </g>
+     <g id="text_15">
+      <!-- 0.01 -->
+      <g transform="translate(43.180625 345.993250956)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-2e"/>
+       <use x="95.41015625" xlink:href="#DejaVuSans-30"/>
+       <use x="159.033203125" xlink:href="#DejaVuSans-31"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_3">
+     <g id="line2d_5">
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="294.450876912"/>
+      </g>
+     </g>
+     <g id="line2d_6">
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="294.450876912"/>
+      </g>
+     </g>
+     <g id="text_16">
+      <!-- 0.02 -->
+      <g transform="translate(43.274375 298.818689412)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-2e"/>
+       <use x="95.41015625" xlink:href="#DejaVuSans-30"/>
+       <use x="159.033203125" xlink:href="#DejaVuSans-32"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_4">
+     <g id="line2d_7">
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="247.276315367"/>
+      </g>
+     </g>
+     <g id="line2d_8">
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="247.276315367"/>
+      </g>
+     </g>
+     <g id="text_17">
+      <!-- 0.03 -->
+      <g transform="translate(43.034375 251.644127867)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-2e"/>
+       <use x="95.41015625" xlink:href="#DejaVuSans-30"/>
+       <use x="159.033203125" xlink:href="#DejaVuSans-33"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_5">
+     <g id="line2d_9">
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="200.101753823"/>
+      </g>
+     </g>
+     <g id="line2d_10">
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="200.101753823"/>
+      </g>
+     </g>
+     <g id="text_18">
+      <!-- 0.04 -->
+      <g transform="translate(42.745625 204.469566323)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-2e"/>
+       <use x="95.41015625" xlink:href="#DejaVuSans-30"/>
+       <use x="159.033203125" xlink:href="#DejaVuSans-34"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_6">
+     <g id="line2d_11">
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="152.927192279"/>
+      </g>
+     </g>
+     <g id="line2d_12">
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="152.927192279"/>
+      </g>
+     </g>
+     <g id="text_19">
+      <!-- 0.05 -->
+      <g transform="translate(43.120625 157.295004779)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-2e"/>
+       <use x="95.41015625" xlink:href="#DejaVuSans-30"/>
+       <use x="159.033203125" xlink:href="#DejaVuSans-35"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_7">
+     <g id="line2d_13">
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="105.752630735"/>
+      </g>
+     </g>
+     <g id="line2d_14">
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="105.752630735"/>
+      </g>
+     </g>
+     <g id="text_20">
+      <!-- 0.06 -->
+      <g transform="translate(42.828125 110.120443235)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-2e"/>
+       <use x="95.41015625" xlink:href="#DejaVuSans-30"/>
+       <use x="159.033203125" xlink:href="#DejaVuSans-36"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_8">
+     <g id="line2d_15">
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="72.0" xlink:href="#me8a85f7bf6" y="58.5780691907"/>
+      </g>
+     </g>
+     <g id="line2d_16">
+      <g>
+       <use style="stroke:#000000;stroke-linecap:butt;stroke-width:0.5;" x="518.4" xlink:href="#m1a32005dea" y="58.5780691907"/>
+      </g>
+     </g>
+     <g id="text_21">
+      <!-- 0.07 -->
+      <g transform="translate(43.098125 62.9458816907)scale(0.12 -0.12)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use x="63.623046875" xlink:href="#DejaVuSans-2e"/>
+       <use x="95.41015625" xlink:href="#DejaVuSans-30"/>
+       <use x="159.033203125" xlink:href="#DejaVuSans-37"/>
+      </g>
+     </g>
+    </g>
+   </g>
+   <g id="patch_3">
+    <path d="
+M72 43.2
+L518.4 43.2" style="fill:none;stroke:#000000;"/>
+   </g>
+   <g id="patch_4">
+    <path d="
+M518.4 388.8
+L518.4 43.2" style="fill:none;stroke:#000000;"/>
+   </g>
+   <g id="patch_5">
+    <path d="
+M72 388.8
+L518.4 388.8" style="fill:none;stroke:#000000;"/>
+   </g>
+   <g id="patch_6">
+    <path d="
+M72 388.8
+L72 43.2" style="fill:none;stroke:#000000;"/>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="p7ff5b81e1d">
+   <rect height="345.6" width="446.4" x="72.0" y="43.2"/>
+  </clipPath>
+ </defs>
+</svg>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_clustering/test-data/Taylor-Butina_Clustering_on_data_q.txt	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,4 @@
+#0 true singletons
+#0 false singletons
+#clusters: 1
+55091849	12	6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/ob2fps.xml	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,143 @@
+<tool id="chemfp_ob2fps" name="Molecules to Fingerprints" version="0.1.2">
+    <description>with different fingerprint types</description>
+    <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism>
+    <requirements>
+        <requirement type="package" version="1.1p1">chemfp</requirement>
+    </requirements>
+    <command>
+        ob2fps $fptype --in "${infile.ext}" "${infile}" -o "${outfile}" --errors report 2>&#38;1
+    </command>
+    <inputs>
+        <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/>
+        <param name='fptype' type='select' format='text'>
+            <option value='--FP2'>FP2</option>
+            <option value='--FP3'>FP3</option>
+            <option value='--FP4'>FP4</option>
+            <option value='--MACCS'>MACCS</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="fps" />
+    </outputs>
+    <tests>
+        <!-- FP2 -->
+        <test>
+            <param name="infile" value="CID_2244.sdf" ftype="sdf" />
+            <param name="fptype" value="--FP2" />
+            <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" />
+        </test>
+        <test>
+            <param name="infile" value="CID_2244.smi" ftype="smi" />
+            <param name="fptype" value="--FP2" />
+            <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" />
+        </test>
+        <!-- FP3 -->
+        <test>
+            <param name="infile" value="CID_2244.sdf" ftype="sdf" />
+            <param name="fptype" value="--FP3" />
+            <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" />
+        </test>
+        <test>
+            <param name="infile" value="CID_2244.smi" ftype="smi" />
+            <param name="fptype" value="--FP3" />
+            <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" />
+        </test>
+        <!-- FP4 -->
+        <test>
+            <param name="infile" value="CID_2244.sdf" ftype="sdf" />
+            <param name="fptype" value="--FP4" />
+            <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" />
+        </test>
+        <test>
+            <param name="infile" value="CID_2244.smi" ftype="smi" />
+            <param name="fptype" value="--FP4" />
+            <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" />
+        </test>
+        <!-- MACCS -->
+        <test>
+            <param name="infile" value="CID_2244.sdf" ftype="sdf" />
+            <param name="fptype" value="--MACCS" />
+            <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" />
+        </test>
+        <test>
+            <param name="infile" value="CID_2244.smi" ftype="smi" />
+            <param name="fptype" value="--MACCS" />
+            <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" />
+        </test>
+    </tests>
+    <help>
+
+
+**What it does**
+
+Generate fingerprints using OpenBabel
+
+-----
+
+**Example**
+
+* input::
+	
+	      - SDF File
+
+		28434379
+		  -OEChem-02031205132D
+
+		 37 39  0     0  0  0  0  0  0999 V2000
+		    8.1648   -1.8842    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+		    6.0812   -0.2134    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+		    6.0812   -1.8229    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+		    2.5369   -2.0182    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+		    6.3919    0.7371    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+		    7.3704    0.9433    0.0000 C   0  0  0  0 
+		    ......
+		  1 15  1  0  0  0  0
+		  1 35  1  0  0  0  0
+		  2  5  1  0  0  0  0
+		  2 11  1  0  0  0  0
+		  2 12  1  0  0  0  0
+		  3 12  2  0  0  0  0
+		  3 13  1  0  0  0  0
+		  4 18  1  0  0  0  0
+		  ......
+
+			&gt;PUBCHEM_COMPOUND_CID&lt;
+			28434379
+
+			&gt; &lt;PUBCHEM_COMPOUND_CANONICALIZED&gt;
+			1
+
+			&gt; &lt;PUBCHEM_CACTVS_COMPLEXITY&gt;
+			280
+
+			&gt; &lt;PUBCHEM_CACTVS_HBOND_ACCEPTOR&gt;
+			2
+
+			&gt; &lt;PUBCHEM_CACTVS_HBOND_DONOR&gt;
+			2
+
+			&gt; &lt;PUBCHEM_CACTVS_ROTATABLE_BOND&gt;
+			2
+
+			&gt; &lt;PUBCHEM_CACTVS_SUBSKEYS&gt;
+			AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==
+
+			&gt;
+
+		- type : FP2
+
+* output::
+
+	#FPS1
+	#num_bits=1021
+	#type=OpenBabel-FP2/1
+	#software=OpenBabel/2.3.0
+	#source=/tmp/dataset_409.dat.sdf
+	#date=2012-02-03T11:13:39
+	c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c
+	0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300
+	10000000000080000000c0000060000c0000060810000010000000800102000000	28434379
+
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244.can	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,1 @@
+CC(=O)Oc1ccccc1C(=O)O	2244
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244.inchi	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,1 @@
+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244.sdf	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,155 @@
+2244
+  -OEChem-05151212332D
+
+ 21 21  0     0  0  0  0  0  0999 V2000
+    3.7320   -0.0600    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301    1.4400    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981    1.4400    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    2.8660   -1.5600    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -0.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981   -1.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -2.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301   -1.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641    0.9400    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.8660   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.0000   -0.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.0611   -1.8700    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.8671   -0.2500    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -2.6800    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.8671   -1.8700    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    2.3100    0.4769    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.4631    0.2500    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.6900   -0.5969    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301    2.0600    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+  1  5  1  0  0  0  0
+  1 12  1  0  0  0  0
+  2 11  1  0  0  0  0
+  2 21  1  0  0  0  0
+  3 11  2  0  0  0  0
+  4 12  2  0  0  0  0
+  5  6  1  0  0  0  0
+  5  7  2  0  0  0  0
+  6  8  2  0  0  0  0
+  6 11  1  0  0  0  0
+  7  9  1  0  0  0  0
+  7 14  1  0  0  0  0
+  8 10  1  0  0  0  0
+  8 15  1  0  0  0  0
+  9 10  2  0  0  0  0
+  9 16  1  0  0  0  0
+ 10 17  1  0  0  0  0
+ 12 13  1  0  0  0  0
+ 13 18  1  0  0  0  0
+ 13 19  1  0  0  0  0
+ 13 20  1  0  0  0  0
+M  END
+> <PUBCHEM_COMPOUND_CID>
+2244
+
+> <PUBCHEM_COMPOUND_CANONICALIZED>
+1
+
+> <PUBCHEM_CACTVS_COMPLEXITY>
+212
+
+> <PUBCHEM_CACTVS_HBOND_ACCEPTOR>
+4
+
+> <PUBCHEM_CACTVS_HBOND_DONOR>
+1
+
+> <PUBCHEM_CACTVS_ROTATABLE_BOND>
+3
+
+> <PUBCHEM_CACTVS_SUBSKEYS>
+AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA==
+
+> <PUBCHEM_IUPAC_OPENEYE_NAME>
+2-acetoxybenzoic acid
+
+> <PUBCHEM_IUPAC_CAS_NAME>
+2-acetyloxybenzoic acid
+
+> <PUBCHEM_IUPAC_NAME>
+2-acetyloxybenzoic acid
+
+> <PUBCHEM_IUPAC_SYSTEMATIC_NAME>
+2-acetyloxybenzoic acid
+
+> <PUBCHEM_IUPAC_TRADITIONAL_NAME>
+2-acetoxybenzoic acid
+
+> <PUBCHEM_IUPAC_INCHI>
+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
+
+> <PUBCHEM_IUPAC_INCHIKEY>
+BSYNRYMUTXBXSQ-UHFFFAOYSA-N
+
+> <PUBCHEM_XLOGP3>
+1.2
+
+> <PUBCHEM_EXACT_MASS>
+180.042259
+
+> <PUBCHEM_MOLECULAR_FORMULA>
+C9H8O4
+
+> <PUBCHEM_MOLECULAR_WEIGHT>
+180.15742
+
+> <PUBCHEM_OPENEYE_CAN_SMILES>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+> <PUBCHEM_OPENEYE_ISO_SMILES>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+> <PUBCHEM_CACTVS_TPSA>
+63.6
+
+> <PUBCHEM_MONOISOTOPIC_WEIGHT>
+180.042259
+
+> <PUBCHEM_TOTAL_CHARGE>
+0
+
+> <PUBCHEM_HEAVY_ATOM_COUNT>
+13
+
+> <PUBCHEM_ATOM_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ATOM_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ISOTOPIC_ATOM_COUNT>
+0
+
+> <PUBCHEM_COMPONENT_COUNT>
+1
+
+> <PUBCHEM_CACTVS_TAUTO_COUNT>
+1
+
+> <PUBCHEM_COORDINATE_TYPE>
+1
+5
+255
+
+> <PUBCHEM_BONDANNOTATIONS>
+5  6  8
+5  7  8
+6  8  8
+7  9  8
+8  10  8
+9  10  8
+
+$$$$
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244.smi	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,1 @@
+O(c1c(cccc1)C(=O)O)C(=O)C	2244
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244_FP2.fps	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=1021
+#type=OpenBabel-FP2/1
+#software=OpenBabel/2.3.1
+#source=CID_2244.sdf
+#date=2012-05-15T16:40:38
+00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004	2244
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244_FP3.fps	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=55
+#type=OpenBabel-FP3/1
+#software=OpenBabel/2.3.1
+#source=CID_2244.sdf
+#date=2012-05-15T16:59:15
+0400000c50b007	2244
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244_FP4.fps	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=307
+#type=OpenBabel-FP4/1
+#software=OpenBabel/2.3.1
+#source=CID_2244.sdf
+#date=2012-05-15T16:59:22
+010000000000000000009800000000004001000000000000000000000000000000000240402801	2244
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244_maccs.fps	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=166
+#type=OpenBabel-MACCS/2
+#software=OpenBabel/2.3.1
+#source=CID_2244.sdf
+#date=2012-05-15T17:00:39
+0000000000000000000000010000016480cca2d21e	2244
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_sdf2fps/sdf2fps.xml	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,93 @@
+<tool id="sdf2fps" name="SDF to Fingerprint" version="0.1.1">
+    <description>extract fingerprints from sdf files metadata</description>
+    <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism>
+    <requirements>
+        <requirement type="package" version="1.1p1">chemfp</requirement>
+    </requirements>
+    <command>
+        sdf2fps --pubchem "${infile}" > "${outfile}"
+    </command>
+    <inputs>
+        <param name="infile" type='data' format="sdf" label="SDF file with fingerprints as metadata"/>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="fps"/>
+    </outputs>
+    <tests>
+    </tests>
+    <help>
+
+
+**What it does**
+
+Read a SDF file and extract the fingerprints, to stores them in a fps-file.
+TODO: currently it only works for PubChem
+
+-----
+
+**Example**
+	* input::
+	
+		SDF File
+
+		28434379
+		  -OEChem-02031205132D
+
+		 37 39  0     0  0  0  0  0  0999 V2000
+		    8.1648   -1.8842    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+		    6.0812   -0.2134    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+		    6.0812   -1.8229    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+		    2.5369   -2.0182    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+		    6.3919    0.7371    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+		    7.3704    0.9433    0.0000 C   0  0  0  0 
+		    ......
+		  1 15  1  0  0  0  0
+		  1 35  1  0  0  0  0
+		  2  5  1  0  0  0  0
+		  2 11  1  0  0  0  0
+		  2 12  1  0  0  0  0
+		  3 12  2  0  0  0  0
+		  3 13  1  0  0  0  0
+		  4 18  1  0  0  0  0
+		  ......
+
+			&gt;PUBCHEM_COMPOUND_CID&lt;
+			28434379
+
+			&gt; &lt;PUBCHEM_COMPOUND_CANONICALIZED&gt;
+			1
+
+			&gt; &lt;PUBCHEM_CACTVS_COMPLEXITY&gt;
+			280
+
+			&gt; &lt;PUBCHEM_CACTVS_HBOND_ACCEPTOR&gt;
+			2
+
+			&gt; &lt;PUBCHEM_CACTVS_HBOND_DONOR&gt;
+			2
+
+			&gt; &lt;PUBCHEM_CACTVS_ROTATABLE_BOND&gt;
+			2
+
+			&gt; &lt;PUBCHEM_CACTVS_SUBSKEYS&gt;
+			AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==
+
+			&gt;
+
+* output::
+
+	#FPS1
+	#num_bits=881
+	#type=CACTVS-E_SCREEN/1.0 extended=2
+	#software=CACTVS/unknown
+	#source=/home/mohammed/galaxy-central/database/files/000/dataset_409.dat
+	#date=2012-02-03T10:44:12
+	07ce04000000000000000000000000000080060000000c0600
+	00000000001a800f0000780008100000101487e9608c0bed32
+	48000580644626204101b4844805901b041c2e19511e45039b
+	8b2924101609401b13e4080000000000010020000004008000
+	0010000002000000000000	28434379
+
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,5 @@
+<?xml version="1.0"?>
+<repositories description="This requires the Molecule datatype definitions (e.g. SMILES, InChI, SD-format).">
+    <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="chemical_datatypes" owner="bgruening" changeset_revision="dbf93116a809" />
+    <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="package_numpy_1_7" owner="bgruening" changeset_revision="3bc566b84b93" />
+</repositories>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,20 @@
+<tool_dependency>
+    <package name="chemfp" version="1.1p1">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://chem-fingerprints.googlecode.com/files/chemfp-1.1p1.tar.gz</action>
+                <action type="make_directory">$INSTALL_DIR/lib/python</action>
+                <action type="shell_command">export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python &amp;&amp; python setup.py install --home $INSTALL_DIR --install-scripts $INSTALL_DIR/bin</action>
+                <action type="set_environment">
+                    <environment_variable name="PYTHONPATH" action="append_to">$INSTALL_DIR/lib/python</environment_variable>
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+        The core chemfp functionality does not depend on a third-party library but you will need a chemistry toolkit in order to generate new fingerprints 
+        from structure files. chemfp supports the free Open Babel and RDKit toolkits and the proprietary OEChem toolkit.
+        Currently the Galaxy-wrappers are using openbabel as underlying toolkit.
+        Compiling chemfp requires gcc and a python2.5+ version.</readme>
+    </package>
+</tool_dependency>