# HG changeset patch
# User bgruening
# Date 1364317541 14400
# Node ID a8ac5250d59c78567908b44b3dda736816fffb83
Uploaded
diff -r 000000000000 -r a8ac5250d59c chemfp_clustering/butina_clustering.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_clustering/butina_clustering.py Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+"""
+ Modified version of code examples from the chemfp project.
+ http://code.google.com/p/chem-fingerprints/
+ Thanks to Andrew Dalke of Andrew Dalke Scientific!
+"""
+
+import chemfp
+import sys
+import os
+
+chemfp_fingerprint_file = sys.argv[1]
+tanimoto_threshold = float(sys.argv[2])
+outfile = sys.argv[3]
+processors = int(sys.argv[4])
+
+
+def get_hit_indicies(hits):
+ return [id for (id, score) in hits]
+
+out = open(outfile, 'w')
+dataset = chemfp.load_fingerprints( chemfp_fingerprint_file )
+
+chemfp.set_num_threads( processors )
+search = dataset.threshold_tanimoto_search_arena(dataset, threshold = tanimoto_threshold)
+
+# Reorder so the centroid with the most hits comes first.
+# (That's why I do a reverse search.)
+# Ignore the arbitrariness of breaking ties by fingerprint index
+results = sorted( ( (len(hits), i, hits) for (i, hits) in enumerate(search.iter_indices_and_scores()) ),reverse=True)
+
+
+# Determine the true/false singletons and the clusters
+true_singletons = []
+false_singletons = []
+clusters = []
+
+seen = set()
+
+for (size, fp_idx, hits) in results:
+ if fp_idx in seen:
+ # Can't use a centroid which is already assigned
+ continue
+ seen.add(fp_idx)
+
+ if size == 1:
+ # The only fingerprint in the exclusion sphere is itself
+ true_singletons.append(fp_idx)
+ continue
+
+ members = get_hit_indicies(hits)
+ # Figure out which ones haven't yet been assigned
+ unassigned = [target_idx for target_idx in members if target_idx not in seen]
+
+ if not unassigned:
+ false_singletons.append(fp_idx)
+ continue
+
+ # this is a new cluster
+ clusters.append( (fp_idx, unassigned) )
+ seen.update(unassigned)
+
+len_cluster = len(clusters)
+#out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(dataset.ids[idx] for idx in true_singletons)) ) )
+#out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(dataset.ids[idx] for idx in false_singletons)) ) )
+
+out.write( "#%s true singletons\n" % len(true_singletons) )
+out.write( "#%s false singletons\n" % len(false_singletons) )
+out.write( "#clusters: %s\n" % len_cluster )
+
+
+# Sort so the cluster with the most compounds comes first,
+# then by alphabetically smallest id
+def cluster_sort_key(cluster):
+ centroid_idx, members = cluster
+ return -len(members), dataset.ids[centroid_idx]
+
+clusters.sort(key=cluster_sort_key)
+
+
+for centroid_idx, members in clusters:
+ centroid_name = dataset.ids[centroid_idx]
+ out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(dataset.ids[idx] for idx in members)))
+ #ToDo: len(members) need to be some biggest top 90% or something ...
+
+for idx in true_singletons:
+ out.write("%s\t%s\n" % (dataset.ids[idx], 0))
+
+out.close()
+
+
diff -r 000000000000 -r a8ac5250d59c chemfp_clustering/butina_clustering.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_clustering/butina_clustering.xml Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,65 @@
+
+ of molecular libraries
+
+ chemfp
+
+
+ butina_clustering.py $infile $threshold $outfile 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+Molecule library clustering using the Taylor-Butina algorithm.
+
+-----
+
+**Example**
+
+* input::
+
+ - fingerprints in FPS format
+
+ #FPS1
+ #num_bits=881
+ #type=CACTVS-E_SCREEN/1.0 extended=2
+ #software=CACTVS/unknown
+ #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat
+ #date=2012-02-09T13:20:37
+ 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e
+ 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009
+ 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e
+ 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807
+ ........
+
+ - Tanimoto threshold : 0.8 (between 0 and 1)
+
+* output::
+
+ 0 true singletons
+ =>
+
+ 0 false singletons
+ =>
+
+ 1 clusters
+ 55091849 has 12 other members
+ => 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823
+
+
+
+
diff -r 000000000000 -r a8ac5250d59c chemfp_clustering/nxn_clustering.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_clustering/nxn_clustering.py Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+"""
+ Modified version of code examples from the chemfp project.
+ http://code.google.com/p/chem-fingerprints/
+ Thanks to Andrew Dalke of Andrew Dalke Scientific!
+"""
+import matplotlib
+matplotlib.use('Agg')
+import sys
+import os
+import chemfp
+import scipy.cluster.hierarchy as hcluster
+import pylab
+import numpy
+
+
+def distance_matrix(arena,t):
+ n = len(arena)
+ # The Tanimoto search computes all of the scores when threshold=0.0.
+ # The SearchResult contains sparse data, so I set all values
+ # now to 1.0 so you can experiment with higher thresholds.
+ distances = numpy.ones((n, n), numpy.float64)
+
+ # Keep track of where the query subarena is in the query
+ query_row = 0
+
+ for query_arena in arena.iter_arenas():
+ results = arena.threshold_tanimoto_search_arena(query_arena, threshold=t)
+ for q_i, hits in enumerate(results.iter_indices_and_scores()):
+ query_idx = query_row + q_i
+ for target_idx, score in hits:
+ distances[query_idx, target_idx] = 1.0 - score
+ query_row += len(query_arena)
+
+ return distances
+
+dataset = chemfp.load_fingerprints( sys.argv[1] )
+distances = distance_matrix( dataset,float( sys.argv[2] ) )
+linkage = hcluster.linkage( distances, method="single", metric="euclidean" )
+
+# Plot using matplotlib, which you must have installed
+hcluster.dendrogram(linkage, labels=dataset.ids)
+
+pylab.savefig( sys.argv[3], format='svg' )
+
+
+
+
+
+
+
diff -r 000000000000 -r a8ac5250d59c chemfp_clustering/nxn_clustering.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_clustering/nxn_clustering.xml Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,61 @@
+
+ of molecular libraries
+
+ numpy
+ chemfp
+
+
+ nxn_clustering.py $infile $threshold $outfile
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+Generating hierarchical clusters and visualizing clusters with dendrograms.
+
+-----
+
+**Example**
+
+* input::
+
+ - fingerprints in FPS format
+
+ #FPS1
+ #num_bits=881
+ #type=CACTVS-E_SCREEN/1.0 extended=2
+ #software=CACTVS/unknown
+ #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat
+ #date=2012-02-09T13:20:37
+ 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e
+ 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009
+ 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e
+ 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807
+ ........
+
+ - Tanimoto threshold : 0.8 (between 0 and 1)
+
+* output::
+
+ plot for the clustring
+
+.. image:: ./static/images/chemfpclustoutput.svg
+
+
+
+
+
diff -r 000000000000 -r a8ac5250d59c chemfp_clustering/test-data/NxN_Clustering_on_q.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_clustering/test-data/NxN_Clustering_on_q.svg Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,793 @@
+
+
+
+
diff -r 000000000000 -r a8ac5250d59c chemfp_clustering/test-data/Taylor-Butina_Clustering_on_data_q.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_clustering/test-data/Taylor-Butina_Clustering_on_data_q.txt Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,4 @@
+#0 true singletons
+#0 false singletons
+#clusters: 1
+55091849 12 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823
diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/ob2fps.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/ob2fps.xml Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,143 @@
+
+ with different fingerprint types
+
+
+ chemfp
+
+
+ ob2fps $fptype --in "${infile.ext}" "${infile}" -o "${outfile}" --errors report 2>&1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+Generate fingerprints using OpenBabel
+
+-----
+
+**Example**
+
+* input::
+
+ - SDF File
+
+ 28434379
+ -OEChem-02031205132D
+
+ 37 39 0 0 0 0 0 0 0999 V2000
+ 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 7.3704 0.9433 0.0000 C 0 0 0 0
+ ......
+ 1 15 1 0 0 0 0
+ 1 35 1 0 0 0 0
+ 2 5 1 0 0 0 0
+ 2 11 1 0 0 0 0
+ 2 12 1 0 0 0 0
+ 3 12 2 0 0 0 0
+ 3 13 1 0 0 0 0
+ 4 18 1 0 0 0 0
+ ......
+
+ >PUBCHEM_COMPOUND_CID<
+ 28434379
+
+ > <PUBCHEM_COMPOUND_CANONICALIZED>
+ 1
+
+ > <PUBCHEM_CACTVS_COMPLEXITY>
+ 280
+
+ > <PUBCHEM_CACTVS_HBOND_ACCEPTOR>
+ 2
+
+ > <PUBCHEM_CACTVS_HBOND_DONOR>
+ 2
+
+ > <PUBCHEM_CACTVS_ROTATABLE_BOND>
+ 2
+
+ > <PUBCHEM_CACTVS_SUBSKEYS>
+ AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==
+
+ >
+
+ - type : FP2
+
+* output::
+
+ #FPS1
+ #num_bits=1021
+ #type=OpenBabel-FP2/1
+ #software=OpenBabel/2.3.0
+ #source=/tmp/dataset_409.dat.sdf
+ #date=2012-02-03T11:13:39
+ c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c
+ 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300
+ 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379
+
+
+
+
diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244.can
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244.can Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,1 @@
+CC(=O)Oc1ccccc1C(=O)O 2244
diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244.inchi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244.inchi Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,1 @@
+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244.sdf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244.sdf Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,155 @@
+2244
+ -OEChem-05151212332D
+
+ 21 21 0 0 0 0 0 0 0999 V2000
+ 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 1 5 1 0 0 0 0
+ 1 12 1 0 0 0 0
+ 2 11 1 0 0 0 0
+ 2 21 1 0 0 0 0
+ 3 11 2 0 0 0 0
+ 4 12 2 0 0 0 0
+ 5 6 1 0 0 0 0
+ 5 7 2 0 0 0 0
+ 6 8 2 0 0 0 0
+ 6 11 1 0 0 0 0
+ 7 9 1 0 0 0 0
+ 7 14 1 0 0 0 0
+ 8 10 1 0 0 0 0
+ 8 15 1 0 0 0 0
+ 9 10 2 0 0 0 0
+ 9 16 1 0 0 0 0
+ 10 17 1 0 0 0 0
+ 12 13 1 0 0 0 0
+ 13 18 1 0 0 0 0
+ 13 19 1 0 0 0 0
+ 13 20 1 0 0 0 0
+M END
+>
+2244
+
+>
+1
+
+>
+212
+
+>
+4
+
+>
+1
+
+>
+3
+
+>
+AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA==
+
+>
+2-acetoxybenzoic acid
+
+>
+2-acetyloxybenzoic acid
+
+>
+2-acetyloxybenzoic acid
+
+>
+2-acetyloxybenzoic acid
+
+>
+2-acetoxybenzoic acid
+
+>
+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
+
+>
+BSYNRYMUTXBXSQ-UHFFFAOYSA-N
+
+>
+1.2
+
+>
+180.042259
+
+>
+C9H8O4
+
+>
+180.15742
+
+>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+>
+63.6
+
+>
+180.042259
+
+>
+0
+
+>
+13
+
+>
+0
+
+>
+0
+
+>
+0
+
+>
+0
+
+>
+0
+
+>
+1
+
+>
+1
+
+>
+1
+5
+255
+
+>
+5 6 8
+5 7 8
+6 8 8
+7 9 8
+8 10 8
+9 10 8
+
+$$$$
+
diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244.smi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244.smi Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,1 @@
+O(c1c(cccc1)C(=O)O)C(=O)C 2244
diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244_FP2.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244_FP2.fps Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=1021
+#type=OpenBabel-FP2/1
+#software=OpenBabel/2.3.1
+#source=CID_2244.sdf
+#date=2012-05-15T16:40:38
+00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244
diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244_FP3.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244_FP3.fps Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=55
+#type=OpenBabel-FP3/1
+#software=OpenBabel/2.3.1
+#source=CID_2244.sdf
+#date=2012-05-15T16:59:15
+0400000c50b007 2244
diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244_FP4.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244_FP4.fps Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=307
+#type=OpenBabel-FP4/1
+#software=OpenBabel/2.3.1
+#source=CID_2244.sdf
+#date=2012-05-15T16:59:22
+010000000000000000009800000000004001000000000000000000000000000000000240402801 2244
diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244_maccs.fps
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_ob2fps/test-data/CID_2244_maccs.fps Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=166
+#type=OpenBabel-MACCS/2
+#software=OpenBabel/2.3.1
+#source=CID_2244.sdf
+#date=2012-05-15T17:00:39
+0000000000000000000000010000016480cca2d21e 2244
diff -r 000000000000 -r a8ac5250d59c chemfp_sdf2fps/sdf2fps.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chemfp_sdf2fps/sdf2fps.xml Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,93 @@
+
+ extract fingerprints from sdf files metadata
+
+
+ chemfp
+
+
+ sdf2fps --pubchem "${infile}" > "${outfile}"
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+Read a SDF file and extract the fingerprints, to stores them in a fps-file.
+TODO: currently it only works for PubChem
+
+-----
+
+**Example**
+ * input::
+
+ SDF File
+
+ 28434379
+ -OEChem-02031205132D
+
+ 37 39 0 0 0 0 0 0 0999 V2000
+ 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 7.3704 0.9433 0.0000 C 0 0 0 0
+ ......
+ 1 15 1 0 0 0 0
+ 1 35 1 0 0 0 0
+ 2 5 1 0 0 0 0
+ 2 11 1 0 0 0 0
+ 2 12 1 0 0 0 0
+ 3 12 2 0 0 0 0
+ 3 13 1 0 0 0 0
+ 4 18 1 0 0 0 0
+ ......
+
+ >PUBCHEM_COMPOUND_CID<
+ 28434379
+
+ > <PUBCHEM_COMPOUND_CANONICALIZED>
+ 1
+
+ > <PUBCHEM_CACTVS_COMPLEXITY>
+ 280
+
+ > <PUBCHEM_CACTVS_HBOND_ACCEPTOR>
+ 2
+
+ > <PUBCHEM_CACTVS_HBOND_DONOR>
+ 2
+
+ > <PUBCHEM_CACTVS_ROTATABLE_BOND>
+ 2
+
+ > <PUBCHEM_CACTVS_SUBSKEYS>
+ AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==
+
+ >
+
+* output::
+
+ #FPS1
+ #num_bits=881
+ #type=CACTVS-E_SCREEN/1.0 extended=2
+ #software=CACTVS/unknown
+ #source=/home/mohammed/galaxy-central/database/files/000/dataset_409.dat
+ #date=2012-02-03T10:44:12
+ 07ce04000000000000000000000000000080060000000c0600
+ 00000000001a800f0000780008100000101487e9608c0bed32
+ 48000580644626204101b4844805901b041c2e19511e45039b
+ 8b2924101609401b13e4080000000000010020000004008000
+ 0010000002000000000000 28434379
+
+
+
+
diff -r 000000000000 -r a8ac5250d59c repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,5 @@
+
+
+
+
+
diff -r 000000000000 -r a8ac5250d59c tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Tue Mar 26 13:05:41 2013 -0400
@@ -0,0 +1,20 @@
+
+
+
+
+ http://chem-fingerprints.googlecode.com/files/chemfp-1.1p1.tar.gz
+ $INSTALL_DIR/lib/python
+ export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python && python setup.py install --home $INSTALL_DIR --install-scripts $INSTALL_DIR/bin
+
+ $INSTALL_DIR/lib/python
+ $INSTALL_DIR/bin
+
+
+
+
+ The core chemfp functionality does not depend on a third-party library but you will need a chemistry toolkit in order to generate new fingerprints
+ from structure files. chemfp supports the free Open Babel and RDKit toolkits and the proprietary OEChem toolkit.
+ Currently the Galaxy-wrappers are using openbabel as underlying toolkit.
+ Compiling chemfp requires gcc and a python2.5+ version.
+
+