# HG changeset patch # User bgruening # Date 1364317541 14400 # Node ID a8ac5250d59c78567908b44b3dda736816fffb83 Uploaded diff -r 000000000000 -r a8ac5250d59c chemfp_clustering/butina_clustering.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_clustering/butina_clustering.py Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,91 @@ +#!/usr/bin/env python +""" + Modified version of code examples from the chemfp project. + http://code.google.com/p/chem-fingerprints/ + Thanks to Andrew Dalke of Andrew Dalke Scientific! +""" + +import chemfp +import sys +import os + +chemfp_fingerprint_file = sys.argv[1] +tanimoto_threshold = float(sys.argv[2]) +outfile = sys.argv[3] +processors = int(sys.argv[4]) + + +def get_hit_indicies(hits): + return [id for (id, score) in hits] + +out = open(outfile, 'w') +dataset = chemfp.load_fingerprints( chemfp_fingerprint_file ) + +chemfp.set_num_threads( processors ) +search = dataset.threshold_tanimoto_search_arena(dataset, threshold = tanimoto_threshold) + +# Reorder so the centroid with the most hits comes first. +# (That's why I do a reverse search.) +# Ignore the arbitrariness of breaking ties by fingerprint index +results = sorted( ( (len(hits), i, hits) for (i, hits) in enumerate(search.iter_indices_and_scores()) ),reverse=True) + + +# Determine the true/false singletons and the clusters +true_singletons = [] +false_singletons = [] +clusters = [] + +seen = set() + +for (size, fp_idx, hits) in results: + if fp_idx in seen: + # Can't use a centroid which is already assigned + continue + seen.add(fp_idx) + + if size == 1: + # The only fingerprint in the exclusion sphere is itself + true_singletons.append(fp_idx) + continue + + members = get_hit_indicies(hits) + # Figure out which ones haven't yet been assigned + unassigned = [target_idx for target_idx in members if target_idx not in seen] + + if not unassigned: + false_singletons.append(fp_idx) + continue + + # this is a new cluster + clusters.append( (fp_idx, unassigned) ) + seen.update(unassigned) + +len_cluster = len(clusters) +#out.write( "#%s true singletons: %s\n" % ( len(true_singletons), " ".join(sorted(dataset.ids[idx] for idx in true_singletons)) ) ) +#out.write( "#%s false singletons: %s\n" % ( len(false_singletons), " ".join(sorted(dataset.ids[idx] for idx in false_singletons)) ) ) + +out.write( "#%s true singletons\n" % len(true_singletons) ) +out.write( "#%s false singletons\n" % len(false_singletons) ) +out.write( "#clusters: %s\n" % len_cluster ) + + +# Sort so the cluster with the most compounds comes first, +# then by alphabetically smallest id +def cluster_sort_key(cluster): + centroid_idx, members = cluster + return -len(members), dataset.ids[centroid_idx] + +clusters.sort(key=cluster_sort_key) + + +for centroid_idx, members in clusters: + centroid_name = dataset.ids[centroid_idx] + out.write("%s\t%s\t%s\n" % (centroid_name, len(members), " ".join(dataset.ids[idx] for idx in members))) + #ToDo: len(members) need to be some biggest top 90% or something ... + +for idx in true_singletons: + out.write("%s\t%s\n" % (dataset.ids[idx], 0)) + +out.close() + + diff -r 000000000000 -r a8ac5250d59c chemfp_clustering/butina_clustering.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_clustering/butina_clustering.xml Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,65 @@ + + of molecular libraries + + chemfp + + + butina_clustering.py $infile $threshold $outfile 4 + + + + + + + + + + + + + + + + + + +**What it does** +Molecule library clustering using the Taylor-Butina algorithm. + +----- + +**Example** + +* input:: + + - fingerprints in FPS format + + #FPS1 + #num_bits=881 + #type=CACTVS-E_SCREEN/1.0 extended=2 + #software=CACTVS/unknown + #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat + #date=2012-02-09T13:20:37 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e + 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e + 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807 + ........ + + - Tanimoto threshold : 0.8 (between 0 and 1) + +* output:: + + 0 true singletons + => + + 0 false singletons + => + + 1 clusters + 55091849 has 12 other members + => 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823 + + + + diff -r 000000000000 -r a8ac5250d59c chemfp_clustering/nxn_clustering.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_clustering/nxn_clustering.py Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,51 @@ +#!/usr/bin/env python +""" + Modified version of code examples from the chemfp project. + http://code.google.com/p/chem-fingerprints/ + Thanks to Andrew Dalke of Andrew Dalke Scientific! +""" +import matplotlib +matplotlib.use('Agg') +import sys +import os +import chemfp +import scipy.cluster.hierarchy as hcluster +import pylab +import numpy + + +def distance_matrix(arena,t): + n = len(arena) + # The Tanimoto search computes all of the scores when threshold=0.0. + # The SearchResult contains sparse data, so I set all values + # now to 1.0 so you can experiment with higher thresholds. + distances = numpy.ones((n, n), numpy.float64) + + # Keep track of where the query subarena is in the query + query_row = 0 + + for query_arena in arena.iter_arenas(): + results = arena.threshold_tanimoto_search_arena(query_arena, threshold=t) + for q_i, hits in enumerate(results.iter_indices_and_scores()): + query_idx = query_row + q_i + for target_idx, score in hits: + distances[query_idx, target_idx] = 1.0 - score + query_row += len(query_arena) + + return distances + +dataset = chemfp.load_fingerprints( sys.argv[1] ) +distances = distance_matrix( dataset,float( sys.argv[2] ) ) +linkage = hcluster.linkage( distances, method="single", metric="euclidean" ) + +# Plot using matplotlib, which you must have installed +hcluster.dendrogram(linkage, labels=dataset.ids) + +pylab.savefig( sys.argv[3], format='svg' ) + + + + + + + diff -r 000000000000 -r a8ac5250d59c chemfp_clustering/nxn_clustering.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_clustering/nxn_clustering.xml Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,61 @@ + + of molecular libraries + + numpy + chemfp + + + nxn_clustering.py $infile $threshold $outfile + + + + + + + + + + + + + + + + + + +**What it does** +Generating hierarchical clusters and visualizing clusters with dendrograms. + +----- + +**Example** + +* input:: + + - fingerprints in FPS format + + #FPS1 + #num_bits=881 + #type=CACTVS-E_SCREEN/1.0 extended=2 + #software=CACTVS/unknown + #source=/home/mohammed/galaxy-central/database/files/000/dataset_423.dat + #date=2012-02-09T13:20:37 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e + 19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e + 19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807 + ........ + + - Tanimoto threshold : 0.8 (between 0 and 1) + +* output:: + + plot for the clustring + +.. image:: ./static/images/chemfpclustoutput.svg + + + + + diff -r 000000000000 -r a8ac5250d59c chemfp_clustering/test-data/NxN_Clustering_on_q.svg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_clustering/test-data/NxN_Clustering_on_q.svg Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,793 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r a8ac5250d59c chemfp_clustering/test-data/Taylor-Butina_Clustering_on_data_q.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_clustering/test-data/Taylor-Butina_Clustering_on_data_q.txt Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,4 @@ +#0 true singletons +#0 false singletons +#clusters: 1 +55091849 12 6499094 6485578 55079807 3153534 55102353 55091466 55091416 6485577 55169009 55091752 55091467 55168823 diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/ob2fps.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/ob2fps.xml Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,143 @@ + + with different fingerprint types + + + chemfp + + + ob2fps $fptype --in "${infile.ext}" "${infile}" -o "${outfile}" --errors report 2>&1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Generate fingerprints using OpenBabel + +----- + +**Example** + +* input:: + + - SDF File + + 28434379 + -OEChem-02031205132D + + 37 39 0 0 0 0 0 0 0999 V2000 + 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 7.3704 0.9433 0.0000 C 0 0 0 0 + ...... + 1 15 1 0 0 0 0 + 1 35 1 0 0 0 0 + 2 5 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 12 1 0 0 0 0 + 3 12 2 0 0 0 0 + 3 13 1 0 0 0 0 + 4 18 1 0 0 0 0 + ...... + + >PUBCHEM_COMPOUND_CID< + 28434379 + + > <PUBCHEM_COMPOUND_CANONICALIZED> + 1 + + > <PUBCHEM_CACTVS_COMPLEXITY> + 280 + + > <PUBCHEM_CACTVS_HBOND_ACCEPTOR> + 2 + + > <PUBCHEM_CACTVS_HBOND_DONOR> + 2 + + > <PUBCHEM_CACTVS_ROTATABLE_BOND> + 2 + + > <PUBCHEM_CACTVS_SUBSKEYS> + AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== + + > + + - type : FP2 + +* output:: + + #FPS1 + #num_bits=1021 + #type=OpenBabel-FP2/1 + #software=OpenBabel/2.3.0 + #source=/tmp/dataset_409.dat.sdf + #date=2012-02-03T11:13:39 + c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c + 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300 + 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379 + + + + diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244.can --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244.can Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,1 @@ +CC(=O)Oc1ccccc1C(=O)O 2244 diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244.inchi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244.inchi Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,1 @@ +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244.sdf Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,155 @@ +2244 + -OEChem-05151212332D + + 21 21 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> +2244 + +> +1 + +> +212 + +> +4 + +> +1 + +> +3 + +> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> +2-acetoxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetoxybenzoic acid + +> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> +1.2 + +> +180.042259 + +> +C9H8O4 + +> +180.15742 + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +63.6 + +> +180.042259 + +> +0 + +> +13 + +> +0 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +1 + +> +1 +5 +255 + +> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +$$$$ + diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244.smi Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,1 @@ +O(c1c(cccc1)C(=O)O)C(=O)C 2244 diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244_FP2.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244_FP2.fps Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=1021 +#type=OpenBabel-FP2/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:40:38 +00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244_FP3.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244_FP3.fps Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=55 +#type=OpenBabel-FP3/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:59:15 +0400000c50b007 2244 diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244_FP4.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244_FP4.fps Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=307 +#type=OpenBabel-FP4/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:59:22 +010000000000000000009800000000004001000000000000000000000000000000000240402801 2244 diff -r 000000000000 -r a8ac5250d59c chemfp_ob2fps/test-data/CID_2244_maccs.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_ob2fps/test-data/CID_2244_maccs.fps Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=166 +#type=OpenBabel-MACCS/2 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T17:00:39 +0000000000000000000000010000016480cca2d21e 2244 diff -r 000000000000 -r a8ac5250d59c chemfp_sdf2fps/sdf2fps.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chemfp_sdf2fps/sdf2fps.xml Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,93 @@ + + extract fingerprints from sdf files metadata + + + chemfp + + + sdf2fps --pubchem "${infile}" > "${outfile}" + + + + + + + + + + + + +**What it does** + +Read a SDF file and extract the fingerprints, to stores them in a fps-file. +TODO: currently it only works for PubChem + +----- + +**Example** + * input:: + + SDF File + + 28434379 + -OEChem-02031205132D + + 37 39 0 0 0 0 0 0 0999 V2000 + 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 7.3704 0.9433 0.0000 C 0 0 0 0 + ...... + 1 15 1 0 0 0 0 + 1 35 1 0 0 0 0 + 2 5 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 12 1 0 0 0 0 + 3 12 2 0 0 0 0 + 3 13 1 0 0 0 0 + 4 18 1 0 0 0 0 + ...... + + >PUBCHEM_COMPOUND_CID< + 28434379 + + > <PUBCHEM_COMPOUND_CANONICALIZED> + 1 + + > <PUBCHEM_CACTVS_COMPLEXITY> + 280 + + > <PUBCHEM_CACTVS_HBOND_ACCEPTOR> + 2 + + > <PUBCHEM_CACTVS_HBOND_DONOR> + 2 + + > <PUBCHEM_CACTVS_ROTATABLE_BOND> + 2 + + > <PUBCHEM_CACTVS_SUBSKEYS> + AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== + + > + +* output:: + + #FPS1 + #num_bits=881 + #type=CACTVS-E_SCREEN/1.0 extended=2 + #software=CACTVS/unknown + #source=/home/mohammed/galaxy-central/database/files/000/dataset_409.dat + #date=2012-02-03T10:44:12 + 07ce04000000000000000000000000000080060000000c0600 + 00000000001a800f0000780008100000101487e9608c0bed32 + 48000580644626204101b4844805901b041c2e19511e45039b + 8b2924101609401b13e4080000000000010020000004008000 + 0010000002000000000000 28434379 + + + + diff -r 000000000000 -r a8ac5250d59c repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,5 @@ + + + + + diff -r 000000000000 -r a8ac5250d59c tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Mar 26 13:05:41 2013 -0400 @@ -0,0 +1,20 @@ + + + + + http://chem-fingerprints.googlecode.com/files/chemfp-1.1p1.tar.gz + $INSTALL_DIR/lib/python + export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python && python setup.py install --home $INSTALL_DIR --install-scripts $INSTALL_DIR/bin + + $INSTALL_DIR/lib/python + $INSTALL_DIR/bin + + + + + The core chemfp functionality does not depend on a third-party library but you will need a chemistry toolkit in order to generate new fingerprints + from structure files. chemfp supports the free Open Babel and RDKit toolkits and the proprietary OEChem toolkit. + Currently the Galaxy-wrappers are using openbabel as underlying toolkit. + Compiling chemfp requires gcc and a python2.5+ version. + +