changeset 38:02e03ac072cf draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit ed9b6859de648aa5f7cde483732f5df20aaff90e
author bgruening
date Tue, 07 May 2019 13:19:01 -0400
parents ea7557bd4635
children 28c487eb8399
files butina_clustering.xml mol2fps.xml nxn_clustering.py nxn_clustering.xml sdf2fps.xml test-data/CID_2244_FP2.fps test-data/CID_2244_FP3.fps test-data/CID_2244_FP4.fps test-data/CID_2244_MACCS.fps test-data/CID_2244_torsions.fps test-data/NxN_Clustering_on_q.svg test-data/NxN_Clustering_on_q.txt
diffstat 12 files changed, 171 insertions(+), 126 deletions(-) [+]
line wrap: on
line diff
--- a/butina_clustering.xml	Fri Mar 23 03:26:12 2018 -0400
+++ b/butina_clustering.xml	Tue May 07 13:19:01 2019 -0400
@@ -1,7 +1,7 @@
-<tool id="ctb_chemfp_butina_clustering" name="Taylor-Butina Clustering" version="0.2">
+<tool id="ctb_chemfp_butina_clustering" name="Taylor-Butina clustering" version="1.5">
     <description>of molecular fingerprints</description>
     <requirements>
-        <requirement type="package" version="1.1p1">chemfp</requirement>
+        <requirement type="package" version="1.5">chemfp</requirement>
         <requirement type="package" version="2.4.1">openbabel</requirement>
     </requirements>
     <command detect_errors="exit_code">
@@ -14,8 +14,8 @@
 ]]>
     </command>
     <inputs>
-        <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/>
-        <param name='threshold' type='float' value='0.8'/>
+        <param name="infile" type="data" format="fps" label="Fingerprint dataset" help="Dataset missing? See TIP below"/>
+        <param name='threshold' type='float' label='Threshold' value='0.8'/>
     </inputs>
     <outputs>
         <data format="tabular" name="outfile"/>
@@ -35,7 +35,7 @@
 
 **What this tool does**
 
-Unsupervised non-hierarchical clustering method based on the Taylor-Butina algorithm, which guarantees that every cluster contains molecules which are within a distance cutoff of the central molecule. This tool is based on the chemfp_ project.
+Unsupervised non-hierarchical clustering of molecular fingerprints, based on the Taylor-Butina algorithm, which guarantees that every cluster contains molecules which are within a distance cutoff of the central molecule. This tool is based on the chemfp_ project.
 
 .. _chemfp: http://chemfp.com/
 
@@ -89,5 +89,6 @@
  </help>
     <citations>
         <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>
+        <citation type="doi">10.1021/ci9803381</citation>
     </citations>
 </tool>
--- a/mol2fps.xml	Fri Mar 23 03:26:12 2018 -0400
+++ b/mol2fps.xml	Tue May 07 13:19:01 2019 -0400
@@ -1,21 +1,20 @@
-<tool id="ctb_chemfp_mol2fps" name="Molecules to Fingerprints" version="0.3.0">
-    <description>with different fingerprint types</description>
+<tool id="ctb_chemfp_mol2fps" name="Molecule to fingerprint" version="1.5">
+    <description>conversion to several different fingerprint formats</description>
     <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" merge_outputs="outfile"></parallelism-->
     <requirements>
-        <requirement type="package" version="1.1p1">chemfp</requirement>
-        <requirement type="package" version="2016.03.3">rdkit</requirement>
+        <requirement type="package" version="1.5">chemfp</requirement>
+        <requirement type="package" version="2018.09.3">rdkit</requirement>
         <requirement type="package" version="2.4.1">openbabel</requirement>
     </requirements>
     <command>
 <![CDATA[
-        #set $fptype = $fp_opts.fp_opts_selector
 
-        #if $fptype in ['--FP2', '--FP3', '--FP4', '--MACCS']:
+        #if $fp_opts.fp_opts_selector in ['--FP2', '--FP3', '--FP4', '--MACCS']:
             ## Open Babel fingerprints
-            ob2fps $fptype --in '${infile.ext}' '${infile}' -o '${outfile}'
+            ob2fps $fp_opts.fp_opts_selector --in '${infile.ext}' '${infile}' -o '${outfile}' --out 'fps'
         #else:
             ## RDKit fingerprints
-            rdkit2fps --in '${infile.ext}' '${infile}' -o '${outfile}'
+            rdkit2fps --in '${infile.ext}' '${infile}' -o '${outfile}' --out 'fps'
             #if $fp_opts.fp_opts_selector == "--RDK":
                 --RDK
                 --fpSize $fp_opts.fpSize
@@ -35,7 +34,7 @@
                 $fp_opts.useChirality
                 $fp_opts.useBondTypes
             #elif $fp_opts.fp_opts_selector == "--pairs":
-                --paris
+                --pairs
                 --fpSize $fp_opts.fpSize
                 --minLength $fp_opts.minLength
                 --maxLength $fp_opts.maxLength
@@ -49,7 +48,7 @@
 ]]>
     </command>
     <inputs>
-        <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/>
+        <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="Molecule file"/>
         <conditional name="fp_opts">
             <param name="fp_opts_selector" type="select" label="Type of fingerprint">
                 <option value='--FP2' selected="True">Open Babel FP2 fingerprints</option>
@@ -68,51 +67,51 @@
             <when value="--FP4" />
             <when value="--MACCS" />
             <when value="--RDK">
-                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
+                <param name="fpSize" type="integer" value="2048" label="Number of bits in the fingerprint" help="">
                     <validator type="in_range" min="1" />
                 </param>
-                <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help="">
+                <param name="minPath" type="integer" value="1" label="Minimum number of bonds to include in the subgraph" help="">
                     <validator type="in_range" min="1" />
                 </param>
-                <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the subgraph" help="">
+                <param name="maxPath" type="integer" value="7" label="Maximum number of bonds to include in the subgraph" help="">
                     <validator type="in_range" min="1" />
                 </param>
-                <param name="nBitsPerHash" type="integer" value="4" label="number of bits to set per path" help="">
+                <param name="nBitsPerHash" type="integer" value="4" label="Number of bits to set per path" help="">
                     <validator type="in_range" min="1" />
                 </param>
                 <param name="useHs" type="boolean" truevalue="--useHs 1" falsevalue="--useHs 0" checked="true" 
-                    label="include information about the number of hydrogens on each atom" />
+                    label="Include information about the number of hydrogens on each atom" />
             </when>
             <when value="--torsions">
-                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
+                <param name="fpSize" type="integer" value="2048" label="Number of bits in the fingerprint" help="">
                     <validator type="in_range" min="1" />
                 </param>
-                <param name="targetSize" type="integer" value="4" label="number of target bits in the fingerprint" help="">
+                <param name="targetSize" type="integer" value="4" label="Number of target bits in the fingerprint" help="">
                     <validator type="in_range" min="1" />
                 </param>
             </when>
             <when value="--morgan">
-                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
+                <param name="fpSize" type="integer" value="2048" label="Number of bits in the fingerprint" help="">
                     <validator type="in_range" min="1" />
                 </param>
-                <param name="radius" type="integer" value="2" label="radius for the Morgan algorithm" help="">
+                <param name="radius" type="integer" value="2" label="Radius for the Morgan algorithm" help="">
                     <validator type="in_range" min="1" />
                 </param>
                 <param name="useFeatures" type="boolean" truevalue="--useFeatures 1" falsevalue="--useFeatures 0" checked="false"
-                    label="include information about the number of hydrogens on each atom" />
+                    label="Include information about the number of hydrogens on each atom" />
                 <param name="useChirality" type="boolean" truevalue="--useChirality 1" falsevalue="--useChirality 0" checked="false"
-                    label="include information about the number of hydrogens on each atom" />
+                    label="Include information about the number of hydrogens on each atom" />
                 <param name="useBondTypes" type="boolean" truevalue="--useBondTypes 1" falsevalue="--useBondTypes 0" checked="true"
-                    label="include information about the number of hydrogens on each atom" />
+                    label="Include information about the number of hydrogens on each atom" />
             </when>
             <when value="--pairs">
-                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
+                <param name="fpSize" type="integer" value="2048" label="Number of bits in the fingerprint" help="">
                     <validator type="in_range" min="1" />
                 </param>
-                <param name="minLength" type="integer" value="1" label="minimum bond count for a pair" help="">
+                <param name="minLength" type="integer" value="1" label="Minimum bond count for a pair" help="">
                     <validator type="in_range" min="1" />
                 </param>
-                <param name="maxLength" type="integer" value="30" label="maximum bond count for a pair" help="">
+                <param name="maxLength" type="integer" value="30" label="Maximum bond count for a pair" help="">
                     <validator type="in_range" min="1" />
                 </param>
             </when>
@@ -128,47 +127,73 @@
         <!-- FP2 -->
         <test>
             <param name="infile" value="CID_2244.sdf" ftype="sdf" />
-            <param name="fp_opts.fp_opts_selector" value="--FP2" />
+            <conditional name="fp_opts">
+                <param name="fp_opts_selector" value="--FP2" />
+            </conditional>
             <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" lines_diff="4"/>
         </test>
         <test>
             <param name="infile" value="CID_2244.smi" ftype="smi" />
-            <param name="fp_opts.fp_opts_selector" value="--FP2" />
+            <conditional name="fp_opts">
+                <param name="fp_opts_selector" value="--FP2" />
+            </conditional>
             <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" lines_diff="4"/>
         </test>
         <!-- FP3 -->
         <test>
             <param name="infile" value="CID_2244.sdf" ftype="sdf" />
-            <param name="fp_opts.fp_opts_selector" value="--FP3" />
+            <conditional name="fp_opts">
+                <param name="fp_opts_selector" value="--FP3" />
+            </conditional>
             <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" lines_diff="4"/>
         </test>
         <test>
             <param name="infile" value="CID_2244.smi" ftype="smi" />
-            <param name="fp_opts.fp_opts_selector" value="--FP3" />
+            <conditional name="fp_opts">
+                <param name="fp_opts_selector" value="--FP3" />
+            </conditional>
             <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" lines_diff="4"/>
         </test>
         <!-- FP4 -->
         <test>
             <param name="infile" value="CID_2244.sdf" ftype="sdf" />
-            <param name="fp_opts.fp_opts_selector" value="--FP4" />
+            <conditional name="fp_opts">
+                <param name="fp_opts_selector" value="--FP4" />
+            </conditional>
             <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" lines_diff="4"/>
         </test>
         <test>
             <param name="infile" value="CID_2244.smi" ftype="smi" />
-            <param name="fp_opts.fp_opts_selector" value="--FP4" />
+            <conditional name="fp_opts">
+                <param name="fp_opts_selector" value="--FP4" />
+            </conditional>
             <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" lines_diff="4"/>
         </test>
         <!-- MACCS -->
         <test>
             <param name="infile" value="CID_2244.sdf" ftype="sdf" />
-            <param name="fp_opts.fp_opts_selector" value="--MACCS" />
+            <conditional name="fp_opts">
+                <param name="fp_opts_selector" value="--MACCS" />
+            </conditional>
             <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" lines_diff="4"/>
         </test>
         <test>
             <param name="infile" value="CID_2244.smi" ftype="smi" />
-            <param name="fp_opts.fp_opts_selector" value="--MACCS" />
+            <conditional name="fp_opts">
+                <param name="fp_opts_selector" value="--MACCS" />
+            </conditional>
             <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" lines_diff="4"/>
         </test>
+        <!-- RDKit2fps -->
+        <test>
+            <param name="infile" value="CID_2244.smi" ftype="smi" />
+            <conditional name="fp_opts">
+                <param name="fp_opts_selector" value="--torsions" />
+            </conditional>
+            <param name="fp_opts.fpSize" value="2048" />
+            <param name="fp_opts.targetSize" value="4" />
+            <output name="outfile" file="CID_2244_torsions.fps" ftype="fps" lines_diff="4"/>
+        </test>
     </tests>
     <help>
 <![CDATA[
@@ -177,7 +202,7 @@
 
 **What this tool does**
 
-This tool uses chemfp_ to calculate 10 different fingerprints of common file formats. Chemfp uses `Open Babel`_, OpenEye_ and RDKit_.
+This tool uses chemfp to calculate molecular fingerprints, supporting a number of common file formats. Chemfp uses OpenBabel, OpenEye and RDKit.
 
 For more information check the websites listed below::
 
@@ -194,7 +219,7 @@
 
 * Example::
 
-	      - SDF File
+	      - SD-File
 
 		28434379
 		  -OEChem-02031205132D
--- a/nxn_clustering.py	Fri Mar 23 03:26:12 2018 -0400
+++ b/nxn_clustering.py	Tue May 07 13:19:01 2019 -0400
@@ -69,12 +69,10 @@
     distances  = distance_matrix( arena, args.tanimoto_threshold )
 
     if args.similarity_matrix:
-        distances.tofile( args.similarity_matrix )
+        numpy.savetxt(args.similarity_matrix, distances)
 
     if args.cluster_image:
-        linkage = hcluster.linkage( distances, method="single", metric="euclidean" )
+        linkage = hcluster.linkage(distances, method="single", metric="euclidean")
+        hcluster.dendrogram(linkage, labels=arena.ids, leaf_rotation=90.)
+        pylab.savefig(args.cluster_image, format=args.oformat)
 
-        hcluster.dendrogram(linkage, labels=arena.ids)
-
-        pylab.savefig( args.cluster_image, format=args.oformat )
-
--- a/nxn_clustering.xml	Fri Mar 23 03:26:12 2018 -0400
+++ b/nxn_clustering.xml	Tue May 07 13:19:01 2019 -0400
@@ -1,10 +1,9 @@
-<tool id="ctb_chemfp_nxn_clustering" name="NxN Clustering" version="0.4">
+<tool id="ctb_chemfp_nxn_clustering" name="NxN clustering" version="1.5">
     <description>of molecular fingerprints</description>
     <requirements>
-        <requirement type="package" version="1.1p1">chemfp</requirement>
-        <requirement type="package" version="2">python</requirement>
-        <requirement type="package" version="2.0.2">matplotlib</requirement>
-        <requirement type="package" version="0.19.0">scipy</requirement>
+        <requirement type="package" version="1.5">chemfp</requirement>
+        <requirement type="package" version="2.2.3">matplotlib</requirement>
+        <requirement type="package" version="1.2.1">scipy</requirement>
         <requirement type="package" version="2.4.1">openbabel</requirement>
     </requirements>
     <command detect_errors="exit_code">
@@ -16,43 +15,44 @@
                 --cluster '$image'
             #end if
             #if str($output_files) in ['both', 'matrix']:
-                --smatrix '$smilarity_matrix'
+                --smatrix '$similarity_matrix'
             #end if
             --oformat '$oformat'
 ]]>
     </command>
     <inputs>
-        <param name="infile" type="data" format="fps" label="Finperprint dataset" help="Dataset missing? See TIP below"/>
-        <param name='threshold' type='float' value='0.0' />
+        <param name="infile" type="data" format="fps" label="Fingerprint dataset" help="Dataset missing? See TIP below"/>
+        <param name='threshold' label='Threshold' type='float' value='0.0' />
         <param name='oformat' type='select' format='text' label="Format of the resulting picture">
             <option value='png'>PNG</option>
             <option value='svg'>SVG</option>
         </param>
         <param name='output_files' type='select' format='text' label="Output options">
-            <option value='both'>NxN matrix and Image</option>
+            <option value='both'>NxN matrix and image</option>
             <option value='image'>Image</option>
-            <option value='matrix'>NxN Matrix</option>
+            <option value='matrix'>NxN ḿatrix</option>
         </param>
 
     </inputs>
     <outputs>
-        <data name="image" format="svg" label="${tool.name} on ${on_string} - Cluster Image">
+        <data name="image" format="svg" label="${tool.name} on ${on_string} - Dendrogram Image">
             <filter>output_files == "both" or output_files == "image"</filter>
             <change_format>
                 <when input="oformat" value="png" format="png"/>
             </change_format>
         </data>
-        <data name="smilarity_matrix" format="binary" label="${tool.name} on ${on_string} - Similarity Matrix">
+        <data name="similarity_matrix" format="txt" label="${tool.name} on ${on_string} - Similarity Matrix">
             <filter>output_files == "both" or output_files == "matrix"</filter>
         </data>
     </outputs>
     <tests>
         <test>
             <param name="infile" ftype="fps" value="targets.fps" />
-            <param name='treshold' value='0.75' />
+            <param name='threshold' value='0.75' />
             <param name='oformat' value='svg' />
-            <param name='output_files' value='image' />
-            <output name="image" file='NxN_Clustering_on_q.svg' ftype="svg" compare="sim_size"/>
+            <param name='output_files' value='both' />
+            <output name="image" file='NxN_Clustering_on_q.svg' ftype="svg"/>
+            <output name="similarity_matrix" file='NxN_Clustering_on_q.txt' ftype="txt"/>
         </test>
     </tests>
     <help>
@@ -62,8 +62,7 @@
 
 **What this tool does**
 
-Generating hierarchical clusters and visualizing clusters with dendrograms.
-For the clustering and the fingerprint handling the chemfp_ project is used.
+Based on a set of fingerprints, generates a square self-similarity (NxN) matrix, as well as a dendrogram visualizing the clusters derived from it using hierarchical clustering. For the clustering and the fingerprint handling the chemfp_ project is used.
 
 .. _chemfp: http://chemfp.com/
 
@@ -73,7 +72,7 @@
 
 **Hint**
 
-The plotting of the cluster image is sensible only with a small dataset.
+The plotting of the dendrogram is sensible only with a small dataset - if more than around 20 fingerprints are used the plot will be unreadable.
 
 -----
 
--- a/sdf2fps.xml	Fri Mar 23 03:26:12 2018 -0400
+++ b/sdf2fps.xml	Tue May 07 13:19:01 2019 -0400
@@ -1,8 +1,8 @@
-<tool id="ctb_sdf2fps" name="SDF to Fingerprint" version="0.2">
-    <description>extract fingerprints from sdf files metadata</description>
+<tool id="ctb_sdf2fps" name="SDF to Fingerprint" version="1.5">
+    <description>- extract fingerprints from sdf file metadata</description>
     <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism-->
     <requirements>
-        <requirement type="package" version="1.1p1">chemfp</requirement>
+        <requirement type="package" version="1.5">chemfp</requirement>
         <requirement type="package" version="2.4.1">openbabel</requirement>
     </requirements>
     <command>
@@ -11,7 +11,7 @@
 ]]>
     </command>
     <inputs>
-        <param name="infile" type='data' format="sdf" label="SDF file with fingerprints as metadata"/>
+        <param name="infile" type='data' format="sdf" label="SD-file with fingerprints as metadata"/>
     </inputs>
     <outputs>
         <data name="outfile" format="fps"/>
@@ -37,7 +37,7 @@
 
 **Input**
 
-`SD-Format`_
+`SD-Format`_, storing the atom types, together with the Cartesian coordinates.
 
 .. _`SD-Format`: http://en.wikipedia.org/wiki/Chemical_table_file
 
@@ -93,6 +93,8 @@
 
 **Output**
 
+After the first few lines, starting with a hash symbol, which contain generic information, the fingerprints are listed as hexadecimal strings.
+
 * Example::
 
 	#FPS1
--- a/test-data/CID_2244_FP2.fps	Fri Mar 23 03:26:12 2018 -0400
+++ b/test-data/CID_2244_FP2.fps	Tue May 07 13:19:01 2019 -0400
@@ -1,7 +1,7 @@
 #FPS1
 #num_bits=1021
 #type=OpenBabel-FP2/1
-#software=OpenBabel/2.4.1
-#source=/tmp/tmptaAke4/files/000/dataset_3.dat
-#date=2017-05-19T13:52:59
+#software=OpenBabel/2.4.1 chemfp/1.5
+#source=CID_2244.sdf
+#date=2019-05-03T12:39:13
 00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004	2244
--- a/test-data/CID_2244_FP3.fps	Fri Mar 23 03:26:12 2018 -0400
+++ b/test-data/CID_2244_FP3.fps	Tue May 07 13:19:01 2019 -0400
@@ -1,7 +1,7 @@
 #FPS1
-#num_bits=1021
-#type=OpenBabel-FP2/1
-#software=OpenBabel/2.4.1
-#source=/tmp/tmptaAke4/files/000/dataset_7.dat
-#date=2017-05-19T13:53:45
-00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004	2244
+#num_bits=55
+#type=OpenBabel-FP3/1
+#software=OpenBabel/2.4.1 chemfp/1.5
+#source=CID_2244.sdf
+#date=2019-05-03T12:39:21
+0400000c50b007	2244
--- a/test-data/CID_2244_FP4.fps	Fri Mar 23 03:26:12 2018 -0400
+++ b/test-data/CID_2244_FP4.fps	Tue May 07 13:19:01 2019 -0400
@@ -1,7 +1,7 @@
 #FPS1
-#num_bits=1021
-#type=OpenBabel-FP2/1
-#software=OpenBabel/2.4.1
-#source=/tmp/tmptaAke4/files/000/dataset_11.dat
-#date=2017-05-19T13:54:39
-00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004	2244
+#num_bits=307
+#type=OpenBabel-FP4/1
+#software=OpenBabel/2.4.1 chemfp/1.5
+#source=CID_2244.sdf
+#date=2019-05-03T12:39:27
+010000000000000000009800000000004001000000000000000000000000000000000240402801	2244
--- a/test-data/CID_2244_MACCS.fps	Fri Mar 23 03:26:12 2018 -0400
+++ b/test-data/CID_2244_MACCS.fps	Tue May 07 13:19:01 2019 -0400
@@ -1,7 +1,7 @@
 #FPS1
-#num_bits=1021
-#type=OpenBabel-FP2/1
-#software=OpenBabel/2.4.1
-#source=/tmp/tmptaAke4/files/000/dataset_15.dat
-#date=2017-05-19T13:55:30
-00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004	2244
+#num_bits=166
+#type=OpenBabel-MACCS/2
+#software=OpenBabel/2.4.1 chemfp/1.5
+#source=CID_2244.sdf
+#date=2019-05-03T12:39:49
+0000000000000000000000010000016480cca2d21e	2244
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CID_2244_torsions.fps	Tue May 07 13:19:01 2019 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=2048
+#type=RDKit-Torsion/2 fpSize=2048 targetSize=4
+#software=RDKit/2018.09.3 chemfp/1.5
+#source=test-data/CID_2244.smi
+#date=2019-04-25T15:11:02
+00100010000003000000000000000000000000000000000000000000000000000000000000000000000000000000000300000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000100000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000010000001000000000000110000000000000000000000000000000000001001000000000000000000001000000000000000000000	2244
--- a/test-data/NxN_Clustering_on_q.svg	Fri Mar 23 03:26:12 2018 -0400
+++ b/test-data/NxN_Clustering_on_q.svg	Tue May 07 13:19:01 2019 -0400
@@ -2,7 +2,7 @@
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 <!-- Created with matplotlib (http://matplotlib.org/) -->
-<svg height="345pt" version="1.1" viewBox="0 0 460 345" width="460pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg height="345.6pt" version="1.1" viewBox="0 0 460.8 345.6" width="460.8pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
  <defs>
   <style type="text/css">
 *{stroke-linecap:butt;stroke-linejoin:round;}
@@ -155,7 +155,7 @@
 z
 " id="DejaVuSans-38"/>
       </defs>
-      <g transform="translate(40.795385 323.702125)scale(0.12 -0.12)">
+      <g transform="translate(74.646635 375.664)rotate(-90)scale(0.12 -0.12)">
        <use xlink:href="#DejaVuSans-35"/>
        <use x="63.623047" xlink:href="#DejaVuSans-35"/>
        <use x="127.246094" xlink:href="#DejaVuSans-30"/>
@@ -209,7 +209,7 @@
 z
 " id="DejaVuSans-32"/>
       </defs>
-      <g transform="translate(68.266154 323.702125)scale(0.12 -0.12)">
+      <g transform="translate(102.117404 375.664)rotate(-90)scale(0.12 -0.12)">
        <use xlink:href="#DejaVuSans-35"/>
        <use x="63.623047" xlink:href="#DejaVuSans-35"/>
        <use x="127.246094" xlink:href="#DejaVuSans-30"/>
@@ -288,7 +288,7 @@
 z
 " id="DejaVuSans-33"/>
       </defs>
-      <g transform="translate(95.736923 323.702125)scale(0.12 -0.12)">
+      <g transform="translate(129.588173 375.664)rotate(-90)scale(0.12 -0.12)">
        <use xlink:href="#DejaVuSans-35"/>
        <use x="63.623047" xlink:href="#DejaVuSans-35"/>
        <use x="127.246094" xlink:href="#DejaVuSans-31"/>
@@ -303,7 +303,7 @@
     <g id="xtick_4">
      <g id="text_4">
       <!-- 55169009 -->
-      <g transform="translate(123.207692 323.702125)scale(0.12 -0.12)">
+      <g transform="translate(157.058942 375.664)rotate(-90)scale(0.12 -0.12)">
        <use xlink:href="#DejaVuSans-35"/>
        <use x="63.623047" xlink:href="#DejaVuSans-35"/>
        <use x="127.246094" xlink:href="#DejaVuSans-31"/>
@@ -337,7 +337,7 @@
 z
 " id="DejaVuSans-34"/>
       </defs>
-      <g transform="translate(150.678462 323.702125)scale(0.12 -0.12)">
+      <g transform="translate(184.529712 375.664)rotate(-90)scale(0.12 -0.12)">
        <use xlink:href="#DejaVuSans-35"/>
        <use x="63.623047" xlink:href="#DejaVuSans-35"/>
        <use x="127.246094" xlink:href="#DejaVuSans-30"/>
@@ -352,7 +352,7 @@
     <g id="xtick_6">
      <g id="text_6">
       <!-- 55102353 -->
-      <g transform="translate(178.149231 323.702125)scale(0.12 -0.12)">
+      <g transform="translate(212.000481 375.664)rotate(-90)scale(0.12 -0.12)">
        <use xlink:href="#DejaVuSans-35"/>
        <use x="63.623047" xlink:href="#DejaVuSans-35"/>
        <use x="127.246094" xlink:href="#DejaVuSans-31"/>
@@ -367,7 +367,7 @@
     <g id="xtick_7">
      <g id="text_7">
       <!-- 55091466 -->
-      <g transform="translate(205.62 323.702125)scale(0.12 -0.12)">
+      <g transform="translate(239.47125 375.664)rotate(-90)scale(0.12 -0.12)">
        <use xlink:href="#DejaVuSans-35"/>
        <use x="63.623047" xlink:href="#DejaVuSans-35"/>
        <use x="127.246094" xlink:href="#DejaVuSans-30"/>
@@ -382,7 +382,7 @@
     <g id="xtick_8">
      <g id="text_8">
       <!-- 6499094 -->
-      <g transform="translate(236.908269 323.702125)scale(0.12 -0.12)">
+      <g transform="translate(266.942019 368.029)rotate(-90)scale(0.12 -0.12)">
        <use xlink:href="#DejaVuSans-36"/>
        <use x="63.623047" xlink:href="#DejaVuSans-34"/>
        <use x="127.246094" xlink:href="#DejaVuSans-39"/>
@@ -396,7 +396,7 @@
     <g id="xtick_9">
      <g id="text_9">
       <!-- 6485578 -->
-      <g transform="translate(264.379038 323.702125)scale(0.12 -0.12)">
+      <g transform="translate(294.412788 368.029)rotate(-90)scale(0.12 -0.12)">
        <use xlink:href="#DejaVuSans-36"/>
        <use x="63.623047" xlink:href="#DejaVuSans-34"/>
        <use x="127.246094" xlink:href="#DejaVuSans-38"/>
@@ -410,7 +410,7 @@
     <g id="xtick_10">
      <g id="text_10">
       <!-- 55091467 -->
-      <g transform="translate(288.032308 323.702125)scale(0.12 -0.12)">
+      <g transform="translate(321.883558 375.664)rotate(-90)scale(0.12 -0.12)">
        <use xlink:href="#DejaVuSans-35"/>
        <use x="63.623047" xlink:href="#DejaVuSans-35"/>
        <use x="127.246094" xlink:href="#DejaVuSans-30"/>
@@ -425,7 +425,7 @@
     <g id="xtick_11">
      <g id="text_11">
       <!-- 55091849 -->
-      <g transform="translate(315.503077 323.702125)scale(0.12 -0.12)">
+      <g transform="translate(349.354327 375.664)rotate(-90)scale(0.12 -0.12)">
        <use xlink:href="#DejaVuSans-35"/>
        <use x="63.623047" xlink:href="#DejaVuSans-35"/>
        <use x="127.246094" xlink:href="#DejaVuSans-30"/>
@@ -440,7 +440,7 @@
     <g id="xtick_12">
      <g id="text_12">
       <!-- 3153534 -->
-      <g transform="translate(346.791346 323.702125)scale(0.12 -0.12)">
+      <g transform="translate(376.825096 368.029)rotate(-90)scale(0.12 -0.12)">
        <use xlink:href="#DejaVuSans-33"/>
        <use x="63.623047" xlink:href="#DejaVuSans-31"/>
        <use x="127.246094" xlink:href="#DejaVuSans-35"/>
@@ -454,7 +454,7 @@
     <g id="xtick_13">
      <g id="text_13">
       <!-- 6485577 -->
-      <g transform="translate(374.262115 323.702125)scale(0.12 -0.12)">
+      <g transform="translate(404.295865 368.029)rotate(-90)scale(0.12 -0.12)">
        <use xlink:href="#DejaVuSans-36"/>
        <use x="63.623047" xlink:href="#DejaVuSans-34"/>
        <use x="127.246094" xlink:href="#DejaVuSans-38"/>
@@ -472,10 +472,10 @@
       <defs>
        <path d="M 0 0 
 L -3.5 0 
-" id="me28c3a741e" style="stroke:#000000;stroke-width:0.8;"/>
+" id="mccd7e1f0e4" style="stroke:#000000;stroke-width:0.8;"/>
       </defs>
       <g>
-       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#me28c3a741e" y="307.584"/>
+       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mccd7e1f0e4" y="307.584"/>
       </g>
      </g>
      <g id="text_14">
@@ -499,7 +499,7 @@
     <g id="ytick_2">
      <g id="line2d_2">
       <g>
-       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#me28c3a741e" y="271.259588"/>
+       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mccd7e1f0e4" y="271.259588"/>
       </g>
      </g>
      <g id="text_15">
@@ -515,7 +515,7 @@
     <g id="ytick_3">
      <g id="line2d_3">
       <g>
-       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#me28c3a741e" y="234.935175"/>
+       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mccd7e1f0e4" y="234.935175"/>
       </g>
      </g>
      <g id="text_16">
@@ -531,7 +531,7 @@
     <g id="ytick_4">
      <g id="line2d_4">
       <g>
-       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#me28c3a741e" y="198.610763"/>
+       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mccd7e1f0e4" y="198.610763"/>
       </g>
      </g>
      <g id="text_17">
@@ -547,7 +547,7 @@
     <g id="ytick_5">
      <g id="line2d_5">
       <g>
-       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#me28c3a741e" y="162.28635"/>
+       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mccd7e1f0e4" y="162.28635"/>
       </g>
      </g>
      <g id="text_18">
@@ -563,7 +563,7 @@
     <g id="ytick_6">
      <g id="line2d_6">
       <g>
-       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#me28c3a741e" y="125.961938"/>
+       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mccd7e1f0e4" y="125.961938"/>
       </g>
      </g>
      <g id="text_19">
@@ -579,7 +579,7 @@
     <g id="ytick_7">
      <g id="line2d_7">
       <g>
-       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#me28c3a741e" y="89.637526"/>
+       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mccd7e1f0e4" y="89.637526"/>
       </g>
      </g>
      <g id="text_20">
@@ -595,7 +595,7 @@
     <g id="ytick_8">
      <g id="line2d_8">
       <g>
-       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#me28c3a741e" y="53.313113"/>
+       <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mccd7e1f0e4" y="53.313113"/>
       </g>
      </g>
      <g id="text_21">
@@ -610,68 +610,68 @@
     </g>
    </g>
    <g id="LineCollection_1">
-    <path clip-path="url(#p7a554818f3)" d="M 98.806154 307.584 
+    <path clip-path="url(#p31abe51ef7)" d="M 98.806154 307.584 
 L 98.806154 160.244138 
 L 126.276923 160.244138 
 L 126.276923 307.584 
 " style="fill:none;stroke:#008000;stroke-width:1.5;"/>
    </g>
    <g id="LineCollection_2">
-    <path clip-path="url(#p7a554818f3)" d="M 208.689231 307.584 
+    <path clip-path="url(#p31abe51ef7)" d="M 208.689231 307.584 
 L 208.689231 307.584 
 L 236.16 307.584 
 L 236.16 307.584 
 " style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>
-    <path clip-path="url(#p7a554818f3)" d="M 181.218462 307.584 
+    <path clip-path="url(#p31abe51ef7)" d="M 181.218462 307.584 
 L 181.218462 307.584 
 L 222.424615 307.584 
 L 222.424615 307.584 
 " style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>
-    <path clip-path="url(#p7a554818f3)" d="M 263.630769 307.584 
+    <path clip-path="url(#p31abe51ef7)" d="M 263.630769 307.584 
 L 263.630769 224.047744 
 L 291.101538 224.047744 
 L 291.101538 307.584 
 " style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>
-    <path clip-path="url(#p7a554818f3)" d="M 201.821538 307.584 
+    <path clip-path="url(#p31abe51ef7)" d="M 201.821538 307.584 
 L 201.821538 202.211048 
 L 277.366154 202.211048 
 L 277.366154 224.047744 
 " style="fill:none;stroke:#ff0000;stroke-width:1.5;"/>
    </g>
    <g id="LineCollection_3">
-    <path clip-path="url(#p7a554818f3)" d="M 318.572308 307.584 
+    <path clip-path="url(#p31abe51ef7)" d="M 318.572308 307.584 
 L 318.572308 227.498079 
 L 346.043077 227.498079 
 L 346.043077 307.584 
 " style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>
-    <path clip-path="url(#p7a554818f3)" d="M 373.513846 307.584 
+    <path clip-path="url(#p31abe51ef7)" d="M 373.513846 307.584 
 L 373.513846 225.958341 
 L 400.984615 225.958341 
 L 400.984615 307.584 
 " style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>
-    <path clip-path="url(#p7a554818f3)" d="M 332.307692 227.498079 
+    <path clip-path="url(#p31abe51ef7)" d="M 332.307692 227.498079 
 L 332.307692 178.92987 
 L 387.249231 178.92987 
 L 387.249231 225.958341 
 " style="fill:none;stroke:#00bfbf;stroke-width:1.5;"/>
    </g>
    <g id="LineCollection_4">
-    <path clip-path="url(#p7a554818f3)" d="M 239.593846 202.211048 
+    <path clip-path="url(#p31abe51ef7)" d="M 239.593846 202.211048 
 L 239.593846 126.040908 
 L 359.778462 126.040908 
 L 359.778462 178.92987 
 " style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>
-    <path clip-path="url(#p7a554818f3)" d="M 153.747692 307.584 
+    <path clip-path="url(#p31abe51ef7)" d="M 153.747692 307.584 
 L 153.747692 98.265487 
 L 299.686154 98.265487 
 L 299.686154 126.040908 
 " style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>
-    <path clip-path="url(#p7a554818f3)" d="M 112.541538 160.244138 
+    <path clip-path="url(#p31abe51ef7)" d="M 112.541538 160.244138 
 L 112.541538 92.745033 
 L 226.716923 92.745033 
 L 226.716923 98.265487 
 " style="fill:none;stroke:#0000ff;stroke-width:1.5;"/>
-    <path clip-path="url(#p7a554818f3)" d="M 71.335385 307.584 
+    <path clip-path="url(#p31abe51ef7)" d="M 71.335385 307.584 
 L 71.335385 54.144 
 L 169.629231 54.144 
 L 169.629231 92.745033 
@@ -700,7 +700,7 @@
   </g>
  </g>
  <defs>
-  <clipPath id="p7a554818f3">
+  <clipPath id="p31abe51ef7">
    <rect height="266.112" width="357.12" x="57.6" y="41.472"/>
   </clipPath>
  </defs>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/NxN_Clustering_on_q.txt	Tue May 07 13:19:01 2019 -0400
@@ -0,0 +1,13 @@
+0.000000000000000000e+00 6.578947368421017572e-03 4.487179487179482340e-02 1.948051948051943150e-02 1.948051948051943150e-02 1.948051948051943150e-02 1.948051948051943150e-02 2.580645161290318068e-02 3.205128205128204844e-02 3.205128205128204844e-02 3.821656050955413164e-02 4.430379746835444443e-02 4.430379746835444443e-02
+6.578947368421017572e-03 0.000000000000000000e+00 5.095541401273884219e-02 2.580645161290318068e-02 1.298701298701299134e-02 1.298701298701299134e-02 1.298701298701299134e-02 3.205128205128204844e-02 3.821656050955413164e-02 2.564102564102566095e-02 3.184713375796177637e-02 3.797468354430377779e-02 3.797468354430377779e-02
+4.487179487179482340e-02 5.095541401273884219e-02 0.000000000000000000e+00 3.821656050955413164e-02 5.063291139240511107e-02 5.063291139240511107e-02 5.063291139240511107e-02 4.430379746835444443e-02 2.547770700636942109e-02 6.250000000000000000e-02 5.625000000000002220e-02 6.211180124223603105e-02 6.211180124223603105e-02
+1.948051948051943150e-02 2.580645161290318068e-02 3.821656050955413164e-02 0.000000000000000000e+00 2.564102564102566095e-02 2.564102564102566095e-02 2.564102564102566095e-02 6.451612903225822926e-03 2.547770700636942109e-02 3.797468354430377779e-02 1.910828025477706582e-02 5.000000000000004441e-02 2.531645569620255554e-02
+1.948051948051943150e-02 1.298701298701299134e-02 5.063291139240511107e-02 2.564102564102566095e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 3.184713375796177637e-02 3.797468354430377779e-02 2.547770700636942109e-02 3.164556962025311115e-02 3.773584905660376521e-02 3.773584905660376521e-02
+1.948051948051943150e-02 1.298701298701299134e-02 5.063291139240511107e-02 2.564102564102566095e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 3.184713375796177637e-02 3.797468354430377779e-02 2.547770700636942109e-02 3.164556962025311115e-02 3.773584905660376521e-02 3.773584905660376521e-02
+1.948051948051943150e-02 1.298701298701299134e-02 5.063291139240511107e-02 2.564102564102566095e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 3.184713375796177637e-02 3.797468354430377779e-02 2.547770700636942109e-02 3.164556962025311115e-02 3.773584905660376521e-02 3.773584905660376521e-02
+2.580645161290318068e-02 3.205128205128204844e-02 4.430379746835444443e-02 6.451612903225822926e-03 3.184713375796177637e-02 3.184713375796177637e-02 3.184713375796177637e-02 0.000000000000000000e+00 3.164556962025311115e-02 4.402515723270439274e-02 1.273885350318471055e-02 5.590062111801241684e-02 1.898734177215188890e-02
+3.205128205128204844e-02 3.821656050955413164e-02 2.547770700636942109e-02 2.547770700636942109e-02 3.797468354430377779e-02 3.797468354430377779e-02 3.797468354430377779e-02 3.164556962025311115e-02 0.000000000000000000e+00 3.773584905660376521e-02 3.144654088050313767e-02 3.749999999999997780e-02 3.749999999999997780e-02
+3.205128205128204844e-02 2.564102564102566095e-02 6.250000000000000000e-02 3.797468354430377779e-02 2.547770700636942109e-02 2.547770700636942109e-02 2.547770700636942109e-02 4.402515723270439274e-02 3.773584905660376521e-02 0.000000000000000000e+00 3.144654088050313767e-02 1.265822784810122226e-02 3.749999999999997780e-02
+3.821656050955413164e-02 3.184713375796177637e-02 5.625000000000002220e-02 1.910828025477706582e-02 3.164556962025311115e-02 3.164556962025311115e-02 3.164556962025311115e-02 1.273885350318471055e-02 3.144654088050313767e-02 3.144654088050313767e-02 0.000000000000000000e+00 4.347826086956518843e-02 6.329113924050666640e-03
+4.430379746835444443e-02 3.797468354430377779e-02 6.211180124223603105e-02 5.000000000000004441e-02 3.773584905660376521e-02 3.773584905660376521e-02 3.773584905660376521e-02 5.590062111801241684e-02 3.749999999999997780e-02 1.265822784810122226e-02 4.347826086956518843e-02 0.000000000000000000e+00 4.938271604938271331e-02
+4.430379746835444443e-02 3.797468354430377779e-02 6.211180124223603105e-02 2.531645569620255554e-02 3.773584905660376521e-02 3.773584905660376521e-02 3.773584905660376521e-02 1.898734177215188890e-02 3.749999999999997780e-02 3.749999999999997780e-02 6.329113924050666640e-03 4.938271604938271331e-02 0.000000000000000000e+00