changeset 11:7e3ad796236a draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 6c84abdd07f292048bf2194073e2e938e94158c4"
author bgruening
date Wed, 25 Mar 2020 20:35:16 +0000
parents 3697b3cad1ea
children 79b9cb9e56a1
files __pycache__/cheminfolib.cpython-36.pyc distance_finder.py test-data/ligands.sdf
diffstat 3 files changed, 457 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file __pycache__/cheminfolib.cpython-36.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/distance_finder.py	Wed Mar 25 20:35:16 2020 +0000
@@ -0,0 +1,109 @@
+# Reports distances of ligands to reference points. An example input for the points is:
+#
+# 5.655   1.497  18.223
+# 1.494  -8.367  18.574
+# 13.034   6.306  25.232
+#
+# Data can be space or tab separated but must contain 3 and only 3 numbers for the x, y and z coordinates
+#
+# That would encode 3 points.
+# Each record in the SDF input is read and the closest heavy atom to each of the reference points is recorded as
+# a property named distance1 where the numeric part is the index (starting from 1) of the points (in that example
+# there would be properties for distance1, distance2 and distance3.
+
+import argparse, os, sys, math
+from openbabel import pybel
+
+
+
+def log(*args, **kwargs):
+    """Log output to STDERR
+    """
+    print(*args, file=sys.stderr, ** kwargs)
+
+
+def execute(ligands_sdf, points_file, outfile):
+    """
+    :param ligands_sdf: A SDF with the 3D molecules to test
+    :param points_file: A file with the points to consider.
+    :param outfile: The name of the file for the SDF output
+    :return:
+    """
+
+
+    points = []
+
+    # read the points
+    with open(points_file, 'r') as f:
+        for line in f.readlines():
+            line.strip()
+            if line:
+                p = line.split()
+                if len(p) == 3:
+                    points.append((float(p[0]), float(p[1]), float(p[2])))
+                    log("Read points",p)
+                    continue
+            log("Failed to read line:", line)
+    log('Found', len(points), 'atom points')
+
+    sdf_writer = pybel.Outputfile("sdf", outfile, overwrite=True)
+
+    count = 0
+    for mol in pybel.readfile("sdf", ligands_sdf):
+        count += 1
+        if count % 50000 == 0:
+            log('Processed', count)
+
+        try:
+            # print("Processing mol", mol.title)
+
+            clone = pybel.Molecule(mol)
+            clone.removeh()
+
+            coords = []
+            for atom in clone.atoms:
+                coords.append(atom.coords)
+
+            p = 0
+            for point in points:
+                p += 1
+                distances = []
+                for i in coords:
+                    # calculates distance based on cartesian coordinates
+                    distance = math.sqrt((point[0] - i[0])**2 + (point[1] - i[1])**2 + (point[2] - i[2])**2)
+                    distances.append(distance)
+                    # log("distance:", distance)
+                min_distance = min(distances)
+                # log('Min:', min_distance)
+                # log(count, p, min_distance)
+
+                mol.data['distance' + str(p)] = min_distance
+
+            sdf_writer.write(mol)
+
+        except Exception as e:
+            log('Failed to handle molecule: '+ str(e))
+            continue
+
+    sdf_writer.close()
+    log('Wrote', count, 'molecules')
+
+
+def main():
+    global work_dir
+
+    parser = argparse.ArgumentParser(description='XChem distances - measure distances to particular points')
+
+    parser.add_argument('-i', '--input', help="SDF containing the 3D molecules to score)")
+    parser.add_argument('-p', '--points', help="PDB format file with atoms")
+    parser.add_argument('-o', '--outfile', default='output.sdf', help="File name for results")
+
+
+    args = parser.parse_args()
+    log("XChem distances args: ", args)
+
+    execute(args.input, args.points, args.outfile)
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ligands.sdf	Wed Mar 25 20:35:16 2020 +0000
@@ -0,0 +1,348 @@
+C[C@H](NS(C)(=O)=O)c1ccccn1
+  rDOCK(R)          3D
+libRbt.so/2013.1/901 2013/11/27
+ 13 13  0  0  0  0  0  0  0  0999 V2000
+   13.1106    0.7550   23.2143 C   0  0  0  0  0  0
+   12.0087    1.7547   22.8361 C   0  0  0  0  0  0
+   11.4071    1.4261   21.5334 N   0  0  0  0  0  0
+    9.9388    0.8337   21.6338 S   0  0  0  0  0  0
+    9.0411    2.3499   21.9038 C   0  0  0  0  0  0
+    9.3678    0.3273   20.3892 O   0  0  0  0  0  0
+    9.6367    0.0497   22.8298 O   0  0  0  0  0  0
+   12.5588    3.1798   22.7273 C   0  0  0  0  0  0
+   12.8091    3.9103   23.8920 C   0  0  0  0  0  0
+   13.2507    5.2279   23.7909 C   0  0  0  0  0  0
+   13.4398    5.7854   22.5337 C   0  0  0  0  0  0
+   13.1881    4.9943   21.4282 C   0  0  0  0  0  0
+   12.7627    3.7170   21.4945 N   0  0  0  0  0  0
+  2  8  1  0  0  0
+  2  3  1  0  0  0
+  2  1  1  0  0  0
+  3  4  1  0  0  0
+  4  5  1  0  0  0
+  4  6  2  0  0  0
+  4  7  2  0  0  0
+  8  9  2  0  0  0
+  8 13  1  0  0  0
+  9 10  1  0  0  0
+ 10 11  2  0  0  0
+ 11 12  1  0  0  0
+ 12 13  2  0  0  0
+M  END
+$$$$
+C[C@@H](NS(C)(=O)=O)c1ccccn1
+  rDOCK(R)          3D
+libRbt.so/2013.1/901 2013/11/27
+ 13 13  0  0  0  0  0  0  0  0999 V2000
+    6.8559    1.7506   21.3142 C   0  0  0  0  0  0
+    7.4019    0.6872   20.3547 C   0  0  0  0  0  0
+    7.4689    1.2291   18.9518 N   0  0  0  0  0  0
+    6.4062    0.5634   17.8113 S   0  0  0  0  0  0
+    4.8453    0.8765   18.6009 C   0  0  0  0  0  0
+    6.4632    1.4005   16.6345 O   0  0  0  0  0  0
+    6.5887   -0.8668   17.7602 O   0  0  0  0  0  0
+    8.6586   -0.0013   20.8698 C   0  0  0  0  0  0
+    9.8290    0.7331   21.0631 C   0  0  0  0  0  0
+   10.9643    0.0817   21.5388 C   0  0  0  0  0  0
+   10.9072   -1.2804   21.8047 C   0  0  0  0  0  0
+    9.7156   -1.9413   21.5709 C   0  0  0  0  0  0
+    8.6053   -1.3390   21.0982 N   0  0  0  0  0  0
+  2  1  1  0  0  0
+  2  3  1  0  0  0
+  2  8  1  0  0  0
+  3  4  1  0  0  0
+  4  5  1  0  0  0
+  4  6  2  0  0  0
+  4  7  2  0  0  0
+  8  9  2  0  0  0
+  8 13  1  0  0  0
+  9 10  1  0  0  0
+ 10 11  2  0  0  0
+ 11 12  1  0  0  0
+ 12 13  2  0  0  0
+M  END
+$$$$
+FC(F)(F)c1ccccc1
+  rDOCK(R)          3D
+libRbt.so/2013.1/901 2013/11/27
+ 10 10  0  0  0  0  0  0  0  0999 V2000
+    9.6749    1.3998   21.0525 F   0  0  0  0  0  0
+   10.4122    1.5718   22.1856 C   0  0  0  0  0  0
+   10.7385    2.8940   22.2189 F   0  0  0  0  0  0
+    9.5494    1.3747   23.2216 F   0  0  0  0  0  0
+   11.6206    0.6719   22.2638 C   0  0  0  0  0  0
+   12.7782    1.0724   22.9480 C   0  0  0  0  0  0
+   13.8994    0.2427   22.9901 C   0  0  0  0  0  0
+   13.8750   -0.9970   22.3527 C   0  0  0  0  0  0
+   12.7278   -1.4119   21.6796 C   0  0  0  0  0  0
+   11.6049   -0.5838   21.6354 C   0  0  0  0  0  0
+  1  2  1  0  0  0
+  2  3  1  0  0  0
+  2  4  1  0  0  0
+  2  5  1  0  0  0
+  5  6  2  0  0  0
+  5 10  1  0  0  0
+  6  7  1  0  0  0
+  7  8  2  0  0  0
+  8  9  1  0  0  0
+  9 10  2  0  0  0
+M  END
+$$$$
+O=[SH](=O)c1ccccc1
+  rDOCK(R)          3D
+libRbt.so/2013.1/901 2013/11/27
+  9  9  0  0  0  0  0  0  0  0999 V2000
+   13.5834    1.0389   23.2971 O   0  0  0  0  0  0
+   12.4089    1.7539   22.8402 S   0  0  0  0  0  0
+   11.1134    1.5953   23.4682 O   0  0  0  0  0  0
+   12.7957    3.4745   22.8588 C   0  0  0  0  0  0
+   13.0347    4.1328   21.6517 C   0  0  0  0  0  0
+   13.3512    5.4908   21.6741 C   0  0  0  0  0  0
+   13.4284    6.1723   22.8914 C   0  0  0  0  0  0
+   13.1905    5.5006   24.0928 C   0  0  0  0  0  0
+   12.8736    4.1434   24.0815 C   0  0  0  0  0  0
+  1  2  2  0  0  0
+  2  3  2  0  0  0
+  2  4  1  0  0  0
+  4  5  2  0  0  0
+  4  9  1  0  0  0
+  5  6  1  0  0  0
+  6  7  2  0  0  0
+  7  8  1  0  0  0
+  8  9  2  0  0  0
+M  END
+$$$$
+CSCCNC(=O)c1ccccc1
+  rDOCK(R)          3D
+libRbt.so/2013.1/901 2013/11/27
+ 13 13  0  0  0  0  0  0  0  0999 V2000
+    5.5458   -1.4150   18.8612 C   0  0  0  0  0  0
+    5.7667    0.2529   18.1959 S   0  0  0  0  0  0
+    7.4715    0.5847   18.7876 C   0  0  0  0  0  0
+    7.5534    0.7884   20.3036 C   0  0  0  0  0  0
+    8.9454    1.0042   20.7146 N   0  0  0  0  0  0
+    9.6402   -0.1322   20.8514 C   0  0  0  0  0  0
+    9.3233   -1.2490   20.4452 O   0  0  0  0  0  0
+   10.9406   -0.0002   21.5911 C   0  0  0  0  0  0
+   11.2868    1.2512   22.1206 C   0  0  0  0  0  0
+   12.4999    1.4280   22.7900 C   0  0  0  0  0  0
+   13.3773    0.3543   22.9381 C   0  0  0  0  0  0
+   13.0426   -0.8964   22.4196 C   0  0  0  0  0  0
+   11.8294   -1.0724   21.7499 C   0  0  0  0  0  0
+  1  2  1  0  0  0
+  2  3  1  0  0  0
+  3  4  1  0  0  0
+  4  5  1  0  0  0
+  5  6  1  0  0  0
+  6  7  2  0  0  0
+  6  8  1  0  0  0
+  8  9  2  0  0  0
+  8 13  1  0  0  0
+  9 10  1  0  0  0
+ 10 11  2  0  0  0
+ 11 12  1  0  0  0
+ 12 13  2  0  0  0
+M  END
+$$$$
+CCNS(=O)(=O)Cc1cccnc1
+  rDOCK(R)          3D
+libRbt.so/2013.1/901 2013/11/27
+ 13 13  0  0  0  0  0  0  0  0999 V2000
+   13.3598   -1.5609   21.7877 C   0  0  0  0  0  0
+   13.7113   -0.3498   22.6305 C   0  0  0  0  0  0
+   12.7714    0.7692   22.3361 N   0  0  0  0  0  0
+   11.8062    1.1577   23.6906 S   0  0  0  0  0  0
+   10.8599    0.0665   23.8065 O   0  0  0  0  0  0
+   12.7113    1.5035   24.7620 O   0  0  0  0  0  0
+   10.9309    2.6284   23.1295 C   0  0  0  0  0  0
+   11.9375    3.7368   22.9987 C   0  0  0  0  0  0
+   12.7555    3.8614   21.8705 C   0  0  0  0  0  0
+   13.6216    4.9464   21.7613 C   0  0  0  0  0  0
+   13.6489    5.8696   22.7918 C   0  0  0  0  0  0
+   12.8858    5.7762   23.9008 N   0  0  0  0  0  0
+   12.0407    4.7242   23.9742 C   0  0  0  0  0  0
+  1  2  1  0  0  0
+  2  3  1  0  0  0
+  3  4  1  0  0  0
+  4  5  2  0  0  0
+  4  6  2  0  0  0
+  4  7  1  0  0  0
+  7  8  1  0  0  0
+  8  9  2  0  0  0
+  8 13  1  0  0  0
+  9 10  1  0  0  0
+ 10 11  2  0  0  0
+ 11 12  1  0  0  0
+ 12 13  2  0  0  0
+M  END
+$$$$
+CCc1ccccc1
+  rDOCK(R)          3D
+libRbt.so/2013.1/901 2013/11/27
+  8  8  0  0  0  0  0  0  0  0999 V2000
+    9.3165   -1.3968   21.4474 C   0  0  0  0  0  0
+   10.1701   -1.9537   22.5759 C   0  0  0  0  0  0
+   11.4727   -1.2108   22.7284 C   0  0  0  0  0  0
+   11.5710   -0.1198   23.6025 C   0  0  0  0  0  0
+   12.7795    0.5633   23.7497 C   0  0  0  0  0  0
+   13.8989    0.1675   23.0199 C   0  0  0  0  0  0
+   13.8083   -0.9074   22.1374 C   0  0  0  0  0  0
+   12.6006   -1.5917   21.9895 C   0  0  0  0  0  0
+  1  2  1  0  0  0
+  2  3  1  0  0  0
+  3  4  2  0  0  0
+  3  8  1  0  0  0
+  4  5  1  0  0  0
+  5  6  2  0  0  0
+  6  7  1  0  0  0
+  7  8  2  0  0  0
+M  END
+$$$$
+CS(=O)(=O)NCCN1CCCC1
+  rDOCK(R)          3D
+libRbt.so/2013.1/901 2013/11/27
+ 12 12  0  0  0  0  0  0  0  0999 V2000
+    6.8490    0.5139   18.2625 C   0  0  0  0  0  0
+    7.3129    0.1623   19.9428 S   0  0  0  0  0  0
+    7.4343    1.3973   20.6804 O   0  0  0  0  0  0
+    6.5548   -0.9612   20.4458 O   0  0  0  0  0  0
+    8.9263   -0.3881   19.7641 N   0  0  0  0  0  0
+    9.5060   -0.7657   21.0831 C   0  0  0  0  0  0
+   10.8140   -0.0093   21.3638 C   0  0  0  0  0  0
+   11.5742   -0.6154   22.5359 N   0  0  0  0  0  0
+   11.4738    0.2167   23.7970 C   0  0  0  0  0  0
+   12.6714    1.1335   23.7058 C   0  0  0  0  0  0
+   13.7645    0.2387   23.1560 C   0  0  0  0  0  0
+   13.0648   -0.8014   22.2945 C   0  0  0  0  0  0
+  1  2  1  0  0  0
+  2  3  2  0  0  0
+  2  4  2  0  0  0
+  2  5  1  0  0  0
+  5  6  1  0  0  0
+  6  7  1  0  0  0
+  7  8  1  0  0  0
+  8  9  1  0  0  0
+  8 12  1  0  0  0
+  9 10  1  0  0  0
+ 10 11  1  0  0  0
+ 11 12  1  0  0  0
+M  END
+$$$$
+C=Cc1ccccc1
+  rDOCK(R)          3D
+libRbt.so/2013.1/901 2013/11/27
+  8  8  0  0  0  0  0  0  0  0999 V2000
+    3.0012    2.5994   19.2746 C   0  0  0  0  0  0
+    3.4784    1.3846   19.5760 C   0  0  0  0  0  0
+    4.7459    0.8376   19.0724 C   0  0  0  0  0  0
+    5.0977   -0.4790   19.4008 C   0  0  0  0  0  0
+    6.2949   -1.0353   18.9390 C   0  0  0  0  0  0
+    7.1469   -0.2825   18.1324 C   0  0  0  0  0  0
+    6.7985    1.0178   17.7793 C   0  0  0  0  0  0
+    5.6004    1.5702   18.2385 C   0  0  0  0  0  0
+  1  2  2  0  0  0
+  2  3  1  0  0  0
+  3  4  2  0  0  0
+  3  8  1  0  0  0
+  4  5  1  0  0  0
+  5  6  2  0  0  0
+  6  7  1  0  0  0
+  7  8  2  0  0  0
+M  END
+$$$$
+CC(C)(C)c1ccccc1
+  rDOCK(R)          3D
+libRbt.so/2013.1/901 2013/11/27
+ 10 10  0  0  0  0  0  0  0  0999 V2000
+   13.4848    4.7599   24.0453 C   0  0  0  0  0  0
+   12.5126    4.3820   22.9047 C   0  0  0  0  0  0
+   11.1564    5.0474   23.2199 C   0  0  0  0  0  0
+   13.0811    4.9891   21.6031 C   0  0  0  0  0  0
+   12.3310    2.8640   22.7263 C   0  0  0  0  0  0
+   13.0824    1.9208   23.4483 C   0  0  0  0  0  0
+   12.9272    0.5461   23.2348 C   0  0  0  0  0  0
+   12.0121    0.0832   22.2967 C   0  0  0  0  0  0
+   11.2426    0.9922   21.5790 C   0  0  0  0  0  0
+   11.3986    2.3657   21.7948 C   0  0  0  0  0  0
+  1  2  1  0  0  0
+  2  3  1  0  0  0
+  2  4  1  0  0  0
+  2  5  1  0  0  0
+  5  6  2  0  0  0
+  5 10  1  0  0  0
+  6  7  1  0  0  0
+  7  8  2  0  0  0
+  8  9  1  0  0  0
+  9 10  2  0  0  0
+M  END
+$$$$
+CCN(C)C(=O)c1ccccc1
+  rDOCK(R)          3D
+libRbt.so/2013.1/901 2013/11/27
+ 12 12  0  0  0  0  0  0  0  0999 V2000
+   10.6306   -0.4318   24.2468 C   0  0  0  0  0  0
+   11.2738   -0.2591   22.8777 C   0  0  0  0  0  0
+   11.5027    1.1477   22.5523 N   0  0  0  0  0  0
+   10.6408    1.7231   21.5282 C   0  0  0  0  0  0
+   12.6021    1.7847   23.1411 C   0  0  0  0  0  0
+   13.4087    1.1762   23.8483 O   0  0  0  0  0  0
+   12.8518    3.2389   22.9240 C   0  0  0  0  0  0
+   12.7404    3.8481   21.6696 C   0  0  0  0  0  0
+   12.9990    5.2139   21.5196 C   0  0  0  0  0  0
+   13.3123    5.9914   22.6330 C   0  0  0  0  0  0
+   13.3606    5.4074   23.8976 C   0  0  0  0  0  0
+   13.1331    4.0370   24.0436 C   0  0  0  0  0  0
+  1  2  1  0  0  0
+  2  3  1  0  0  0
+  3  4  1  0  0  0
+  3  5  1  0  0  0
+  5  6  2  0  0  0
+  5  7  1  0  0  0
+  7  8  2  0  0  0
+  7 12  1  0  0  0
+  8  9  1  0  0  0
+  9 10  2  0  0  0
+ 10 11  1  0  0  0
+ 11 12  2  0  0  0
+M  END
+$$$$
+CNC(=O)c1cccc(CCNS(C)(=O)=O)c1
+  rDOCK(R)          3D
+libRbt.so/2013.1/901 2013/11/27
+ 17 17  0  0  0  0  0  0  0  0999 V2000
+   10.0019    1.6672   21.8593 C   0  0  0  0  0  0
+    8.7487    1.5971   21.1244 N   0  0  0  0  0  0
+    8.6290    0.5212   20.3389 C   0  0  0  0  0  0
+    9.4567   -0.3639   20.1329 O   0  0  0  0  0  0
+    7.3235    0.4378   19.6088 C   0  0  0  0  0  0
+    7.1514   -0.5242   18.6092 C   0  0  0  0  0  0
+    5.9627   -0.5677   17.8790 C   0  0  0  0  0  0
+    4.9453    0.3559   18.1379 C   0  0  0  0  0  0
+    5.0962    1.3237   19.1419 C   0  0  0  0  0  0
+    3.9963    2.3284   19.4060 C   0  0  0  0  0  0
+    3.7241    2.6438   20.8872 C   0  0  0  0  0  0
+    2.6087    3.6063   21.0810 N   0  0  0  0  0  0
+    1.3263    3.1043   22.0447 S   0  0  0  0  0  0
+    0.5403    1.9194   20.9788 C   0  0  0  0  0  0
+    1.8577    2.3935   23.1858 O   0  0  0  0  0  0
+    0.4357    4.2308   22.2210 O   0  0  0  0  0  0
+    6.2789    1.3264   19.8924 C   0  0  0  0  0  0
+  1  2  1  0  0  0
+  2  3  1  0  0  0
+  3  4  2  0  0  0
+  3  5  1  0  0  0
+  5  6  2  0  0  0
+  5 17  1  0  0  0
+  6  7  1  0  0  0
+  7  8  2  0  0  0
+  8  9  1  0  0  0
+  9 10  1  0  0  0
+  9 17  2  0  0  0
+ 10 11  1  0  0  0
+ 11 12  1  0  0  0
+ 12 13  1  0  0  0
+ 13 14  1  0  0  0
+ 13 15  2  0  0  0
+ 13 16  2  0  0  0
+M  END
+$$$$