annotate qed/qed.py @ 5:987524196f3e

Uploaded
author bgruening
date Sat, 11 May 2013 17:14:14 -0400
parents 80efb29755f3
children 6e6b05e75a3f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
987524196f3e Uploaded
bgruening
parents: 0
diff changeset
1 #!/usr/bin/env python
0
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
2 __all__ = ['weights_max', 'weights_mean', 'weights_none', 'default']
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
3
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
4 # Silicos-it
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
5 from errors import WrongArgument
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
6
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
7 # RDKit
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
8 from rdkit.Chem import Descriptors
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
9 from rdkit import Chem
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
10
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
11 # General
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
12 from copy import deepcopy
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
13 from math import exp, log
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
14 import sys, os, re
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
15 import argparse
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
16
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
17 def check_filetype(filepath):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
18 mol = False
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
19 for line in open(filepath):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
20 if line.find('$$$$') != -1:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
21 return 'sdf'
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
22 elif line.find('@<TRIPOS>MOLECULE') != -1:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
23 return 'mol2'
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
24 elif line.find('ligand id') != -1:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
25 return 'drf'
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
26 elif re.findall('^InChI=', line):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
27 return 'inchi'
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
28 elif re.findall('^M\s+END', line):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
29 mol = True
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
30
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
31 if mol:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
32 # END can occures before $$$$, so and SDF file will
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
33 # be recognised as mol, if you not using this hack'
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
34 return 'mol'
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
35 return 'smi'
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
36
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
37 AliphaticRings = Chem.MolFromSmarts('[$([A;R][!a])]')
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
38
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
39 AcceptorSmarts = [
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
40 '[oH0;X2]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
41 '[OH1;X2;v2]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
42 '[OH0;X2;v2]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
43 '[OH0;X1;v2]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
44 '[O-;X1]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
45 '[SH0;X2;v2]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
46 '[SH0;X1;v2]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
47 '[S-;X1]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
48 '[nH0;X2]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
49 '[NH0;X1;v3]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
50 '[$([N;+0;X3;v3]);!$(N[C,S]=O)]'
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
51 ]
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
52 Acceptors = []
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
53 for hba in AcceptorSmarts:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
54 Acceptors.append(Chem.MolFromSmarts(hba))
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
55
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
56 StructuralAlertSmarts = [
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
57 '*1[O,S,N]*1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
58 '[S,C](=[O,S])[F,Br,Cl,I]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
59 '[CX4][Cl,Br,I]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
60 '[C,c]S(=O)(=O)O[C,c]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
61 '[$([CH]),$(CC)]#CC(=O)[C,c]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
62 '[$([CH]),$(CC)]#CC(=O)O[C,c]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
63 'n[OH]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
64 '[$([CH]),$(CC)]#CS(=O)(=O)[C,c]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
65 'C=C(C=O)C=O',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
66 'n1c([F,Cl,Br,I])cccc1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
67 '[CH1](=O)',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
68 '[O,o][O,o]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
69 '[C;!R]=[N;!R]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
70 '[N!R]=[N!R]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
71 '[#6](=O)[#6](=O)',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
72 '[S,s][S,s]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
73 '[N,n][NH2]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
74 'C(=O)N[NH2]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
75 '[C,c]=S',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
76 '[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
77 'C1(=[O,N])C=CC(=[O,N])C=C1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
78 'C1(=[O,N])C(=[O,N])C=CC=C1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
79 'a21aa3a(aa1aaaa2)aaaa3',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
80 'a31a(a2a(aa1)aaaa2)aaaa3',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
81 'a1aa2a3a(a1)A=AA=A3=AA=A2',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
82 'c1cc([NH2])ccc1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
83 '[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si,Na,Ca,Ge,Ag,Mg,K,Ba,Sr,Be,Ti,Mo,Mn,Ru,Pd,Ni,Cu,Au,Cd,Al,Ga,Sn,Rh,Tl,Bi,Nb,Li,Pb,Hf,Ho]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
84 'I',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
85 'OS(=O)(=O)[O-]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
86 '[N+](=O)[O-]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
87 'C(=O)N[OH]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
88 'C1NC(=O)NC(=O)1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
89 '[SH]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
90 '[S-]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
91 'c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
92 'c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
93 '[CR1]1[CR1][CR1][CR1][CR1][CR1][CR1]1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
94 '[CR1]1[CR1][CR1]cc[CR1][CR1]1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
95 '[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
96 '[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
97 '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
98 '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
99 'C#C',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
100 '[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
101 '[$([N+R]),$([n+R]),$([N+]=C)][O-]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
102 '[C,c]=N[OH]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
103 '[C,c]=NOC=O',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
104 '[C,c](=O)[CX4,CR0X3,O][C,c](=O)',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
105 'c1ccc2c(c1)ccc(=O)o2',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
106 '[O+,o+,S+,s+]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
107 'N=C=O',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
108 '[NX3,NX4][F,Cl,Br,I]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
109 'c1ccccc1OC(=O)[#6]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
110 '[CR0]=[CR0][CR0]=[CR0]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
111 '[C+,c+,C-,c-]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
112 'N=[N+]=[N-]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
113 'C12C(NC(N1)=O)CSC2',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
114 'c1c([OH])c([OH,NH2,NH])ccc1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
115 'P',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
116 '[N,O,S]C#N',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
117 'C=C=O',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
118 '[Si][F,Cl,Br,I]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
119 '[SX2]O',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
120 '[SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
121 'O1CCCCC1OC2CCC3CCCCC3C2',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
122 'N=[CR0][N,n,O,S]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
123 '[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2]2',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
124 'C=[C!r]C#N',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
125 '[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
126 '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
127 '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
128 '[OH]c1ccc([OH,NH2,NH])cc1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
129 'c1ccccc1OC(=O)O',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
130 '[SX2H0][N]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
131 'c12ccccc1(SC(S)=N2)',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
132 'c12ccccc1(SC(=S)N2)',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
133 'c1nnnn1C=O',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
134 's1c(S)nnc1NC=O',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
135 'S1C=CSC1=S',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
136 'C(=O)Onnn',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
137 'OS(=O)(=O)C(F)(F)F',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
138 'N#CC[OH]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
139 'N#CC(=O)',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
140 'S(=O)(=O)C#N',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
141 'N[CH2]C#N',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
142 'C1(=O)NCC1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
143 'S(=O)(=O)[O-,OH]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
144 'NC[F,Cl,Br,I]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
145 'C=[C!r]O',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
146 '[NX2+0]=[O+0]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
147 '[OR0,NR0][OR0,NR0]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
148 'C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
149 '[CX2R0][NX3R0]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
150 'c1ccccc1[C;!R]=[C;!R]c2ccccc2',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
151 '[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
152 '[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,C,n,N,o,O]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
153 '[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
154 '[*]=[N+]=[*]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
155 '[SX3](=O)[O-,OH]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
156 'N#N',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
157 'F.F.F.F',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
158 '[R0;D2][R0;D2][R0;D2][R0;D2]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
159 '[cR,CR]~C(=O)NC(=O)~[cR,CR]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
160 'C=!@CC=[O,S]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
161 '[#6,#8,#16][C,c](=O)O[C,c]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
162 'c[C;R0](=[O,S])[C,c]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
163 'c[SX2][C;!R]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
164 'C=C=C',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
165 'c1nc([F,Cl,Br,I,S])ncc1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
166 'c1ncnc([F,Cl,Br,I,S])c1',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
167 'c1nc(c2c(n1)nc(n2)[F,Cl,Br,I])',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
168 '[C,c]S(=O)(=O)c1ccc(cc1)F',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
169 '[15N]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
170 '[13C]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
171 '[18O]',
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
172 '[34S]'
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
173 ]
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
174
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
175 StructuralAlerts = []
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
176 for smarts in StructuralAlertSmarts:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
177 StructuralAlerts.append(Chem.MolFromSmarts(smarts))
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
178
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
179
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
180 # ADS parameters for the 8 molecular properties: [row][column]
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
181 # rows[8]: MW, ALOGP, HBA, HBD, PSA, ROTB, AROM, ALERTS
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
182 # columns[7]: A, B, C, D, E, F, DMAX
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
183 # ALOGP parameters from Gregory Gerebtzoff (2012, Roche)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
184 pads1 = [ [2.817065973, 392.5754953, 290.7489764, 2.419764353, 49.22325677, 65.37051707, 104.9805561],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
185 [0.486849448, 186.2293718, 2.066177165, 3.902720615, 1.027025453, 0.913012565, 145.4314800],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
186 [2.948620388, 160.4605972, 3.615294657, 4.435986202, 0.290141953, 1.300669958, 148.7763046],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
187 [1.618662227, 1010.051101, 0.985094388, 0.000000001, 0.713820843, 0.920922555, 258.1632616],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
188 [1.876861559, 125.2232657, 62.90773554, 87.83366614, 12.01999824, 28.51324732, 104.5686167],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
189 [0.010000000, 272.4121427, 2.558379970, 1.565547684, 1.271567166, 2.758063707, 105.4420403],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
190 [3.217788970, 957.7374108, 2.274627939, 0.000000001, 1.317690384, 0.375760881, 312.3372610],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
191 [0.010000000, 1199.094025, -0.09002883, 0.000000001, 0.185904477, 0.875193782, 417.7253140] ]
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
192 # ALOGP parameters from the original publication
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
193 pads2 = [ [2.817065973, 392.5754953, 290.7489764, 2.419764353, 49.22325677, 65.37051707, 104.9805561],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
194 [3.172690585, 137.8624751, 2.534937431, 4.581497897, 0.822739154, 0.576295591, 131.3186604],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
195 [2.948620388, 160.4605972, 3.615294657, 4.435986202, 0.290141953, 1.300669958, 148.7763046],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
196 [1.618662227, 1010.051101, 0.985094388, 0.000000001, 0.713820843, 0.920922555, 258.1632616],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
197 [1.876861559, 125.2232657, 62.90773554, 87.83366614, 12.01999824, 28.51324732, 104.5686167],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
198 [0.010000000, 272.4121427, 2.558379970, 1.565547684, 1.271567166, 2.758063707, 105.4420403],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
199 [3.217788970, 957.7374108, 2.274627939, 0.000000001, 1.317690384, 0.375760881, 312.3372610],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
200 [0.010000000, 1199.094025, -0.09002883, 0.000000001, 0.185904477, 0.875193782, 417.7253140] ]
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
201
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
202 def ads(x, a, b, c, d, e, f, dmax):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
203 return ((a+(b/(1+exp(-1*(x-c+d/2)/e))*(1-1/(1+exp(-1*(x-c-d/2)/f))))) / dmax)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
204
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
205 def properties(mol):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
206 """
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
207 Calculates the properties that are required to calculate the QED descriptor.
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
208 """
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
209 matches = []
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
210 if (mol is None):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
211 raise WrongArgument("properties(mol)", "mol argument is \'None\'")
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
212 x = [0] * 9
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
213 x[0] = Descriptors.MolWt(mol) # MW
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
214 x[1] = Descriptors.MolLogP(mol) # ALOGP
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
215 for hba in Acceptors: # HBA
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
216 if (mol.HasSubstructMatch(hba)):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
217 matches = mol.GetSubstructMatches(hba)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
218 x[2] += len(matches)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
219 x[3] = Descriptors.NumHDonors(mol) # HBD
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
220 x[4] = Descriptors.TPSA(mol) # PSA
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
221 x[5] = Descriptors.NumRotatableBonds(mol) # ROTB
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
222 x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol), AliphaticRings)) # AROM
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
223 for alert in StructuralAlerts: # ALERTS
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
224 if (mol.HasSubstructMatch(alert)): x[7] += 1
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
225 ro5_failed = 0
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
226 if x[3] > 5:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
227 ro5_failed += 1 #HBD
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
228 if x[2] > 10:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
229 ro5_failed += 1 #HBA
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
230 if x[0] >= 500:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
231 ro5_failed += 1
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
232 if x[1] > 5:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
233 ro5_failed += 1
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
234 x[8] = ro5_failed
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
235 return x
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
236
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
237
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
238 def qed(w, p, gerebtzoff):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
239 d = [0.00] * 8
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
240 if gerebtzoff:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
241 for i in range(0, 8):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
242 d[i] = ads(p[i], pads1[i][0], pads1[i][1], pads1[i][2], pads1[i][3], pads1[i][4], pads1[i][5], pads1[i][6])
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
243 else:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
244 for i in range(0, 8):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
245 d[i] = ads(p[i], pads2[i][0], pads2[i][1], pads2[i][2], pads2[i][3], pads2[i][4], pads2[i][5], pads2[i][6])
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
246 t = 0.0
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
247 for i in range(0, 8):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
248 t += w[i] * log(d[i])
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
249 return (exp(t / sum(w)))
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
250
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
251
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
252 def weights_max(mol, gerebtzoff = True, props = False):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
253 """
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
254 Calculates the QED descriptor using maximal descriptor weights.
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
255 If props is specified we skip the calculation step and use the props-list of properties.
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
256 """
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
257 if not props:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
258 props = properties(mol)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
259 return qed([0.50, 0.25, 0.00, 0.50, 0.00, 0.50, 0.25, 1.00], props, gerebtzoff)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
260
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
261
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
262 def weights_mean(mol, gerebtzoff = True, props = False):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
263 """
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
264 Calculates the QED descriptor using average descriptor weights.
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
265 If props is specified we skip the calculation step and use the props-list of properties.
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
266 """
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
267 if not props:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
268 props = properties(mol)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
269 return qed([0.66, 0.46, 0.05, 0.61, 0.06, 0.65, 0.48, 0.95], props, gerebtzoff)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
270
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
271
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
272 def weights_none(mol, gerebtzoff = True, props = False):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
273 """
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
274 Calculates the QED descriptor using unit weights.
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
275 If props is specified we skip the calculation step and use the props-list of properties.
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
276 """
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
277 if not props:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
278 props = properties(mol)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
279 return qed([1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00], props, gerebtzoff)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
280
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
281
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
282 def default(mol, gerebtzoff = True):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
283 """
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
284 Calculates the QED descriptor using average descriptor weights and Gregory Gerebtzoff parameters.
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
285 """
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
286 return weights_mean(mol, gerebtzoff)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
287
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
288
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
289 if __name__ == "__main__":
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
290 parser = argparse.ArgumentParser()
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
291 parser.add_argument('-i', '--input', required=True, help='path to the input file name')
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
292 parser.add_argument("-m", "--method", dest="method", choices=['max', 'mean', 'unweighted'],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
293 default="mean",
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
294 help="Specify the method you want to use.")
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
295
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
296 parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'),
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
297 default=sys.stdout, help="path to the result file, default it sdtout")
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
298
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
299 parser.add_argument("--header", dest="header", action="store_true",
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
300 default=False,
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
301 help="Write header line.")
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
302
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
303
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
304 args = parser.parse_args()
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
305
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
306 # Elucidate filetype and open supplier
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
307 ifile = os.path.abspath(args.input)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
308 if not os.path.isfile(ifile):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
309 print "Error: ", ifile, " is not a file or cannot be found."
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
310 sys.exit(1)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
311 if not os.path.exists(ifile):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
312 print "Error: ", ifile, " does not exist or cannot be found."
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
313 sys.exit(1)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
314 if not os.access(ifile, os.R_OK):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
315 print "Error: ", ifile, " is not readable."
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
316 sys.exit(1)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
317
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
318 filetype = check_filetype(ifile)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
319
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
320
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
321 """
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
322 We want to store the original SMILES in the output. So in case of a SMILES file iterate over the file and convert each line separate.
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
323 """
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
324
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
325 if filetype == 'sdf':
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
326 supplier = Chem.SDMolSupplier(ifile)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
327 # Process file
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
328 if args.header:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
329 args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\n")
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
330 count = 0
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
331 for mol in supplier:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
332 count += 1
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
333 if mol is None:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
334 print "Warning: skipping molecule ", count, " and continuing with next."
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
335 continue
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
336 props = properties(mol)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
337
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
338 if args.method == 'max':
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
339 calc_qed = weights_max(mol, True, props)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
340 elif args.method == 'unweighted':
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
341 calc_qed = weights_none(mol, True, props)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
342 else:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
343 calc_qed = weights_mean(mol, True, props)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
344
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
345 args.outfile.write( "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\n" % (
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
346 props[0],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
347 props[1],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
348 props[2],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
349 props[3],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
350 props[4],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
351 props[5],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
352 props[6],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
353 props[7],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
354 props[8],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
355 calc_qed,
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
356 mol.GetProp("_Name"),
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
357 ))
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
358 elif filetype == 'smi':
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
359 supplier = Chem.SmilesMolSupplier(ifile, " \t", 0, 1, False, True)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
360
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
361 # Process file
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
362 if args.header:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
363 args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\tSMILES\n")
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
364 count = 0
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
365 for line in open(ifile):
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
366 tokens = line.strip().split('\t')
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
367 if len(tokens) > 1:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
368 smiles, title = tokens
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
369 else:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
370 smiles = tokens[0]
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
371 title = ''
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
372 mol = Chem.MolFromSmiles(smiles)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
373 count += 1
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
374 if mol is None:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
375 print "Warning: skipping molecule ", count, " and continuing with next."
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
376 continue
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
377 props = properties(mol)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
378
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
379 if args.method == 'max':
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
380 calc_qed = weights_max(mol, True, props)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
381 elif args.method == 'unweighted':
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
382 calc_qed = weights_none(mol, True, props)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
383 else:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
384 calc_qed = weights_mean(mol, True, props)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
385
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
386 args.outfile.write( "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\t%s\n" % (
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
387 props[0],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
388 props[1],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
389 props[2],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
390 props[3],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
391 props[4],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
392 props[5],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
393 props[6],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
394 props[7],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
395 props[8],
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
396 calc_qed,
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
397 title,
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
398 smiles
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
399 ))
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
400
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
401 else:
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
402 print "Error: unknown file extension: ", extension
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
403 sys.exit(1)
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
404
80efb29755f3 Uploaded
bgruening
parents:
diff changeset
405 sys.exit(0)