0
|
1 package Fingerprints::TopologicalAtomPairsFingerprints;
|
|
2 #
|
|
3 # $RCSfile: TopologicalAtomPairsFingerprints.pm,v $
|
|
4 # $Date: 2015/02/28 20:48:54 $
|
|
5 # $Revision: 1.30 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Exporter;
|
|
32 use Fingerprints::Fingerprints;
|
|
33 use TextUtil ();
|
|
34 use Molecule;
|
|
35 use AtomTypes::AtomicInvariantsAtomTypes;
|
|
36 use AtomTypes::DREIDINGAtomTypes;
|
|
37 use AtomTypes::EStateAtomTypes;
|
|
38 use AtomTypes::FunctionalClassAtomTypes;
|
|
39 use AtomTypes::MMFF94AtomTypes;
|
|
40 use AtomTypes::SLogPAtomTypes;
|
|
41 use AtomTypes::SYBYLAtomTypes;
|
|
42 use AtomTypes::TPSAAtomTypes;
|
|
43 use AtomTypes::UFFAtomTypes;
|
|
44
|
|
45 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
46
|
|
47 @ISA = qw(Fingerprints::Fingerprints Exporter);
|
|
48 @EXPORT = qw();
|
|
49 @EXPORT_OK = qw();
|
|
50
|
|
51 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
|
|
52
|
|
53 # Setup class variables...
|
|
54 my($ClassName);
|
|
55 _InitializeClass();
|
|
56
|
|
57 # Overload Perl functions...
|
|
58 use overload '""' => 'StringifyTopologicalAtomPairsFingerprints';
|
|
59
|
|
60 # Class constructor...
|
|
61 sub new {
|
|
62 my($Class, %NamesAndValues) = @_;
|
|
63
|
|
64 # Initialize object...
|
|
65 my $This = $Class->SUPER::new();
|
|
66 bless $This, ref($Class) || $Class;
|
|
67 $This->_InitializeTopologicalAtomPairsFingerprints();
|
|
68
|
|
69 $This->_InitializeTopologicalAtomPairsFingerprintsProperties(%NamesAndValues);
|
|
70
|
|
71 return $This;
|
|
72 }
|
|
73
|
|
74 # Initialize object data...
|
|
75 #
|
|
76 sub _InitializeTopologicalAtomPairsFingerprints {
|
|
77 my($This) = @_;
|
|
78
|
|
79 # Type of fingerprint...
|
|
80 $This->{Type} = 'TopologicalAtomPairs';
|
|
81
|
|
82 # Type of vector...
|
|
83 $This->{VectorType} = 'FingerprintsVector';
|
|
84
|
|
85 # Type of FingerprintsVector...
|
|
86 $This->{FingerprintsVectorType} = 'NumericalValues';
|
|
87
|
|
88 # Minimum and maximum bond distance between atom paris...
|
|
89 $This->{MinDistance} = 1;
|
|
90 $This->{MaxDistance} = 10;
|
|
91
|
|
92 # Atom identifier type to use for atom IDs in atom pairs...
|
|
93 #
|
|
94 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes,
|
|
95 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
|
|
96 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
|
|
97 #
|
|
98 $This->{AtomIdentifierType} = '';
|
|
99
|
|
100 # Atom types assigned to each heavy atom...
|
|
101 #
|
|
102 %{$This->{AssignedAtomTypes}} = ();
|
|
103
|
|
104 # All atom pairs between minimum and maximum distance...
|
|
105 #
|
|
106 @{$This->{AtomPairsIDs}} = ();
|
|
107 %{$This->{AtomPairsCount}} = ();
|
|
108 }
|
|
109
|
|
110 # Initialize class ...
|
|
111 sub _InitializeClass {
|
|
112 #Class name...
|
|
113 $ClassName = __PACKAGE__;
|
|
114 }
|
|
115
|
|
116 # Initialize object properties....
|
|
117 sub _InitializeTopologicalAtomPairsFingerprintsProperties {
|
|
118 my($This, %NamesAndValues) = @_;
|
|
119
|
|
120 my($Name, $Value, $MethodName);
|
|
121 while (($Name, $Value) = each %NamesAndValues) {
|
|
122 $MethodName = "Set${Name}";
|
|
123 $This->$MethodName($Value);
|
|
124 }
|
|
125
|
|
126 # Make sure molecule object was specified...
|
|
127 if (!exists $NamesAndValues{Molecule}) {
|
|
128 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
|
|
129 }
|
|
130 if (!exists $NamesAndValues{AtomIdentifierType}) {
|
|
131 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
|
|
132 }
|
|
133
|
|
134 $This->_InitializeFingerprintsVector();
|
|
135
|
|
136 return $This;
|
|
137 }
|
|
138
|
|
139 # Set minimum distance for atom pairs...
|
|
140 #
|
|
141 sub SetMinDistance {
|
|
142 my($This, $Value) = @_;
|
|
143
|
|
144 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
145 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer...";
|
|
146 }
|
|
147 $This->{MinDistance} = $Value;
|
|
148
|
|
149 return $This;
|
|
150 }
|
|
151
|
|
152 # Set maximum distance for atom pairs...
|
|
153 #
|
|
154 sub SetMaxDistance {
|
|
155 my($This, $Value) = @_;
|
|
156
|
|
157 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
158 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer...";
|
|
159 }
|
|
160 $This->{MaxDistance} = $Value;
|
|
161
|
|
162 return $This;
|
|
163 }
|
|
164
|
|
165 # Set atom identifier type..
|
|
166 #
|
|
167 sub SetAtomIdentifierType {
|
|
168 my($This, $IdentifierType) = @_;
|
|
169
|
|
170 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
|
|
171 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes.";
|
|
172 }
|
|
173
|
|
174 if ($This->{AtomIdentifierType}) {
|
|
175 croak "Error: ${ClassName}->SeAtomIdentifierType: Can't change intial atom identifier type: It's already set...";
|
|
176 }
|
|
177
|
|
178 $This->{AtomIdentifierType} = $IdentifierType;
|
|
179
|
|
180 # Initialize atom identifier type information...
|
|
181 $This->_InitializeAtomIdentifierTypeInformation();
|
|
182
|
|
183 return $This;
|
|
184 }
|
|
185
|
|
186 # Generate fingerprints description...
|
|
187 #
|
|
188 sub GetDescription {
|
|
189 my($This) = @_;
|
|
190
|
|
191 # Is description explicity set?
|
|
192 if (exists $This->{Description}) {
|
|
193 return $This->{Description};
|
|
194 }
|
|
195
|
|
196 # Generate fingerprints description...
|
|
197
|
|
198 return "$This->{Type}:$This->{AtomIdentifierType}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}";
|
|
199 }
|
|
200
|
|
201 # Generate topological atom pairs [ Ref 57, Ref 59, Ref 72 ] fingerprints...
|
|
202 #
|
|
203 # Methodology:
|
|
204 # . Generate a distance matrix.
|
|
205 # . Assign atom types to all the atoms.
|
|
206 # . Using distance matrix and atom types, count occurrence of
|
|
207 # unique atom pairs within specified distance range - It corresponds to the
|
|
208 # correlation-vector for the atom pairs.
|
|
209 #
|
|
210 # Notes:
|
|
211 # . Hydrogen atoms are ignored during the fingerprint generation.
|
|
212 #
|
|
213 sub GenerateFingerprints {
|
|
214 my($This) = @_;
|
|
215
|
|
216 if ($This->{MinDistance} > $This->{MaxDistance}) {
|
|
217 croak "Error: ${ClassName}->GenerateTopologicalAtomPairsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}...";
|
|
218 }
|
|
219
|
|
220 # Cache appropriate molecule data...
|
|
221 $This->_SetupMoleculeDataCache();
|
|
222
|
|
223 # Generate distance matrix...
|
|
224 if (!$This->_SetupDistanceMatrix()) {
|
|
225 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't generate distance matrix...";
|
|
226 return $This;
|
|
227 }
|
|
228
|
|
229 # Assign atom types to all heavy atoms...
|
|
230 if (!$This->_AssignAtomTypes()) {
|
|
231 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
|
|
232 return $This;
|
|
233 }
|
|
234
|
|
235 # Intialize values of toplogical atom pairs...
|
|
236 $This->_InitializeToplogicalAtomPairs();
|
|
237
|
|
238 # Count atom pairs...
|
|
239 $This->_GenerateAndCountAtomPairs();
|
|
240
|
|
241 # Set final fingerprints...
|
|
242 $This->_SetFinalFingerprints();
|
|
243
|
|
244 # Clear cached molecule data...
|
|
245 $This->_ClearMoleculeDataCache();
|
|
246
|
|
247 return $This;
|
|
248 }
|
|
249
|
|
250 # Setup distance matrix...
|
|
251 #
|
|
252 sub _SetupDistanceMatrix {
|
|
253 my($This) = @_;
|
|
254
|
|
255 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix();
|
|
256
|
|
257 if (!$This->{DistanceMatrix}) {
|
|
258 return undef;
|
|
259 }
|
|
260
|
|
261 return $This;
|
|
262 }
|
|
263
|
|
264 # Assign appropriate atom types to all heavy atoms...
|
|
265 #
|
|
266 sub _AssignAtomTypes {
|
|
267 my($This) = @_;
|
|
268 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens);
|
|
269
|
|
270 %{$This->{AssignedAtomTypes}} = ();
|
|
271 $IgnoreHydrogens = 1;
|
|
272
|
|
273 $SpecifiedAtomTypes = undef;
|
|
274
|
|
275 IDENTIFIERTYPE: {
|
|
276 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
|
|
277 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
|
|
278 last IDENTIFIERTYPE;
|
|
279 }
|
|
280
|
|
281 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
|
|
282 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
283 last IDENTIFIERTYPE;
|
|
284 }
|
|
285
|
|
286 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
|
|
287 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
288 last IDENTIFIERTYPE;
|
|
289 }
|
|
290
|
|
291 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
|
|
292 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
|
|
293 last IDENTIFIERTYPE;
|
|
294 }
|
|
295
|
|
296 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
|
|
297 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
298 last IDENTIFIERTYPE;
|
|
299 }
|
|
300
|
|
301 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
|
|
302 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
303 last IDENTIFIERTYPE;
|
|
304 }
|
|
305 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
|
|
306 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
307 last IDENTIFIERTYPE;
|
|
308 }
|
|
309
|
|
310 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
|
|
311 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
|
|
312 last IDENTIFIERTYPE;
|
|
313 }
|
|
314
|
|
315 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
|
|
316 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
|
|
317 last IDENTIFIERTYPE;
|
|
318 }
|
|
319
|
|
320 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}...";
|
|
321 }
|
|
322
|
|
323 # Assign atom types...
|
|
324 $SpecifiedAtomTypes->AssignAtomTypes();
|
|
325
|
|
326 # Make sure atom types assignment is successful...
|
|
327 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
|
|
328 return undef;
|
|
329 }
|
|
330
|
|
331 # Collect assigned atom types...
|
|
332 ATOM: for $Atom (@{$This->{Atoms}}) {
|
|
333 if ($Atom->IsHydrogen()) {
|
|
334 next ATOM;
|
|
335 }
|
|
336 $AtomID = $Atom->GetID();
|
|
337 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom);
|
|
338 }
|
|
339
|
|
340 return $This;
|
|
341 }
|
|
342
|
|
343 # Initialize topological atom pairs between specified distance range...
|
|
344 #
|
|
345 sub _InitializeToplogicalAtomPairs {
|
|
346 my($This) = @_;
|
|
347 my($Distance);
|
|
348
|
|
349 @{$This->{AtomPairsIDs}} = ();
|
|
350 %{$This->{AtomPairsCount}} = ();
|
|
351
|
|
352 for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) {
|
|
353 %{$This->{AtomPairsCount}{$Distance}} = ();
|
|
354 }
|
|
355
|
|
356 return $This;
|
|
357 }
|
|
358
|
|
359 # Count atom pairs between mininum and maximum distance at each
|
|
360 # distance using distance matrix and atom types assiged to each heavy
|
|
361 # atom.
|
|
362 #
|
|
363 # Notes:
|
|
364 # . The row and column indices of distance matrix correspond to atom indices.
|
|
365 # . Distance value of BigNumber implies the atom is not connected to any other atom.
|
|
366 # . Due to symmetric nature of distance matrix, only upper or lower triangular matrix
|
|
367 # needs to be processed during identification and count of atom pairs.
|
|
368 #
|
|
369 sub _GenerateAndCountAtomPairs {
|
|
370 my($This) = @_;
|
|
371
|
|
372 my($NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $DistanceMatrix, $Distance, $AtomID1, $AtomID2, $AtomType1, $AtomType2, $SkipIndexCheck, $CountIncrement);
|
|
373
|
|
374 $DistanceMatrix = $This->{DistanceMatrix};
|
|
375 ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize();
|
|
376 $SkipIndexCheck = 0;
|
|
377
|
|
378 ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) {
|
|
379 $AtomID1 = $This->{AtomIndexToID}{$RowIndex};
|
|
380 if ( !(exists($This->{AssignedAtomTypes}{$AtomID1})) ) {
|
|
381 next ROWINDEX;
|
|
382 }
|
|
383 $AtomType1 = $This->{AssignedAtomTypes}{$AtomID1};
|
|
384
|
|
385 COLINDEX: for $ColIndex (($RowIndex + 1) .. ($NumOfCols - 1) ) {
|
|
386 $AtomID2 = $This->{AtomIndexToID}{$ColIndex};
|
|
387 if ( !(exists($This->{AssignedAtomTypes}{$AtomID2})) ) {
|
|
388 next COLINDEX;
|
|
389 }
|
|
390 $Distance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck);
|
|
391 if ($Distance < $This->{MinDistance} || $Distance > $This->{MaxDistance}) {
|
|
392 next COLINDEX;
|
|
393 }
|
|
394 $AtomType2 = $This->{AssignedAtomTypes}{$AtomID2};
|
|
395
|
|
396 if ($AtomType1 le $AtomType2) {
|
|
397 $This->_SetAtomPairsCount($Distance, $AtomType1, $AtomType2);
|
|
398 }
|
|
399 else {
|
|
400 $This->_SetAtomPairsCount($Distance, $AtomType2, $AtomType1);
|
|
401 }
|
|
402 }
|
|
403 }
|
|
404 return $This;
|
|
405 }
|
|
406
|
|
407 # Set atom paris count for a specific atom ID pair at a specific distance...
|
|
408 #
|
|
409 sub _SetAtomPairsCount {
|
|
410 my($This, $Distance, $AtomType1, $AtomType2) = @_;
|
|
411
|
|
412 if (! exists $This->{AtomPairsCount}{$Distance}{$AtomType1}) {
|
|
413 %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} = ();
|
|
414 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 1;
|
|
415 return $This;
|
|
416 }
|
|
417
|
|
418 if (exists $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2}) {
|
|
419 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} += 1;
|
|
420 }
|
|
421 else {
|
|
422 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 1;
|
|
423 }
|
|
424
|
|
425 return $This;
|
|
426 }
|
|
427
|
|
428 # Set final fingerpritns vector...
|
|
429 #
|
|
430 sub _SetFinalFingerprints {
|
|
431 my($This) = @_;
|
|
432 my($Distance, $AtomType1, $AtomType2, $Value, @Values);
|
|
433
|
|
434 # Mark successful generation of fingerprints...
|
|
435 $This->{FingerprintsGenerated} = 1;
|
|
436
|
|
437 @Values = ();
|
|
438 @{$This->{AtomPairsIDs}} = ();
|
|
439
|
|
440 for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) {
|
|
441 for $AtomType1 (sort keys %{$This->{AtomPairsCount}{$Distance}} ) {
|
|
442 for $AtomType2 (sort keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) {
|
|
443 push @{$This->{AtomPairsIDs}}, "${AtomType1}-D${Distance}-${AtomType2}";
|
|
444 $Value = $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2};
|
|
445 push @Values, $Value;
|
|
446 }
|
|
447 }
|
|
448 }
|
|
449
|
|
450 # Add AtomPairsIDs and values to fingerprint vector...
|
|
451 $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomPairsIDs}});
|
|
452 $This->{FingerprintsVector}->AddValues(\@Values);
|
|
453
|
|
454 return $This;
|
|
455 }
|
|
456
|
|
457 # Get atom pair IDs corresponding to atom pairs count values in fingerprint
|
|
458 # vector as an array or reference to an array...
|
|
459 #
|
|
460 # AtomPairIDs list differes in molecules and is generated during finalization
|
|
461 # of fingerprints to make sure the fingerprint vector containing count values
|
|
462 # matches the atom pairs array.
|
|
463 #
|
|
464 sub GetAtomPairIDs {
|
|
465 my($This) = @_;
|
|
466
|
|
467 return wantarray ? @{$This->{AtomPairsIDs}} : \@{$This->{AtomPairsIDs}};
|
|
468 }
|
|
469
|
|
470 # Cache appropriate molecule data...
|
|
471 #
|
|
472 sub _SetupMoleculeDataCache {
|
|
473 my($This) = @_;
|
|
474
|
|
475 # Get all atoms including hydrogens to correctly map atom indices to atom IDs for
|
|
476 # usage of distance matrix. The hydrogen atoms are ignored during processing...
|
|
477 #
|
|
478 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
|
|
479
|
|
480 # Get all atom IDs...
|
|
481 my(@AtomIDs);
|
|
482 @AtomIDs = ();
|
|
483 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}};
|
|
484
|
|
485 # Set AtomIndex to AtomID hash...
|
|
486 %{$This->{AtomIndexToID}} = ();
|
|
487 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs;
|
|
488
|
|
489 return $This;
|
|
490 }
|
|
491
|
|
492 # Clear cached molecule data...
|
|
493 #
|
|
494 sub _ClearMoleculeDataCache {
|
|
495 my($This) = @_;
|
|
496
|
|
497 @{$This->{Atoms}} = ();
|
|
498
|
|
499 return $This;
|
|
500 }
|
|
501
|
|
502 # Set atomic invariants to use for atom identifiers...
|
|
503 #
|
|
504 sub SetAtomicInvariantsToUse {
|
|
505 my($This, @Values) = @_;
|
|
506 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
|
|
507
|
|
508 if (!@Values) {
|
|
509 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
|
|
510 return;
|
|
511 }
|
|
512
|
|
513 $FirstValue = $Values[0];
|
|
514 $TypeOfFirstValue = ref $FirstValue;
|
|
515
|
|
516 @SpecifiedAtomicInvariants = ();
|
|
517 @AtomicInvariantsToUse = ();
|
|
518
|
|
519 if ($TypeOfFirstValue =~ /^ARRAY/) {
|
|
520 push @SpecifiedAtomicInvariants, @{$FirstValue};
|
|
521 }
|
|
522 else {
|
|
523 push @SpecifiedAtomicInvariants, @Values;
|
|
524 }
|
|
525
|
|
526 # Make sure specified AtomicInvariants are valid...
|
|
527 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
|
|
528 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
|
|
529 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
|
|
530 }
|
|
531 $AtomicInvariant = $SpecifiedAtomicInvariant;
|
|
532 push @AtomicInvariantsToUse, $AtomicInvariant;
|
|
533 }
|
|
534
|
|
535 # Set atomic invariants to use...
|
|
536 @{$This->{AtomicInvariantsToUse}} = ();
|
|
537 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
|
|
538
|
|
539 return $This;
|
|
540 }
|
|
541
|
|
542 # Set functional classes to use for atom identifiers...
|
|
543 #
|
|
544 sub SetFunctionalClassesToUse {
|
|
545 my($This, @Values) = @_;
|
|
546 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
|
|
547
|
|
548 if (!@Values) {
|
|
549 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
|
|
550 return;
|
|
551 }
|
|
552
|
|
553 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
|
|
554 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
|
|
555 return;
|
|
556 }
|
|
557
|
|
558 $FirstValue = $Values[0];
|
|
559 $TypeOfFirstValue = ref $FirstValue;
|
|
560
|
|
561 @SpecifiedFunctionalClasses = ();
|
|
562 @FunctionalClassesToUse = ();
|
|
563
|
|
564 if ($TypeOfFirstValue =~ /^ARRAY/) {
|
|
565 push @SpecifiedFunctionalClasses, @{$FirstValue};
|
|
566 }
|
|
567 else {
|
|
568 push @SpecifiedFunctionalClasses, @Values;
|
|
569 }
|
|
570
|
|
571 # Make sure specified FunctionalClasses are valid...
|
|
572 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
|
|
573 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
|
|
574 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
|
|
575 }
|
|
576 push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
|
|
577 }
|
|
578
|
|
579 # Set functional classes to use...
|
|
580 @{$This->{FunctionalClassesToUse}} = ();
|
|
581 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
|
|
582
|
|
583 return $This;
|
|
584 }
|
|
585
|
|
586 # Initialize atom indentifier type information...
|
|
587 #
|
|
588 # Current supported values:
|
|
589 #
|
|
590 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes,
|
|
591 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
|
|
592 #
|
|
593 sub _InitializeAtomIdentifierTypeInformation {
|
|
594 my($This) = @_;
|
|
595
|
|
596 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
|
|
597 $This->_InitializeAtomicInvariantsAtomTypesInformation();
|
|
598 }
|
|
599 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
|
|
600 $This->_InitializeFunctionalClassAtomTypesInformation();
|
|
601 }
|
|
602 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
|
|
603 # Nothing to do for now...
|
|
604 }
|
|
605 else {
|
|
606 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
|
|
607 }
|
|
608
|
|
609 return $This;
|
|
610 }
|
|
611
|
|
612 # Initialize atomic invariants atom types to use for generating atom identifiers...
|
|
613 #
|
|
614 # Let:
|
|
615 # AS = Atom symbol corresponding to element symbol
|
|
616 #
|
|
617 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
618 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
619 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
620 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
621 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
622 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
|
|
623 # H<n> = Number of implicit and explicit hydrogens for atom
|
|
624 # Ar = Aromatic annotation indicating whether atom is aromatic
|
|
625 # RA = Ring atom annotation indicating whether atom is a ring
|
|
626 # FC<+n/-n> = Formal charge assigned to atom
|
|
627 # MN<n> = Mass number indicating isotope other than most abundant isotope
|
|
628 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
|
|
629 #
|
|
630 # AtomTypeIDx = Atomic invariants atom type for atom x
|
|
631 # AtomTypeIDy = Atomic invariants atom type for atom y
|
|
632 # Dn = Topological distance between atom x and y
|
|
633 #
|
|
634 # Then:
|
|
635 #
|
|
636 # Atom pair AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
|
|
637 #
|
|
638 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
|
|
639 #
|
|
640 # AtomPairID corresponds to:
|
|
641 #
|
|
642 # AtomTypeIDx-D<n>-AtomTypeIDy
|
|
643 #
|
|
644 # Except for AS which is a required atomic invariant in atom pair AtomIDs, all other atomic invariants are
|
|
645 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>.
|
|
646 # AtomID specification doesn't include atomic invariants with zero or undefined values.
|
|
647 #
|
|
648 # Examples of atom pair AtomIDs:
|
|
649 #
|
|
650 # O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge
|
|
651 # O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge
|
|
652 # O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon
|
|
653 # O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom
|
|
654 #
|
|
655 # C.X2.BO3.H1.Ar - Aromatic carbon
|
|
656 #
|
|
657 # Examples of AtomPairIDs:
|
|
658 #
|
|
659 # C.X2.BO2.H3-D1-O.X1.BO1 - Carbon with two heavy atom neighbors attached to oxygen at bond distance 1(methanol)
|
|
660 #
|
|
661 # C.X2.BO3.H1.Ar-D3-C.X2.BO3.H1.Ar - Two aromatic carbons at bond distance 3 where each carbon has
|
|
662 # two heavy atom neighbors and bond order of 3 (benzene)
|
|
663 #
|
|
664 sub _InitializeAtomicInvariantsAtomTypesInformation {
|
|
665 my($This) = @_;
|
|
666
|
|
667 # Default atomic invariants to use for generating atom neighborhood atom IDs: AS, X, BO, H, FC
|
|
668 #
|
|
669 @{$This->{AtomicInvariantsToUse}} = ();
|
|
670 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC');
|
|
671
|
|
672 return $This;
|
|
673 }
|
|
674
|
|
675 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
|
|
676 # class, to use for generating atom identifiers...
|
|
677 #
|
|
678 # Let:
|
|
679 # HBD: HydrogenBondDonor
|
|
680 # HBA: HydrogenBondAcceptor
|
|
681 # PI : PositivelyIonizable
|
|
682 # NI : NegativelyIonizable
|
|
683 # Ar : Aromatic
|
|
684 # Hal : Halogen
|
|
685 # H : Hydrophobic
|
|
686 # RA : RingAtom
|
|
687 # CA : ChainAtom
|
|
688 #
|
|
689 # Then:
|
|
690 #
|
|
691 # Functiononal class atom type specification for an atom corresponds to:
|
|
692 #
|
|
693 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
|
|
694 #
|
|
695 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
|
|
696 #
|
|
697 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
|
|
698 #
|
|
699 # HydrogenBondDonor: NH, NH2, OH
|
|
700 # HydrogenBondAcceptor: N[!H], O
|
|
701 # PositivelyIonizable: +, NH2
|
|
702 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
|
|
703 #
|
|
704 sub _InitializeFunctionalClassAtomTypesInformation {
|
|
705 my($This) = @_;
|
|
706
|
|
707 # Default functional class atom typess to use for generating atom identifiers
|
|
708 # are: HBD, HBA, PI, NI, Ar, Hal
|
|
709 #
|
|
710 @{$This->{FunctionalClassesToUse}} = ();
|
|
711 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
|
|
712
|
|
713 return $This;
|
|
714 }
|
|
715
|
|
716 # Return a string containg data for TopologicalAtomPairsFingerprints object...
|
|
717 #
|
|
718 sub StringifyTopologicalAtomPairsFingerprints {
|
|
719 my($This) = @_;
|
|
720 my($FingerprintsString);
|
|
721
|
|
722 # Type of fingerprint...
|
|
723 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}";
|
|
724
|
|
725 # Min and max distance...
|
|
726 $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}";
|
|
727
|
|
728 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
|
|
729 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
|
|
730
|
|
731 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
|
|
732 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
|
|
733
|
|
734 for $AtomicInvariant (@AtomicInvariantsOrder) {
|
|
735 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
|
|
736 }
|
|
737
|
|
738 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
|
|
739 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
|
|
740 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
|
|
741 }
|
|
742 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
|
|
743 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
|
|
744
|
|
745 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
|
|
746 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
|
|
747
|
|
748 for $FunctionalClass (@FunctionalClassesOrder) {
|
|
749 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
|
|
750 }
|
|
751
|
|
752 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
|
|
753 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
|
|
754 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
|
|
755 }
|
|
756
|
|
757 # Total number of atom pairs...
|
|
758 $FingerprintsString .= "; NumOfAtomPairs: " . $This->{FingerprintsVector}->GetNumOfValues();
|
|
759
|
|
760 # FingerprintsVector...
|
|
761 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
|
|
762
|
|
763 return $FingerprintsString;
|
|
764 }
|
|
765
|
|
766 1;
|
|
767
|
|
768 __END__
|
|
769
|
|
770 =head1 NAME
|
|
771
|
|
772 TopologicalAtomPairsFingerprints
|
|
773
|
|
774 =head1 SYNOPSIS
|
|
775
|
|
776 use Fingerprints::TopologicalAtomPairsFingerprints;
|
|
777
|
|
778 use Fingerprints::TopologicalAtomPairsFingerprints qw(:all);
|
|
779
|
|
780 =head1 DESCRIPTION
|
|
781
|
|
782 B<TopologicalAtomPairsFingerprints> [ Ref 57, Ref 59, Ref 72 ] class provides the following methods:
|
|
783
|
|
784 new, GenerateFingerprints, GetAtomPairIDs, GetDescription, SetAtomIdentifierType,
|
|
785 SetAtomicInvariantsToUse, SetFunctionalClassesToUse, SetMaxDistance,
|
|
786 SetMinDistance, StringifyTopologicalAtomPairsFingerprints
|
|
787
|
|
788 B<TopologicalAtomPairsFingerprints> is derived from B<Fingerprints> class which in turn
|
|
789 is derived from B<ObjectProperty> base class that provides methods not explicitly defined
|
|
790 in B<TopologicalAtomPairsFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's
|
|
791 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property:
|
|
792
|
|
793 Set<PropertyName>(<PropertyValue>);
|
|
794 $PropertyValue = Get<PropertyName>();
|
|
795 Delete<PropertyName>();
|
|
796
|
|
797 The current release of MayaChemTools supports generation of B<AtomTypesFingerpritns>
|
|
798 corresponding to following B<AtomtomIdentifierTypes>:
|
|
799
|
|
800 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
|
|
801 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
|
|
802 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
|
|
803
|
|
804 Based on the values specified for B<AtomIdentifierType> along with other specified
|
|
805 parameters such as B<AtomicInvariantsToUse> and B<FunctionalClassesToUse>, initial
|
|
806 atom types are assigned to all non-hydrogen atoms in a molecule. Using the distance
|
|
807 matrix for the molecule and initial atom types assigned to non-hydrogen atoms, all unique atom
|
|
808 pairs within B<MinDistance> and B<MaxDistance> are identified and counted. An atom pair
|
|
809 identifier is generated for each unique atom pair; the format of atom pair identifier is:
|
|
810
|
|
811 <AtomType1>-D<n>-<AtomType2>
|
|
812
|
|
813 AtomType1, AtomType2: Atom types assigned to atom1 and atom2
|
|
814 D: Distance between atom1 and atom2
|
|
815
|
|
816 where AtomType1 <= AtomType2
|
|
817
|
|
818 The atom pair identifiers for all unique atom pairs corresponding to non-hydrogen atoms constitute
|
|
819 topological atom pairs fingerprints of the molecule.
|
|
820
|
|
821 The current release of MayaChemTools generates the following types of topological atom pairs
|
|
822 fingerprints vector strings:
|
|
823
|
|
824 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD
|
|
825 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1
|
|
826 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3.
|
|
827 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...;
|
|
828 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1
|
|
829 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1...
|
|
830
|
|
831 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD
|
|
832 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesPairsString;C.X
|
|
833 1.BO1.H3-D1-C.X3.BO3.H1 2 C.X2.BO2.H2-D1-C.X2.BO2.H2 1 C.X2.BO2.H2-D1-
|
|
834 C.X3.BO3.H1 4 C.X2.BO2.H2-D1-C.X3.BO4 1 C.X2.BO2.H2-D1-N.X3.BO3 1 C.X2
|
|
835 .BO3.H1-D1-C.X2.BO3.H1 10 C.X2.BO3.H1-D1-C.X3.BO4 8 C.X3.BO3.H1-D1-C.X
|
|
836 3.BO4 1 C.X3.BO3.H1-D1-O.X1.BO1.H1 2 C.X3.BO4-D1-C.X3.BO4 6 C.X3.BO...
|
|
837
|
|
838 FingerprintsVector;TopologicalAtomPairs:DREIDINGAtomTypes:MinDistance1
|
|
839 :MaxDistance10;157;NumericalValues;IDsAndValuesString;C_2-D1-C_3 C_2-D
|
|
840 1-C_R C_2-D1-N_3 C_2-D1-O_2 C_2-D1-O_3 C_3-D1-C_3 C_3-D1-C_R C_3-D1-N_
|
|
841 R C_3-D1-O_3 C_R-D1-C_R C_R-D1-F_ C_R-D1-N_3 C_R-D1-N_R C_2-D2-C_3 C_2
|
|
842 1 1 1 2 1 7 1 1 2 23 1 1 2 1 3 5 5 2 1 5 28 2 3 3 1 1 1 2 4 1 1 4 9 3
|
|
843 1 4 24 2 4 3 3 4 5 5 14 1 1 2 3 22 1 3 4 4 1 1 1 1 2 2 5 1 4 21 3 1...
|
|
844
|
|
845 FingerprintsVector;TopologicalAtomPairs:EStateAtomTypes:MinDistance1:M
|
|
846 axDistance10;251;NumericalValues;IDsAndValuesString;aaCH-D1-aaCH aaCH-
|
|
847 D1-aasC aasC-D1-aasC aasC-D1-aasN aasC-D1-dssC aasC-D1-sF aasC-D1-ssNH
|
|
848 aasC-D1-sssCH aasN-D1-ssCH2 dO-D1-dssC dssC-D1-sOH dssC-D1-ssCH2 d...;
|
|
849 10 8 5 2 1 1 1 1 1 2 1 1 1 2 2 1 4 10 12 2 2 6 3 1 3 2 2 1 1 1 1 1 1 1
|
|
850 1 1 5 2 1 1 6 12 2 2 2 2 6 1 3 2 2 5 2 2 1 2 1 1 1 1 1 1 3 1 3 19 2...
|
|
851
|
|
852 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi
|
|
853 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar
|
|
854 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H
|
|
855 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...;
|
|
856 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4
|
|
857 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ...
|
|
858
|
|
859 FingerprintsVector;TopologicalAtomPairs:MMFF94AtomTypes:MinDistance1:M
|
|
860 axDistance10;227;NumericalValues;IDsAndValuesPairsString;C5A-D1-C5B 2
|
|
861 C5A-D1-CB 1 C5A-D1-CR 1 C5A-D1-N5 2 C5B-D1-C5B 1 C5B-D1-C=ON 1 C5B-D1-
|
|
862 CB 1 C=ON-D1-NC=O 1 C=ON-D1-O=CN 1 CB-D1-CB 18 CB-D1-F 1 CB-D1-NC=O 1
|
|
863 COO-D1-CR 1 COO-D1-O=CO 1 COO-D1-OC=O 1 CR-D1-CR 7 CR-D1-N5 1 CR-D1-OR
|
|
864 2 C5A-D2-C5A 1 C5A-D2-C5B 2 C5A-D2-C=ON 1 C5A-D2-CB 3 C5A-D2-CR 4 ...
|
|
865
|
|
866 FingerprintsVector;TopologicalAtomPairs:SLogPAtomTypes:MinDistance1:Ma
|
|
867 xDistance10;329;NumericalValues;IDsAndValuesPairsString;C1-D1-C10 1 C1
|
|
868 -D1-C11 2 C1-D1-C5 1 C1-D1-CS 4 C10-D1-N11 1 C11-D1-C21 1 C14-D1-C18 2
|
|
869 C14-D1-F 1 C18-D1-C18 10 C18-D1-C20 4 C18-D1-C22 2 C20-D1-C20 3 C20-D
|
|
870 1-C21 1 C20-D1-N11 1 C21-D1-C21 1 C21-D1-C5 1 C21-D1-N11 1 C22-D1-N4 1
|
|
871 C5-D1-N4 1 C5-D1-O10 1 C5-D1-O2 1 C5-D1-O9 1 CS-D1-O2 2 C1-D2-C1 3...
|
|
872
|
|
873 FingerprintsVector;TopologicalAtomPairs:SYBYLAtomTypes:MinDistance1:Ma
|
|
874 xDistance10;159;NumericalValues;IDsAndValuesPairsString;C.2-D1-C.3 1 C
|
|
875 .2-D1-C.ar 1 C.2-D1-N.am 1 C.2-D1-O.2 1 C.2-D1-O.co2 2 C.3-D1-C.3 7 C.
|
|
876 3-D1-C.ar 1 C.3-D1-N.ar 1 C.3-D1-O.3 2 C.ar-D1-C.ar 23 C.ar-D1-F 1 C.a
|
|
877 r-D1-N.am 1 C.ar-D1-N.ar 2 C.2-D2-C.3 1 C.2-D2-C.ar 3 C.3-D2-C.3 5 C.3
|
|
878 -D2-C.ar 5 C.3-D2-N.ar 2 C.3-D2-O.3 4 C.3-D2-O.co2 2 C.ar-D2-C.ar 2...
|
|
879
|
|
880 FingerprintsVector;TopologicalAtomPairs:TPSAAtomTypes:MinDistance1:Max
|
|
881 Distance10;64;NumericalValues;IDsAndValuesPairsString;N21-D1-None 3 N7
|
|
882 -D1-None 2 None-D1-None 34 None-D1-O3 2 None-D1-O4 3 N21-D2-None 5 N7-
|
|
883 D2-None 3 N7-D2-O3 1 None-D2-None 44 None-D2-O3 2 None-D2-O4 5 O3-D2-O
|
|
884 4 1 N21-D3-None 7 N7-D3-None 4 None-D3-None 45 None-D3-O3 4 None-D3-O4
|
|
885 5 N21-D4-N7 1 N21-D4-None 5 N21-D4-O3 1 N21-D4-O4 1 N7-D4-None 4 N...
|
|
886
|
|
887 FingerprintsVector;TopologicalAtomPairs:UFFAtomTypes:MinDistance1:MaxD
|
|
888 istance10;157;NumericalValues;IDsAndValuesPairsString;C_2-D1-C_3 1 C_2
|
|
889 -D1-C_R 1 C_2-D1-N_3 1 C_2-D1-O_2 2 C_2-D1-O_3 1 C_3-D1-C_3 7 C_3-D1-C
|
|
890 _R 1 C_3-D1-N_R 1 C_3-D1-O_3 2 C_R-D1-C_R 23 C_R-D1-F_ 1 C_R-D1-N_3 1
|
|
891 C_R-D1-N_R 2 C_2-D2-C_3 1 C_2-D2-C_R 3 C_3-D2-C_3 5 C_3-D2-C_R 5 C_3-D
|
|
892 2-N_R 2 C_3-D2-O_2 1 C_3-D2-O_3 5 C_R-D2-C_R 28 C_R-D2-F_ 2 C_R-D2-...
|
|
893
|
|
894 =head2 METHODS
|
|
895
|
|
896 =over 4
|
|
897
|
|
898 =item B<new>
|
|
899
|
|
900 $NewTopologicalAtomPairsFingerprints = new TopologicalAtomPairsFingerprints(
|
|
901 %NamesAndValues);
|
|
902
|
|
903 Using specified I<TopologicalAtomPairsFingerprints> property names and values hash, B<new>
|
|
904 method creates a new object and returns a reference to newly created B<TopologicalAtomPairsFingerprints>
|
|
905 object. By default, the following properties are initialized:
|
|
906
|
|
907 Molecule = ''
|
|
908 Type = 'TopologicalAtomPairs'
|
|
909 MinDistance = 1
|
|
910 MaxDistance = 10
|
|
911 AtomIdentifierType = ''
|
|
912 AtomicInvariantsToUse = ['AS', 'X', 'BO', 'H', 'FC']
|
|
913 FunctionalClassesToUse = ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal']
|
|
914
|
|
915 Examples:
|
|
916
|
|
917 $TopologicalAtomPairsFingerprints = new TopologicalAtomPairsFingerprints(
|
|
918 'Molecule' => $Molecule,
|
|
919 'AtomIdentifierType' =>
|
|
920 'AtomicInvariantsAtomTypes');
|
|
921
|
|
922 $TopologicalAtomPairsFingerprints = new TopologicalAtomPairsFingerprints(
|
|
923 'Molecule' => $Molecule,
|
|
924 'MinDistance' => 1,
|
|
925 'MaxDistance' => 10,
|
|
926 'AtomIdentifierType' =>
|
|
927 'AtomicInvariantsAtomTypes',
|
|
928 'AtomicInvariantsToUse' =>
|
|
929 ['AS', 'X', 'BO', 'H', 'FC'] );
|
|
930
|
|
931 $TopologicalAtomPairsFingerprints = new TopologicalAtomPairsFingerprints(
|
|
932 'Molecule' => $Molecule,
|
|
933 'AtomIdentifierType' =>
|
|
934 'EStateAtomTypes');
|
|
935
|
|
936 $TopologicalAtomPairsFingerprints = new TopologicalAtomPairsFingerprints(
|
|
937 'Molecule' => $Molecule,
|
|
938 'AtomIdentifierType' =>
|
|
939 'SLogPAtomTypes');
|
|
940
|
|
941 $TopologicalAtomPairsFingerprints = new TopologicalAtomPairsFingerprints(
|
|
942 'Molecule' => $Molecule,
|
|
943 'MinDistance' => 1,
|
|
944 'MaxDistance' => 10,
|
|
945 'AtomIdentifierType' =>
|
|
946 'FunctionalClassAtomTypes',
|
|
947 'FunctionalClassesToUse' =>
|
|
948 ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal']);
|
|
949
|
|
950
|
|
951 $TopologicalAtomPairsFingerprints->GenerateFingerprints();
|
|
952 print "$TopologicalAtomPairsFingerprints\n";
|
|
953
|
|
954 =item B<GetDescription>
|
|
955
|
|
956 $Description = $TopologicalAtomPairsFingerprints->GetDescription();
|
|
957
|
|
958 Returns a string containing description of topological atom pairs fingerprints fingerprints.
|
|
959
|
|
960 =item B<GenerateFingerprints>
|
|
961
|
|
962 $TopologicalAtomPairsFingerprints->GenerateFingerprints();
|
|
963
|
|
964 Generates topological atom pairs fingerprints and returns I<TopologicalAtomPairsFingerprints>.
|
|
965
|
|
966 =item B<GetAtomPairIDs>
|
|
967
|
|
968 $AtomPairIDsRef = $TopologicalAtomPairsFingerprints->GetAtomPairIDs();
|
|
969 @AtomPairIDs = $TopologicalAtomPairsFingerprints->GetAtomPairIDs();
|
|
970
|
|
971 Returns atom pair IDs corresponding to atom pairs count values in topological atom pairs
|
|
972 fingerprints vector as an array or reference to an array.
|
|
973
|
|
974 =item B<SetAtomIdentifierType>
|
|
975
|
|
976 $TopologicalAtomPairsFingerprints->SetAtomIdentifierType($IdentifierType);
|
|
977
|
|
978 Sets atom I<IdentifierType> to use during atom pairs fingerprints generation and
|
|
979 returns I<TopologicalAtomPairsFingerprints>.
|
|
980
|
|
981 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
|
|
982 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes,
|
|
983 TPSAAtomTypes, UFFAtomTypes>.
|
|
984
|
|
985 =item B<SetAtomicInvariantsToUse>
|
|
986
|
|
987 $TopologicalAtomPairsFingerprints->SetAtomicInvariantsToUse($ValuesRef);
|
|
988 $TopologicalAtomPairsFingerprints->SetAtomicInvariantsToUse(@Values);
|
|
989
|
|
990 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType>
|
|
991 for topological atom pairs fingerprints generation and returns I<TopologicalAtomPairsFingerprints>.
|
|
992
|
|
993 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB,
|
|
994 H, Ar, RA, FC, MN, SM>. Default value: I<AS,X,BO,H,FC>.
|
|
995
|
|
996 The atomic invariants abbreviations correspond to:
|
|
997
|
|
998 AS = Atom symbol corresponding to element symbol
|
|
999
|
|
1000 X<n> = Number of non-hydrogen atom neighbors or heavy atoms
|
|
1001 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms
|
|
1002 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms
|
|
1003 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms
|
|
1004 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms
|
|
1005 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms
|
|
1006 H<n> = Number of implicit and explicit hydrogens for atom
|
|
1007 Ar = Aromatic annotation indicating whether atom is aromatic
|
|
1008 RA = Ring atom annotation indicating whether atom is a ring
|
|
1009 FC<+n/-n> = Formal charge assigned to atom
|
|
1010 MN<n> = Mass number indicating isotope other than most abundant isotope
|
|
1011 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or
|
|
1012 3 (triplet)
|
|
1013
|
|
1014 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
|
|
1015
|
|
1016 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
|
|
1017
|
|
1018 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
|
|
1019 optional. Atom type specification doesn't include atomic invariants with zero or undefined values.
|
|
1020
|
|
1021 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words
|
|
1022 are also allowed:
|
|
1023
|
|
1024 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
|
|
1025 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
|
|
1026 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
|
|
1027 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
|
|
1028 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
|
|
1029 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
|
|
1030 H : NumOfImplicitAndExplicitHydrogens
|
|
1031 Ar : Aromatic
|
|
1032 RA : RingAtom
|
|
1033 FC : FormalCharge
|
|
1034 MN : MassNumber
|
|
1035 SM : SpinMultiplicity
|
|
1036
|
|
1037 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant
|
|
1038 atom types.
|
|
1039
|
|
1040 =item B<SetFunctionalClassesToUse>
|
|
1041
|
|
1042 $TopologicalAtomPairsFingerprints->SetFunctionalClassesToUse($ValuesRef);
|
|
1043 $TopologicalAtomPairsFingerprints->SetFunctionalClassesToUse(@Values);
|
|
1044
|
|
1045 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType>
|
|
1046 for topological atom pairs fingerprints generation and returns I<TopologicalAtomPairsFingerprints>.
|
|
1047
|
|
1048 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>.
|
|
1049 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>.
|
|
1050
|
|
1051 The functional class abbreviations correspond to:
|
|
1052
|
|
1053 HBD: HydrogenBondDonor
|
|
1054 HBA: HydrogenBondAcceptor
|
|
1055 PI : PositivelyIonizable
|
|
1056 NI : NegativelyIonizable
|
|
1057 Ar : Aromatic
|
|
1058 Hal : Halogen
|
|
1059 H : Hydrophobic
|
|
1060 RA : RingAtom
|
|
1061 CA : ChainAtom
|
|
1062
|
|
1063 Functional class atom type specification for an atom corresponds to:
|
|
1064
|
|
1065 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None
|
|
1066
|
|
1067 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom
|
|
1068 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]:
|
|
1069
|
|
1070 HydrogenBondDonor: NH, NH2, OH
|
|
1071 HydrogenBondAcceptor: N[!H], O
|
|
1072 PositivelyIonizable: +, NH2
|
|
1073 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
|
|
1074
|
|
1075 =item B<SetMaxDistance>
|
|
1076
|
|
1077 $TopologicalAtomPairsFingerprints->SetMaxDistance($Distance);
|
|
1078
|
|
1079 Sets maximum distance to use during topological atom pairs fingerprints generation and
|
|
1080 returns I<TopologicalAtomPairsFingerprints>.
|
|
1081
|
|
1082 =item B<SetMinDistance>
|
|
1083
|
|
1084 $TopologicalAtomPairsFingerprints->SetMinDistance($Distance);
|
|
1085
|
|
1086 Sets minimum distance to use during topological atom pairs fingerprints generation and
|
|
1087 returns I<TopologicalAtomPairsFingerprints>.
|
|
1088
|
|
1089 =item B<StringifyTopologicalAtomPairsFingerprints>
|
|
1090
|
|
1091 $String = $TopologicalAtomPairsFingerprints->
|
|
1092 StringifyTopologicalAtomPairsFingerprints();
|
|
1093
|
|
1094 Returns a string containing information about I<TopologicalAtomPairsFingerprints> object.
|
|
1095
|
|
1096 =back
|
|
1097
|
|
1098 =head1 AUTHOR
|
|
1099
|
|
1100 Manish Sud <msud@san.rr.com>
|
|
1101
|
|
1102 =head1 SEE ALSO
|
|
1103
|
|
1104 Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm,
|
|
1105 AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, ExtendedConnectivityFingerprints.pm,
|
|
1106 MACCSKeys.pm, PathLengthFingerprints.pm, TopologicalAtomTripletsFingerprints.pm,
|
|
1107 TopologicalAtomTorsionsFingerprints.pm, TopologicalPharmacophoreAtomPairsFingerprints.pm,
|
|
1108 TopologicalPharmacophoreAtomTripletsFingerprints.pm
|
|
1109
|
|
1110 =head1 COPYRIGHT
|
|
1111
|
|
1112 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
1113
|
|
1114 This file is part of MayaChemTools.
|
|
1115
|
|
1116 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
1117 the terms of the GNU Lesser General Public License as published by the Free
|
|
1118 Software Foundation; either version 3 of the License, or (at your option)
|
|
1119 any later version.
|
|
1120
|
|
1121 =cut
|