comparison mayachemtools/lib/Fingerprints/TopologicalAtomPairsFingerprints.pm @ 0:73ae111cf86f draft

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 11:55:01 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:73ae111cf86f
1 package Fingerprints::TopologicalAtomPairsFingerprints;
2 #
3 # $RCSfile: TopologicalAtomPairsFingerprints.pm,v $
4 # $Date: 2015/02/28 20:48:54 $
5 # $Revision: 1.30 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Exporter;
32 use Fingerprints::Fingerprints;
33 use TextUtil ();
34 use Molecule;
35 use AtomTypes::AtomicInvariantsAtomTypes;
36 use AtomTypes::DREIDINGAtomTypes;
37 use AtomTypes::EStateAtomTypes;
38 use AtomTypes::FunctionalClassAtomTypes;
39 use AtomTypes::MMFF94AtomTypes;
40 use AtomTypes::SLogPAtomTypes;
41 use AtomTypes::SYBYLAtomTypes;
42 use AtomTypes::TPSAAtomTypes;
43 use AtomTypes::UFFAtomTypes;
44
45 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
46
47 @ISA = qw(Fingerprints::Fingerprints Exporter);
48 @EXPORT = qw();
49 @EXPORT_OK = qw();
50
51 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
52
53 # Setup class variables...
54 my($ClassName);
55 _InitializeClass();
56
57 # Overload Perl functions...
58 use overload '""' => 'StringifyTopologicalAtomPairsFingerprints';
59
60 # Class constructor...
61 sub new {
62 my($Class, %NamesAndValues) = @_;
63
64 # Initialize object...
65 my $This = $Class->SUPER::new();
66 bless $This, ref($Class) || $Class;
67 $This->_InitializeTopologicalAtomPairsFingerprints();
68
69 $This->_InitializeTopologicalAtomPairsFingerprintsProperties(%NamesAndValues);
70
71 return $This;
72 }
73
74 # Initialize object data...
75 #
76 sub _InitializeTopologicalAtomPairsFingerprints {
77 my($This) = @_;
78
79 # Type of fingerprint...
80 $This->{Type} = 'TopologicalAtomPairs';
81
82 # Type of vector...
83 $This->{VectorType} = 'FingerprintsVector';
84
85 # Type of FingerprintsVector...
86 $This->{FingerprintsVectorType} = 'NumericalValues';
87
88 # Minimum and maximum bond distance between atom paris...
89 $This->{MinDistance} = 1;
90 $This->{MaxDistance} = 10;
91
92 # Atom identifier type to use for atom IDs in atom pairs...
93 #
94 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes,
95 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
96 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
97 #
98 $This->{AtomIdentifierType} = '';
99
100 # Atom types assigned to each heavy atom...
101 #
102 %{$This->{AssignedAtomTypes}} = ();
103
104 # All atom pairs between minimum and maximum distance...
105 #
106 @{$This->{AtomPairsIDs}} = ();
107 %{$This->{AtomPairsCount}} = ();
108 }
109
110 # Initialize class ...
111 sub _InitializeClass {
112 #Class name...
113 $ClassName = __PACKAGE__;
114 }
115
116 # Initialize object properties....
117 sub _InitializeTopologicalAtomPairsFingerprintsProperties {
118 my($This, %NamesAndValues) = @_;
119
120 my($Name, $Value, $MethodName);
121 while (($Name, $Value) = each %NamesAndValues) {
122 $MethodName = "Set${Name}";
123 $This->$MethodName($Value);
124 }
125
126 # Make sure molecule object was specified...
127 if (!exists $NamesAndValues{Molecule}) {
128 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
129 }
130 if (!exists $NamesAndValues{AtomIdentifierType}) {
131 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
132 }
133
134 $This->_InitializeFingerprintsVector();
135
136 return $This;
137 }
138
139 # Set minimum distance for atom pairs...
140 #
141 sub SetMinDistance {
142 my($This, $Value) = @_;
143
144 if (!TextUtil::IsPositiveInteger($Value)) {
145 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer...";
146 }
147 $This->{MinDistance} = $Value;
148
149 return $This;
150 }
151
152 # Set maximum distance for atom pairs...
153 #
154 sub SetMaxDistance {
155 my($This, $Value) = @_;
156
157 if (!TextUtil::IsPositiveInteger($Value)) {
158 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer...";
159 }
160 $This->{MaxDistance} = $Value;
161
162 return $This;
163 }
164
165 # Set atom identifier type..
166 #
167 sub SetAtomIdentifierType {
168 my($This, $IdentifierType) = @_;
169
170 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
171 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes.";
172 }
173
174 if ($This->{AtomIdentifierType}) {
175 croak "Error: ${ClassName}->SeAtomIdentifierType: Can't change intial atom identifier type: It's already set...";
176 }
177
178 $This->{AtomIdentifierType} = $IdentifierType;
179
180 # Initialize atom identifier type information...
181 $This->_InitializeAtomIdentifierTypeInformation();
182
183 return $This;
184 }
185
186 # Generate fingerprints description...
187 #
188 sub GetDescription {
189 my($This) = @_;
190
191 # Is description explicity set?
192 if (exists $This->{Description}) {
193 return $This->{Description};
194 }
195
196 # Generate fingerprints description...
197
198 return "$This->{Type}:$This->{AtomIdentifierType}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}";
199 }
200
201 # Generate topological atom pairs [ Ref 57, Ref 59, Ref 72 ] fingerprints...
202 #
203 # Methodology:
204 # . Generate a distance matrix.
205 # . Assign atom types to all the atoms.
206 # . Using distance matrix and atom types, count occurrence of
207 # unique atom pairs within specified distance range - It corresponds to the
208 # correlation-vector for the atom pairs.
209 #
210 # Notes:
211 # . Hydrogen atoms are ignored during the fingerprint generation.
212 #
213 sub GenerateFingerprints {
214 my($This) = @_;
215
216 if ($This->{MinDistance} > $This->{MaxDistance}) {
217 croak "Error: ${ClassName}->GenerateTopologicalAtomPairsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}...";
218 }
219
220 # Cache appropriate molecule data...
221 $This->_SetupMoleculeDataCache();
222
223 # Generate distance matrix...
224 if (!$This->_SetupDistanceMatrix()) {
225 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't generate distance matrix...";
226 return $This;
227 }
228
229 # Assign atom types to all heavy atoms...
230 if (!$This->_AssignAtomTypes()) {
231 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
232 return $This;
233 }
234
235 # Intialize values of toplogical atom pairs...
236 $This->_InitializeToplogicalAtomPairs();
237
238 # Count atom pairs...
239 $This->_GenerateAndCountAtomPairs();
240
241 # Set final fingerprints...
242 $This->_SetFinalFingerprints();
243
244 # Clear cached molecule data...
245 $This->_ClearMoleculeDataCache();
246
247 return $This;
248 }
249
250 # Setup distance matrix...
251 #
252 sub _SetupDistanceMatrix {
253 my($This) = @_;
254
255 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix();
256
257 if (!$This->{DistanceMatrix}) {
258 return undef;
259 }
260
261 return $This;
262 }
263
264 # Assign appropriate atom types to all heavy atoms...
265 #
266 sub _AssignAtomTypes {
267 my($This) = @_;
268 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens);
269
270 %{$This->{AssignedAtomTypes}} = ();
271 $IgnoreHydrogens = 1;
272
273 $SpecifiedAtomTypes = undef;
274
275 IDENTIFIERTYPE: {
276 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
277 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
278 last IDENTIFIERTYPE;
279 }
280
281 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
282 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
283 last IDENTIFIERTYPE;
284 }
285
286 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
287 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
288 last IDENTIFIERTYPE;
289 }
290
291 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
292 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
293 last IDENTIFIERTYPE;
294 }
295
296 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
297 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
298 last IDENTIFIERTYPE;
299 }
300
301 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
302 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
303 last IDENTIFIERTYPE;
304 }
305 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
306 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
307 last IDENTIFIERTYPE;
308 }
309
310 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
311 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
312 last IDENTIFIERTYPE;
313 }
314
315 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
316 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
317 last IDENTIFIERTYPE;
318 }
319
320 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}...";
321 }
322
323 # Assign atom types...
324 $SpecifiedAtomTypes->AssignAtomTypes();
325
326 # Make sure atom types assignment is successful...
327 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
328 return undef;
329 }
330
331 # Collect assigned atom types...
332 ATOM: for $Atom (@{$This->{Atoms}}) {
333 if ($Atom->IsHydrogen()) {
334 next ATOM;
335 }
336 $AtomID = $Atom->GetID();
337 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom);
338 }
339
340 return $This;
341 }
342
343 # Initialize topological atom pairs between specified distance range...
344 #
345 sub _InitializeToplogicalAtomPairs {
346 my($This) = @_;
347 my($Distance);
348
349 @{$This->{AtomPairsIDs}} = ();
350 %{$This->{AtomPairsCount}} = ();
351
352 for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) {
353 %{$This->{AtomPairsCount}{$Distance}} = ();
354 }
355
356 return $This;
357 }
358
359 # Count atom pairs between mininum and maximum distance at each
360 # distance using distance matrix and atom types assiged to each heavy
361 # atom.
362 #
363 # Notes:
364 # . The row and column indices of distance matrix correspond to atom indices.
365 # . Distance value of BigNumber implies the atom is not connected to any other atom.
366 # . Due to symmetric nature of distance matrix, only upper or lower triangular matrix
367 # needs to be processed during identification and count of atom pairs.
368 #
369 sub _GenerateAndCountAtomPairs {
370 my($This) = @_;
371
372 my($NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $DistanceMatrix, $Distance, $AtomID1, $AtomID2, $AtomType1, $AtomType2, $SkipIndexCheck, $CountIncrement);
373
374 $DistanceMatrix = $This->{DistanceMatrix};
375 ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize();
376 $SkipIndexCheck = 0;
377
378 ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) {
379 $AtomID1 = $This->{AtomIndexToID}{$RowIndex};
380 if ( !(exists($This->{AssignedAtomTypes}{$AtomID1})) ) {
381 next ROWINDEX;
382 }
383 $AtomType1 = $This->{AssignedAtomTypes}{$AtomID1};
384
385 COLINDEX: for $ColIndex (($RowIndex + 1) .. ($NumOfCols - 1) ) {
386 $AtomID2 = $This->{AtomIndexToID}{$ColIndex};
387 if ( !(exists($This->{AssignedAtomTypes}{$AtomID2})) ) {
388 next COLINDEX;
389 }
390 $Distance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck);
391 if ($Distance < $This->{MinDistance} || $Distance > $This->{MaxDistance}) {
392 next COLINDEX;
393 }
394 $AtomType2 = $This->{AssignedAtomTypes}{$AtomID2};
395
396 if ($AtomType1 le $AtomType2) {
397 $This->_SetAtomPairsCount($Distance, $AtomType1, $AtomType2);
398 }
399 else {
400 $This->_SetAtomPairsCount($Distance, $AtomType2, $AtomType1);
401 }
402 }
403 }
404 return $This;
405 }
406
407 # Set atom paris count for a specific atom ID pair at a specific distance...
408 #
409 sub _SetAtomPairsCount {
410 my($This, $Distance, $AtomType1, $AtomType2) = @_;
411
412 if (! exists $This->{AtomPairsCount}{$Distance}{$AtomType1}) {
413 %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} = ();
414 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 1;
415 return $This;
416 }
417
418 if (exists $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2}) {
419 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} += 1;
420 }
421 else {
422 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 1;
423 }
424
425 return $This;
426 }
427
428 # Set final fingerpritns vector...
429 #
430 sub _SetFinalFingerprints {
431 my($This) = @_;
432 my($Distance, $AtomType1, $AtomType2, $Value, @Values);
433
434 # Mark successful generation of fingerprints...
435 $This->{FingerprintsGenerated} = 1;
436
437 @Values = ();
438 @{$This->{AtomPairsIDs}} = ();
439
440 for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) {
441 for $AtomType1 (sort keys %{$This->{AtomPairsCount}{$Distance}} ) {
442 for $AtomType2 (sort keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) {
443 push @{$This->{AtomPairsIDs}}, "${AtomType1}-D${Distance}-${AtomType2}";
444 $Value = $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2};
445 push @Values, $Value;
446 }
447 }
448 }
449
450 # Add AtomPairsIDs and values to fingerprint vector...
451 $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomPairsIDs}});
452 $This->{FingerprintsVector}->AddValues(\@Values);
453
454 return $This;
455 }
456
457 # Get atom pair IDs corresponding to atom pairs count values in fingerprint
458 # vector as an array or reference to an array...
459 #
460 # AtomPairIDs list differes in molecules and is generated during finalization
461 # of fingerprints to make sure the fingerprint vector containing count values
462 # matches the atom pairs array.
463 #
464 sub GetAtomPairIDs {
465 my($This) = @_;
466
467 return wantarray ? @{$This->{AtomPairsIDs}} : \@{$This->{AtomPairsIDs}};
468 }
469
470 # Cache appropriate molecule data...
471 #
472 sub _SetupMoleculeDataCache {
473 my($This) = @_;
474
475 # Get all atoms including hydrogens to correctly map atom indices to atom IDs for
476 # usage of distance matrix. The hydrogen atoms are ignored during processing...
477 #
478 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
479
480 # Get all atom IDs...
481 my(@AtomIDs);
482 @AtomIDs = ();
483 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}};
484
485 # Set AtomIndex to AtomID hash...
486 %{$This->{AtomIndexToID}} = ();
487 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs;
488
489 return $This;
490 }
491
492 # Clear cached molecule data...
493 #
494 sub _ClearMoleculeDataCache {
495 my($This) = @_;
496
497 @{$This->{Atoms}} = ();
498
499 return $This;
500 }
501
502 # Set atomic invariants to use for atom identifiers...
503 #
504 sub SetAtomicInvariantsToUse {
505 my($This, @Values) = @_;
506 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
507
508 if (!@Values) {
509 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
510 return;
511 }
512
513 $FirstValue = $Values[0];
514 $TypeOfFirstValue = ref $FirstValue;
515
516 @SpecifiedAtomicInvariants = ();
517 @AtomicInvariantsToUse = ();
518
519 if ($TypeOfFirstValue =~ /^ARRAY/) {
520 push @SpecifiedAtomicInvariants, @{$FirstValue};
521 }
522 else {
523 push @SpecifiedAtomicInvariants, @Values;
524 }
525
526 # Make sure specified AtomicInvariants are valid...
527 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
528 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
529 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
530 }
531 $AtomicInvariant = $SpecifiedAtomicInvariant;
532 push @AtomicInvariantsToUse, $AtomicInvariant;
533 }
534
535 # Set atomic invariants to use...
536 @{$This->{AtomicInvariantsToUse}} = ();
537 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
538
539 return $This;
540 }
541
542 # Set functional classes to use for atom identifiers...
543 #
544 sub SetFunctionalClassesToUse {
545 my($This, @Values) = @_;
546 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
547
548 if (!@Values) {
549 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
550 return;
551 }
552
553 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
554 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
555 return;
556 }
557
558 $FirstValue = $Values[0];
559 $TypeOfFirstValue = ref $FirstValue;
560
561 @SpecifiedFunctionalClasses = ();
562 @FunctionalClassesToUse = ();
563
564 if ($TypeOfFirstValue =~ /^ARRAY/) {
565 push @SpecifiedFunctionalClasses, @{$FirstValue};
566 }
567 else {
568 push @SpecifiedFunctionalClasses, @Values;
569 }
570
571 # Make sure specified FunctionalClasses are valid...
572 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
573 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
574 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
575 }
576 push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
577 }
578
579 # Set functional classes to use...
580 @{$This->{FunctionalClassesToUse}} = ();
581 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
582
583 return $This;
584 }
585
586 # Initialize atom indentifier type information...
587 #
588 # Current supported values:
589 #
590 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes,
591 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
592 #
593 sub _InitializeAtomIdentifierTypeInformation {
594 my($This) = @_;
595
596 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
597 $This->_InitializeAtomicInvariantsAtomTypesInformation();
598 }
599 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
600 $This->_InitializeFunctionalClassAtomTypesInformation();
601 }
602 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
603 # Nothing to do for now...
604 }
605 else {
606 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
607 }
608
609 return $This;
610 }
611
612 # Initialize atomic invariants atom types to use for generating atom identifiers...
613 #
614 # Let:
615 # AS = Atom symbol corresponding to element symbol
616 #
617 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
618 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
619 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
620 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
621 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
622 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
623 # H<n> = Number of implicit and explicit hydrogens for atom
624 # Ar = Aromatic annotation indicating whether atom is aromatic
625 # RA = Ring atom annotation indicating whether atom is a ring
626 # FC<+n/-n> = Formal charge assigned to atom
627 # MN<n> = Mass number indicating isotope other than most abundant isotope
628 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
629 #
630 # AtomTypeIDx = Atomic invariants atom type for atom x
631 # AtomTypeIDy = Atomic invariants atom type for atom y
632 # Dn = Topological distance between atom x and y
633 #
634 # Then:
635 #
636 # Atom pair AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
637 #
638 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
639 #
640 # AtomPairID corresponds to:
641 #
642 # AtomTypeIDx-D<n>-AtomTypeIDy
643 #
644 # Except for AS which is a required atomic invariant in atom pair AtomIDs, all other atomic invariants are
645 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>.
646 # AtomID specification doesn't include atomic invariants with zero or undefined values.
647 #
648 # Examples of atom pair AtomIDs:
649 #
650 # O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge
651 # O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge
652 # O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon
653 # O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom
654 #
655 # C.X2.BO3.H1.Ar - Aromatic carbon
656 #
657 # Examples of AtomPairIDs:
658 #
659 # C.X2.BO2.H3-D1-O.X1.BO1 - Carbon with two heavy atom neighbors attached to oxygen at bond distance 1(methanol)
660 #
661 # C.X2.BO3.H1.Ar-D3-C.X2.BO3.H1.Ar - Two aromatic carbons at bond distance 3 where each carbon has
662 # two heavy atom neighbors and bond order of 3 (benzene)
663 #
664 sub _InitializeAtomicInvariantsAtomTypesInformation {
665 my($This) = @_;
666
667 # Default atomic invariants to use for generating atom neighborhood atom IDs: AS, X, BO, H, FC
668 #
669 @{$This->{AtomicInvariantsToUse}} = ();
670 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC');
671
672 return $This;
673 }
674
675 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
676 # class, to use for generating atom identifiers...
677 #
678 # Let:
679 # HBD: HydrogenBondDonor
680 # HBA: HydrogenBondAcceptor
681 # PI : PositivelyIonizable
682 # NI : NegativelyIonizable
683 # Ar : Aromatic
684 # Hal : Halogen
685 # H : Hydrophobic
686 # RA : RingAtom
687 # CA : ChainAtom
688 #
689 # Then:
690 #
691 # Functiononal class atom type specification for an atom corresponds to:
692 #
693 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
694 #
695 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
696 #
697 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
698 #
699 # HydrogenBondDonor: NH, NH2, OH
700 # HydrogenBondAcceptor: N[!H], O
701 # PositivelyIonizable: +, NH2
702 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
703 #
704 sub _InitializeFunctionalClassAtomTypesInformation {
705 my($This) = @_;
706
707 # Default functional class atom typess to use for generating atom identifiers
708 # are: HBD, HBA, PI, NI, Ar, Hal
709 #
710 @{$This->{FunctionalClassesToUse}} = ();
711 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
712
713 return $This;
714 }
715
716 # Return a string containg data for TopologicalAtomPairsFingerprints object...
717 #
718 sub StringifyTopologicalAtomPairsFingerprints {
719 my($This) = @_;
720 my($FingerprintsString);
721
722 # Type of fingerprint...
723 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}";
724
725 # Min and max distance...
726 $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}";
727
728 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
729 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
730
731 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
732 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
733
734 for $AtomicInvariant (@AtomicInvariantsOrder) {
735 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
736 }
737
738 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
739 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
740 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
741 }
742 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
743 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
744
745 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
746 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
747
748 for $FunctionalClass (@FunctionalClassesOrder) {
749 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
750 }
751
752 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
753 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
754 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
755 }
756
757 # Total number of atom pairs...
758 $FingerprintsString .= "; NumOfAtomPairs: " . $This->{FingerprintsVector}->GetNumOfValues();
759
760 # FingerprintsVector...
761 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
762
763 return $FingerprintsString;
764 }
765
766 1;
767
768 __END__
769
770 =head1 NAME
771
772 TopologicalAtomPairsFingerprints
773
774 =head1 SYNOPSIS
775
776 use Fingerprints::TopologicalAtomPairsFingerprints;
777
778 use Fingerprints::TopologicalAtomPairsFingerprints qw(:all);
779
780 =head1 DESCRIPTION
781
782 B<TopologicalAtomPairsFingerprints> [ Ref 57, Ref 59, Ref 72 ] class provides the following methods:
783
784 new, GenerateFingerprints, GetAtomPairIDs, GetDescription, SetAtomIdentifierType,
785 SetAtomicInvariantsToUse, SetFunctionalClassesToUse, SetMaxDistance,
786 SetMinDistance, StringifyTopologicalAtomPairsFingerprints
787
788 B<TopologicalAtomPairsFingerprints> is derived from B<Fingerprints> class which in turn
789 is derived from B<ObjectProperty> base class that provides methods not explicitly defined
790 in B<TopologicalAtomPairsFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's
791 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property:
792
793 Set<PropertyName>(<PropertyValue>);
794 $PropertyValue = Get<PropertyName>();
795 Delete<PropertyName>();
796
797 The current release of MayaChemTools supports generation of B<AtomTypesFingerpritns>
798 corresponding to following B<AtomtomIdentifierTypes>:
799
800 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
801 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
802 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
803
804 Based on the values specified for B<AtomIdentifierType> along with other specified
805 parameters such as B<AtomicInvariantsToUse> and B<FunctionalClassesToUse>, initial
806 atom types are assigned to all non-hydrogen atoms in a molecule. Using the distance
807 matrix for the molecule and initial atom types assigned to non-hydrogen atoms, all unique atom
808 pairs within B<MinDistance> and B<MaxDistance> are identified and counted. An atom pair
809 identifier is generated for each unique atom pair; the format of atom pair identifier is:
810
811 <AtomType1>-D<n>-<AtomType2>
812
813 AtomType1, AtomType2: Atom types assigned to atom1 and atom2
814 D: Distance between atom1 and atom2
815
816 where AtomType1 <= AtomType2
817
818 The atom pair identifiers for all unique atom pairs corresponding to non-hydrogen atoms constitute
819 topological atom pairs fingerprints of the molecule.
820
821 The current release of MayaChemTools generates the following types of topological atom pairs
822 fingerprints vector strings:
823
824 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD
825 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1
826 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3.
827 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...;
828 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1
829 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1...
830
831 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD
832 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesPairsString;C.X
833 1.BO1.H3-D1-C.X3.BO3.H1 2 C.X2.BO2.H2-D1-C.X2.BO2.H2 1 C.X2.BO2.H2-D1-
834 C.X3.BO3.H1 4 C.X2.BO2.H2-D1-C.X3.BO4 1 C.X2.BO2.H2-D1-N.X3.BO3 1 C.X2
835 .BO3.H1-D1-C.X2.BO3.H1 10 C.X2.BO3.H1-D1-C.X3.BO4 8 C.X3.BO3.H1-D1-C.X
836 3.BO4 1 C.X3.BO3.H1-D1-O.X1.BO1.H1 2 C.X3.BO4-D1-C.X3.BO4 6 C.X3.BO...
837
838 FingerprintsVector;TopologicalAtomPairs:DREIDINGAtomTypes:MinDistance1
839 :MaxDistance10;157;NumericalValues;IDsAndValuesString;C_2-D1-C_3 C_2-D
840 1-C_R C_2-D1-N_3 C_2-D1-O_2 C_2-D1-O_3 C_3-D1-C_3 C_3-D1-C_R C_3-D1-N_
841 R C_3-D1-O_3 C_R-D1-C_R C_R-D1-F_ C_R-D1-N_3 C_R-D1-N_R C_2-D2-C_3 C_2
842 1 1 1 2 1 7 1 1 2 23 1 1 2 1 3 5 5 2 1 5 28 2 3 3 1 1 1 2 4 1 1 4 9 3
843 1 4 24 2 4 3 3 4 5 5 14 1 1 2 3 22 1 3 4 4 1 1 1 1 2 2 5 1 4 21 3 1...
844
845 FingerprintsVector;TopologicalAtomPairs:EStateAtomTypes:MinDistance1:M
846 axDistance10;251;NumericalValues;IDsAndValuesString;aaCH-D1-aaCH aaCH-
847 D1-aasC aasC-D1-aasC aasC-D1-aasN aasC-D1-dssC aasC-D1-sF aasC-D1-ssNH
848 aasC-D1-sssCH aasN-D1-ssCH2 dO-D1-dssC dssC-D1-sOH dssC-D1-ssCH2 d...;
849 10 8 5 2 1 1 1 1 1 2 1 1 1 2 2 1 4 10 12 2 2 6 3 1 3 2 2 1 1 1 1 1 1 1
850 1 1 5 2 1 1 6 12 2 2 2 2 6 1 3 2 2 5 2 2 1 2 1 1 1 1 1 1 3 1 3 19 2...
851
852 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi
853 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar
854 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H
855 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...;
856 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4
857 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ...
858
859 FingerprintsVector;TopologicalAtomPairs:MMFF94AtomTypes:MinDistance1:M
860 axDistance10;227;NumericalValues;IDsAndValuesPairsString;C5A-D1-C5B 2
861 C5A-D1-CB 1 C5A-D1-CR 1 C5A-D1-N5 2 C5B-D1-C5B 1 C5B-D1-C=ON 1 C5B-D1-
862 CB 1 C=ON-D1-NC=O 1 C=ON-D1-O=CN 1 CB-D1-CB 18 CB-D1-F 1 CB-D1-NC=O 1
863 COO-D1-CR 1 COO-D1-O=CO 1 COO-D1-OC=O 1 CR-D1-CR 7 CR-D1-N5 1 CR-D1-OR
864 2 C5A-D2-C5A 1 C5A-D2-C5B 2 C5A-D2-C=ON 1 C5A-D2-CB 3 C5A-D2-CR 4 ...
865
866 FingerprintsVector;TopologicalAtomPairs:SLogPAtomTypes:MinDistance1:Ma
867 xDistance10;329;NumericalValues;IDsAndValuesPairsString;C1-D1-C10 1 C1
868 -D1-C11 2 C1-D1-C5 1 C1-D1-CS 4 C10-D1-N11 1 C11-D1-C21 1 C14-D1-C18 2
869 C14-D1-F 1 C18-D1-C18 10 C18-D1-C20 4 C18-D1-C22 2 C20-D1-C20 3 C20-D
870 1-C21 1 C20-D1-N11 1 C21-D1-C21 1 C21-D1-C5 1 C21-D1-N11 1 C22-D1-N4 1
871 C5-D1-N4 1 C5-D1-O10 1 C5-D1-O2 1 C5-D1-O9 1 CS-D1-O2 2 C1-D2-C1 3...
872
873 FingerprintsVector;TopologicalAtomPairs:SYBYLAtomTypes:MinDistance1:Ma
874 xDistance10;159;NumericalValues;IDsAndValuesPairsString;C.2-D1-C.3 1 C
875 .2-D1-C.ar 1 C.2-D1-N.am 1 C.2-D1-O.2 1 C.2-D1-O.co2 2 C.3-D1-C.3 7 C.
876 3-D1-C.ar 1 C.3-D1-N.ar 1 C.3-D1-O.3 2 C.ar-D1-C.ar 23 C.ar-D1-F 1 C.a
877 r-D1-N.am 1 C.ar-D1-N.ar 2 C.2-D2-C.3 1 C.2-D2-C.ar 3 C.3-D2-C.3 5 C.3
878 -D2-C.ar 5 C.3-D2-N.ar 2 C.3-D2-O.3 4 C.3-D2-O.co2 2 C.ar-D2-C.ar 2...
879
880 FingerprintsVector;TopologicalAtomPairs:TPSAAtomTypes:MinDistance1:Max
881 Distance10;64;NumericalValues;IDsAndValuesPairsString;N21-D1-None 3 N7
882 -D1-None 2 None-D1-None 34 None-D1-O3 2 None-D1-O4 3 N21-D2-None 5 N7-
883 D2-None 3 N7-D2-O3 1 None-D2-None 44 None-D2-O3 2 None-D2-O4 5 O3-D2-O
884 4 1 N21-D3-None 7 N7-D3-None 4 None-D3-None 45 None-D3-O3 4 None-D3-O4
885 5 N21-D4-N7 1 N21-D4-None 5 N21-D4-O3 1 N21-D4-O4 1 N7-D4-None 4 N...
886
887 FingerprintsVector;TopologicalAtomPairs:UFFAtomTypes:MinDistance1:MaxD
888 istance10;157;NumericalValues;IDsAndValuesPairsString;C_2-D1-C_3 1 C_2
889 -D1-C_R 1 C_2-D1-N_3 1 C_2-D1-O_2 2 C_2-D1-O_3 1 C_3-D1-C_3 7 C_3-D1-C
890 _R 1 C_3-D1-N_R 1 C_3-D1-O_3 2 C_R-D1-C_R 23 C_R-D1-F_ 1 C_R-D1-N_3 1
891 C_R-D1-N_R 2 C_2-D2-C_3 1 C_2-D2-C_R 3 C_3-D2-C_3 5 C_3-D2-C_R 5 C_3-D
892 2-N_R 2 C_3-D2-O_2 1 C_3-D2-O_3 5 C_R-D2-C_R 28 C_R-D2-F_ 2 C_R-D2-...
893
894 =head2 METHODS
895
896 =over 4
897
898 =item B<new>
899
900 $NewTopologicalAtomPairsFingerprints = new TopologicalAtomPairsFingerprints(
901 %NamesAndValues);
902
903 Using specified I<TopologicalAtomPairsFingerprints> property names and values hash, B<new>
904 method creates a new object and returns a reference to newly created B<TopologicalAtomPairsFingerprints>
905 object. By default, the following properties are initialized:
906
907 Molecule = ''
908 Type = 'TopologicalAtomPairs'
909 MinDistance = 1
910 MaxDistance = 10
911 AtomIdentifierType = ''
912 AtomicInvariantsToUse = ['AS', 'X', 'BO', 'H', 'FC']
913 FunctionalClassesToUse = ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal']
914
915 Examples:
916
917 $TopologicalAtomPairsFingerprints = new TopologicalAtomPairsFingerprints(
918 'Molecule' => $Molecule,
919 'AtomIdentifierType' =>
920 'AtomicInvariantsAtomTypes');
921
922 $TopologicalAtomPairsFingerprints = new TopologicalAtomPairsFingerprints(
923 'Molecule' => $Molecule,
924 'MinDistance' => 1,
925 'MaxDistance' => 10,
926 'AtomIdentifierType' =>
927 'AtomicInvariantsAtomTypes',
928 'AtomicInvariantsToUse' =>
929 ['AS', 'X', 'BO', 'H', 'FC'] );
930
931 $TopologicalAtomPairsFingerprints = new TopologicalAtomPairsFingerprints(
932 'Molecule' => $Molecule,
933 'AtomIdentifierType' =>
934 'EStateAtomTypes');
935
936 $TopologicalAtomPairsFingerprints = new TopologicalAtomPairsFingerprints(
937 'Molecule' => $Molecule,
938 'AtomIdentifierType' =>
939 'SLogPAtomTypes');
940
941 $TopologicalAtomPairsFingerprints = new TopologicalAtomPairsFingerprints(
942 'Molecule' => $Molecule,
943 'MinDistance' => 1,
944 'MaxDistance' => 10,
945 'AtomIdentifierType' =>
946 'FunctionalClassAtomTypes',
947 'FunctionalClassesToUse' =>
948 ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal']);
949
950
951 $TopologicalAtomPairsFingerprints->GenerateFingerprints();
952 print "$TopologicalAtomPairsFingerprints\n";
953
954 =item B<GetDescription>
955
956 $Description = $TopologicalAtomPairsFingerprints->GetDescription();
957
958 Returns a string containing description of topological atom pairs fingerprints fingerprints.
959
960 =item B<GenerateFingerprints>
961
962 $TopologicalAtomPairsFingerprints->GenerateFingerprints();
963
964 Generates topological atom pairs fingerprints and returns I<TopologicalAtomPairsFingerprints>.
965
966 =item B<GetAtomPairIDs>
967
968 $AtomPairIDsRef = $TopologicalAtomPairsFingerprints->GetAtomPairIDs();
969 @AtomPairIDs = $TopologicalAtomPairsFingerprints->GetAtomPairIDs();
970
971 Returns atom pair IDs corresponding to atom pairs count values in topological atom pairs
972 fingerprints vector as an array or reference to an array.
973
974 =item B<SetAtomIdentifierType>
975
976 $TopologicalAtomPairsFingerprints->SetAtomIdentifierType($IdentifierType);
977
978 Sets atom I<IdentifierType> to use during atom pairs fingerprints generation and
979 returns I<TopologicalAtomPairsFingerprints>.
980
981 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
982 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes,
983 TPSAAtomTypes, UFFAtomTypes>.
984
985 =item B<SetAtomicInvariantsToUse>
986
987 $TopologicalAtomPairsFingerprints->SetAtomicInvariantsToUse($ValuesRef);
988 $TopologicalAtomPairsFingerprints->SetAtomicInvariantsToUse(@Values);
989
990 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType>
991 for topological atom pairs fingerprints generation and returns I<TopologicalAtomPairsFingerprints>.
992
993 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB,
994 H, Ar, RA, FC, MN, SM>. Default value: I<AS,X,BO,H,FC>.
995
996 The atomic invariants abbreviations correspond to:
997
998 AS = Atom symbol corresponding to element symbol
999
1000 X<n> = Number of non-hydrogen atom neighbors or heavy atoms
1001 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms
1002 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms
1003 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms
1004 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms
1005 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms
1006 H<n> = Number of implicit and explicit hydrogens for atom
1007 Ar = Aromatic annotation indicating whether atom is aromatic
1008 RA = Ring atom annotation indicating whether atom is a ring
1009 FC<+n/-n> = Formal charge assigned to atom
1010 MN<n> = Mass number indicating isotope other than most abundant isotope
1011 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or
1012 3 (triplet)
1013
1014 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
1015
1016 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
1017
1018 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
1019 optional. Atom type specification doesn't include atomic invariants with zero or undefined values.
1020
1021 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words
1022 are also allowed:
1023
1024 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
1025 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
1026 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
1027 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
1028 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
1029 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
1030 H : NumOfImplicitAndExplicitHydrogens
1031 Ar : Aromatic
1032 RA : RingAtom
1033 FC : FormalCharge
1034 MN : MassNumber
1035 SM : SpinMultiplicity
1036
1037 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant
1038 atom types.
1039
1040 =item B<SetFunctionalClassesToUse>
1041
1042 $TopologicalAtomPairsFingerprints->SetFunctionalClassesToUse($ValuesRef);
1043 $TopologicalAtomPairsFingerprints->SetFunctionalClassesToUse(@Values);
1044
1045 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType>
1046 for topological atom pairs fingerprints generation and returns I<TopologicalAtomPairsFingerprints>.
1047
1048 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>.
1049 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>.
1050
1051 The functional class abbreviations correspond to:
1052
1053 HBD: HydrogenBondDonor
1054 HBA: HydrogenBondAcceptor
1055 PI : PositivelyIonizable
1056 NI : NegativelyIonizable
1057 Ar : Aromatic
1058 Hal : Halogen
1059 H : Hydrophobic
1060 RA : RingAtom
1061 CA : ChainAtom
1062
1063 Functional class atom type specification for an atom corresponds to:
1064
1065 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None
1066
1067 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom
1068 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]:
1069
1070 HydrogenBondDonor: NH, NH2, OH
1071 HydrogenBondAcceptor: N[!H], O
1072 PositivelyIonizable: +, NH2
1073 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
1074
1075 =item B<SetMaxDistance>
1076
1077 $TopologicalAtomPairsFingerprints->SetMaxDistance($Distance);
1078
1079 Sets maximum distance to use during topological atom pairs fingerprints generation and
1080 returns I<TopologicalAtomPairsFingerprints>.
1081
1082 =item B<SetMinDistance>
1083
1084 $TopologicalAtomPairsFingerprints->SetMinDistance($Distance);
1085
1086 Sets minimum distance to use during topological atom pairs fingerprints generation and
1087 returns I<TopologicalAtomPairsFingerprints>.
1088
1089 =item B<StringifyTopologicalAtomPairsFingerprints>
1090
1091 $String = $TopologicalAtomPairsFingerprints->
1092 StringifyTopologicalAtomPairsFingerprints();
1093
1094 Returns a string containing information about I<TopologicalAtomPairsFingerprints> object.
1095
1096 =back
1097
1098 =head1 AUTHOR
1099
1100 Manish Sud <msud@san.rr.com>
1101
1102 =head1 SEE ALSO
1103
1104 Fingerprints.pm, FingerprintsStringUtil.pm, AtomNeighborhoodsFingerprints.pm,
1105 AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, ExtendedConnectivityFingerprints.pm,
1106 MACCSKeys.pm, PathLengthFingerprints.pm, TopologicalAtomTripletsFingerprints.pm,
1107 TopologicalAtomTorsionsFingerprints.pm, TopologicalPharmacophoreAtomPairsFingerprints.pm,
1108 TopologicalPharmacophoreAtomTripletsFingerprints.pm
1109
1110 =head1 COPYRIGHT
1111
1112 Copyright (C) 2015 Manish Sud. All rights reserved.
1113
1114 This file is part of MayaChemTools.
1115
1116 MayaChemTools is free software; you can redistribute it and/or modify it under
1117 the terms of the GNU Lesser General Public License as published by the Free
1118 Software Foundation; either version 3 of the License, or (at your option)
1119 any later version.
1120
1121 =cut