Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/Fingerprints/AtomNeighborhoodsFingerprints.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4816e4a8ae95 |
---|---|
1 package Fingerprints::AtomNeighborhoodsFingerprints; | |
2 # | |
3 # $RCSfile: AtomNeighborhoodsFingerprints.pm,v $ | |
4 # $Date: 2015/02/28 20:48:53 $ | |
5 # $Revision: 1.27 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Carp; | |
31 use Exporter; | |
32 use Fingerprints::Fingerprints; | |
33 use TextUtil (); | |
34 use Molecule; | |
35 use AtomTypes::AtomicInvariantsAtomTypes; | |
36 use AtomTypes::DREIDINGAtomTypes; | |
37 use AtomTypes::EStateAtomTypes; | |
38 use AtomTypes::FunctionalClassAtomTypes; | |
39 use AtomTypes::MMFF94AtomTypes; | |
40 use AtomTypes::SLogPAtomTypes; | |
41 use AtomTypes::SYBYLAtomTypes; | |
42 use AtomTypes::TPSAAtomTypes; | |
43 use AtomTypes::UFFAtomTypes; | |
44 | |
45 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
46 | |
47 @ISA = qw(Fingerprints::Fingerprints Exporter); | |
48 @EXPORT = qw(); | |
49 @EXPORT_OK = qw(); | |
50 | |
51 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
52 | |
53 # Setup class variables... | |
54 my($ClassName); | |
55 _InitializeClass(); | |
56 | |
57 # Overload Perl functions... | |
58 use overload '""' => 'StringifyAtomNeighborhoodsFingerprints'; | |
59 | |
60 # Class constructor... | |
61 sub new { | |
62 my($Class, %NamesAndValues) = @_; | |
63 | |
64 # Initialize object... | |
65 my $This = $Class->SUPER::new(); | |
66 bless $This, ref($Class) || $Class; | |
67 $This->_InitializeAtomNeighborhoodsFingerprints(); | |
68 | |
69 $This->_InitializeAtomNeighborhoodsFingerprintsProperties(%NamesAndValues); | |
70 | |
71 return $This; | |
72 } | |
73 | |
74 # Initialize object data... | |
75 # | |
76 sub _InitializeAtomNeighborhoodsFingerprints { | |
77 my($This) = @_; | |
78 | |
79 # Type of fingerprint... | |
80 $This->{Type} = 'AtomNeighborhoods'; | |
81 | |
82 # Type of vector... | |
83 $This->{VectorType} = 'FingerprintsVector'; | |
84 | |
85 # Type of FingerprintsVector... | |
86 $This->{FingerprintsVectorType} = 'AlphaNumericalValues'; | |
87 | |
88 # Minimum and maximum atomic neighborhoods radii... | |
89 $This->{MinNeighborhoodRadius} = 0; | |
90 $This->{MaxNeighborhoodRadius} = 2; | |
91 | |
92 # Atom identifier type to use for atom IDs in atom neighborhood atoms... | |
93 # | |
94 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, | |
95 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
96 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
97 # | |
98 $This->{AtomIdentifierType} = ''; | |
99 | |
100 # Atom types assigned to each heavy atom... | |
101 %{$This->{AssignedAtomTypes}} = (); | |
102 | |
103 # Atom neighorhoods with in specified atom radii.. | |
104 %{$This->{AtomNeighborhoods}} = (); | |
105 | |
106 # Atom neighborhoods atom types count at different neighborhoods... | |
107 %{$This->{NeighborhoodAtomTypesCount}} = (); | |
108 | |
109 # Atom neighborhood identifiers using specified atom identifier types methodology... | |
110 @{$This->{AtomNeighborhoodsIdentifiers}} = (); | |
111 } | |
112 | |
113 # Initialize class ... | |
114 sub _InitializeClass { | |
115 #Class name... | |
116 $ClassName = __PACKAGE__; | |
117 } | |
118 | |
119 # Initialize object properties.... | |
120 sub _InitializeAtomNeighborhoodsFingerprintsProperties { | |
121 my($This, %NamesAndValues) = @_; | |
122 | |
123 my($Name, $Value, $MethodName); | |
124 while (($Name, $Value) = each %NamesAndValues) { | |
125 $MethodName = "Set${Name}"; | |
126 $This->$MethodName($Value); | |
127 } | |
128 | |
129 # Make sure molecule object was specified... | |
130 if (!exists $NamesAndValues{Molecule}) { | |
131 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; | |
132 } | |
133 if (exists $NamesAndValues{Size}) { | |
134 croak "Error: ${ClassName}->New: Object can't be instantiated with a user specified size: It's an arbitrary length vector..."; | |
135 } | |
136 if (!exists $NamesAndValues{AtomIdentifierType}) { | |
137 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; | |
138 } | |
139 | |
140 $This->_InitializeFingerprintsVector(); | |
141 | |
142 return $This; | |
143 } | |
144 | |
145 # Set atom identifier type.. | |
146 # | |
147 sub SetAtomIdentifierType { | |
148 my($This, $IdentifierType) = @_; | |
149 | |
150 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
151 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes."; | |
152 } | |
153 | |
154 if ($This->{AtomIdentifierType}) { | |
155 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; | |
156 } | |
157 | |
158 $This->{AtomIdentifierType} = $IdentifierType; | |
159 | |
160 # Initialize atom identifier type information... | |
161 $This->_InitializeAtomIdentifierTypeInformation(); | |
162 | |
163 return $This; | |
164 } | |
165 | |
166 # Set minimum atom neighborhood radius... | |
167 # | |
168 sub SetMinNeighborhoodRadius { | |
169 my($This, $Value) = @_; | |
170 | |
171 if (!TextUtil::IsInteger($Value)) { | |
172 croak "Error: ${ClassName}->SetMinNeighborhoodRadius: MinNeighborhoodRadius value, $Value, is not valid: It must be an integer..."; | |
173 } | |
174 | |
175 if ($Value < 0 ) { | |
176 croak "Error: ${ClassName}->SetMinNeighborhoodRadius: MinNeighborhoodRadius value, $Value, is not valid: It must be >= 0..."; | |
177 } | |
178 $This->{MinNeighborhoodRadius} = $Value; | |
179 | |
180 return $This; | |
181 } | |
182 | |
183 # Set maximum atom neighborhood radius... | |
184 # | |
185 sub SetMaxNeighborhoodRadius { | |
186 my($This, $Value) = @_; | |
187 | |
188 if (!TextUtil::IsInteger($Value)) { | |
189 croak "Error: ${ClassName}->SetMaxNeighborhoodRadius: MaxNeighborhoodRadius value, $Value, is not valid: It must be an integer..."; | |
190 } | |
191 | |
192 if ($Value < 0 ) { | |
193 croak "Error: ${ClassName}->SetMaxNeighborhoodRadius: MaxNeighborhoodRadius value, $Value, is not valid: It must be >= 0..."; | |
194 } | |
195 $This->{MaxNeighborhoodRadius} = $Value; | |
196 | |
197 return $This; | |
198 } | |
199 | |
200 # Generate fingerprints description... | |
201 # | |
202 sub GetDescription { | |
203 my($This) = @_; | |
204 | |
205 # Is description explicity set? | |
206 if (exists $This->{Description}) { | |
207 return $This->{Description}; | |
208 } | |
209 | |
210 # Generate fingerprints description... | |
211 | |
212 return "$This->{Type}:$This->{AtomIdentifierType}:MinRadius$This->{MinNeighborhoodRadius}:MaxRadius$This->{MaxNeighborhoodRadius}"; | |
213 } | |
214 | |
215 # Generate atom neighborhood [ Ref 53-56, Ref 73 ] fingerprints... | |
216 # | |
217 # Methodology: | |
218 # . Assign atom types to all non-hydrogen atoms in the molecule | |
219 # . Get atom neighborhoods up to MaxNeighborhoodRadis | |
220 # . Count unqiue atom types at each neighborhood radii for all heavy atoms | |
221 # . Generate neighborhood identifiers for all neighborhoods around central | |
222 # heavy atom | |
223 # . Atom neighborhood identifier for a specific radii is generated using neighborhood | |
224 # radius, assigned atom type and its count as follows: | |
225 # | |
226 # NR<n>-<AtomType>-ATC<n> | |
227 # | |
228 # . Atom neighborhood identifier for a central atom at all specified radii is generated | |
229 # by concatenating neighborhood identifiers at each radii by colon: | |
230 # | |
231 # NR<n>-<AtomType>-ATC<n>:NR<n>-<AtomType>-ATC<n>: | |
232 # | |
233 # . Set final fingerprints as list of neighborhood atom indentifiers | |
234 # | |
235 sub GenerateFingerprints { | |
236 my($This) = @_; | |
237 | |
238 if ($This->{MinNeighborhoodRadius} > $This->{MaxNeighborhoodRadius}) { | |
239 croak "Error: ${ClassName}->GenerateFingerprints: No fingerpritns generated: MinLength, $This->{MinNeighborhoodRadius}, must be less than MaxLength, $This->{MaxNeighborhoodRadius}..."; | |
240 } | |
241 | |
242 # Cache appropriate molecule data... | |
243 $This->_SetupMoleculeDataCache(); | |
244 | |
245 # Assign atom types to all heavy atoms... | |
246 if (!$This->_AssignAtomTypes()) { | |
247 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; | |
248 return $This; | |
249 } | |
250 | |
251 # Intialize atom neighborhoods information... | |
252 $This->_InitializeAtomNeighborhoods(); | |
253 | |
254 # Identify atom neighborhoods with in specified radii... | |
255 $This->_GetAtomNeighborhoods(); | |
256 | |
257 # Count atom neighborhoods atom types... | |
258 $This->_CountAtomNeighborhoodsAtomTypes(); | |
259 | |
260 # Genenerate atom neighborhood identifiers... | |
261 $This->_GenerateAtomNeighborhoodIdentifiers(); | |
262 | |
263 # Set final fingerprints... | |
264 $This->_SetFinalFingerprints(); | |
265 | |
266 # Clear cached molecule data... | |
267 $This->_ClearMoleculeDataCache(); | |
268 | |
269 return $This; | |
270 } | |
271 | |
272 # Assign appropriate atom types to all heavy atoms... | |
273 # | |
274 sub _AssignAtomTypes { | |
275 my($This) = @_; | |
276 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens); | |
277 | |
278 %{$This->{AssignedAtomTypes}} = (); | |
279 $IgnoreHydrogens = 1; | |
280 | |
281 $SpecifiedAtomTypes = undef; | |
282 | |
283 IDENTIFIERTYPE: { | |
284 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
285 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); | |
286 last IDENTIFIERTYPE; | |
287 } | |
288 | |
289 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { | |
290 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
291 last IDENTIFIERTYPE; | |
292 } | |
293 | |
294 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { | |
295 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
296 last IDENTIFIERTYPE; | |
297 } | |
298 | |
299 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
300 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); | |
301 last IDENTIFIERTYPE; | |
302 } | |
303 | |
304 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { | |
305 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
306 last IDENTIFIERTYPE; | |
307 } | |
308 | |
309 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { | |
310 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
311 last IDENTIFIERTYPE; | |
312 } | |
313 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { | |
314 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
315 last IDENTIFIERTYPE; | |
316 } | |
317 | |
318 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { | |
319 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); | |
320 last IDENTIFIERTYPE; | |
321 } | |
322 | |
323 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { | |
324 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); | |
325 last IDENTIFIERTYPE; | |
326 } | |
327 | |
328 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
329 } | |
330 | |
331 # Assign atom types... | |
332 $SpecifiedAtomTypes->AssignAtomTypes(); | |
333 | |
334 # Make sure atom types assignment is successful... | |
335 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { | |
336 return undef; | |
337 } | |
338 | |
339 # Collect assigned atom types... | |
340 ATOM: for $Atom (@{$This->{Atoms}}) { | |
341 if ($Atom->IsHydrogen()) { | |
342 next ATOM; | |
343 } | |
344 $AtomID = $Atom->GetID(); | |
345 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom); | |
346 } | |
347 | |
348 return $This; | |
349 } | |
350 | |
351 # Initialize topological atom pairs between specified distance range... | |
352 # | |
353 sub _InitializeAtomNeighborhoods { | |
354 my($This) = @_; | |
355 my($Radius); | |
356 | |
357 # Initialize atom neighborhood count information between specified radii... | |
358 %{$This->{NeighborhoodAtomTypesCount}} = (); | |
359 | |
360 for $Radius ($This->{MinNeighborhoodRadius} .. $This->{MaxNeighborhoodRadius}) { | |
361 %{$This->{NeighborhoodAtomTypesCount}{$Radius}} = (); | |
362 } | |
363 | |
364 # Initialize atom neighborhoods atoms information at all specified radii... | |
365 # | |
366 %{$This->{AtomNeighborhoods}} = (); | |
367 | |
368 for $Radius (0 .. $This->{MaxNeighborhoodRadius}) { | |
369 %{$This->{AtomNeighborhoods}{$Radius}} = (); | |
370 } | |
371 | |
372 return $This; | |
373 } | |
374 | |
375 # Collect atom neighborhoods upto maximum neighborhood radius... | |
376 # | |
377 # Notes: | |
378 # . Fingerprints are only generated for neighborhoods between specified minimum | |
379 # and maximum neighborhood radii. | |
380 # | |
381 sub _GetAtomNeighborhoods { | |
382 my($This) = @_; | |
383 my($Atom, $AtomID, $MaxRadius, $Radius, $Molecule); | |
384 | |
385 $MaxRadius = $This->{MaxNeighborhoodRadius}; | |
386 $Molecule = $This->GetMolecule(); | |
387 | |
388 # Collect atom neighborhoods... | |
389 | |
390 ATOM: for $Atom (@{$This->{Atoms}}) { | |
391 $AtomID = $Atom->GetID(); | |
392 $Radius = 0; | |
393 | |
394 if ($MaxRadius == 0) { | |
395 # Atom is its own neighborhood at 0 radius... | |
396 my(@AtomNeighborhoodsAtoms); | |
397 | |
398 @AtomNeighborhoodsAtoms = ($Atom); | |
399 $This->{AtomNeighborhoods}{$Radius}{$AtomID} = \@AtomNeighborhoodsAtoms; | |
400 | |
401 next ATOM; | |
402 } | |
403 | |
404 # Collect available atom neighborhoods at different neighborhood radii levels... | |
405 my($AtomNeighborhoodAtomsRef); | |
406 | |
407 for $AtomNeighborhoodAtomsRef ($Molecule->GetAtomNeighborhoodsWithRadiusUpto($Atom, $MaxRadius)) { | |
408 $This->{AtomNeighborhoods}{$Radius}{$AtomID} = $AtomNeighborhoodAtomsRef; | |
409 $Radius++; | |
410 } | |
411 } | |
412 return $This; | |
413 } | |
414 | |
415 # Count atom neighborhoods atom types for each non-hydrogen central atoms with | |
416 # neighborhoods in specified radii range... | |
417 # | |
418 sub _CountAtomNeighborhoodsAtomTypes { | |
419 my($This) = @_; | |
420 my($AtomID, $NeighborhoodAtomID, $Radius, $NeighborhoodAtom, $NeighborhoodAtomType, $AtomNeighborhoodAtomsRef); | |
421 | |
422 RADIUS: for $Radius (sort { $a <=> $b } keys %{$This->{AtomNeighborhoods}} ) { | |
423 if ($Radius < $This->{MinNeighborhoodRadius} || $Radius > $This->{MaxNeighborhoodRadius}) { | |
424 next RADIUS; | |
425 } | |
426 # Go over the neighborhoods of each atom at the current radius... | |
427 for $AtomID (keys %{$This->{AtomNeighborhoods}{$Radius}}) { | |
428 $AtomNeighborhoodAtomsRef = $This->{AtomNeighborhoods}{$Radius}{$AtomID}; | |
429 NEIGHBORHOODATOM: for $NeighborhoodAtom (@{$AtomNeighborhoodAtomsRef}) { | |
430 if ($NeighborhoodAtom->IsHydrogen()) { | |
431 next NEIGHBORHOODATOM; | |
432 } | |
433 $NeighborhoodAtomID = $NeighborhoodAtom->GetID(); | |
434 $NeighborhoodAtomType = $This->{AssignedAtomTypes}{$NeighborhoodAtomID}; | |
435 | |
436 # Count neighbothood atom types for each atom at different radii... | |
437 if (!exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}) { | |
438 %{$This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}} = (); | |
439 } | |
440 if (exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType}) { | |
441 $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType} += 1; | |
442 } | |
443 else { | |
444 $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType} = 1; | |
445 } | |
446 } | |
447 } | |
448 } | |
449 return $This; | |
450 } | |
451 | |
452 # Generate atom neighborhood identifiers for each non-hydrogen atom using atom | |
453 # neighborhood atom types and their count information... | |
454 # | |
455 # Let: | |
456 # NR<n> = Neighborhood radius | |
457 # AtomType = Assigned atom type | |
458 # ATC<n> = AtomType count | |
459 # | |
460 # Then: | |
461 # | |
462 # AtomNeighborhoodAtomIdentifier for a neighborhood atom generated for | |
463 # AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
464 # | |
465 # NR<n>-<AtomType>-ATC<n> | |
466 # | |
467 # AtomNeighborhoodsIdentifier for all specified atom neighbothoods of an atom generated for | |
468 # AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
469 # | |
470 # NR<n>-<AtomType>-ATC<n>;NR<n>-<AtomType>-ATC<n>;... | |
471 # | |
472 sub _GenerateAtomNeighborhoodIdentifiers { | |
473 my($This) = @_; | |
474 my($Atom, $AtomID, $Radius, $AtomType, $AtomTypeCount, $AtomNeighborhoodIdentifier, @AtomNeighborhoodIdentifiers); | |
475 | |
476 @{$This->{AtomNeighborhoodsIdentifiers}} = (); | |
477 | |
478 for $Atom (@{$This->{Atoms}}) { | |
479 $AtomID = $Atom->GetID(); | |
480 @AtomNeighborhoodIdentifiers = (); | |
481 RADIUS: for $Radius ($This->{MinNeighborhoodRadius} .. $This->{MaxNeighborhoodRadius}) { | |
482 if (!exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}) { | |
483 next RADIUS; | |
484 } | |
485 for $AtomType (sort keys %{$This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}}) { | |
486 $AtomTypeCount = $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$AtomType}; | |
487 push @AtomNeighborhoodIdentifiers, "NR${Radius}-${AtomType}-ATC${AtomTypeCount}"; | |
488 } | |
489 } | |
490 $AtomNeighborhoodIdentifier = join(":", @AtomNeighborhoodIdentifiers); | |
491 push @{$This->{AtomNeighborhoodsIdentifiers}}, $AtomNeighborhoodIdentifier; | |
492 } | |
493 | |
494 return $This; | |
495 } | |
496 | |
497 # Set final fingerprits vector... | |
498 # | |
499 sub _SetFinalFingerprints { | |
500 my($This) = @_; | |
501 | |
502 # Mark successful generation of fingerprints... | |
503 $This->{FingerprintsGenerated} = 1; | |
504 | |
505 # Sort AtomNeighborhoodsIdentifiers.. | |
506 # | |
507 @{$This->{AtomNeighborhoodsIdentifiers}} = sort @{$This->{AtomNeighborhoodsIdentifiers}}; | |
508 | |
509 # Add sorted atom neighborhood identifiers to FingerprintsVector which is already defined | |
510 # during initialization containing AlphaNumericalValues... | |
511 # | |
512 $This->{FingerprintsVector}->AddValues(\@{$This->{AtomNeighborhoodsIdentifiers}}); | |
513 | |
514 return $This; | |
515 } | |
516 | |
517 # Cache appropriate molecule data... | |
518 # | |
519 sub _SetupMoleculeDataCache { | |
520 my($This) = @_; | |
521 | |
522 # Get all non-hydrogen atoms... | |
523 my($NegateAtomCheckMethod); | |
524 $NegateAtomCheckMethod = 1; | |
525 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod); | |
526 | |
527 return $This; | |
528 } | |
529 | |
530 # Clear cached molecule data... | |
531 # | |
532 sub _ClearMoleculeDataCache { | |
533 my($This) = @_; | |
534 | |
535 @{$This->{Atoms}} = (); | |
536 | |
537 return $This; | |
538 } | |
539 | |
540 # Set atomic invariants to use for atom identifiers... | |
541 # | |
542 sub SetAtomicInvariantsToUse { | |
543 my($This, @Values) = @_; | |
544 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); | |
545 | |
546 if (!@Values) { | |
547 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; | |
548 return; | |
549 } | |
550 | |
551 $FirstValue = $Values[0]; | |
552 $TypeOfFirstValue = ref $FirstValue; | |
553 | |
554 @SpecifiedAtomicInvariants = (); | |
555 @AtomicInvariantsToUse = (); | |
556 | |
557 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
558 push @SpecifiedAtomicInvariants, @{$FirstValue}; | |
559 } | |
560 else { | |
561 push @SpecifiedAtomicInvariants, @Values; | |
562 } | |
563 | |
564 # Make sure specified AtomicInvariants are valid... | |
565 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { | |
566 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { | |
567 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; | |
568 } | |
569 $AtomicInvariant = $SpecifiedAtomicInvariant; | |
570 push @AtomicInvariantsToUse, $AtomicInvariant; | |
571 } | |
572 | |
573 # Set atomic invariants to use... | |
574 @{$This->{AtomicInvariantsToUse}} = (); | |
575 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; | |
576 | |
577 return $This; | |
578 } | |
579 | |
580 # Set functional classes to use for atom identifiers... | |
581 # | |
582 sub SetFunctionalClassesToUse { | |
583 my($This, @Values) = @_; | |
584 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); | |
585 | |
586 if (!@Values) { | |
587 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; | |
588 return; | |
589 } | |
590 | |
591 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { | |
592 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; | |
593 return; | |
594 } | |
595 | |
596 $FirstValue = $Values[0]; | |
597 $TypeOfFirstValue = ref $FirstValue; | |
598 | |
599 @SpecifiedFunctionalClasses = (); | |
600 @FunctionalClassesToUse = (); | |
601 | |
602 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
603 push @SpecifiedFunctionalClasses, @{$FirstValue}; | |
604 } | |
605 else { | |
606 push @SpecifiedFunctionalClasses, @Values; | |
607 } | |
608 | |
609 # Make sure specified FunctionalClasses are valid... | |
610 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { | |
611 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { | |
612 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; | |
613 } | |
614 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; | |
615 } | |
616 | |
617 # Set functional classes to use... | |
618 @{$This->{FunctionalClassesToUse}} = (); | |
619 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; | |
620 | |
621 return $This; | |
622 } | |
623 | |
624 # Initialize atom indentifier type information... | |
625 # | |
626 # Current supported values: | |
627 # | |
628 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, | |
629 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
630 # | |
631 sub _InitializeAtomIdentifierTypeInformation { | |
632 my($This) = @_; | |
633 | |
634 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
635 $This->_InitializeAtomicInvariantsAtomTypesInformation(); | |
636 } | |
637 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
638 $This->_InitializeFunctionalClassAtomTypesInformation(); | |
639 } | |
640 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { | |
641 # Nothing to do for now... | |
642 } | |
643 else { | |
644 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; | |
645 } | |
646 | |
647 return $This; | |
648 } | |
649 | |
650 # Initialize atomic invariants atom types to use for generating atom identifiers... | |
651 # | |
652 # Let: | |
653 # AS = Atom symbol corresponding to element symbol | |
654 # | |
655 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom | |
656 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom | |
657 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom | |
658 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
659 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
660 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom | |
661 # H<n> = Number of implicit and explicit hydrogens for atom | |
662 # Ar = Aromatic annotation indicating whether atom is aromatic | |
663 # RA = Ring atom annotation indicating whether atom is a ring | |
664 # FC<+n/-n> = Formal charge assigned to atom | |
665 # MN<n> = Mass number indicating isotope other than most abundant isotope | |
666 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) | |
667 # | |
668 # Then: | |
669 # | |
670 # Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
671 # | |
672 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
673 # | |
674 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
675 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>. | |
676 # AtomID specification doesn't include atomic invariants with zero or undefined values. | |
677 # | |
678 sub _InitializeAtomicInvariantsAtomTypesInformation { | |
679 my($This) = @_; | |
680 | |
681 # Default atomic invariants to use for generating atom neighborhood atom IDs: AS, X, BO, H, FC | |
682 # | |
683 @{$This->{AtomicInvariantsToUse}} = (); | |
684 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); | |
685 | |
686 return $This; | |
687 } | |
688 | |
689 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes | |
690 # class, to use for generating atom identifiers... | |
691 # | |
692 # Let: | |
693 # HBD: HydrogenBondDonor | |
694 # HBA: HydrogenBondAcceptor | |
695 # PI : PositivelyIonizable | |
696 # NI : NegativelyIonizable | |
697 # Ar : Aromatic | |
698 # Hal : Halogen | |
699 # H : Hydrophobic | |
700 # RA : RingAtom | |
701 # CA : ChainAtom | |
702 # | |
703 # Then: | |
704 # | |
705 # Functiononal class atom type specification for an atom corresponds to: | |
706 # | |
707 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA | |
708 # | |
709 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal | |
710 # | |
711 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: | |
712 # | |
713 # HydrogenBondDonor: NH, NH2, OH | |
714 # HydrogenBondAcceptor: N[!H], O | |
715 # PositivelyIonizable: +, NH2 | |
716 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
717 # | |
718 sub _InitializeFunctionalClassAtomTypesInformation { | |
719 my($This) = @_; | |
720 | |
721 # Default functional class atom typess to use for generating atom identifiers | |
722 # are: HBD, HBA, PI, NI, Ar, Hal | |
723 # | |
724 @{$This->{FunctionalClassesToUse}} = (); | |
725 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); | |
726 | |
727 return $This; | |
728 } | |
729 | |
730 # Return a string containg data for AtomNeighborhoodsFingerprints object... | |
731 # | |
732 sub StringifyAtomNeighborhoodsFingerprints { | |
733 my($This) = @_; | |
734 my($FingerprintsString); | |
735 | |
736 # Type of fingerprint... | |
737 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}; MinNeighborhoodRadius: $This->{MinNeighborhoodRadius}; MaxNeighborhoodRadius: $This->{MaxNeighborhoodRadius}"; | |
738 | |
739 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { | |
740 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); | |
741 | |
742 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); | |
743 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); | |
744 | |
745 for $AtomicInvariant (@AtomicInvariantsOrder) { | |
746 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; | |
747 } | |
748 | |
749 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; | |
750 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; | |
751 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; | |
752 } | |
753 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { | |
754 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); | |
755 | |
756 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); | |
757 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); | |
758 | |
759 for $FunctionalClass (@FunctionalClassesOrder) { | |
760 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; | |
761 } | |
762 | |
763 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; | |
764 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; | |
765 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; | |
766 } | |
767 | |
768 # Total number of atom neighborhood atom IDs... | |
769 $FingerprintsString .= "; NumOfAtomNeighborhoodAtomIdentifiers: " . $This->{FingerprintsVector}->GetNumOfValues(); | |
770 | |
771 # FingerprintsVector... | |
772 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; | |
773 | |
774 return $FingerprintsString; | |
775 } | |
776 | |
777 1; | |
778 | |
779 __END__ | |
780 | |
781 =head1 NAME | |
782 | |
783 AtomNeighborhoodsFingerprints | |
784 | |
785 =head1 SYNOPSIS | |
786 | |
787 use Fingerprints::AtomNeighborhoodsFingerprints; | |
788 | |
789 use Fingerprints::AtomNeighborhoodsFingerprints qw(:all); | |
790 | |
791 =head1 DESCRIPTION | |
792 | |
793 B<AtomNeighborhoodsFingerprints> [ Ref 53-56, Ref 73 ] class provides the following methods: | |
794 | |
795 new, GenerateFingerprints, GetDescription, SetAtomIdentifierType, | |
796 SetAtomicInvariantsToUse, SetFunctionalClassesToUse, SetMaxNeighborhoodRadius, | |
797 SetMinNeighborhoodRadius, StringifyAtomNeighborhoodsFingerprints | |
798 | |
799 B<AtomNeighborhoodsFingerprints> is derived from B<Fingerprints> class which in turn | |
800 is derived from B<ObjectProperty> base class that provides methods not explicitly defined | |
801 in B<AtomNeighborhoodsFingerprints>, B<Fingerprints> or B<ObjectProperty> classes using Perl's | |
802 AUTOLOAD functionality. These methods are generated on-the-fly for a specified object property: | |
803 | |
804 Set<PropertyName>(<PropertyValue>); | |
805 $PropertyValue = Get<PropertyName>(); | |
806 Delete<PropertyName>(); | |
807 | |
808 The current release of MayaChemTools supports generation of B<AtomNeighborhoodsFingerprints> | |
809 corresponding to following B<AtomIdentifierTypes>: | |
810 | |
811 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
812 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
813 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
814 | |
815 Based on the values specified for B<AtomIdentifierType> along with other specified | |
816 sucb as B<AtomicInvariantsToUse> and B<FunctionalClassesToUse>, initial atom types are | |
817 assigned to all non-hydrogen atoms in a molecule. Using atom neighborhoods | |
818 around each non-hydrogen central atom corresponding to radii between specified values | |
819 B<MinNeighborhoodRadius> and B<MaxNeighborhoodRadius>, unique atom types at each radii | |
820 level are counted and an atom neighborhood identifier is generated. | |
821 | |
822 The format of an atom neighborhood identifier around a central non-hydrogen atom at a | |
823 specific radius is: | |
824 | |
825 NR<n>-<AtomType>-ATC<n> | |
826 | |
827 NR: Neighborhood radius | |
828 AtomType: Assigned atom type | |
829 ATC: Atom type count | |
830 | |
831 The atom neighborhood identifier for non-hydrogen central atom corresponding to all specified radii | |
832 is generated by concatenating neighborhood identifiers at each radii by colon as a delimiter: | |
833 | |
834 NR<n>-<AtomType>-ATC<n>:NR<n>-<AtomType>-ATC<n>:... | |
835 | |
836 The atom neighborhood identifiers for all non-hydrogen central atoms at all specified radii are | |
837 concatenated using space as a delimiter and constitute atom neighborhood fingerprint of the molecule. | |
838 | |
839 The current release of MayaChemTools generates the following types of atom neighborhoods | |
840 fingerprints vector strings: | |
841 | |
842 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi | |
843 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT | |
844 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X | |
845 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A | |
846 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2 | |
847 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B... | |
848 | |
849 FingerprintsVector;AtomNeighborhoods:DREIDINGAtomTypes:MinRadius0:MaxR | |
850 adius2;41;AlphaNumericalValues;ValuesString;NR0-C_2-ATC1:NR1-C_3-ATC1: | |
851 NR1-O_2-ATC1:NR1-O_3-ATC1:NR2-C_3-ATC1 NR0-C_2-ATC1:NR1-C_R-ATC1:NR1-N | |
852 _3-ATC1:NR1-O_2-ATC1:NR2-C_R-ATC3 NR0-C_3-ATC1:NR1-C_2-ATC1:NR1-C_3-AT | |
853 C1:NR2-C_3-ATC1:NR2-O_2-ATC1:NR2-O_3-ATC2 NR0-C_3-ATC1:NR1-C_3-ATC1:NR | |
854 1-N_R-ATC1:NR2-C_3-ATC1:NR2-C_R-ATC2 NR0-C_3-ATC1:NR1-C_3-ATC1:NR2-... | |
855 | |
856 FingerprintsVector;AtomNeighborhoods:EStateAtomTypes:MinRadius0:MaxRad | |
857 ius2;41;AlphaNumericalValues;ValuesString;NR0-aaCH-ATC1:NR1-aaCH-ATC1: | |
858 NR1-aasC-ATC1:NR2-aaCH-ATC1:NR2-aasC-ATC1:NR2-sF-ATC1 NR0-aaCH-ATC1:NR | |
859 1-aaCH-ATC1:NR1-aasC-ATC1:NR2-aaCH-ATC1:NR2-aasC-ATC1:NR2-sF-ATC1 NR0- | |
860 aaCH-ATC1:NR1-aaCH-ATC1:NR1-aasC-ATC1:NR2-aaCH-ATC1:NR2-aasC-ATC2 NR0- | |
861 aaCH-ATC1:NR1-aaCH-ATC1:NR1-aasC-ATC1:NR2-aaCH-ATC1:NR2-aasC-ATC2 N... | |
862 | |
863 FingerprintsVector;AtomNeighborhoods:FunctionalClassAtomTypes:MinRadiu | |
864 s0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-Ar-ATC1:NR1-Ar- | |
865 ATC1:NR1-Ar.HBA-ATC1:NR1-None-ATC1:NR2-Ar-ATC2:NR2-None-ATC4 NR0-Ar-AT | |
866 C1:NR1-Ar-ATC2:NR1-Ar.HBA-ATC1:NR2-Ar-ATC5:NR2-None-ATC1 NR0-Ar-ATC1:N | |
867 R1-Ar-ATC2:NR1-HBD-ATC1:NR2-Ar-ATC2:NR2-None-ATC1 NR0-Ar-ATC1:NR1-Ar-A | |
868 TC2:NR1-Hal-ATC1:NR2-Ar-ATC2 NR0-Ar-ATC1:NR1-Ar-ATC2:NR1-None-ATC1:... | |
869 | |
870 FingerprintsVector;AtomNeighborhoods:MMFF94AtomTypes:MinRadius0:MaxRad | |
871 ius2;41;AlphaNumericalValues;ValuesString;NR0-C5A-ATC1:NR1-C5B-ATC1:NR | |
872 1-CB-ATC1:NR1-N5-ATC1:NR2-C5A-ATC1:NR2-C5B-ATC1:NR2-CB-ATC3:NR2-CR-ATC | |
873 1 NR0-C5A-ATC1:NR1-C5B-ATC1:NR1-CR-ATC1:NR1-N5-ATC1:NR2-C5A-ATC1:NR2-C | |
874 5B-ATC1:NR2-C=ON-ATC1:NR2-CR-ATC3 NR0-C5B-ATC1:NR1-C5A-ATC1:NR1-C5B-AT | |
875 C1:NR1-C=ON-ATC1:NR2-C5A-ATC1:NR2-CB-ATC1:NR2-CR-ATC1:NR2-N5-ATC1:N... | |
876 | |
877 FingerprintsVector;AtomNeighborhoods:SLogPAtomTypes:MinRadius0:MaxRadi | |
878 us2;41;AlphaNumericalValues;ValuesString;NR0-C1-ATC1:NR1-C10-ATC1:NR1- | |
879 CS-ATC1:NR2-C1-ATC1:NR2-N11-ATC1:NR2-O2-ATC1 NR0-C1-ATC1:NR1-C11-ATC1: | |
880 NR2-C1-ATC1:NR2-C21-ATC1 NR0-C1-ATC1:NR1-C11-ATC1:NR2-C1-ATC1:NR2-C21- | |
881 ATC1 NR0-C1-ATC1:NR1-C5-ATC1:NR1-CS-ATC1:NR2-C1-ATC1:NR2-O2-ATC2:NR2-O | |
882 9-ATC1 NR0-C1-ATC1:NR1-CS-ATC2:NR2-C1-ATC2:NR2-O2-ATC2 NR0-C10-ATC1... | |
883 | |
884 FingerprintsVector;AtomNeighborhoods:SYBYLAtomTypes:MinRadius0:MaxRadi | |
885 us2;41;AlphaNumericalValues;ValuesString;NR0-C.2-ATC1:NR1-C.3-ATC1:NR1 | |
886 -O.co2-ATC2:NR2-C.3-ATC1 NR0-C.2-ATC1:NR1-C.ar-ATC1:NR1-N.am-ATC1:NR1- | |
887 O.2-ATC1:NR2-C.ar-ATC3 NR0-C.3-ATC1:NR1-C.2-ATC1:NR1-C.3-ATC1:NR2-C.3- | |
888 ATC1:NR2-O.3-ATC1:NR2-O.co2-ATC2 NR0-C.3-ATC1:NR1-C.3-ATC1:NR1-N.ar-AT | |
889 C1:NR2-C.3-ATC1:NR2-C.ar-ATC2 NR0-C.3-ATC1:NR1-C.3-ATC1:NR2-C.3-ATC... | |
890 | |
891 FingerprintsVector;AtomNeighborhoods:TPSAAtomTypes:MinRadius0:MaxRadiu | |
892 s2;41;AlphaNumericalValues;ValuesString;NR0-N21-ATC1:NR1-None-ATC3:NR2 | |
893 -None-ATC5 NR0-N7-ATC1:NR1-None-ATC2:NR2-None-ATC3:NR2-O3-ATC1 NR0-Non | |
894 e-ATC1:NR1-N21-ATC1:NR1-None-ATC1:NR2-None-ATC3 NR0-None-ATC1:NR1-N21- | |
895 ATC1:NR1-None-ATC2:NR2-None-ATC6 NR0-None-ATC1:NR1-N21-ATC1:NR1-None-A | |
896 TC2:NR2-None-ATC6 NR0-None-ATC1:NR1-N7-ATC1:NR1-None-ATC1:NR1-O3-AT... | |
897 | |
898 FingerprintsVector;AtomNeighborhoods:UFFAtomTypes:MinRadius0:MaxRadius | |
899 2;41;AlphaNumericalValues;ValuesString;NR0-C_2-ATC1:NR1-C_3-ATC1:NR1-O | |
900 _2-ATC1:NR1-O_3-ATC1:NR2-C_3-ATC1 NR0-C_2-ATC1:NR1-C_R-ATC1:NR1-N_3-AT | |
901 C1:NR1-O_2-ATC1:NR2-C_R-ATC3 NR0-C_3-ATC1:NR1-C_2-ATC1:NR1-C_3-ATC1:NR | |
902 2-C_3-ATC1:NR2-O_2-ATC1:NR2-O_3-ATC2 NR0-C_3-ATC1:NR1-C_3-ATC1:NR1-N_R | |
903 -ATC1:NR2-C_3-ATC1:NR2-C_R-ATC2 NR0-C_3-ATC1:NR1-C_3-ATC1:NR2-C_3-A... | |
904 | |
905 =head2 METHODS | |
906 | |
907 =over 4 | |
908 | |
909 =item B<new> | |
910 | |
911 $NewAtomNeighborhoodsFingerprints = new AtomNeighborhoodsFingerprints( | |
912 %NamesAndValues); | |
913 | |
914 Using specified I<AtomNeighborhoodsFingerprints> property names and values hash, B<new> | |
915 method creates a new object and returns a reference to newly created B<AtomNeighborhoodsFingerprints> | |
916 object. By default, the following properties are initialized: | |
917 | |
918 Molecule = '' | |
919 Type = 'AtomNeighborhoods' | |
920 MinNeighborhoodRadius = 0 | |
921 MaxNeighborhoodRadius = 2 | |
922 AtomIdentifierType = '' | |
923 AtomicInvariantsToUse = ['AS', 'X', 'BO', 'H', 'FC', 'MN'] | |
924 FunctionalClassesToUse = ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'] | |
925 | |
926 Examples: | |
927 | |
928 $AtomNeighborhoodsFingerprints = new AtomNeighborhoodsFingerprints( | |
929 'Molecule' => $Molecule, | |
930 'AtomIdentifierType' => | |
931 "AtomicInvariantsAtomTypes"); | |
932 | |
933 $AtomNeighborhoodsFingerprints = new AtomNeighborhoodsFingerprints( | |
934 'Molecule' => $Molecule, | |
935 'MinNeighborhoodRadius' => 0, | |
936 'MaxNeighborhoodRadius' => 2, | |
937 'AtomIdentifierType' => | |
938 'AtomicInvariantsAtomTypes', | |
939 'AtomicInvariantsToUse' => | |
940 ['AS', 'X', 'BO', 'H', 'FC'] ); | |
941 | |
942 $AtomNeighborhoodsFingerprints = new AtomNeighborhoodsFingerprints( | |
943 'Molecule' => $Molecule, | |
944 'AtomIdentifierType' => | |
945 'SYBYLAtomTypes'); | |
946 | |
947 $AtomNeighborhoodsFingerprints = new AtomNeighborhoodsFingerprints( | |
948 'Molecule' => $Molecule, | |
949 'AtomIdentifierType' => | |
950 'MMFF94AtomTypes'); | |
951 | |
952 $AtomNeighborhoodsFingerprints = new AtomNeighborhoodsFingerprints( | |
953 'Molecule' => $Molecule, | |
954 'AtomIdentifierType' => | |
955 'AtomicInvariantsAtomTypes'); | |
956 | |
957 $AtomNeighborhoodsFingerprints = new AtomNeighborhoodsFingerprints( | |
958 'Molecule' => $Molecule, | |
959 'MinNeighborhoodRadius' => 0, | |
960 'MaxNeighborhoodRadius' => 2, | |
961 'AtomIdentifierType' => | |
962 'FunctionalClassAtomTypes', | |
963 'FunctionalClassesToUse' => | |
964 ['HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'] ); | |
965 | |
966 $AtomNeighborhoodsFingerprints->GenerateFingerprints(); | |
967 print "$AtomNeighborhoodsFingerprints\n"; | |
968 | |
969 =item B<GenerateFingerprints> | |
970 | |
971 $AtomNeighborhoodsFingerprints->GenerateFingerprints(); | |
972 | |
973 Generates atom neighborhood fingerprints and returns I<AtomNeighborhoodsFingerprints>. | |
974 | |
975 =item B<GetDescription> | |
976 | |
977 $Description = $AtomNeighborhoodsFingerprints->GetDescription(); | |
978 | |
979 Returns a string containing description of atom neighborhood fingerprints. | |
980 | |
981 =item B<SetAtomIdentifierType> | |
982 | |
983 $AtomNeighborhoodsFingerprints->SetAtomIdentifierType($IdentifierType); | |
984 | |
985 Sets atom I<IdentifierType> to use during atom neighborhood fingerprints generation and | |
986 returns I<AtomNeighborhoodsFingerprints>. | |
987 | |
988 Possible values: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
989 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, | |
990 TPSAAtomTypes, UFFAtomTypes>. | |
991 | |
992 =item B<SetAtomicInvariantsToUse> | |
993 | |
994 $AtomNeighborhoodsFingerprints->SetAtomicInvariantsToUse($ValuesRef); | |
995 $AtomNeighborhoodsFingerprints->SetAtomicInvariantsToUse(@Values); | |
996 | |
997 Sets atomic invariants to use during I<AtomicInvariantsAtomTypes> value of I<AtomIdentifierType> | |
998 for atom neighborhood fingerprints generation and returns I<AtomNeighborhoodsFingerprints>. | |
999 | |
1000 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB, | |
1001 H, Ar, RA, FC, MN, SM>. Default value: I<AS,X,BO,H,FC>. | |
1002 | |
1003 The atomic invariants abbreviations correspond to: | |
1004 | |
1005 AS = Atom symbol corresponding to element symbol | |
1006 | |
1007 X<n> = Number of non-hydrogen atom neighbors or heavy atoms | |
1008 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms | |
1009 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms | |
1010 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms | |
1011 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms | |
1012 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms | |
1013 H<n> = Number of implicit and explicit hydrogens for atom | |
1014 Ar = Aromatic annotation indicating whether atom is aromatic | |
1015 RA = Ring atom annotation indicating whether atom is a ring | |
1016 FC<+n/-n> = Formal charge assigned to atom | |
1017 MN<n> = Mass number indicating isotope other than most abundant isotope | |
1018 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or | |
1019 3 (triplet) | |
1020 | |
1021 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
1022 | |
1023 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> | |
1024 | |
1025 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are | |
1026 optional. Atom type specification doesn't include atomic invariants with zero or undefined values. | |
1027 | |
1028 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words | |
1029 are also allowed: | |
1030 | |
1031 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors | |
1032 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms | |
1033 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms | |
1034 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms | |
1035 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms | |
1036 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms | |
1037 H : NumOfImplicitAndExplicitHydrogens | |
1038 Ar : Aromatic | |
1039 RA : RingAtom | |
1040 FC : FormalCharge | |
1041 MN : MassNumber | |
1042 SM : SpinMultiplicity | |
1043 | |
1044 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant | |
1045 atom types. | |
1046 | |
1047 =item B<SetFunctionalClassesToUse> | |
1048 | |
1049 $AtomNeighborhoodsFingerprints->SetFunctionalClassesToUse($ValuesRef); | |
1050 $AtomNeighborhoodsFingerprints->SetFunctionalClassesToUse(@Values); | |
1051 | |
1052 Sets functional classes invariants to use during I<FunctionalClassAtomTypes> value of I<AtomIdentifierType> | |
1053 for atom neighborhoods fingerprints generation and returns I<AtomNeighborhoodsFingerprints>. | |
1054 | |
1055 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>. | |
1056 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>. | |
1057 | |
1058 The functional class abbreviations correspond to: | |
1059 | |
1060 HBD: HydrogenBondDonor | |
1061 HBA: HydrogenBondAcceptor | |
1062 PI : PositivelyIonizable | |
1063 NI : NegativelyIonizable | |
1064 Ar : Aromatic | |
1065 Hal : Halogen | |
1066 H : Hydrophobic | |
1067 RA : RingAtom | |
1068 CA : ChainAtom | |
1069 | |
1070 Functional class atom type specification for an atom corresponds to: | |
1071 | |
1072 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA or None | |
1073 | |
1074 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom | |
1075 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]: | |
1076 | |
1077 HydrogenBondDonor: NH, NH2, OH | |
1078 HydrogenBondAcceptor: N[!H], O | |
1079 PositivelyIonizable: +, NH2 | |
1080 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH | |
1081 | |
1082 =item B<SetMaxNeighborhoodRadius> | |
1083 | |
1084 $AtomNeighborhoodsFingerprints->SetMaxNeighborhoodRadius($Radius); | |
1085 | |
1086 Sets maximum neighborhood radius to use during atom neighborhood fingerprints generation and | |
1087 returns I<AtomNeighborhoodsFingerprints>. | |
1088 | |
1089 =item B<SetMinNeighborhoodRadius> | |
1090 | |
1091 $AtomNeighborhoodsFingerprints->SetMinNeighborhoodRadius($Radius); | |
1092 | |
1093 Sets minimum neighborhood radius to use during atom neighborhood fingerprints generation and | |
1094 returns I<AtomNeighborhoodsFingerprints>. | |
1095 | |
1096 =item B<StringifyAtomNeighborhoodsFingerprints> | |
1097 | |
1098 $String = $Fingerprints->StringifyAtomNeighborhoodsFingerprints(); | |
1099 | |
1100 Returns a string containing information about I<AtomNeighborhoodsFingerprints> object. | |
1101 | |
1102 =back | |
1103 | |
1104 =head1 AUTHOR | |
1105 | |
1106 Manish Sud <msud@san.rr.com> | |
1107 | |
1108 =head1 SEE ALSO | |
1109 | |
1110 Fingerprints.pm, FingerprintsStringUtil.pm, AtomTypesFingerprints.pm, EStateIndiciesFingerprints.pm, | |
1111 ExtendedConnectivityFingerprints.pm, MACCSKeys.pm, PathLengthFingerprints.pm, | |
1112 TopologicalAtomPairsFingerprints.pm, TopologicalAtomTripletsFingerprints.pm, | |
1113 TopologicalAtomTorsionsFingerprints.pm, TopologicalPharmacophoreAtomPairsFingerprints.pm, | |
1114 TopologicalPharmacophoreAtomTripletsFingerprints.pm | |
1115 | |
1116 =head1 COPYRIGHT | |
1117 | |
1118 Copyright (C) 2015 Manish Sud. All rights reserved. | |
1119 | |
1120 This file is part of MayaChemTools. | |
1121 | |
1122 MayaChemTools is free software; you can redistribute it and/or modify it under | |
1123 the terms of the GNU Lesser General Public License as published by the Free | |
1124 Software Foundation; either version 3 of the License, or (at your option) | |
1125 any later version. | |
1126 | |
1127 =cut |