comparison lib/MolecularDescriptors/MolecularDescriptorsGenerator.pm @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 package MolecularDescriptors::MolecularDescriptorsGenerator;
2 #
3 # $RCSfile: MolecularDescriptorsGenerator.pm,v $
4 # $Date: 2015/02/28 20:49:20 $
5 # $Revision: 1.13 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Exporter;
32 use Scalar::Util ();
33 use ObjectProperty;
34 use TextUtil ();
35 use FileUtil ();
36 use Molecule;
37 use MolecularDescriptors::MolecularDescriptors;
38
39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
40
41 @ISA = qw(ObjectProperty Exporter);
42 @EXPORT = qw();
43 @EXPORT_OK = qw(GetAvailableDescriptorClassNames GetAvailableClassAndDescriptorNames GetAvailableDescriptorNames GetAvailableDescriptorNamesForDescriptorClass GetAvailableClassNameForDescriptorName GetRuleOf5DescriptorNames GetRuleOf3DescriptorNames IsDescriptorClassNameAvailable IsDescriptorNameAvailable);
44
45 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
46
47 # Setup class variables...
48 my($ClassName, %DescriptorsDataMap);
49 _InitializeClass();
50
51 # Overload Perl functions...
52 use overload '""' => 'StringifyMolecularDescriptorsGenerator';
53
54 # Class constructor...
55 sub new {
56 my($Class, %NamesAndValues) = @_;
57
58 # Initialize object...
59 my $This = {};
60 bless $This, ref($Class) || $Class;
61 $This->_InitializeMolecularDescriptorsGenerator();
62
63 $This->_InitializeMolecularDescriptorsGeneratorProperties(%NamesAndValues);
64
65 return $This;
66 }
67
68 # Initialize class ...
69 sub _InitializeClass {
70 #Class name...
71 $ClassName = __PACKAGE__;
72
73 # Load available molecular descriptor classes...
74 _LoadMolecularDescriptorsData();
75
76 }
77
78 # Initialize object data...
79 #
80 sub _InitializeMolecularDescriptorsGenerator {
81 my($This) = @_;
82
83 # Type of desciptors to generate...
84 #
85 # The current release of MayaChemTools supports generation of four sets of
86 # descriptors: All available descriptors, rule of 5 or 3 descriptors or a specified
87 # set of descriptors.
88 #
89 # Possible values: All, RuleOf5, RuleOf3 or Specify
90 #
91 # RuleOf5 [ Ref 91 ] descriptor names: MolecularWeight, HydrogenBondDonors, HydrogenBondAcceptors,
92 # SLogP. RuleOf5 states: MolecularWeight <= 500, HydrogenBondDonors <= 5, HydrogenBondAcceptors <= 10,
93 # and logP <= 5.
94 #
95 # RuleOf3 [ Ref 92 ] descriptor names: MolecularWeight, RotatableBonds, HydrogenBondDonors,
96 # HydrogenBondAcceptors, SLogP, TPSA. RuleOf3 states: MolecularWeight <= 300, RotatableBonds <= 3,
97 # HydrogenBondDonors <= 3, HydrogenBondAcceptors <= 3, logP <= 3, and TPSA <= 60.
98 #
99 # For Specify value of Mode, a set of descritor names must be specified using
100 # DescriptorNames parameter.
101 #
102 # Default: All
103 #
104 $This->{Mode} = '';
105
106 # Descriptor names used to generate descriptor values during a specified descriptor
107 # generation mode...
108 #
109 @{$This->{DescriptorNames}} = ();
110
111 # Descriptor calculation control parameters for specified descriptor class names...
112 #
113 # These parameters are passed on to appropriate descriptor classes during
114 # instantiations of descriptor class objects.
115 #
116 %{$This->{DescriptorClassParameters}} = ();
117
118 $This->{DescriptorClassesInstantiated} = 0;
119
120 # Descriptor class names and objects corresponding to specified descriptor names...
121 #
122 @{$This->{DescriptorClassNames}} = ();
123 %{$This->{DescriptorClassObjects}} = ();
124
125 # Descriptor values generated for specified descriptor names...
126 #
127 @{$This->{DescriptorValues}} = ();
128
129 return $This;
130 }
131
132 # Initialize object properties...
133 #
134 sub _InitializeMolecularDescriptorsGeneratorProperties {
135 my($This, %NamesAndValues) = @_;
136
137 my($Name, $Value, $MethodName);
138 while (($Name, $Value) = each %NamesAndValues) {
139 $MethodName = "Set${Name}";
140 $This->$MethodName($Value);
141 }
142
143 # Set default value for Mode...
144 if (!$This->{Mode}) {
145 $This->{Mode} = 'All';
146 }
147
148 $This->_CheckAndInitializeDescriptorNames();
149
150 return $This;
151 }
152
153 # Set descriptors generation mode......
154 #
155 sub SetMode {
156 my($This, $Value) = @_;
157
158 # All - all available descriptors
159 # Specify - Specified set of descriptors
160
161 if ($Value !~ /^(All|RuleOf5|RuleOf3|Specify)$/i) {
162 croak "Error: ${ClassName}->SetMode: Mode value, $Value, is not valid; Supported values: All, RuleOf5, RuleOf3 or Specify...";
163 }
164
165 $This->{Mode} = $Value;
166
167 return $This;
168 }
169
170 # Set descriptor names to use for generating descriptor values using an array
171 # or reference to an array...
172 #
173 sub SetDescriptorNames {
174 my($This, @Values) = @_;
175
176 if ($This->{Mode} =~ /^All$/i) {
177 croak "Error: ${ClassName}->SetDescriptorNames: Descriptor names cann't be specified during \"All\" value of descsriptors generation \"Mode\"...";
178 }
179
180 if (!@Values) {
181 return;
182 }
183
184 my($FirstValue, $TypeOfFirstValue);
185
186 $FirstValue = $Values[0];
187 $TypeOfFirstValue = ref $FirstValue;
188
189 @{$This->{DescriptorNames}} = ();
190
191 if ($TypeOfFirstValue =~ /^ARRAY/) {
192 # Initialize using array refernce...
193 push @{$This->{DescriptorNames}}, @{$FirstValue};
194 }
195 else {
196 # It's a list of values...
197 push @{$This->{DescriptorNames}}, @Values;
198 }
199
200 # Make sure specified descriptor names are valid...
201 $This->_ValidateDescriptorNames();
202
203 return $This;
204 }
205
206 # Get descriptor names as an array...
207 #
208 sub GetDescriptorNames {
209 my($This) = @_;
210
211 return wantarray ? @{$This->{DescriptorNames}} : scalar @{$This->{DescriptorNames}};
212 }
213
214 # Get all descriptor values as an array...
215 #
216 sub GetDescriptorValues {
217 my($This) = @_;
218
219 if ($This->{DescriptorsGenerated}) {
220 return wantarray ? @{$This->{DescriptorValues}} : scalar @{$This->{DescriptorValues}};
221 }
222 else {
223 my(@DescriptorValues);
224
225 @DescriptorValues = ('None') x scalar @{$This->{DescriptorNames}};
226
227 return wantarray ? @DescriptorValues : scalar @DescriptorValues;
228 }
229 }
230
231 # Get descriptor value for a specified descriptor name...
232 #
233 sub GetDescriptorValueByName {
234 my($This, $Name) = @_;
235 my(%NamesAndValues);
236
237 %NamesAndValues = $This->GetDescriptorNamesAndValues();
238
239 return exists $NamesAndValues{$Name} ? $NamesAndValues{$Name} : 'None';
240
241 }
242
243 # Get calculated molecular descriptor names sand values as a hash with names
244 # and values as key/value pairs...
245 #
246 sub GetDescriptorNamesAndValues {
247 my($This) = @_;
248 my(%NamesAndValues);
249
250 %NamesAndValues = ();
251 @NamesAndValues{ @{$This->{DescriptorNames}} } = $This->GetDescriptorValues();
252
253 return %NamesAndValues;
254 }
255
256 # Set up descriptor calculation control parameters for a specified descriptor class name...
257 #
258 # The specified parameter names and values are simply passed on to specified descriptor
259 # class during instantiation of descriptor class object without any performing any validation
260 # of parameter names and associated values. It's up to the appropriate descriptor class methods
261 # to validate these parameters and values.
262 #
263 # In addition to specified parameter names and values, the parameter hash must also contain
264 # descriptor class name as key and value pair with DescriptorClassName as key with class
265 # name as value.
266 #
267 sub SetDescriptorClassParameters {
268 my($This, %NamesAndValues) = @_;
269 my($DescriptorClassName, $Name, $Value);
270
271 if (!exists $NamesAndValues{DescriptorClassName}) {
272 croak "Error: ${ClassName}->_SetDescriptorNameParameters: Can't set descriptor class name paramaters: DescriptorClassName is not specified...";
273 }
274
275 $DescriptorClassName = $NamesAndValues{DescriptorClassName};
276 if (!IsDescriptorClassNameAvailable($DescriptorClassName)) {
277 carp "Warning: ${ClassName}->_SetDescriptorClassParameters: Can't set descriptor class name paramaters: Specified descriptor class name, $DescriptorClassName, is not available...";
278 return $This;
279 }
280
281 if (exists $This->{DescriptorClassParameters}{$DescriptorClassName}) {
282 carp "Warning: ${ClassName}->SetDescriptorClassParameters: Class name parameters for $DescriptorClassName have already been specified: Replacing previous values...";
283 }
284
285 %{$This->{DescriptorClassParameters}{$DescriptorClassName}} = ();
286 NAME: while (($Name, $Value) = each %NamesAndValues) {
287 if ($Name =~ /^DescriptorClassName$/) {
288 next NAME;
289 }
290 $This->{DescriptorClassParameters}{$DescriptorClassName}{$Name} = $Value;
291 }
292
293 return $This;
294 }
295
296 # Get descriptor name parameters as a reference to hash of hashes with hash
297 # keys corresponding to class name and class parameter name with hash value
298 # as class parameter value...
299 #
300 sub GetDescriptorClassParameters {
301 my($This) = @_;
302
303 return \%{$This->{DescriptorClassParameters}};
304 }
305
306 # Get available descriptor class names as an array.
307 #
308 # This functionality can be either invoked as a class function or an
309 # object method.
310 #
311 sub GetAvailableDescriptorClassNames {
312
313 return wantarray ? @{$DescriptorsDataMap{ClassNames}} : scalar @{$DescriptorsDataMap{ClassNames}};
314 }
315
316 # Get available descriptors class and descriptors names as a hash containing key/value
317 # pairs corresponding to class name and an array of descriptor names available for the
318 # class.
319 #
320 # This functionality can be either invoked as a class function or an
321 # object method.
322 #
323 sub GetAvailableClassAndDescriptorNames {
324 my($DescriptorClassName, @DescriptorNames, %ClassAndDescriptorNames);
325
326 %ClassAndDescriptorNames = ();
327 for $DescriptorClassName (@{$DescriptorsDataMap{ClassNames}}) {
328 @{$ClassAndDescriptorNames{$DescriptorClassName}} = ();
329 push @{$ClassAndDescriptorNames{$DescriptorClassName}}, @{$DescriptorsDataMap{ClassToDescriptorNames}{$DescriptorClassName}};
330 }
331
332 return %ClassAndDescriptorNames;
333 }
334
335 # Get available descriptor names as an array.
336 #
337 # This functionality can be either invoked as a class function or an
338 # object method.
339 #
340 sub GetAvailableDescriptorNames {
341 my(@DescriptorNames);
342
343 @DescriptorNames = ();
344 push @DescriptorNames, map { @{$DescriptorsDataMap{ClassToDescriptorNames}{$_}} } @{$DescriptorsDataMap{ClassNames}};
345
346 return wantarray ? @DescriptorNames : scalar @DescriptorNames;
347 }
348
349 # Is it a valid descriptors class name?
350 #
351 # This functionality can be either invoked as a class function or an
352 # object method.
353 #
354 sub IsDescriptorClassNameAvailable {
355 my($FirstParameter, $SecondParameter) = @_;
356 my($This, $DescriptorClassName);
357
358 if ((@_ == 2) && (_IsMolecularDescriptorsGenerator($FirstParameter))) {
359 ($This, $DescriptorClassName) = ($FirstParameter, $SecondParameter);
360 }
361 else {
362 ($DescriptorClassName) = ($FirstParameter);
363 }
364
365 return (exists $DescriptorsDataMap{ClassToDescriptorNames}{$DescriptorClassName}) ? 1 : 0;
366 }
367
368 # Is it a valid descriptor name?
369 #
370 # This functionality can be either invoked as a class function or an
371 # object method.
372 #
373 sub IsDescriptorNameAvailable {
374 my($FirstParameter, $SecondParameter) = @_;
375 my($This, $DescriptorName);
376
377 if ((@_ == 2) && (_IsMolecularDescriptorsGenerator($FirstParameter))) {
378 ($This, $DescriptorName) = ($FirstParameter, $SecondParameter);
379 }
380 else {
381 ($DescriptorName) = ($FirstParameter);
382 }
383
384 return (exists $DescriptorsDataMap{DescriptorToClassName}{$DescriptorName}) ? 1 : 0;
385 }
386
387 # Get available descriptors names for a descriptor class as an array.
388 #
389 # This functionality can be either invoked as a class function or an
390 # object method.
391 #
392 sub GetAvailableDescriptorNamesForDescriptorClass {
393 my($FirstParameter, $SecondParameter) = @_;
394 my($This, $DescriptorClassName, @DescriptorNames);
395
396 if ((@_ == 2) && (_IsMolecularDescriptorsGenerator($FirstParameter))) {
397 ($This, $DescriptorClassName) = ($FirstParameter, $SecondParameter);
398 }
399 else {
400 ($DescriptorClassName) = ($FirstParameter);
401 }
402
403 @DescriptorNames = ();
404 if (exists $DescriptorsDataMap{ClassToDescriptorNames}{$DescriptorClassName}) {
405 push @DescriptorNames, @{$DescriptorsDataMap{ClassToDescriptorNames}{$DescriptorClassName}};
406 }
407
408 return wantarray ? @DescriptorNames : scalar @DescriptorNames;
409 }
410
411 # Get available descriptors class name for a descriptor name.
412 #
413 # This functionality can be either invoked as a class function or an
414 # object method.
415 #
416 sub GetAvailableClassNameForDescriptorName {
417 my($FirstParameter, $SecondParameter) = @_;
418 my($This, $DescriptorClassName, $DescriptorName);
419
420 if ((@_ == 2) && (_IsMolecularDescriptorsGenerator($FirstParameter))) {
421 ($This, $DescriptorName) = ($FirstParameter, $SecondParameter);
422 }
423 else {
424 ($DescriptorName) = ($FirstParameter);
425 }
426
427 $DescriptorClassName = '';
428 if (exists $DescriptorsDataMap{DescriptorToClassName}{$DescriptorName}) {
429 $DescriptorClassName = $DescriptorsDataMap{DescriptorToClassName}{$DescriptorName};
430 }
431
432 return $DescriptorClassName;
433 }
434
435 # Get RuleOf5 descriptor names as an array.
436 #
437 # This functionality can be either invoked as a class function or an
438 # object method.
439 #
440 sub GetRuleOf5DescriptorNames {
441 my(@DescriptorNames);
442
443 @DescriptorNames = qw(MolecularWeight HydrogenBondDonors HydrogenBondAcceptors SLogP);
444
445 return wantarray ? @DescriptorNames : scalar @DescriptorNames;
446 }
447
448 # Get RuleOf3 descriptor names as an array.
449 #
450 # This functionality can be either invoked as a class function or an
451 # object method.
452 #
453 sub GetRuleOf3DescriptorNames {
454 my(@DescriptorNames);
455
456 @DescriptorNames = qw(MolecularWeight RotatableBonds HydrogenBondDonors HydrogenBondAcceptors SLogP TPSA);
457
458 return wantarray ? @DescriptorNames : scalar @DescriptorNames;
459 }
460
461
462 # Set molecule object...
463 #
464 sub SetMolecule {
465 my($This, $Molecule) = @_;
466
467 $This->{Molecule} = $Molecule;
468
469 # Weaken the reference to disable increment of reference count...
470 Scalar::Util::weaken($This->{Molecule});
471
472 return $This;
473 }
474
475 # Generate specified molecular descriptors...
476 #
477 # After instantiating descriptor class objects at first invocation and initialializing
478 # descriptor values during subsequent invocations, GenerateDescriptors method
479 # provided by each descriptor class is used to calculate descriptor values for
480 # specified descriptors.
481 #
482 sub GenerateDescriptors {
483 my($This) = @_;
484 my($DescriptorClassName, $DescriptorClassObject);
485
486 # Initialize descriptor values...
487 $This->_InitializeDescriptorValues();
488
489 # Instantiate decriptor classed corresponding to specified descriptors...
490 if (!$This->{DescriptorClassesInstantiated}) {
491 $This->_InstantiateDescriptorClasses();
492 }
493
494 # Check availability of molecule...
495 if (!$This->{Molecule}) {
496 carp "Warning: ${ClassName}->GenerateDescriptors: $This->{Type} molecular descriptors generation didn't succeed: Molecule data is not available: Molecule object hasn't been set...";
497 return undef;
498 }
499
500 # Calculate descriptor values...
501 for $DescriptorClassName (@{$This->{DescriptorClassNames}}) {
502 $DescriptorClassObject = $This->{DescriptorClassObjects}{$DescriptorClassName};
503
504 $DescriptorClassObject->SetMolecule($This->{Molecule});
505 $DescriptorClassObject->GenerateDescriptors();
506
507 if (!$DescriptorClassObject->IsDescriptorsGenerationSuccessful()) {
508 return undef;
509 }
510 }
511
512 # Set final descriptor values...
513 $This->_SetFinalDescriptorValues();
514
515 return $This;
516 }
517
518 # Initialize descriptor values...
519 #
520 sub _InitializeDescriptorValues {
521 my($This) = @_;
522
523 $This->{DescriptorsGenerated} = 0;
524
525 @{$This->{DescriptorValues}} = ();
526
527 return $This;
528 }
529
530 # Setup final descriptor values...
531 #
532 sub _SetFinalDescriptorValues {
533 my($This) = @_;
534 my($DescriptorName, $DescriptorClassName, $DescriptorClassObject);
535
536 $This->{DescriptorsGenerated} = 1;
537
538 @{$This->{DescriptorValues}} = ();
539
540 if ($This->{Mode} =~ /^All$/i) {
541 # Set descriptor values for all available descriptors...
542 for $DescriptorClassName (@{$This->{DescriptorClassNames}}) {
543 $DescriptorClassObject = $This->{DescriptorClassObjects}{$DescriptorClassName};
544
545 push @{$This->{DescriptorValues}}, $DescriptorClassObject->GetDescriptorValues();
546 }
547 }
548 else {
549 # Set descriptor values for a subset of available descriptors...
550 for $DescriptorName (@{$This->{DescriptorNames}}) {
551 $DescriptorClassName = $DescriptorsDataMap{DescriptorToClassName}{$DescriptorName};
552 $DescriptorClassObject = $This->{DescriptorClassObjects}{$DescriptorClassName};
553
554 push @{$This->{DescriptorValues}}, $DescriptorClassObject->GetDescriptorValueByName($DescriptorName);
555 }
556 }
557
558 return $This;
559 }
560
561 # Is descriptors generation successful?
562 #
563 # Notes:
564 # . After successful generation of descriptor values by each descriptor class
565 # corresponding to specified descriptor names, DescriptorsCalculated to 1;
566 # otherwise, it's set to 0.
567 #
568 sub IsDescriptorsGenerationSuccessful {
569 my($This) = @_;
570
571 return $This->{DescriptorsGenerated} ? 1 : 0;
572 }
573
574 # Check and set default descriptor names for generating descriptor values...
575 #
576 sub _CheckAndInitializeDescriptorNames {
577 my($This) = @_;
578
579 if ($This->{Mode} =~ /^(All|RuleOf5|RuleOf3)$/i) {
580 if (@{$This->{DescriptorNames}}) {
581 croak "Error: ${ClassName}->_CheckAndInitializeDescriptorNames: Descriptor names can't be specified during \"All, RuleOf5 or RuleOf3\" values of descsriptors generation \"Mode\"...";
582 }
583 }
584
585 if ($This->{Mode} =~ /^All$/i) {
586 @{$This->{DescriptorNames}} = GetAvailableDescriptorNames();
587 }
588 elsif ($This->{Mode} =~ /^RuleOf5$/i) {
589 @{$This->{DescriptorNames}} = GetRuleOf5DescriptorNames();
590 }
591 elsif ($This->{Mode} =~ /^RuleOf3$/i) {
592 @{$This->{DescriptorNames}} = GetRuleOf3DescriptorNames();
593 }
594 elsif ($This->{Mode} =~ /^Specify$/i) {
595 if (!@{$This->{DescriptorNames}}) {
596 croak "Error: ${ClassName}->_CheckAndInitializeDescriptorNames: DescriptorNames must be specified during Specify value for Mode...";
597 }
598 }
599 else {
600 croak "Error: ${ClassName}->_CheckAndInitializeDescriptorNames: Mode value, $This->{Mode}, is not valid...";
601 }
602 }
603
604 # Instantiate descriptor classes corresponding to specified descriptor names...
605 #
606 sub _InstantiateDescriptorClasses {
607 my($This) = @_;
608 my($DescriptorClassName, $DescriptorName, $DescriptorClassPath);
609
610 $This->{DescriptorClassesInstantiated} = 1;
611
612 @{$This->{DescriptorClassNames}} = ();
613 %{$This->{DescriptorClassObjects}} = ();
614
615 NAME: for $DescriptorName (@{$This->{DescriptorNames}}) {
616 $DescriptorClassName = $DescriptorsDataMap{DescriptorToClassName}{$DescriptorName};
617
618 if (exists $This->{DescriptorClassObjects}{$DescriptorClassName}) {
619 next NAME;
620 }
621 push @{$This->{DescriptorClassNames}}, $DescriptorClassName;
622
623 $DescriptorClassPath = $DescriptorsDataMap{ClassNameToClassPath}{$DescriptorClassName};
624
625 if (exists $This->{DescriptorClassParameters}{$DescriptorClassName}) {
626 $This->{DescriptorClassObjects}{$DescriptorClassName} = $DescriptorClassPath->new(%{$This->{DescriptorClassParameters}{$DescriptorClassName}});
627 }
628 else {
629 $This->{DescriptorClassObjects}{$DescriptorClassName} = $DescriptorClassPath->new();
630 }
631 }
632
633 return $This;
634 }
635
636 # Return a string containg data for MolecularDescriptorsGenerator object...
637 #
638 sub StringifyMolecularDescriptorsGenerator {
639 my($This) = @_;
640 my($TheString, $NamesAndValuesString, $Name, $Value, @NamesAndValuesInfo, %NamesAndValues);
641
642 # Type of MolecularDescriptors...
643 $TheString = "MolecularDescriptorsGenerator: Mode - $This->{Mode}; SpecifiedDescriptorNames - < @{$This->{DescriptorNames}} >; AvailableMolecularDescriptorClassNames - < @{$DescriptorsDataMap{ClassNames}} >";
644
645 @NamesAndValuesInfo = ();
646 %NamesAndValues = $This->GetDescriptorNamesAndValues();
647
648 for $Name (@{$This->{DescriptorNames}}) {
649 $Value = $NamesAndValues{$Name};
650 $Value = (TextUtil::IsEmpty($Value) || $Value =~ /^None$/i) ? 'None' : $Value;
651 push @NamesAndValuesInfo, "$Name - $Value";
652 }
653 if (@NamesAndValuesInfo) {
654 $TheString .= "Names - Values: <" . TextUtil::JoinWords(\@NamesAndValuesInfo, ", ", 0) . ">";
655 }
656 else {
657 $TheString .= "Names - Values: < None>";
658 }
659
660 return $TheString;
661 }
662
663 # Is it a MolecularDescriptorsGenerator object?
664 sub _IsMolecularDescriptorsGenerator {
665 my($Object) = @_;
666
667 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
668 }
669
670 # Validate descriptor names for generating descriptor values...
671 #
672 sub _ValidateDescriptorNames {
673 my($This) = @_;
674 my($DescriptorName);
675
676 for $DescriptorName (@{$This->{DescriptorNames}}) {
677 if (!exists $DescriptorsDataMap{DescriptorToClassName}{$DescriptorName}) {
678 croak "Error: ${ClassName}->_SetAndValidateDescriptorNames: Specified descriptor name, $DescriptorName, is not valid...";
679 }
680 }
681
682 return $This;
683 }
684
685 #
686 # Load available molecular descriptors data...
687 #
688 # All available molecular descriptors classes are automatically detected in
689 # MolecularDescriptors directory under <MayaChemTools>/lib directory and
690 # information about available descriptor names is retrieved from each descriptor
691 # class using function GetDescriptorNames. The following %DescriptorsDataMap
692 # is setup containing all available molecular descriptors data:
693 #
694 # @{$DescriptorsDataMap{ClassNames}}
695 # %{$DescriptorsDataMap{ClassNameToPath}}
696 # %{$DescriptorsDataMap{ClassToDescriptorNames}}
697 # %{$DescriptorsDataMap{DescriptorToClassName}}
698 #
699 # GenerateDescriptors method is invoked fo each specified descriptor class
700 # object to calculate descriptor values for specified descriptors. After successful
701 # calculation of descriptors, GetDescriptorValues or GetDescriptorValueByName
702 # methods provided by descriptor objects are used to retrieve calculated
703 # descriptor values.
704 #
705 sub _LoadMolecularDescriptorsData {
706
707 %DescriptorsDataMap = ();
708
709 _RetrieveAndLoadDescriptorClasses();
710 _SetupDescriptorsDataMap();
711 }
712
713 #
714 # Retrieve available molecular descriptors classes from MolecularDescriptors directory under
715 # <MayaChemTools>/lib directory...
716 #
717 sub _RetrieveAndLoadDescriptorClasses {
718 my($DescriptorsDirName, $MayaChemToolsLibDir, $DescriptorsDirPath, $IncludeDirName, $DescriptorClassName, $DescriptorClassPath, $DescriptorsClassFileName, @FileNames, @DescriptorsClassFileNames);
719
720 @{$DescriptorsDataMap{ClassNames}} = ();
721 %{$DescriptorsDataMap{ClassNameToPath}} = ();
722
723 $DescriptorsDirName = "MolecularDescriptors";
724 $MayaChemToolsLibDir = FileUtil::GetMayaChemToolsLibDirName();
725
726 $DescriptorsDirPath = "$MayaChemToolsLibDir/$DescriptorsDirName";
727
728 if (! -d "$DescriptorsDirPath") {
729 croak "Error: ${ClassName}::_RetrieveAndLoadDescriptorClasses: MayaChemTools package molecular descriptors directory, $DescriptorsDirPath, is missing: Possible installation problems...";
730 }
731
732 @FileNames = ("$DescriptorsDirPath/*");
733 $IncludeDirName = 0;
734 @DescriptorsClassFileNames = FileUtil::ExpandFileNames(\@FileNames, "pm", $IncludeDirName);
735
736 if (!@DescriptorsClassFileNames) {
737 croak "Error: ${ClassName}::_RetrieveAndLoadDescriptorClasses: MayaChemTools package molecular descriptors directory, $DescriptorsDirPath, doesn't contain any molecular descriptor class: Possible installation problems...";
738 }
739
740 FILENAME: for $DescriptorsClassFileName (sort @DescriptorsClassFileNames) {
741 if ($DescriptorsClassFileName !~ /\.pm/) {
742 croak "Error: ${ClassName}::_RetrieveAndLoadDescriptorClasses: MayaChemTools package molecular descriptors directory, $DescriptorsDirPath, contains invalid class file name $DescriptorsClassFileName: Possible installation problems...";
743 }
744
745 # Ignore base class and descriptors generator class...
746 if ($DescriptorsClassFileName =~ /^(MolecularDescriptorsGenerator\.pm|MolecularDescriptors\.pm)$/) {
747 next FILENAME;
748 }
749
750 ($DescriptorClassName) = split /\./, $DescriptorsClassFileName;
751 $DescriptorClassPath = "${DescriptorsDirName}::${DescriptorClassName}";
752
753 # Load descriptors class...
754 eval "use $DescriptorClassPath";
755
756 if ($@) {
757 croak "Error: ${ClassName}::_RetrieveAndLoadDescriptorClasses: use $DescriptorClassPath failed: $@ ...";
758 }
759
760 push @{$DescriptorsDataMap{ClassNames}}, $DescriptorClassName;
761
762 $DescriptorsDataMap{ClassNameToClassPath}{$DescriptorClassName} = $DescriptorClassPath;
763 }
764 }
765
766 #
767 # Setup descriptors data map using loaded descriptor classes...
768 #
769 sub _SetupDescriptorsDataMap {
770 my($DescriptorClassName, $DescriptorName, $DescriptorClassPath, @DescriptorNames);
771
772 # Class to decriptor names map...
773 %{$DescriptorsDataMap{ClassToDescriptorNames}} = ();
774
775 # Descriptor to class name map...
776 %{$DescriptorsDataMap{DescriptorToClassName}} = ();
777
778 for $DescriptorClassName (@{$DescriptorsDataMap{ClassNames}}) {
779 $DescriptorClassPath = $DescriptorsDataMap{ClassNameToClassPath}{$DescriptorClassName};
780
781 @DescriptorNames = $DescriptorClassPath->GetDescriptorNames();
782
783 if (!@DescriptorNames) {
784 croak "Error: ${ClassName}::_SetupDescriptorsDataMap: Molecular descriptor class $DescriptorClassName doesn't provide any descriptor names...";
785 }
786
787 if (exists $DescriptorsDataMap{ClassToDescriptorNames}{$DescriptorClassName} ) {
788 croak "Error: ${ClassName}::_SetupDescriptorsDataMap: Molecular descriptor class $DescriptorClassName has already been processed...";
789 }
790
791 @{$DescriptorsDataMap{ClassToDescriptorNames}{$DescriptorClassName}} = ();
792 @{$DescriptorsDataMap{ClassToDescriptorNames}{$DescriptorClassName}} = @DescriptorNames;
793
794 for $DescriptorName (@DescriptorNames) {
795 if (exists $DescriptorsDataMap{DescriptorToClassName}{$DescriptorName}) {
796 croak "Error: ${ClassName}::_SetupDescriptorsDataMap: Molecular descriptor name, $DescriptorName, in class name, $DescriptorClassName, has already been provided by class name $DescriptorsDataMap{DescriptorToClassName}{$DescriptorName}...";
797 }
798
799 $DescriptorsDataMap{DescriptorToClassName}{$DescriptorName} = $DescriptorClassName;
800 }
801 }
802 }
803
804 1;
805
806 __END__
807
808 =head1 NAME
809
810 MolecularDescriptorsGenerator
811
812 =head1 SYNOPSIS
813
814 use MolecularDescriptors::MolecularDescriptorsGenerator;
815
816 use MolecularDescriptors::MolecularDescriptorsGenerator qw(:all);
817
818 =head1 DESCRIPTION
819
820 B<MolecularDescriptorsGenerator> class provides the following methods:
821
822 new, GenerateDescriptors, GetAvailableClassAndDescriptorNames,
823 GetAvailableClassNameForDescriptorName, GetAvailableDescriptorClassNames,
824 GetAvailableDescriptorNames, GetAvailableDescriptorNamesForDescriptorClass,
825 GetDescriptorClassParameters, GetDescriptorNames, GetDescriptorNamesAndValues,
826 GetDescriptorValueByName, GetDescriptorValues, GetRuleOf3DescriptorNames,
827 GetRuleOf5DescriptorNames, IsDescriptorClassNameAvailable,
828 IsDescriptorNameAvailable, IsDescriptorsGenerationSuccessful,
829 SetDescriptorClassParameters, SetDescriptorNames, SetMode, SetMolecule,
830 StringifyMolecularDescriptorsGenerator
831
832 B<MolecularDescriptorsGenerator> is derived from is derived from B<ObjectProperty>
833 base class that provides methods not explicitly defined in B<MolecularDescriptorsGenerator>
834 or B<ObjectProperty> classes using Perl's AUTOLOAD functionality. These methods are
835 generated on-the-fly for a specified object property:
836
837 Set<PropertyName>(<PropertyValue>);
838 $PropertyValue = Get<PropertyName>();
839 Delete<PropertyName>();
840
841 B<MolecularDescriptorsGenerator> is designed to provide a plug-in environment for
842 molecular descriptors development. The molecular descriptor class modules available
843 in B<MolecularDescriptors> directory under B<MayaChemTools/lib> directory are
844 automatically detected and loaded into the system. The descriptor names provided
845 by each descriptor class module through its B<GetDescriptorNames> function are
846 retrieved and are made available for calculations of their values for a specified
847 molecule.
848
849 Any combination of available descriptor names can be specified during calculation
850 of descriptor values using B<GenerateDescriptors> method. The current release of
851 MayaChemTools supports generation of four sets of descriptors: All available
852 descriptors, rule of 5 or 3 descriptors, or a specified set of descriptor names.
853
854 RuleOf5 [ Ref 91 ] descriptor names are: MolecularWeight, HydrogenBondDonors,
855 HydrogenBondAcceptors, SLogP. RuleOf5 states: MolecularWeight <= 500,
856 HydrogenBondDonors <= 5, HydrogenBondAcceptors <= 10, and logP <= 5.
857
858 RuleOf3 [ Ref 92 ] descriptor names are: MolecularWeight, RotatableBonds,
859 HydrogenBondDonors, HydrogenBondAcceptors, SLogP, TPSA. RuleOf3 states:
860 MolecularWeight <= 300, RotatableBonds <= 3, HydrogenBondDonors <= 3,
861 HydrogenBondAcceptors <= 3, logP <= 3, and TPSA <= 60.
862
863 Before calculation of a specified set of descriptors by B<GenerateDescriptors>
864 method, a set of descriptor calculation control parameters for a specific descriptor
865 class name can be set using B<SetDescriptorClassParameters> method. The specified
866 control parameter names and values are simply passed on to specified descriptor
867 class during instantiation of descriptor class object without performing any validation
868 of parameter names and associated values. It's up to the appropriate descriptor class methods
869 to validate these parameters and values. In addition to specified parameter names and
870 values, the parameter hash must also contain descriptor class name as key and
871 value pair with DescriptorClassName as key with class name as value.
872
873 =head2 METHODS
874
875 =over 4
876
877 =item B<new>
878
879 $NewMolecularDescriptorsGenerator = new MolecularDescriptors::
880 MolecularDescriptorsGenerator(
881 %NamesAndValues);
882
883 Using specified I<MolecularDescriptorsGenerator> property names and values hash, B<new>
884 method creates a new object and returns a reference to newly created B<MolecularDescriptorsGenerator>
885 object. By default, the following properties are initialized:
886
887 Mode = 'All'
888 @{$This->{DescriptorNames}} = ()
889 %{$This->{DescriptorClassParameters}} = ()
890 @{$This->{DescriptorClassNames}} = ()
891 %{$This->{DescriptorClassObjects}} = ()
892 @{$This->{DescriptorValues}} = ()
893
894 Examples:
895
896 $MolecularDescriptorsGenerator = new MolecularDescriptors::
897 MolecularDescriptorsGenerator(
898 'Molecule' => $Molecule);
899
900 @DescriptorNames = qw(MolecularWeight HydrogenBondDonors Fsp3Carbons)
901 $MolecularDescriptorsGenerator = new MolecularDescriptors::
902 MolecularDescriptorsGenerator(
903 'Mode' => 'Specify',
904 'DescriptorNames' => \@DescriptorNames);
905
906 $MolecularDescriptorsGenerator->SetDescriptorClassParameters(
907 'DescriptorClassName' => 'WeightAndMassDescriptors',
908 'WeightPrecision' => 2,
909 'MassPrecision' => 2);
910
911 $MolecularDescriptorsGenerator->SetDescriptorClassParameters(
912 'DescriptorClassName' => 'HydrogenBondsDescriptors',
913 'HydrogenBondsType' => 'HBondsType1');
914
915 $MolecularDescriptorsGenerator->SetMolecule($Molecule);
916 $MolecularDescriptorsGenerator->GenerateDescriptors();
917 print "MolecularDescriptorsGenerator: $MolecularDescriptorsGenerator\n";
918
919
920 =item B<GenerateDescriptors>
921
922 $MolecularDescriptorsGenerator->GenerateDescriptors();
923
924 Calculates descriptor values for specified descriptors and returns I<MolecularDescriptorsGenerator>.
925
926 Descriptor class objects are instantiated only once at first invocation. During
927 subsequent calls to B<GenerateDescriptors> method, descriptor values are
928 initialized and B<GenerateDescriptors> method provided by descriptor class is
929 used to calculate descriptor values for specified descriptors.
930
931 =item B<GetAvailableClassAndDescriptorNames>
932
933 %ClassAndDescriptorNames = $MolecularDescriptorsGenerator->
934 GetAvailableClassAndDescriptorNames();
935 %ClassAndDescriptorNames = MolecularDescriptors::
936 MolecularDescriptorsGenerator::
937 GetAvailableClassAndDescriptorNames();
938
939 Returns available descriptors class and descriptors names as a hash containing key
940 and value pairs corresponding to class name and an array of descriptor names
941 available for the class.
942
943 =item B<GetAvailableClassNameForDescriptorName>
944
945 $DescriptorClassName = $MolecularDescriptorsGenerator->
946 GetAvailableClassNameForDescriptorName($DescriptorName);
947
948 $DescriptorClassName = MolecularDescriptors::MolecularDescriptorsGenerator::
949 GetAvailableClassNameForDescriptorName($DescriptorName);
950
951 Returns available descriptor class name for a descriptor name.
952
953 =item B<GetAvailableDescriptorClassNames>
954
955 $Return = $MolecularDescriptorsGenerator->GetAvailableDescriptorClassNames();
956
957 @DescriptorClassNames = $MolecularDescriptorsGenerator->
958 GetAvailableDescriptorClassNames();
959 @DescriptorClassNames = MolecularDescriptors::
960 MolecularDescriptorsGenerator::
961 GetAvailableDescriptorClassNames();
962
963 Returns available descriptor class names as an array or number of available descriptor
964 class names in scalar context.
965
966 =item B<GetAvailableDescriptorNames>
967
968 @DescriptorNames = $MolecularDescriptorsGenerator->
969 GetAvailableDescriptorNames();
970 @DescriptorNames = MolecularDescriptors::
971 MolecularDescriptorsGenerator::
972 GetAvailableDescriptorNames();
973
974 Returns available descriptor names as an array or number of available descriptor
975 names in scalar context.
976
977 =item B<GetAvailableDescriptorNamesForDescriptorClass>
978
979 @DescriptorNames = $MolecularDescriptorsGenerator->
980 GetAvailableDescriptorNamesForDescriptorClass($DescriptorClassName);
981 @DescriptorNames = MolecularDescriptors::
982 MolecularDescriptorsGenerator::
983 GetAvailableDescriptorNamesForDescriptorClass($DescriptorClassName);
984
985 Returns available descriptors names for a descriptor class as an array or number
986 of available descriptor names in scalar context.
987
988 =item B<GetDescriptorClassParameters>
989
990 $DescriptorClassParametersRef = $MolecularDescriptorsGenerator->
991 GetDescriptorClassParameters();
992 $DescriptorClassParametersRef = MolecularDescriptors::
993 MolecularDescriptorsGenerator::
994 GetDescriptorClassParameters();
995
996 Returns descriptor name parameters as a reference to hash of hashes with hash
997 keys corresponding to class name and class parameter name with hash value
998 as class parameter value.
999
1000 =item B<GetDescriptorNames>
1001
1002 @DescriptorNames = $MolecularDescriptorsGenerator->GetDescriptorNames();
1003 @DescriptorNames = MolecularDescriptors::MolecularDescriptorsGenerator::
1004 GetDescriptorNames();
1005
1006 Returns all available descriptor names as an array or number of available descriptors
1007 in scalar context.
1008
1009 =item B<GetDescriptorNamesAndValues>
1010
1011 %NamesAndValues = $MolecularDescriptorsGenerator->
1012 GetDescriptorNamesAndValues();
1013
1014 Returns calculated molecular descriptor names and values as a hash with descriptor
1015 names and values as hash key and value pairs.
1016
1017 =item B<GetDescriptorValueByName>
1018
1019 $Value = $MolecularDescriptorsGenerator->
1020 GetDescriptorValueByName($Name);
1021
1022 Returns calculated descriptor values for a specified descriptor name.
1023
1024 =item B<GetDescriptorValues>
1025
1026 @DescriptorValues = $MolecularDescriptorsGenerator->GetDescriptorValues();
1027
1028 Returns all calculated descriptor values as an array corresponding to specified
1029 descriptor names.
1030
1031 =item B<GetRuleOf3DescriptorNames>
1032
1033 @DescriptorNames = $MolecularDescriptorsGenerator->
1034 GetRuleOf3DescriptorNames();
1035 @DescriptorNames = MolecularDescriptors::
1036 MolecularDescriptorsGenerator::
1037 GetRuleOf3DescriptorNames();
1038
1039 Returns rule of 3 descriptor names as an array or number of rule of 3 descriptors in scalar
1040 context.
1041
1042 RuleOf3 [ Ref 92 ] descriptor names are: MolecularWeight, RotatableBonds,
1043 HydrogenBondDonors, HydrogenBondAcceptors, SLogP, TPSA. RuleOf3 states:
1044 MolecularWeight <= 300, RotatableBonds <= 3, HydrogenBondDonors <= 3,
1045 HydrogenBondAcceptors <= 3, logP <= 3, and TPSA <= 60.
1046
1047 =item B<GetRuleOf5DescriptorNames>
1048
1049 @DescriptorNames = $MolecularDescriptorsGenerator->
1050 GetRuleOf5DescriptorNames();
1051 @DescriptorNames = $MolecularDescriptorsGenerator::
1052 GetRuleOf5DescriptorNames();
1053
1054 Returns rule of 5 descriptor names as an array or number of rule of 4 descriptors in scalar
1055 context.
1056
1057 RuleOf5 [ Ref 91 ] descriptor names are: MolecularWeight, HydrogenBondDonors,
1058 HydrogenBondAcceptors, SLogP. RuleOf5 states: MolecularWeight <= 500,
1059 HydrogenBondDonors <= 5, HydrogenBondAcceptors <= 10, and logP <= 5.
1060
1061 =item B<IsDescriptorClassNameAvailable>
1062
1063 $Status = $MolecularDescriptorsGenerator->
1064 IsDescriptorClassNameAvailable($ClassName);
1065 $Status = MolecularDescriptors::
1066 MolecularDescriptorsGenerator::
1067 IsDescriptorClassNameAvailable($ClassName);
1068
1069 Returns 1 or 0 based on whether specified descriptor class name is available.
1070
1071 =item B<IsDescriptorNameAvailable>
1072
1073 $Status = $MolecularDescriptorsGenerator->
1074 IsDescriptorNameAvailable($DescriptorName);
1075 $Status = MolecularDescriptors::
1076 MolecularDescriptorsGenerator::
1077 IsDescriptorNameAvailable($DescriptorName);
1078
1079 Returns 1 or 0 based on whether specified descriptor name is available.
1080
1081 =item B<IsDescriptorsGenerationSuccessful>
1082
1083 $Status = $MolecularDescriptorsGenerator->
1084 IsDescriptorsGenerationSuccessful();
1085
1086 Returns 1 or 0 based on whether descriptors generation is successful.
1087
1088 =item B<SetDescriptorClassParameters>
1089
1090 $MolecularDescriptorsGenerator->SetDescriptorClassParameters(
1091 %NamesAndValues);
1092
1093 Sets descriptor calculation control parameters for a specified descriptor class name
1094 and returns I<MolecularDescriptorsGenerator>.
1095
1096 The specified parameter names and values are simply passed on to specified descriptor
1097 class during instantiation of descriptor class object without any performing any validation
1098 of parameter names and associated values. It's up to the appropriate descriptor class methods
1099 to validate these parameters and values.
1100
1101 In addition to specified parameter names and values, the parameter hash must also contain
1102 descriptor class name as key and value pair with DescriptorClassName as key with class
1103 name as value.
1104
1105 =item B<SetDescriptorNames>
1106
1107 $MolecularDescriptorsGenerator->SetDescriptorNames(@Names);
1108 $MolecularDescriptorsGenerator->SetDescriptorNames(\@Names);
1109
1110 Sets descriptor names to use for generating descriptor values using an array
1111 or reference to an array and returns I<MolecularDescriptorsGenerator>.
1112
1113 =item B<SetMode>
1114
1115 $MolecularDescriptorsGenerator->SetMode($Mode);
1116
1117 Sets descriptors generation mode and returns I<MolecularDescriptorsGenerator>.
1118 Possible I<Mode> values: I<All, RuleOf5, RuleOf3, Specify>.
1119
1120 =item B<SetMolecule>
1121
1122 $MolecularDescriptorsGenerator->SetMolecule($Molecule);
1123
1124 Sets molecule to use during calculation of molecular descriptors and returns
1125 I<MolecularDescriptorsGenerator>.
1126
1127 =item B<StringifyMolecularDescriptorsGenerator>
1128
1129 $String = $MolecularDescriptorsGenerator->StringifyMolecularDescriptorsGenerator();
1130
1131 Returns a string containing information about I<MolecularDescriptorsGenerator> object.
1132
1133 =back
1134
1135 =head1 AUTHOR
1136
1137 Manish Sud <msud@san.rr.com>
1138
1139 =head1 SEE ALSO
1140
1141 MolecularDescriptors.pm
1142
1143 =head1 COPYRIGHT
1144
1145 Copyright (C) 2015 Manish Sud. All rights reserved.
1146
1147 This file is part of MayaChemTools.
1148
1149 MayaChemTools is free software; you can redistribute it and/or modify it under
1150 the terms of the GNU Lesser General Public License as published by the Free
1151 Software Foundation; either version 3 of the License, or (at your option)
1152 any later version.
1153
1154 =cut