MayaChemTools

   1 package Fingerprints::FingerprintsVector;
   2 #
   3 # $RCSfile: FingerprintsVector.pm,v $
   4 # $Date: 2015/02/28 20:48:54 $
   5 # $Revision: 1.31 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Scalar::Util ();
  33 use MathUtil ();
  34 use TextUtil ();
  35 use StatisticsUtil ();
  36 use BitVector;
  37 use Vector;
  38 
  39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  40 
  41 @ISA = qw(Exporter);
  42 
  43 # Distance coefficients
  44 my(@DistanceCoefficients) = qw(CityBlockDistanceCoefficient EuclideanDistanceCoefficient HammingDistanceCoefficient ManhattanDistanceCoefficient SoergelDistanceCoefficient);
  45 
  46 # Similarity coefficients...
  47 my(@SimilarityCoefficients) = qw(CosineSimilarityCoefficient CzekanowskiSimilarityCoefficient DiceSimilarityCoefficient OchiaiSimilarityCoefficient JaccardSimilarityCoefficient SorensonSimilarityCoefficient TanimotoSimilarityCoefficient);
  48 
  49 # New from string...
  50 my(@NewFromString) = qw(NewFromValuesString NewFromValuesAndIDsString NewFromIDsAndValuesString NewFromValuesAndIDsPairsString NewFromIDsAndValuesPairsString);
  51 
  52 @EXPORT = qw(IsFingerprintsVector);
  53 @EXPORT_OK = qw(GetSupportedDistanceCoefficients GetSupportedSimilarityCoefficients GetSupportedDistanceAndSimilarityCoefficients @DistanceCoefficients @SimilarityCoefficients);
  54 
  55 %EXPORT_TAGS = (
  56                 new => [@NewFromString],
  57                 distancecoefficients => [@DistanceCoefficients],
  58                 similaritycoefficients => [@SimilarityCoefficients],
  59                 all  => [@EXPORT, @EXPORT_OK]
  60                );
  61 
  62 # Setup class variables...
  63 my($ClassName);
  64 _InitializeClass();
  65 
  66 # Overload Perl functions...
  67 use overload '""' => 'StringifyFingerprintsVector';
  68 
  69 # Class constructor...
  70 sub new {
  71   my($Class, %NamesAndValues) = @_;
  72 
  73   # Initialize object...
  74   my $This = {};
  75   bless $This, ref($Class) || $Class;
  76 
  77   $This->_InitializeFingerprintsVector();
  78 
  79   $This->_InitializeFingerprintsVectorProperties(%NamesAndValues);
  80 
  81   return $This;
  82 }
  83 
  84 # Initialize object data...
  85 #
  86 sub _InitializeFingerprintsVector {
  87   my($This) = @_;
  88 
  89   # Type of fingerprint vector...
  90   $This->{Type} = '';
  91 
  92   # Fingerprint vector values...
  93   @{$This->{Values}} = ();
  94 
  95   # Fingerprint vector value IDs...
  96   @{$This->{ValueIDs}} = ();
  97 
  98   return $This;
  99 }
 100 
 101 # Initialize class ...
 102 sub _InitializeClass {
 103   #Class name...
 104   $ClassName = __PACKAGE__;
 105 }
 106 
 107 # Initialize object properties....
 108 sub _InitializeFingerprintsVectorProperties {
 109   my($This, %NamesAndValues) = @_;
 110 
 111   my($Name, $Value, $MethodName);
 112   while (($Name, $Value) = each  %NamesAndValues) {
 113     $MethodName = "Set${Name}";
 114     $This->$MethodName($Value);
 115   }
 116 
 117   if (!exists $NamesAndValues{Type}) {
 118     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying type...";
 119   }
 120   return $This;
 121 }
 122 
 123 # Create a new fingerprints vector using space delimited values string. This functionality can be
 124 # either invoked as a class function or an object method.
 125 #
 126 sub NewFromValuesString ($$;$) {
 127   my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
 128   my($This, $Type, $ValuesString);
 129 
 130   if (@_ == 3) {
 131     ($This, $Type, $ValuesString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
 132   }
 133   else {
 134     ($This, $Type, $ValuesString) = (undef, $FirstParameter, $SecondParameter);
 135   }
 136   my($FingerprintsVector, @Values);
 137 
 138   @Values = ();
 139   if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) {
 140     @Values = split(' ', $ValuesString);
 141   }
 142 
 143   $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values);
 144 
 145   return $FingerprintsVector;
 146 }
 147 
 148 # Create a new fingerprints vector using values and IDs string containing semicolon
 149 # delimited value string and value IDs strings. The values within value and value IDs
 150 # string are delimited by spaces.
 151 #
 152 # This functionality can be either invoked as a class function or an object method.
 153 #
 154 sub NewFromValuesAndIDsString ($$;$) {
 155   my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
 156   my($This, $Type, $ValuesAndIDsString);
 157 
 158   if (@_ == 3) {
 159     ($This, $Type, $ValuesAndIDsString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
 160   }
 161   else {
 162     ($This, $Type, $ValuesAndIDsString) = (undef, $FirstParameter, $SecondParameter);
 163   }
 164   my($FingerprintsVector, $ValuesString, $ValueIDsString, @Values, @ValueIDs);
 165 
 166   ($ValuesString, $ValueIDsString) = split(';', $ValuesAndIDsString);
 167 
 168   @Values = ();
 169   if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) {
 170     @Values = split(' ', $ValuesString);
 171   }
 172   @ValueIDs = ();
 173   if (defined($ValueIDsString) && length($ValueIDsString) && $ValueIDsString !~ /^None$/i) {
 174     @ValueIDs = split(' ', $ValueIDsString);
 175   }
 176 
 177   if (@Values != @ValueIDs ) {
 178     carp "Warning: ${ClassName}->NewFromValuesAndIDsString: Object can't be instantiated: Number specified values, " . scalar @Values . ", must be equal to number of specified value IDs, " . scalar @ValueIDs .  "...";
 179     return undef;
 180   }
 181 
 182   $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
 183 
 184   return $FingerprintsVector;
 185 }
 186 
 187 # Create a new fingerprints vector using IDs and values string containing semicolon
 188 # delimited value IDs string and values strings. The values within value and value IDs
 189 # string are delimited by spaces.
 190 #
 191 # This functionality can be either invoked as a class function or an object method.
 192 #
 193 sub NewFromIDsAndValuesString ($$;$) {
 194   my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
 195   my($This, $Type, $IDsAndValuesString);
 196 
 197   if (@_ == 3) {
 198     ($This, $Type, $IDsAndValuesString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
 199   }
 200   else {
 201     ($This, $Type, $IDsAndValuesString) = (undef, $FirstParameter, $SecondParameter);
 202   }
 203   my($FingerprintsVector, $ValuesString, $ValueIDsString, @Values, @ValueIDs);
 204 
 205   ($ValueIDsString, $ValuesString) = split(';', $IDsAndValuesString);
 206 
 207   @Values = ();
 208   if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) {
 209     @Values = split(' ', $ValuesString);
 210   }
 211   @ValueIDs = ();
 212   if (defined($ValueIDsString) && length($ValueIDsString) && $ValueIDsString !~ /^None$/i) {
 213     @ValueIDs = split(' ', $ValueIDsString);
 214   }
 215 
 216   if (@Values != @ValueIDs ) {
 217     carp "Warning: ${ClassName}->NewFromIDsAndValuesString: Object can't be instantiated: Number specified values, " . scalar @Values . ", must be equal to number of specified value IDs, " . scalar @ValueIDs .  "...";
 218     return undef;
 219   }
 220 
 221   $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
 222 
 223   return $FingerprintsVector;
 224 }
 225 
 226 # Create a new fingerprints vector using values and IDs pairs string containing space
 227 # value and value IDs pairs.
 228 #
 229 # This functionality can be either invoked as a class function or an object method.
 230 #
 231 sub NewFromValuesAndIDsPairsString ($$;$) {
 232   my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
 233   my($This, $Type, $ValuesAndIDsPairsString);
 234 
 235   if (@_ == 3) {
 236     ($This, $Type, $ValuesAndIDsPairsString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
 237   }
 238   else {
 239     ($This, $Type, $ValuesAndIDsPairsString) = (undef, $FirstParameter, $SecondParameter);
 240   }
 241   my($FingerprintsVector, $Index, @Values, @ValueIDs, @ValuesAndIDsPairs);
 242 
 243   @ValuesAndIDsPairs = split(' ', $ValuesAndIDsPairsString);
 244   if (@ValuesAndIDsPairs % 2) {
 245     carp "Warning: ${ClassName}->NewFromValuesAndIDsPairsString: No fingerprint vector created: Invalid values and IDs pairs data: Input list must contain even number of values and IDs pairs...";
 246     return undef;
 247   }
 248 
 249   @Values = (); @ValueIDs = ();
 250   if (!(@ValuesAndIDsPairs == 2 && $ValuesAndIDsPairs[0] =~ /^None$/i && $ValuesAndIDsPairs[1] =~ /^None$/i)) {
 251     for ($Index = 0; $Index < $#ValuesAndIDsPairs; $Index += 2) {
 252       push @Values, $ValuesAndIDsPairs[$Index];
 253       push @ValueIDs, $ValuesAndIDsPairs[$Index + 1];
 254     }
 255   }
 256   $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
 257 
 258   return $FingerprintsVector;
 259 }
 260 
 261 # Create a new fingerprints vector using IDs and values pairs string containing space
 262 # value IDs and valus pairs.
 263 #
 264 # This functionality can be either invoked as a class function or an object method.
 265 #
 266 sub NewFromIDsAndValuesPairsString ($$;$) {
 267   my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
 268   my($This, $Type, $IDsAndValuesPairsString);
 269 
 270   if (@_ == 3) {
 271     ($This, $Type, $IDsAndValuesPairsString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
 272   }
 273   else {
 274     ($This, $Type, $IDsAndValuesPairsString) = (undef, $FirstParameter, $SecondParameter);
 275   }
 276   my($FingerprintsVector, $Index, @Values, @ValueIDs, @IDsAndValuesPairs);
 277 
 278   @IDsAndValuesPairs = split(' ', $IDsAndValuesPairsString);
 279   if (@IDsAndValuesPairs % 2) {
 280     croak "Error: ${ClassName}->NewFromIDsAndValuesPairsString: No fingerprint vector created: Invalid values and IDs pairs data: Input list must contain even number of values and IDs pairs...";
 281     return undef;
 282   }
 283 
 284   @Values = (); @ValueIDs = ();
 285   if (!(@IDsAndValuesPairs == 2 && $IDsAndValuesPairs[0] =~ /^None$/i && $IDsAndValuesPairs[1] =~ /^None$/i)) {
 286     for ($Index = 0; $Index < $#IDsAndValuesPairs; $Index += 2) {
 287       push @ValueIDs, $IDsAndValuesPairs[$Index];
 288       push @Values, $IDsAndValuesPairs[$Index + 1];
 289     }
 290   }
 291   $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
 292 
 293   return $FingerprintsVector;
 294 }
 295 
 296 # Set type of fingerprint vector. Supported types are: OrderedNumericalValues, NumericalValues, and
 297 # AlphaNumericalValues
 298 #
 299 #  .  For OrderedNumericalValues type, both vectors must be of the same size and contain similar
 300 #     types of numerical values in the same order.
 301 #
 302 #  .  For NumericalValues type, vector value IDs for both vectors must be specified; however, their
 303 #     size and order of IDs and numerical values may be different. For each vector, value IDs must
 304 #     correspond to vector values.
 305 #
 306 #  .  For AlphaNumericalValues type, vectors may contain both numerical and alphanumerical values
 307 #     and their sizes may be different.
 308 #
 309 sub SetType {
 310   my($This, $Type) = @_;
 311 
 312   if ($Type !~ /^(OrderedNumericalValues|NumericalValues|AlphaNumericalValues)$/i) {
 313     croak "Error: ${ClassName}->SetType: Specified value, $Type, for Type is not vaild. Supported types in current release of MayaChemTools: OrderedNumericalValues, NumericalValues or AlphaNumericalValues";
 314   }
 315 
 316   if ($This->{Type}) {
 317     croak "Error: ${ClassName}->SetType: Can't change intial fingerprints vector type:  It's already set...";
 318   }
 319   $This->{Type} = $Type;
 320 
 321   return $This;
 322 }
 323 
 324 # Get fingerpints vector type...
 325 #
 326 sub GetType {
 327   my($This) = @_;
 328 
 329   return $This->{Type};
 330 }
 331 
 332 # Set ID...
 333 sub SetID {
 334   my($This, $Value) = @_;
 335 
 336   $This->{ID} = $Value;
 337 
 338   return $This;
 339 }
 340 
 341 # Get ID...
 342 sub GetID {
 343   my($This) = @_;
 344 
 345   return exists $This->{ID} ? $This->{ID} : 'None';
 346 }
 347 
 348 # Set description...
 349 sub SetDescription {
 350   my($This, $Value) = @_;
 351 
 352   $This->{Description} = $Value;
 353 
 354   return $This;
 355 }
 356 
 357 # Get description...
 358 sub GetDescription {
 359   my($This) = @_;
 360 
 361   return exists $This->{Description} ? $This->{Description} : 'No description available';
 362 }
 363 
 364 # Set vector type...
 365 sub SetVectorType {
 366   my($This, $Value) = @_;
 367 
 368   $This->{VectorType} = $Value;
 369 
 370   return $This;
 371 }
 372 
 373 # Get vector type...
 374 sub GetVectorType {
 375   my($This) = @_;
 376 
 377   return exists $This->{VectorType} ? $This->{VectorType} : 'FingerprintsVector';
 378 }
 379 
 380 # Set values of a fingerprint vector using a vector, reference to an array or an array...
 381 #
 382 sub SetValues {
 383   my($This, @Values) = @_;
 384 
 385   $This->_SetOrAddValuesOrValueIDs("SetValues", @Values);
 386 
 387   return $This;
 388 }
 389 
 390 # Set value IDs of a fingerprint vector using a vector, reference to an array or an array...
 391 #
 392 sub SetValueIDs {
 393   my($This, @Values) = @_;
 394 
 395   $This->_SetOrAddValuesOrValueIDs("SetValueIDs", @Values);
 396 
 397   return $This;
 398 }
 399 
 400 # Add values to a fingerprint vector using a vector, reference to an array or an array...
 401 #
 402 sub AddValues {
 403   my($This, @Values) = @_;
 404 
 405   $This->_SetOrAddValuesOrValueIDs("AddValues", @Values);
 406 
 407   return $This;
 408 }
 409 
 410 # Add value IDs to a fingerprint vector using a vector, reference to an array or an array...
 411 #
 412 sub AddValueIDs {
 413   my($This, @Values) = @_;
 414 
 415   $This->_SetOrAddValuesOrValueIDs("AddValueIDs", @Values);
 416 
 417   return $This;
 418 }
 419 
 420 # Set or add values or value IDs using:
 421 #
 422 #    o List of values or ValueIDs
 423 #    o Reference to an list of values or ValuesIDs
 424 #    o A vector containing values or ValueIDs
 425 #
 426 sub _SetOrAddValuesOrValueIDs {
 427   my($This, $Mode, @Values) = @_;
 428 
 429   if (!@Values) {
 430     return;
 431   }
 432 
 433   # Collect specified values or valueIDs...
 434   my($FirstValue, $TypeOfFirstValue, $ValuesRef);
 435 
 436   $FirstValue = $Values[0];
 437   $TypeOfFirstValue = ref $FirstValue;
 438   if ($TypeOfFirstValue =~ /^(SCALAR|HASH|CODE|REF|GLOB)/) {
 439     croak "Error: ${ClassName}-> _SetOrAddValuesOrValueIDs: Trying to add values to vector object with a reference to unsupported value format...";
 440   }
 441 
 442   if (Vector::IsVector($FirstValue)) {
 443     # It's a vector...
 444     $ValuesRef = $FirstValue->GetValues();
 445   }
 446   elsif ($TypeOfFirstValue =~ /^ARRAY/) {
 447     # It's an array refernce...
 448     $ValuesRef = $FirstValue;
 449   }
 450   else {
 451     # It's a list of values...
 452     $ValuesRef = \@Values;
 453   }
 454 
 455   # Set or add values or value IDs...
 456   MODE: {
 457     if ($Mode =~ /^SetValues$/i) { @{$This->{Values}} = (); push @{$This->{Values}}, @{$ValuesRef}; last MODE; }
 458     if ($Mode =~ /^SetValueIDs$/i) { @{$This->{ValueIDs}} = (); push @{$This->{ValueIDs}}, @{$ValuesRef}; last MODE; }
 459     if ($Mode =~ /^AddValues$/i) { push @{$This->{Values}}, @{$ValuesRef}; last MODE; }
 460     if ($Mode =~ /^AddValueIDs$/i) { push @{$This->{ValueIDs}}, @{$ValuesRef}; last MODE; }
 461     croak "Error: ${ClassName}-> _SetOrAddValuesOrValueIDs: Unknown mode $Mode...";
 462   }
 463   return $This;
 464 }
 465 
 466 # Set a specific value in fingerprint vector with indicies starting from 0..
 467 #
 468 sub SetValue {
 469   my($This, $Index, $Value, $SkipCheck) = @_;
 470 
 471   # Just set it...
 472   if ($SkipCheck) {
 473     return $This->_SetValue($Index, $Value);
 474   }
 475 
 476   # Check and set...
 477   if ($Index < 0) {
 478     croak "Error: ${ClassName}->SetValue: Index value must be a positive number...";
 479   }
 480   if ($Index >= $This->GetNumOfValues()) {
 481     croak "Error: ${ClassName}->SetValue: Index vaue must be less than number of values...";
 482   }
 483 
 484   return $This->_SetValue($Index, $Value);
 485 }
 486 
 487 # Set a fingerprint vector value...
 488 #
 489 sub _SetValue {
 490   my($This, $Index, $Value) = @_;
 491 
 492   $This->{Values}[$Index] = $Value;
 493 
 494   return $This;
 495 }
 496 
 497 # Get a specific value from fingerprint vector with indicies starting from 0...
 498 #
 499 sub GetValue {
 500   my($This, $Index) = @_;
 501 
 502   if ($Index < 0) {
 503     croak "Error: ${ClassName}->GetValue: Index value must be a positive number...";
 504   }
 505   if ($Index >= $This->GetNumOfValues()) {
 506     croak "Error: ${ClassName}->GetValue: Index value must be less than number of values...";
 507   }
 508   return $This->_GetValue($Index);
 509 }
 510 
 511 # Get a fingerprint vector value...
 512 sub _GetValue {
 513   my($This, $Index) = @_;
 514 
 515   return $This->{Values}[$Index];
 516 }
 517 
 518 # Return vector values as an array or reference to an array...
 519 #
 520 sub GetValues {
 521   my($This) = @_;
 522 
 523   return wantarray ? @{$This->{Values}} : \@{$This->{Values}};
 524 }
 525 
 526 # Set a specific value ID in fingerprint vector with indicies starting from 0..
 527 #
 528 sub SetValueID {
 529   my($This, $Index, $Value, $SkipCheck) = @_;
 530 
 531   # Just set it...
 532   if ($SkipCheck) {
 533     return $This->_SetValueID($Index, $Value);
 534   }
 535 
 536   # Check and set...
 537   if ($Index < 0) {
 538     croak "Error: ${ClassName}->SetValueID: Index value must be a positive number...";
 539   }
 540   if ($Index >= $This->GetNumOfValueIDs()) {
 541     croak "Error: ${ClassName}->SetValueID: Index vaue must be less than number of value IDs...";
 542   }
 543 
 544   return $This->_SetValueID($Index, $Value);
 545 }
 546 
 547 # Set a fingerprint vector value ID...
 548 #
 549 sub _SetValueID {
 550   my($This, $Index, $Value) = @_;
 551 
 552   $This->{ValueIDs}[$Index] = $Value;
 553 
 554   return $This;
 555 }
 556 
 557 # Get a specific value ID from fingerprint vector with indicies starting from 0...
 558 #
 559 sub GetValueID {
 560   my($This, $Index) = @_;
 561 
 562   if ($Index < 0) {
 563     croak "Error: ${ClassName}->GetValueID: Index value must be a positive number...";
 564   }
 565   if ($Index >= $This->GetNumOfValueIDs()) {
 566     croak "Error: ${ClassName}->GetValueID: Index value must be less than number of value IDs...";
 567   }
 568   return $This->_GetValueID($Index);
 569 }
 570 
 571 # Get a fingerprint vector value ID...
 572 #
 573 sub _GetValueID {
 574   my($This, $Index) = @_;
 575 
 576   return $This->{ValueIDs}[$Index];
 577 }
 578 
 579 # Return vector value IDs as an array or reference to an array...
 580 #
 581 sub GetValueIDs {
 582   my($This) = @_;
 583 
 584   return wantarray ? @{$This->{ValueIDs}} : \@{$This->{ValueIDs}};
 585 }
 586 
 587 # Get fingerprints vector string containing values and/or IDs string in a specifed format...
 588 #
 589 sub GetFingerprintsVectorString {
 590   my($This, $Format) = @_;
 591 
 592   FORMAT : {
 593     if ($Format =~ /^(IDsAndValuesString|IDsAndValues)$/i) { return $This->GetIDsAndValuesString(); last FORMAT; }
 594     if ($Format =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) { return $This->GetIDsAndValuesPairsString(); last FORMAT; }
 595     if ($Format =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) { return $This->GetValuesAndIDsString(); last FORMAT; }
 596     if ($Format =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) { return $This->GetValuesAndIDsPairsString(); last FORMAT;}
 597     if ($Format =~ /^(ValueIDsString|ValueIDs)$/i) { return $This->GetValueIDsString(); last FORMAT; }
 598     if ($Format =~ /^(ValuesString|Values)$/i) { return $This->GetValuesString(); last FORMAT; }
 599     croak "Error: ${ClassName}->GetFingerprintsVectorString: Specified vector string format, $Format, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValueIDsString, ValueIDs, ValuesString, Values...";
 600   }
 601   return '';
 602 }
 603 # Get vector value IDs and values string as space delimited ASCII string separated
 604 # by semicolon...
 605 #
 606 sub GetIDsAndValuesString {
 607   my($This) = @_;
 608 
 609   if (@{$This->{ValueIDs}} && @{$This->{Values}}) {
 610     # Both IDs and values are available...
 611     return join(' ', @{$This->{ValueIDs}}) . ";" . join(' ', @{$This->{Values}});
 612   }
 613   elsif (@{$This->{Values}}) {
 614     # Only values are available...
 615     return "None;" . join(' ', @{$This->{Values}});
 616   }
 617   else {
 618     # Values are not available...
 619     return "None;None";
 620   }
 621 }
 622 
 623 # Get vector value IDs and value pairs string as space delimited ASCII string...
 624 #
 625 sub GetIDsAndValuesPairsString {
 626   my($This) = @_;
 627   my($Index, $ValueIDsPresent, @IDsAndValuesPairs);
 628 
 629   if (!@{$This->{Values}}) {
 630     # Values are unavailable...
 631     return "None None";
 632   }
 633 
 634   $ValueIDsPresent = @{$This->{ValueIDs}} ? 1 : 0;
 635 
 636   @IDsAndValuesPairs = ();
 637   for $Index (0 .. $#{$This->{Values}}) {
 638     if ($ValueIDsPresent) {
 639       push @IDsAndValuesPairs, ($This->{ValueIDs}->[$Index], $This->{Values}->[$Index]);
 640     }
 641     else {
 642       push @IDsAndValuesPairs, ('None', $This->{Values}->[$Index]);
 643     }
 644   }
 645   return join(' ', @IDsAndValuesPairs);
 646 }
 647 
 648 # Get vector value and value IDs string as space delimited ASCII string separated
 649 # by semicolon...
 650 #
 651 sub GetValuesAndIDsString {
 652   my($This) = @_;
 653 
 654   if (@{$This->{ValueIDs}} && @{$This->{Values}}) {
 655     # Both IDs and values are available...
 656     return join(' ', @{$This->{Values}}) . ";" . join(' ', @{$This->{ValueIDs}});
 657   }
 658   elsif (@{$This->{Values}}) {
 659     # Only values are available...
 660     return join(' ', @{$This->{Values}}) . ";None";
 661   }
 662   else {
 663     # Values are not available...
 664     return "None;None";
 665   }
 666 }
 667 
 668 # Get vector value and value ID pairs string as space delimited ASCII string...
 669 #
 670 sub GetValuesAndIDsPairsString {
 671   my($This) = @_;
 672   my($Index, $ValueIDsPresent, @ValuesAndIDsPairs);
 673 
 674   if (!@{$This->{Values}}) {
 675     # Values are unavailable...
 676     return "None None";
 677   }
 678 
 679   $ValueIDsPresent = @{$This->{ValueIDs}} ? 1 : 0;
 680 
 681   @ValuesAndIDsPairs = ();
 682   for $Index (0 .. $#{$This->{Values}}) {
 683     if ($ValueIDsPresent) {
 684       push @ValuesAndIDsPairs, ($This->{Values}->[$Index], $This->{ValueIDs}->[$Index]);
 685     }
 686     else {
 687       push @ValuesAndIDsPairs, ($This->{Values}->[$Index], 'None');
 688     }
 689   }
 690   return join(' ', @ValuesAndIDsPairs);
 691 }
 692 
 693 # Get vector value IDs string as space delimited ASCII string...
 694 #
 695 sub GetValueIDsString {
 696   my($This) = @_;
 697 
 698   return @{$This->{ValueIDs}} ? join(' ', @{$This->{ValueIDs}}) : 'None';
 699 }
 700 
 701 # Get vector value string as space delimited ASCII string...
 702 #
 703 sub GetValuesString {
 704   my($This) = @_;
 705 
 706   return @{$This->{Values}} ? join(' ', @{$This->{Values}}) : 'None';
 707 }
 708 
 709 # Get number of values...
 710 sub GetNumOfValues {
 711   my($This) = @_;
 712 
 713   return scalar @{$This->{Values}};
 714 }
 715 
 716 # Get number of non-zero values...
 717 sub GetNumOfNonZeroValues {
 718   my($This) = @_;
 719   my($Count, $Index, $Size);
 720 
 721   $Count = 0;
 722   $Size = $This->GetNumOfValues();
 723 
 724   for $Index (0 .. ($Size -1)) {
 725     if ($This->{Values}[$Index] != 0) {
 726       $Count++;
 727     }
 728   }
 729   return $Count;
 730 }
 731 
 732 # Get number of value IDs...
 733 sub GetNumOfValueIDs {
 734   my($This) = @_;
 735 
 736   return scalar @{$This->{ValueIDs}};
 737 }
 738 
 739 # FinegerprintsVectors class provides methods to calculate similarity between vectors
 740 # containing three different types of values:
 741 #
 742 # Type I: OrderedNumericalValues
 743 #
 744 #   . Size of two vectors are same
 745 #   . Vectors contain real values in a specific order. For example: MACCS keys count, Topological
 746 #     pharnacophore atom pairs and so on.
 747 #   . Option to calculate similarity value using continious values or binary values
 748 #
 749 # Type II: UnorderedNumericalValues
 750 #
 751 #   . Size of two vectors might not be same
 752 #   . Vectors contain unordered real value identified by value IDs. For example: Toplogical atom pairs,
 753 #     Topological atom torsions and so on
 754 #   . Option to calculate similarity value using continous values or binary values
 755 #
 756 # Type III: AlphaNumericalValues
 757 #
 758 #   . Size of two vectors might not be same
 759 #   . Vectors contain unordered alphanumerical values. For example: Extended connectivity fingerprints,
 760 #     atom neighbothood fingerpritns.
 761 #   . The vector values are treated as keys or bit indices and similarity value is calculated accordingly.
 762 #
 763 # Before performing similarity or distance calculations between vectors containing UnorderedNumericalValues
 764 # or AlphaNumericalValues, the vectors are tranformed into vectors containing unique OrderedNumericalValues
 765 # using value IDs for UnorderedNumericalValues and values itself for AlphaNumericalValues.
 766 #
 767 # Three forms similarity or distance calculation between two vectors: AlgebraicForm, BinaryForm or
 768 # SetTheoreticForm.
 769 #
 770 # The value of an extra paramter, CalculationMode, passed to each similarity or distance function
 771 # controls the calculation. Supported values for CalculationMode: AlgebraicForm, BinaryForm and
 772 # SetTheoreticForm. Default: AlgebraicForm.
 773 #
 774 # For BinaryForm CalculationMode, the ordered list of processed final vector values containing the value or
 775 # count of each unique value type is simply converted into a binary vector containing 1s and 0s
 776 # corresponding to presence or absence of values before calculating similarity or distance between
 777 # two vectors.
 778 #
 779 # For two fingerprint vectors A and B of same size containing OrderedNumericalValues, let:
 780 #
 781 #  N = Number values in A or B
 782 #
 783 #  Xa = Values of vector A
 784 #  Xb = Values of vector B
 785 #
 786 #  Xai = Value of ith element in A
 787 #  Xbi = Value of ith element in B
 788 #
 789 #  SUM = Sum of i over N values
 790 #
 791 # For SetTheoreticForm of calculation between two vectors, let:
 792 #
 793 #  SetIntersectionXaXb = SUM ( MIN ( Xai, Xbi ) )
 794 #  SetDifferenceXaXb = SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) )
 795 #
 796 # For BinaryForm of calculation between two vectors, let:
 797 #
 798 #  Na = Number of bits set to "1" in A = SUM ( Xai )
 799 #  Nb = Number of bits set to "1" in B = SUM ( Xbi )
 800 #  Nc = Number of bits set to "1" in both A and B = SUM ( Xai * Xbi )
 801 #  Nd = Number of bits set to "0" in both A and B = SUM ( 1 - Xai - Xbi + Xai * Xbi)
 802 #
 803 #  N = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd
 804 #
 805 # Additionally, for BinaryForm various values also correspond to:
 806 #
 807 #  Na = | Xa |
 808 #  Nb = | Xb |
 809 #  Nc = | SetIntersectionXaXb |
 810 #  Nd = N - | SetDifferenceXaXb |
 811 #
 812 #  | SetDifferenceXaXb | = N - Nd = Na + Nb - Nc + Nd - Nd = Na + Nb - Nc
 813 #                        =  | Xa | + | Xb | - | SetIntersectionXaXb |
 814 #
 815 # Various distance coefficients and similarity coefficients [ Ref 40, Ref 62, Ref 64 ] for a pair vectors A and B
 816 # in AlgebraicForm and BinaryForm are defined as follows:
 817 #
 818 # . CityBlockDistanceCoefficient: ( same as HammingDistanceCoefficient and ManhattanDistanceCoefficient)
 819 #
 820 #     . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) )
 821 #
 822 #     . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
 823 #
 824 #     . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb |
 825 #                        = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
 826 #
 827 # . CosineSimilarityCoefficient:  ( same as OchiaiSimilarityCoefficient)
 828 #
 829 #     . AlgebraicForm: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) )
 830 #
 831 #     . BinaryForm: Nc / SQRT ( Na * Nb)
 832 #
 833 #     . SetTheoreticForm: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| )
 834 #                        = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) )
 835 #
 836 # . CzekanowskiSimilarityCoefficient: ( same as DiceSimilarityCoefficient and SorensonSimilarityCoefficient)
 837 #
 838 #     . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) )  ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
 839 #
 840 #     . BinaryForm: 2 * Nc / ( Na + Nb )
 841 #
 842 #     . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| )
 843 #                        = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
 844 #
 845 # . DiceSimilarityCoefficient: ( same as CzekanowskiSimilarityCoefficient and SorensonSimilarityCoefficient)
 846 #
 847 #     . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) )  ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
 848 #
 849 #     . BinaryForm: 2 * Nc / ( Na + Nb )
 850 #
 851 #     . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| )
 852 #                        = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
 853 #
 854 # . EuclideanDistanceCoefficient:
 855 #
 856 #     . AlgebraicForm: SQRT ( SUM ( ( ( Xai - Xbi ) ** 2 ) ) )
 857 #
 858 #     . BinaryForm: SQRT ( ( Na - Nc ) + ( Nb - Nc ) ) = SQRT ( Na + Nb - 2 * Nc )
 859 #
 860 #     . SetTheoreticForm: SQRT ( | SetDifferenceXaXb | - | SetIntersectionXaXb | )
 861 #                        = SQRT (  SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) )
 862 #
 863 # . HammingDistanceCoefficient:  ( same as CityBlockDistanceCoefficient and ManhattanDistanceCoefficient)
 864 #
 865 #     . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) )
 866 #
 867 #     . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
 868 #
 869 #     . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb |
 870 #                        = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
 871 #
 872 # . JaccardSimilarityCoefficient: ( same as TanimotoSimilarityCoefficient)
 873 #
 874 #     . AlgebraicForm:  SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) )
 875 #
 876 #     . BinaryForm:  Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc )
 877 #
 878 #     . SetTheoreticForm: | SetIntersectionXaXb | / | SetDifferenceXaXb |
 879 #                        = SUM ( MIN ( Xai, Xbi ) ) / (  SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
 880 #
 881 # . ManhattanDistanceCoefficient:  ( same as CityBlockDistanceCoefficient and HammingDistanceCoefficient)
 882 #
 883 #     . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) )
 884 #
 885 #     . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
 886 #
 887 #     . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb |
 888 #                        = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
 889 #
 890 # . OchiaiSimilarityCoefficient:  ( same as CosineSimilarityCoefficient)
 891 #
 892 #     . AlgebraicForm: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) )
 893 #
 894 #     . BinaryForm: Nc / SQRT ( Na * Nb)
 895 #
 896 #     . SetTheoreticForm: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| )
 897 #                        = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) )
 898 #
 899 # . SorensonSimilarityCoefficient: ( same as CzekanowskiSimilarityCoefficient and DiceSimilarityCoefficient)
 900 #
 901 #     . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) )  ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
 902 #
 903 #     . BinaryForm: 2 * Nc / ( Na + Nb )
 904 #
 905 #     . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| )
 906 #                        = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
 907 #
 908 # . SoergelDistanceCoefficient:
 909 #
 910 #     . AlgebraicForm:  SUM ( ABS ( Xai - Xbi ) ) / SUM ( MAX ( Xai, Xbi ) )
 911 #
 912 #     . BinaryForm: 1 - Nc / ( Na + Nb - Nc ) = ( Na + Nb - 2 * Nc ) / ( Na + Nb - Nc )
 913 #
 914 #     . SetTheoreticForm: ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) / | SetDifferenceXaXb |
 915 #                        = ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
 916 #
 917 # . TanimotoSimilarityCoefficient:  ( same as JaccardSimilarityCoefficient)
 918 #
 919 #     . AlgebraicForm:  SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) )
 920 #
 921 #     . BinaryForm:  Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc )
 922 #
 923 #     . SetTheoreticForm: | SetIntersectionXaXb | / | SetDifferenceXaXb |
 924 #                        = SUM ( MIN ( Xai, Xbi ) ) / (  SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
 925 #
 926 #
 927 
 928 # Calculate Hamming distance coefficient between two fingerprint vectors.
 929 #
 930 # This functionality can be either invoked as a class function or an object method.
 931 #
 932 sub HammingDistanceCoefficient ($$;$$) {
 933   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
 934 
 935   return CityBlockDistanceCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
 936 }
 937 
 938 # Calculate Hamming distance coefficient between two fingerprint vectors.
 939 #
 940 # This functionality can be either invoked as a class function or an object method.
 941 #
 942 sub ManhattanDistanceCoefficient ($$;$$) {
 943   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
 944 
 945   return CityBlockDistanceCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
 946 }
 947 
 948 # Calculate CityBlock distance coefficient between two fingerprint vectors.
 949 #
 950 # This functionality can be either invoked as a class function or an object method.
 951 #
 952 sub CityBlockDistanceCoefficient ($$;$$) {
 953   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
 954 
 955   $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
 956   $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
 957 
 958   # Validate and process fingerprints vectors for similarity calculations...
 959   #
 960   _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("CityBlockDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
 961 
 962   # Perform the calculation...
 963   if ($CalculationMode =~ /^AlgebraicForm$/i) {
 964     return _CityBlockDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
 965   }
 966   elsif ($CalculationMode =~ /^BinaryForm$/i) {
 967     return _CityBlockDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
 968   }
 969   elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
 970     return _CityBlockDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
 971   }
 972   else {
 973     return undef;
 974   }
 975 }
 976 
 977 # Calculate CityBlock distance coefficient using algebraic form...
 978 #
 979 sub _CityBlockDistanceCoefficientUsingAlgebraicForm {
 980   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
 981   my($SumAbsSubtractionXaiXbi);
 982 
 983   $SumAbsSubtractionXaiXbi = _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
 984 
 985   return $SumAbsSubtractionXaiXbi;
 986 }
 987 
 988 # Calculate CityBlock distance coefficient using binary form...
 989 #
 990 sub _CityBlockDistanceCoefficientUsingBinaryForm {
 991   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
 992   my($Na, $Nb, $Nc);
 993 
 994   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
 995 
 996   return  ($Na + $Nb - 2 * $Nc);
 997 }
 998 
 999 # Calculate  CityBlock distance coefficient using set theoretic form...
1000 #
1001 sub _CityBlockDistanceCoefficientUsingSetTheoreticForm {
1002   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1003   my($SumMinXaiXbi, $SumXai, $SumXbi);
1004 
1005   $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1006   $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1007   $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1008 
1009   return  ($SumXai + $SumXbi - 2 * $SumMinXaiXbi);
1010 }
1011 
1012 # Calculate Ochiai similarity cofficient between two fingerprint vectors.
1013 #
1014 # This functionality can be either invoked as a class function or an object method.
1015 #
1016 sub OchiaiSimilarityCoefficient ($$;$$) {
1017   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1018 
1019   return CosineSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1020 }
1021 
1022 # Calculate Cosine similarity cofficient between two fingerprint vectors.
1023 #
1024 # This functionality can be either invoked as a class function or an object method.
1025 #
1026 sub CosineSimilarityCoefficient ($$;$$) {
1027   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1028 
1029   $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1030   $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1031 
1032   # Validate and process fingerprints vectors for similarity calculations...
1033   #
1034   _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("CosineSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1035 
1036   # Perform the calculation...
1037   if ($CalculationMode =~ /^AlgebraicForm$/i) {
1038     return _CosineSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1039   }
1040   elsif ($CalculationMode =~ /^BinaryForm$/i) {
1041     return _CosineSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1042   }
1043   elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1044     return _CosineSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1045   }
1046   else {
1047     return undef;
1048   }
1049 }
1050 
1051 # Calculate Cosine similarity coefficient using algebraic form...
1052 #
1053 sub _CosineSimilarityCoefficientUsingAlgebraicForm {
1054   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1055   my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator);
1056 
1057   $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA);
1058   $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB);
1059   $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1060 
1061   $Numerator = $SumProductXaiXbi;
1062   $Denominator = sqrt($SumXai2 * $SumXbi2);
1063 
1064   return  $Denominator ? ($Numerator/$Denominator) : 0;
1065 }
1066 
1067 # CalculateCosine similarity coefficient using binary form...
1068 #
1069 sub _CosineSimilarityCoefficientUsingBinaryForm {
1070   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1071   my($Na, $Nb, $Nc, $Numerator, $Denominator);
1072 
1073   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1074 
1075   $Numerator = $Nc;
1076   $Denominator = sqrt($Na * $Nb);
1077 
1078   return  $Denominator ? ($Numerator/$Denominator) : 0;
1079 }
1080 
1081 # Calculate Cosine similarity coefficient using set theoretic form...
1082 #
1083 sub _CosineSimilarityCoefficientUsingSetTheoreticForm {
1084   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1085   my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
1086 
1087   $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1088   $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1089   $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1090 
1091   $Numerator = $SumMinXaiXbi;
1092   $Denominator = sqrt($SumXai * $SumXbi);
1093 
1094   return  $Denominator ? ($Numerator/$Denominator) : 0;
1095 }
1096 
1097 # Calculate Czekanowski similarity cofficient between two fingerprint vectors.
1098 #
1099 # This functionality can be either invoked as a class function or an object method.
1100 #
1101 sub CzekanowskiSimilarityCoefficient ($$;$$) {
1102   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1103 
1104   return DiceSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1105 }
1106 
1107 # Calculate Sorenson similarity cofficient between two fingerprint vectors.
1108 #
1109 # This functionality can be either invoked as a class function or an object method.
1110 #
1111 sub SorensonSimilarityCoefficient ($$;$$) {
1112   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1113 
1114   return DiceSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1115 }
1116 
1117 # Calculate Dice similarity cofficient between two fingerprint vectors.
1118 #
1119 # This functionality can be either invoked as a class function or an object method.
1120 #
1121 sub DiceSimilarityCoefficient ($$;$$) {
1122   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1123 
1124   $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1125   $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1126 
1127   # Validate and process fingerprints vectors for similarity calculations...
1128   #
1129   _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("DiceSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1130 
1131   # Perform the calculation...
1132   if ($CalculationMode =~ /^AlgebraicForm$/i) {
1133     return _DiceSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1134   }
1135   elsif ($CalculationMode =~ /^BinaryForm$/i) {
1136     return _DiceSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1137   }
1138   elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1139     return _DiceSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1140   }
1141   else {
1142     return undef;
1143   }
1144 }
1145 
1146 # Calculate Dice similarity coefficient using algebraic form...
1147 #
1148 sub _DiceSimilarityCoefficientUsingAlgebraicForm {
1149   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1150   my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator);
1151 
1152   $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA);
1153   $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB);
1154   $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1155 
1156   $Numerator = 2 * $SumProductXaiXbi;
1157   $Denominator = $SumXai2 + $SumXbi2;
1158 
1159   return  $Denominator ? ($Numerator/$Denominator) : 0;
1160 }
1161 
1162 # Calculate Dice similarity coefficient using binary form...
1163 #
1164 sub _DiceSimilarityCoefficientUsingBinaryForm {
1165   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1166   my($Na, $Nb, $Nc, $Numerator, $Denominator);
1167 
1168   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1169 
1170   $Numerator = 2 * $Nc;
1171   $Denominator = $Na + $Nb;
1172 
1173   return  $Denominator ? ($Numerator/$Denominator) : 0;
1174 }
1175 
1176 # Calculate Dice similarity coefficient using set theoretic form...
1177 #
1178 sub _DiceSimilarityCoefficientUsingSetTheoreticForm {
1179   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1180   my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
1181 
1182   $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1183   $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1184   $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1185 
1186   $Numerator = 2 * $SumMinXaiXbi;
1187   $Denominator = $SumXai + $SumXbi;
1188 
1189   return  $Denominator ? ($Numerator/$Denominator) : 0;
1190 }
1191 
1192 
1193 # Calculate Euclidean distance coefficient between two fingerprint vectors.
1194 #
1195 # This functionality can be either invoked as a class function or an object method.
1196 #
1197 sub EuclideanDistanceCoefficient ($$;$$) {
1198   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1199 
1200   $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1201   $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1202 
1203   # Validate and process fingerprints vectors for similarity calculations...
1204   #
1205   _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("EuclideanDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1206 
1207   # Perform the calculation...
1208   if ($CalculationMode =~ /^AlgebraicForm$/i) {
1209     return _EuclideanDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1210   }
1211   elsif ($CalculationMode =~ /^BinaryForm$/i) {
1212     return _EuclideanDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1213   }
1214   elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1215     return _EuclideanDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1216   }
1217   else {
1218     return undef;
1219   }
1220 }
1221 
1222 # Calculate Euclidean distance coefficient using algebraic form...
1223 #
1224 sub _EuclideanDistanceCoefficientUsingAlgebraicForm {
1225   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1226   my($SumSquaresSubtractionXaiXbi);
1227 
1228   $SumSquaresSubtractionXaiXbi = _GetSumOfSquaresOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1229 
1230   return sqrt($SumSquaresSubtractionXaiXbi);
1231 }
1232 
1233 # Calculate Euclidean distance coefficient using binary form...
1234 #
1235 sub _EuclideanDistanceCoefficientUsingBinaryForm {
1236   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1237   my($Na, $Nb, $Nc);
1238 
1239   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1240 
1241   return  (sqrt($Na + $Nb - 2 * $Nc));
1242 }
1243 
1244 # Calculate Euclidean distance coefficient using set theoretic form...
1245 #
1246 sub _EuclideanDistanceCoefficientUsingSetTheoreticForm {
1247   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1248   my($SumMinXaiXbi, $SumXai, $SumXbi);
1249 
1250   $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1251   $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1252   $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1253 
1254   return  (sqrt($SumXai + $SumXbi - 2 * $SumMinXaiXbi));
1255 }
1256 
1257 # Calculate Jaccard similarity cofficient between two fingerprint vectors.
1258 #
1259 # This functionality can be either invoked as a class function or an object method.
1260 #
1261 sub JaccardSimilarityCoefficient ($$;$$) {
1262   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1263 
1264   return TanimotoSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1265 }
1266 
1267 # Calculate Tanimoto similarity cofficient between two fingerprint vectors.
1268 #
1269 # This functionality can be either invoked as a class function or an object method.
1270 #
1271 sub TanimotoSimilarityCoefficient ($$;$$) {
1272   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1273 
1274   $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1275   $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1276 
1277   # Validate and process fingerprints vectors for similarity calculations...
1278   #
1279   _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("TanimotoSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1280 
1281   # Perform the calculation...
1282   if ($CalculationMode =~ /^AlgebraicForm$/i) {
1283     return _TanimotoSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1284   }
1285   elsif ($CalculationMode =~ /^BinaryForm$/i) {
1286     return _TanimotoSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1287   }
1288   elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1289     return _TanimotoSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1290   }
1291   else {
1292     return undef;
1293   }
1294 }
1295 
1296 # Calculate Tanimoto similarity coefficient using algebraic form...
1297 #
1298 sub _TanimotoSimilarityCoefficientUsingAlgebraicForm {
1299   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1300   my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator);
1301 
1302   $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA);
1303   $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB);
1304   $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1305 
1306   $Numerator = $SumProductXaiXbi;
1307   $Denominator = $SumXai2 + $SumXbi2 - $SumProductXaiXbi;
1308 
1309   return  $Denominator ? ($Numerator/$Denominator) : 0;
1310 }
1311 
1312 # Calculate Tanimoto similarity coefficient using binary form...
1313 #
1314 sub _TanimotoSimilarityCoefficientUsingBinaryForm {
1315   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1316   my($Na, $Nb, $Nc, $Numerator, $Denominator);
1317 
1318   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1319 
1320   $Numerator = $Nc;
1321   $Denominator = $Na + $Nb - $Nc;
1322 
1323   return  $Denominator ? ($Numerator/$Denominator) : 0;
1324 }
1325 
1326 # Calculate Tanimoto similarity coefficient using set theoretic form...
1327 #
1328 sub _TanimotoSimilarityCoefficientUsingSetTheoreticForm {
1329   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1330   my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
1331 
1332   $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1333   $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1334   $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1335 
1336   $Numerator = $SumMinXaiXbi;
1337   $Denominator = $SumXai + $SumXbi - $SumMinXaiXbi;
1338 
1339   return  $Denominator ? ($Numerator/$Denominator) : 0;
1340 }
1341 
1342 
1343 # Calculate Soergel distance coefficient between two fingerprint vectors.
1344 #
1345 # This functionality can be either invoked as a class function or an object method.
1346 #
1347 sub SoergelDistanceCoefficient ($$;$$) {
1348   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1349 
1350   $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1351   $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1352 
1353   # Validate and process fingerprints vectors for similarity calculations...
1354   #
1355   _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("SoergelDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1356 
1357   # Perform the calculation...
1358   if ($CalculationMode =~ /^AlgebraicForm$/i) {
1359     return _SoergelDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1360   }
1361   elsif ($CalculationMode =~ /^BinaryForm$/i) {
1362     return _SoergelDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1363   }
1364   elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1365     return _SoergelDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1366   }
1367   else {
1368     return undef;
1369   }
1370 }
1371 
1372 # Calculate Soergel distance coefficientusing algebraic form...
1373 #
1374 sub _SoergelDistanceCoefficientUsingAlgebraicForm {
1375   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1376   my($SumAbsSubtractionXaiXbi, $SumMaxXaiXbi, $Numerator, $Denominator);
1377 
1378   $SumAbsSubtractionXaiXbi = _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1379   $SumMaxXaiXbi = _GetSumOfMaximumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1380 
1381   $Numerator = $SumAbsSubtractionXaiXbi;
1382   $Denominator = $SumMaxXaiXbi;
1383 
1384   return  $Denominator ? ($Numerator/$Denominator) : 0;
1385 }
1386 
1387 # Calculate Soergel distance coefficient using binary form...
1388 #
1389 sub _SoergelDistanceCoefficientUsingBinaryForm {
1390   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1391   my($Na, $Nb, $Nc, $Numerator, $Denominator);
1392 
1393   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1394 
1395   $Numerator = $Na + $Nb - 2 * $Nc;
1396   $Denominator = $Na + $Nb - $Nc;
1397 
1398   return  $Denominator ? ($Numerator/$Denominator) : 0;
1399 }
1400 
1401 # Calculate SoergelDistanceCoefficient using set theoretic form...
1402 #
1403 sub _SoergelDistanceCoefficientUsingSetTheoreticForm {
1404   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1405   my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
1406 
1407   $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1408   $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1409   $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1410 
1411   $Numerator = $SumXai + $SumXbi - 2 * $SumMinXaiXbi;
1412   $Denominator = $SumXai + $SumXbi - $SumMinXaiXbi;
1413 
1414   return  $Denominator ? ($Numerator/$Denominator) : 0;
1415 }
1416 
1417 # Validate and process fingerprints vectors for similarity calculations...
1418 #
1419 sub _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation {
1420   my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1421 
1422   $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1423   $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1424 
1425   if (!$SkipValuesCheck) {
1426     _ValidateFingerprintsVectorsForSimilarityCalculation($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode);
1427   }
1428   _ProcessFingerprintsVectorsForSimilarityCalculation($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode);
1429 }
1430 
1431 # Make sure fingerprint vectors are good for performing similarity/distance calculation...
1432 #
1433 sub _ValidateFingerprintsVectorsForSimilarityCalculation {
1434   my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode) = @_;
1435 
1436   # Make sure both are fingerprint vectors..
1437   if (!(IsFingerprintsVector($FingerprintsVectorA) && IsFingerprintsVector($FingerprintsVectorB))) {
1438     croak "Error: ${ClassName}->${ErrorMsg}: Both objects must be fingerprint vectors...";
1439   }
1440 
1441   # Check types...
1442   if ($FingerprintsVectorA->{Type} ne $FingerprintsVectorB->{Type}) {
1443     croak "Error: ${ClassName}->${ErrorMsg}: Type of first fingerprint vector, $FingerprintsVectorA->{Type}, must be same as type of second fingerprint vector, $FingerprintsVectorB->{Type}...";
1444   }
1445 
1446   # Check calculation mode...
1447   if ($CalculationMode !~ /^(AlgebraicForm|BinaryForm|SetTheoreticForm)$/i) {
1448     croak "Error: ${ClassName}->${ErrorMsg}: Specified similarity calculation mode, $CalculationMode, is not valid. Supported values: AlgebraicForm, BinaryForm, and SetTheoreticForm...";
1449   }
1450 
1451   # Check values and value IDs...
1452   my($Na, $Nb, $NIDa, $NIDb);
1453   $Na = $FingerprintsVectorA->GetNumOfValues(); $Nb = $FingerprintsVectorB->GetNumOfValues();
1454   $NIDa = $FingerprintsVectorA->GetNumOfValueIDs(); $NIDb = $FingerprintsVectorB->GetNumOfValueIDs();
1455 
1456   if ($Na == 0) {
1457     croak "Error: ${ClassName}->${ErrorMsg}: Number of values in first fingerprint vector, $Na, must be > 0 for fingerprint vector type $FingerprintsVectorA->{Type} ...";
1458   }
1459   if ($Nb == 0) {
1460     croak "Error: ${ClassName}->${ErrorMsg}: Number of values in second fingerprint vector, $Nb, must be > 0 for fingerprint vector type $FingerprintsVectorB->{Type} ...";
1461   }
1462 
1463   if ($FingerprintsVectorA->{Type} =~ /^OrderedNumericalValues$/i) {
1464     if ($Na != $Nb) {
1465       croak "Error: ${ClassName}->${ErrorMsg}: Number of values in first fingerprint vector, $Na, must be equal to number of values, $Nb, in second fingerprint vector for fingerprint vector types $FingerprintsVectorA->{Type} ...";
1466     }
1467   }
1468   elsif ($FingerprintsVectorA->{Type} =~ /^NumericalValues$/i) {
1469     if ($NIDa == 0) {
1470       croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDa, must be > 0 for fingerprint vector type $FingerprintsVectorA->{Type} ...";
1471     }
1472     if ($NIDb == 0) {
1473       croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDb, must be > 0 for fingerprint vector type $FingerprintsVectorB->{Type} ...";
1474     }
1475 
1476     if ($NIDa != $Na) {
1477       croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDa, must be equal to its number of values, $Na, for fingerprint vector type $FingerprintsVectorA->{Type} ...";
1478     }
1479     if ($NIDb != $Nb) {
1480       croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in second fingerprint vector, $NIDb, must be equal to its number of values, $Nb, for fingerprint vector type $FingerprintsVectorA->{Type} ...";
1481     }
1482   }
1483   elsif ($FingerprintsVectorA->{Type} =~ /^AlphaNumericalValues$/i) {
1484     if ($NIDa || $NIDb) {
1485       croak "Error: ${ClassName}->${ErrorMsg}: ValueIDs cann't be specified for fingerprint vector types $FingerprintsVectorA->{Type} ...";
1486     }
1487   }
1488   else {
1489     croak "Error: ${ClassName}->${ErrorMsg}: Fingerprint vector types $FingerprintsVectorA->{Type} is not valid...";
1490   }
1491 }
1492 
1493 # Process fingerprints vectors for similarity calculation by generating vectors
1494 # containing ordered list of values...
1495 #
1496 sub _ProcessFingerprintsVectorsForSimilarityCalculation {
1497   my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode) = @_;
1498 
1499   $FingerprintsVectorA->{OrderedValuesRef} = undef; $FingerprintsVectorB->{OrderedValuesRef} = undef;
1500   $FingerprintsVectorA->{BitVector} = undef; $FingerprintsVectorB->{BitVector} = undef;
1501 
1502   if ($FingerprintsVectorA->{Type} =~ /^OrderedNumericalValues$/i) {
1503     _ProcessOrderedNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
1504   }
1505   elsif ($FingerprintsVectorA->{Type} =~ /^NumericalValues$/i) {
1506     _ProcessNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
1507   }
1508   elsif ($FingerprintsVectorA->{Type} =~ /^AlphaNumericalValues$/i) {
1509     _ProcessAlphaNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
1510   }
1511   else {
1512     croak "Error: ${ClassName}->${ErrorMsg}: Fingerprint vector types $FingerprintsVectorA->{Type} is not valid...";
1513   }
1514   if ($CalculationMode =~ /^BinaryForm$/i) {
1515     _TransformFinalOrderedValuesIntoBitVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
1516   }
1517 }
1518 
1519 # Process fingerprints vectors with ordered numerical values for similarity calculations...
1520 #
1521 sub _ProcessOrderedNumericalValuesFingerprintsVectorsForSimilarityCalculation {
1522   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1523 
1524   $FingerprintsVectorA->{OrderedValuesRef} = \@{$FingerprintsVectorA->{Values}};
1525   $FingerprintsVectorB->{OrderedValuesRef} = \@{$FingerprintsVectorB->{Values}};
1526 }
1527 
1528 # Process fingerprints vectors with numerical values for similarity calculations...
1529 #
1530 sub _ProcessNumericalValuesFingerprintsVectorsForSimilarityCalculation {
1531   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1532 
1533   # Set up unique IDs and values map for each fingerprint vector...
1534   my($Index, $Value, $ValueID, %UniqueFingerprintsVectorAValueIDValues, %UniqueFingerprintsVectorBValueIDValues, %UniqueFingerprintsVectorsValueIDs);
1535 
1536   %UniqueFingerprintsVectorAValueIDValues = ();
1537   %UniqueFingerprintsVectorBValueIDValues = ();
1538   %UniqueFingerprintsVectorsValueIDs = ();
1539 
1540   # Go over first vector...
1541   for $Index (0 .. $#{$FingerprintsVectorA->{ValueIDs}}) {
1542     $ValueID = $FingerprintsVectorA->{ValueIDs}[$Index];
1543     $Value = $FingerprintsVectorA->{Values}[$Index];
1544     if (exists $UniqueFingerprintsVectorAValueIDValues{$ValueID}) {
1545       $UniqueFingerprintsVectorAValueIDValues{$ValueID} += $Value;
1546     }
1547     else {
1548       $UniqueFingerprintsVectorAValueIDValues{$ValueID} = $Value;
1549     }
1550     if (!exists $UniqueFingerprintsVectorsValueIDs{$ValueID}) {
1551       $UniqueFingerprintsVectorsValueIDs{$ValueID} = 1;
1552     }
1553   }
1554 
1555   # Go over second vector...
1556   for $Index (0 .. $#{$FingerprintsVectorB->{ValueIDs}}) {
1557     $ValueID = $FingerprintsVectorB->{ValueIDs}[$Index];
1558     $Value = $FingerprintsVectorB->{Values}[$Index];
1559     if (exists $UniqueFingerprintsVectorBValueIDValues{$ValueID}) {
1560       $UniqueFingerprintsVectorBValueIDValues{$ValueID} += $Value;
1561     }
1562     else {
1563       $UniqueFingerprintsVectorBValueIDValues{$ValueID} = $Value;
1564     }
1565     if (!exists $UniqueFingerprintsVectorsValueIDs{$ValueID}) {
1566       $UniqueFingerprintsVectorsValueIDs{$ValueID} = 1;
1567     }
1568   }
1569 
1570   # Setup ordered values...
1571   my(@UniqueOrderedValueIDs, @OrderedValuesA, @OrderedValuesB);
1572 
1573   @UniqueOrderedValueIDs = ();
1574   @UniqueOrderedValueIDs = sort keys %UniqueFingerprintsVectorsValueIDs;
1575 
1576   @OrderedValuesA = ();
1577   @OrderedValuesA = map { exists $UniqueFingerprintsVectorAValueIDValues{$_} ? $UniqueFingerprintsVectorAValueIDValues{$_} : 0 } @UniqueOrderedValueIDs;
1578 
1579   @OrderedValuesB = ();
1580   @OrderedValuesB = map { exists $UniqueFingerprintsVectorBValueIDValues{$_} ? $UniqueFingerprintsVectorBValueIDValues{$_} : 0 } @UniqueOrderedValueIDs;
1581 
1582   $FingerprintsVectorA->{OrderedValuesRef} = \@OrderedValuesA;
1583   $FingerprintsVectorB->{OrderedValuesRef} = \@OrderedValuesB;
1584 }
1585 
1586 # Process fingerprints vectors with allpha numerical values for similarity calculations...
1587 #
1588 sub _ProcessAlphaNumericalValuesFingerprintsVectorsForSimilarityCalculation {
1589   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1590 
1591   # Set up unique IDs and values map for each vector...
1592   my($Index, $Value, $ValueID, %UniqueFingerprintsVectorAValuesCount, %UniqueFingerprintsVectorBValuesCount, %UniqueFingerprintsVectorsValues);
1593 
1594   %UniqueFingerprintsVectorAValuesCount = ();
1595   %UniqueFingerprintsVectorBValuesCount = ();
1596   %UniqueFingerprintsVectorsValues = ();
1597 
1598   # Go over first vector...
1599   for $Value (@{$FingerprintsVectorA->{Values}}) {
1600     if (exists $UniqueFingerprintsVectorAValuesCount{$Value}) {
1601       $UniqueFingerprintsVectorAValuesCount{$Value} += 1;
1602     }
1603     else {
1604       $UniqueFingerprintsVectorAValuesCount{$Value} = 1;
1605     }
1606     if (!exists $UniqueFingerprintsVectorsValues{$Value}) {
1607       $UniqueFingerprintsVectorsValues{$Value} = 1;
1608     }
1609   }
1610 
1611   # Go over second vector...
1612   for $Value (@{$FingerprintsVectorB->{Values}}) {
1613     if (exists $UniqueFingerprintsVectorBValuesCount{$Value}) {
1614       $UniqueFingerprintsVectorBValuesCount{$Value} += 1;
1615     }
1616     else {
1617       $UniqueFingerprintsVectorBValuesCount{$Value} = 1;
1618     }
1619     if (!exists $UniqueFingerprintsVectorsValues{$Value}) {
1620       $UniqueFingerprintsVectorsValues{$Value} = 1;
1621     }
1622   }
1623 
1624   # Setup ordered values...
1625   my(@UniqueOrderedValueIDs, @OrderedValuesA, @OrderedValuesB);
1626 
1627   @UniqueOrderedValueIDs = ();
1628   @UniqueOrderedValueIDs = sort keys %UniqueFingerprintsVectorsValues;
1629 
1630   @OrderedValuesA = ();
1631   @OrderedValuesA = map { exists $UniqueFingerprintsVectorAValuesCount{$_} ? $UniqueFingerprintsVectorAValuesCount{$_} : 0 } @UniqueOrderedValueIDs;
1632 
1633   @OrderedValuesB = ();
1634   @OrderedValuesB = map { exists $UniqueFingerprintsVectorBValuesCount{$_} ? $UniqueFingerprintsVectorBValuesCount{$_} : 0 } @UniqueOrderedValueIDs;
1635 
1636   $FingerprintsVectorA->{OrderedValuesRef} = \@OrderedValuesA;
1637   $FingerprintsVectorB->{OrderedValuesRef} = \@OrderedValuesB;
1638 
1639 }
1640 
1641 # Transform final ordered values array into a BitVector for similarity calculation...
1642 #
1643 sub _TransformFinalOrderedValuesIntoBitVectorsForSimilarityCalculation {
1644   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1645   my($Index, $Size, $BitVectorA, $BitVectorB, $SkipCheck);
1646 
1647   # Create bit vectors...
1648   $Size = scalar @{$FingerprintsVectorA->{OrderedValuesRef}};
1649 
1650   $FingerprintsVectorA->{BitVector} = new BitVector($Size);
1651   $FingerprintsVectorB->{BitVector} = new BitVector($Size);
1652 
1653   # Set bits...
1654   $SkipCheck = 1;
1655   for $Index (0 .. ($Size - 1)) {
1656     if ($FingerprintsVectorA->{OrderedValuesRef}[$Index]) {
1657       $FingerprintsVectorA->{BitVector}->SetBit($Index, $SkipCheck);
1658     }
1659     if ($FingerprintsVectorB->{OrderedValuesRef}[$Index]) {
1660       $FingerprintsVectorB->{BitVector}->SetBit($Index, $SkipCheck);
1661     }
1662   }
1663 }
1664 
1665 # Return sum of ordered vector values...
1666 #
1667 sub _GetSumOfFingerprintsOrderedValues {
1668   my($FingerprintVector) = @_;
1669 
1670   return StatisticsUtil::Sum($FingerprintVector->{OrderedValuesRef});
1671 }
1672 
1673 # Return sum of squared ordered vector values...
1674 #
1675 sub _GetSumOfSquaresOfFingerprintsOrderedValues {
1676   my($FingerprintVector) = @_;
1677 
1678   return StatisticsUtil::SumOfSquares($FingerprintVector->{OrderedValuesRef});
1679 }
1680 
1681 # Return sum of product of correponding ordered vector values...
1682 #
1683 sub _GetSumOfProductOfFingerprintsOrderedValues {
1684   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1685   my($Index, $SumProductXaiXbi);
1686 
1687   $SumProductXaiXbi = 0;
1688   for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1689     $SumProductXaiXbi += $FingerprintsVectorA->{OrderedValuesRef}[$Index] * $FingerprintsVectorB->{OrderedValuesRef}[$Index];
1690   }
1691   return $SumProductXaiXbi;
1692 }
1693 
1694 # Return sum of absolute value of subtraction of correponding ordered vector values...
1695 #
1696 sub _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues {
1697   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1698   my($Index, $SumAbsSubtractionXaiXbi);
1699 
1700   $SumAbsSubtractionXaiXbi = 0;
1701   for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1702     $SumAbsSubtractionXaiXbi += abs($FingerprintsVectorA->{OrderedValuesRef}[$Index] - $FingerprintsVectorB->{OrderedValuesRef}[$Index]);
1703   }
1704   return $SumAbsSubtractionXaiXbi;
1705 }
1706 
1707 # Return sum of squares of subtraction of correponding ordered vector values...
1708 #
1709 sub _GetSumOfSquaresOfSubtractionOfFingerprintsOrderedValues {
1710   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1711   my($Index, $SumSquaresSubtractionXaiXbi);
1712 
1713   $SumSquaresSubtractionXaiXbi = 0;
1714   for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1715     $SumSquaresSubtractionXaiXbi += ($FingerprintsVectorA->{OrderedValuesRef}[$Index] - $FingerprintsVectorB->{OrderedValuesRef}[$Index])**2;
1716   }
1717   return $SumSquaresSubtractionXaiXbi;
1718 }
1719 
1720 # Return sum of minimum of correponding ordered vector values...
1721 #
1722 sub _GetSumOfMinimumOfFingerprintsOrderdedValues {
1723   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1724   my($Index, $SumMinXaiXbi);
1725 
1726   $SumMinXaiXbi = 0;
1727   for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1728     $SumMinXaiXbi += MathUtil::min($FingerprintsVectorA->{OrderedValuesRef}[$Index], $FingerprintsVectorB->{OrderedValuesRef}[$Index]);
1729   }
1730   return $SumMinXaiXbi;
1731 }
1732 
1733 # Return sum of maximum of correponding ordered vector values...
1734 #
1735 sub _GetSumOfMaximumOfFingerprintsOrderdedValues {
1736   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1737   my($Index, $SumMaxXaiXbi);
1738 
1739   $SumMaxXaiXbi = 0;
1740   for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1741     $SumMaxXaiXbi += MathUtil::max($FingerprintsVectorA->{OrderedValuesRef}[$Index], $FingerprintsVectorB->{OrderedValuesRef}[$Index]);
1742   }
1743   return $SumMaxXaiXbi;
1744 }
1745 
1746 # Get number of Na, Nb and Nc bits in vector A and B for BinaryForm calculation...
1747 #
1748 sub _GetNumOfIndividualAndCommonSetBits ($$) {
1749   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1750   my($Na, $Nb, $Nc, $Nd, $FingerprintsBitVectorA, $FingerprintsBitVectorB);
1751 
1752   $FingerprintsBitVectorA = $FingerprintsVectorA->{BitVector};
1753   $FingerprintsBitVectorB = $FingerprintsVectorB->{BitVector};
1754 
1755   # Number of bits set to "1" in A
1756   $Na = $FingerprintsBitVectorA->GetNumOfSetBits();
1757 
1758   # Number of bits set to "1" in B
1759   $Nb = $FingerprintsBitVectorB->GetNumOfSetBits();
1760 
1761   # Number of bits set to "1" in both A and B
1762   my($NcBitVector);
1763   $NcBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB;
1764   $Nc = $NcBitVector->GetNumOfSetBits();
1765 
1766   return ($Na, $Nb, $Nc);
1767 }
1768 
1769 # Return a list of supported distance coefficients...
1770 #
1771 sub GetSupportedDistanceCoefficients () {
1772 
1773   return @DistanceCoefficients;
1774 }
1775 
1776 # Return a list of supported similarity coefficients...
1777 #
1778 sub GetSupportedSimilarityCoefficients () {
1779 
1780   return @SimilarityCoefficients;
1781 }
1782 
1783 # Return a list of supported distance and similarity coefficients...
1784 #
1785 sub GetSupportedDistanceAndSimilarityCoefficients () {
1786   my(@DistanceAndSimilarityCoefficients);
1787 
1788   @DistanceAndSimilarityCoefficients = ();
1789   push @DistanceAndSimilarityCoefficients, @DistanceCoefficients;
1790   push @DistanceAndSimilarityCoefficients, @SimilarityCoefficients;
1791 
1792   return sort @DistanceAndSimilarityCoefficients;
1793 }
1794 
1795 # Is it a fingerprints vector object?
1796 sub IsFingerprintsVector ($) {
1797   my($Object) = @_;
1798 
1799   return _IsFingerprintsVector($Object);
1800 }
1801 
1802 # Is it a fingerprints vector object?
1803 sub _IsFingerprintsVector {
1804   my($Object) = @_;
1805 
1806   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
1807 }
1808 
1809 # Return a string containing vector values...
1810 sub StringifyFingerprintsVector {
1811   my($This) = @_;
1812   my($FingerprintsVectorString);
1813 
1814   # Set type, values and value IDs...
1815   my($NumOfValues, $ValuesString, $NumOfValueIDs, $ValueIDsString, $MaxValuesToStringify);
1816 
1817   $NumOfValues = $This->GetNumOfValues();
1818   $MaxValuesToStringify = 500;
1819 
1820   if ($NumOfValues < $MaxValuesToStringify) {
1821     # Append all values...
1822     $ValuesString = $NumOfValues ? join ' ', @{$This->{Values}} : 'None';
1823   }
1824   else {
1825     # Truncate values...
1826     my($Index, @Values);
1827     for $Index (0 .. ($MaxValuesToStringify - 1)) {
1828       push @Values, $This->{Values}[$Index];
1829     }
1830     $ValuesString = join(' ', @Values) . " ...";
1831   }
1832 
1833   $NumOfValueIDs = $This->GetNumOfValueIDs();
1834   if ($NumOfValueIDs < $MaxValuesToStringify) {
1835     # Append all valueIDs...
1836     $ValueIDsString = $NumOfValueIDs ? join ' ', @{$This->{ValueIDs}} : 'None';
1837   }
1838   else {
1839     # Truncate value IDs...
1840     my($Index, @ValueIDs);
1841     @ValueIDs = ();
1842     for $Index (0 .. ($MaxValuesToStringify - 1)) {
1843       push @ValueIDs, $This->{ValueIDs}[$Index];
1844     }
1845     $ValueIDsString = join(' ', @ValueIDs) . " ...";
1846   }
1847 
1848   $FingerprintsVectorString = "Type: $This->{Type}; NumOfValues: $NumOfValues";
1849   if ($This->{Type} =~ /^(OrderedNumericalValues|NumericalValues)$/i) {
1850     my($NumOfNonZeroValues);
1851     $NumOfNonZeroValues = $This->GetNumOfNonZeroValues();
1852     $FingerprintsVectorString .= "; NumOfNonZeroValues: $NumOfNonZeroValues";
1853   }
1854 
1855   # Append all the values and value IDs...
1856   if ($NumOfValues < $MaxValuesToStringify) {
1857     $FingerprintsVectorString .= "; Values: <$ValuesString>; NumOfValueIDs: $NumOfValueIDs; ValueIDs: <$ValueIDsString>";
1858   }
1859   else {
1860     $FingerprintsVectorString .= "; Values (Truncated after $MaxValuesToStringify): <$ValuesString>; NumOfValueIDs: $NumOfValueIDs; ValueIDs (Truncated after $MaxValuesToStringify): <$ValueIDsString>";
1861   }
1862 
1863   return $FingerprintsVectorString;
1864 }
1865