MayaChemTools

   1 package Fingerprints::FingerprintsBitVector;
   2 #
   3 # $RCSfile: FingerprintsBitVector.pm,v $
   4 # $Date: 2015/02/28 20:48:54 $
   5 # $Revision: 1.27 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Scalar::Util ();
  33 use BitVector;
  34 use MathUtil;
  35 use TextUtil ();
  36 
  37 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  38 
  39 @ISA = qw(BitVector Exporter);
  40 
  41 # Similiarity coefficients...
  42 my(@SimilarityCoefficients) = qw(BaroniUrbaniSimilarityCoefficient BuserSimilarityCoefficient CosineSimilarityCoefficient DiceSimilarityCoefficient DennisSimilarityCoefficient ForbesSimilarityCoefficient FossumSimilarityCoefficient HamannSimilarityCoefficient JacardSimilarityCoefficient Kulczynski1SimilarityCoefficient Kulczynski2SimilarityCoefficient MatchingSimilarityCoefficient McConnaugheySimilarityCoefficient OchiaiSimilarityCoefficient PearsonSimilarityCoefficient RogersTanimotoSimilarityCoefficient RussellRaoSimilarityCoefficient SimpsonSimilarityCoefficient SkoalSneath1SimilarityCoefficient SkoalSneath2SimilarityCoefficient SkoalSneath3SimilarityCoefficient TanimotoSimilarityCoefficient TverskySimilarityCoefficient YuleSimilarityCoefficient WeightedTanimotoSimilarityCoefficient WeightedTverskySimilarityCoefficient);
  43 
  44 # New from string...
  45 my(@NewFromString) = qw(NewFromBinaryString NewFromHexadecimalString NewFromRawBinaryString);
  46 
  47 @EXPORT = qw(IsFingerprintsBitVector);
  48 @EXPORT_OK = qw(GetSupportedSimilarityCoefficients @NewFromString @SimilarityCoefficients);
  49 
  50 %EXPORT_TAGS = (
  51                 new => [@NewFromString],
  52                 coefficients => [@SimilarityCoefficients],
  53                 all  => [@EXPORT, @EXPORT_OK]
  54                );
  55 
  56 # Setup class variables...
  57 my($ClassName);
  58 _InitializeClass();
  59 
  60 use overload '""' => 'StringifyFingerprintsBitVector';
  61 
  62 # Class constructor...
  63 sub new {
  64   my($Class, $Size) = @_;
  65 
  66   # Initialize object...
  67   my $This = $Class->SUPER::new($Size);
  68   bless $This, ref($Class) || $Class;
  69   $This->_InitializeFingerprintsBitVector($Size);
  70 
  71   return $This;
  72 }
  73 
  74 # Initialize object data...
  75 #
  76 # Note:
  77 #  . The class, BitVector, used to derive this class provides all the functionality to
  78 #    manipulate bits.
  79 #  . Irrespective of specified size, Perl functions used to handle bit data in
  80 #    BitVector class automatically sets the size to the next nearest power of 2.
  81 #    SpecifiedSize is used by this class to process any aribitray size during similarity
  82 #    coefficient calculations.
  83 #
  84 sub _InitializeFingerprintsBitVector {
  85   my($This, $Size) = @_;
  86 
  87   if (!defined $Size) {
  88     croak "Error: ${ClassName}->new: FingerprintsBitVector object instantiated without specifying its size ...";
  89   }
  90   if ($Size <=0) {
  91     croak "Error: ${ClassName}->new: Fingerprints bit vector size, $Size, must be a positive integer...";
  92   }
  93 
  94   # Specified size of fingerprints...
  95   $This->{SpecifiedSize} = $Size;
  96 
  97 }
  98 
  99 # Initialize class ...
 100 sub _InitializeClass {
 101   #Class name...
 102   $ClassName = __PACKAGE__;
 103 }
 104 
 105 # Set specified size...
 106 #
 107 # Notes:
 108 #   Irrespective of specified size, Perl functions used to handle bit data in
 109 #   BitVector class automatically sets the size to the next nearest power of 2.
 110 #   SpecifiedSize is used by this class to process any aribitray size during similarity
 111 #   coefficient calculations.
 112 #
 113 sub SetSpecifiedSize {
 114   my($This, $SpecifiedSize) = @_;
 115 
 116   if (!($SpecifiedSize > 0 && $SpecifiedSize <= $This->{Size})) {
 117     croak "Error: ${ClassName}->SetSpecifiedSize: Specified size, $SpecifiedSize, is not valid:  It must be > 0 && <= ", $This->GetSize()," ...";
 118   }
 119   $This->{SpecifiedSize} = $SpecifiedSize;
 120 }
 121 
 122 # Get specified size...
 123 sub GetSpecifiedSize {
 124   my($This) = @_;
 125 
 126   return $This->{SpecifiedSize};
 127 }
 128 
 129 # Set ID...
 130 sub SetID {
 131   my($This, $Value) = @_;
 132 
 133   $This->{ID} = $Value;
 134 
 135   return $This;
 136 }
 137 
 138 # Get ID...
 139 sub GetID {
 140   my($This) = @_;
 141 
 142   return exists $This->{ID} ? $This->{ID} : 'None';
 143 }
 144 
 145 # Set description...
 146 sub SetDescription {
 147   my($This, $Value) = @_;
 148 
 149   $This->{Description} = $Value;
 150 
 151   return $This;
 152 }
 153 
 154 # Get description...
 155 sub GetDescription {
 156   my($This) = @_;
 157 
 158   return exists $This->{Description} ? $This->{Description} : 'No description available';
 159 }
 160 
 161 # Set vector type...
 162 sub SetVectorType {
 163   my($This, $Value) = @_;
 164 
 165   $This->{VectorType} = $Value;
 166 
 167   return $This;
 168 }
 169 
 170 # Get vector type...
 171 sub GetVectorType {
 172   my($This) = @_;
 173 
 174   return exists $This->{VectorType} ? $This->{VectorType} : 'FingerprintsBitVector';
 175 }
 176 
 177 # Create a new fingerprints bit vector using binary string. This functionality can be
 178 # either invoked as a class function or an object method.
 179 #
 180 sub NewFromBinaryString ($;$) {
 181   my($FirstParameter, $SecondParameter, $ThirdParameter) = @_;
 182 
 183   if (_IsFingerprintsBitVector($FirstParameter)) {
 184     return _NewFingerptinsBitVectorFromString('Binary', $SecondParameter, $ThirdParameter);
 185   }
 186   else {
 187     return _NewFingerptinsBitVectorFromString( 'Binary', $FirstParameter, $SecondParameter);
 188   }
 189 }
 190 
 191 # Create a new fingerprints bit vector using hexadecimal string. This functionality can be
 192 # either invoked as a class function or an object method.
 193 #
 194 sub NewFromHexadecimalString ($;$) {
 195   my($FirstParameter, $SecondParameter, $ThirdParameter) = @_;
 196 
 197   if (_IsFingerprintsBitVector($FirstParameter)) {
 198     return _NewFingerptinsBitVectorFromString('Hexadecimal', $SecondParameter, $ThirdParameter);
 199   }
 200   else {
 201     return _NewFingerptinsBitVectorFromString( 'Hexadecimal', $FirstParameter, $SecondParameter);
 202   }
 203 }
 204 
 205 # Create a new fingerprints bit vector using octal string. This functionality can be
 206 # either invoked as a class function or an object method.
 207 #
 208 #
 209 sub NewFromOctalString ($) {
 210   croak "Error: ${ClassName}->NewFromOctalString: Creation of fingerprits bit vector from an octal string is not supported ...";
 211 }
 212 
 213 # Create a new fingerprints bit vector using decimal string. This functionality can be
 214 # either invoked as a class function or an object method.
 215 #
 216 sub NewFromDecimalString ($;$) {
 217   croak "Error: ${ClassName}->NewFromDecimalString: Creation of fingerprits bit vector from a decimal string is not supported ...";
 218 }
 219 
 220 # Create a new fingerprints bit vector using raw binary string. This functionality can be
 221 # either invoked as a class function or an object method.
 222 #
 223 sub NewFromRawBinaryString ($;$) {
 224   my($FirstParameter, $SecondParameter, $ThirdParameter) = @_;
 225 
 226   if (_IsFingerprintsBitVector($FirstParameter)) {
 227     return _NewFingerptinsBitVectorFromString('RawBinary', $SecondParameter, $ThirdParameter);
 228   }
 229   else {
 230     return _NewFingerptinsBitVectorFromString( 'RawBinary', $FirstParameter, $SecondParameter);
 231   }
 232 }
 233 
 234 # Create a new fingerprints bit vector from a string...
 235 #
 236 #
 237 sub _NewFingerptinsBitVectorFromString ($$;$) {
 238   my($Format, $String, $BitsOrder) = @_;
 239   my($FingerprintsBitVector, $Size);
 240 
 241   $Size = BitVector::_CalculateStringSizeInBits($Format, $String);
 242 
 243   $FingerprintsBitVector = new Fingerprints::FingerprintsBitVector($Size);
 244   $FingerprintsBitVector->_SetBitsAsString($Format, $String, $BitsOrder);
 245 
 246   return $FingerprintsBitVector;
 247 }
 248 
 249 # Get fingerprint bits as a hexadecimal string...
 250 #
 251 sub GetBitsAsHexadecimalString {
 252   my($This, $BitsOrder) = @_;
 253 
 254   return $This->_GetFingerprintBitsAsString('Hexadecimal', $BitsOrder);
 255 }
 256 
 257 # Get fingerprint bits as an octal string...
 258 #
 259 sub GetBitsAsOctalString {
 260   my($This, $BitsOrder) = @_;
 261 
 262   croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as an octal string is not supported ...";
 263 }
 264 
 265 # Get fingerprint bits as an decimal string...
 266 #
 267 sub GetBitsAsDecimalString {
 268   my($This, $BitsOrder) = @_;
 269 
 270   croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as a decimal string is not supported ...";
 271 }
 272 
 273 # Get fingerprint bits as a binary string conatning 1s and 0s...
 274 #
 275 sub GetBitsAsBinaryString {
 276   my($This, $BitsOrder) = @_;
 277 
 278   return $This->_GetFingerprintBitsAsString('Binary', $BitsOrder);
 279 }
 280 
 281 # Get fingerprint bits as a binary string conatning 1s and 0s...
 282 #
 283 sub GetBitsAsRawBinaryString {
 284   my($This) = @_;
 285 
 286   return $This->_GetFingerprintBitsAsString('RawBinary');
 287 }
 288 
 289 # Return fingerprint bits as a string...
 290 #
 291 sub _GetFingerprintBitsAsString {
 292   my($This, $Format, $BitsOrder) = @_;
 293 
 294   $BitsOrder = (defined($BitsOrder) && $BitsOrder) ? $BitsOrder : 'Ascending';
 295 
 296   return $This->_GetBitsAsString($Format, $BitsOrder);
 297 }
 298 
 299 # Is it a fingerprints bit vector object?
 300 sub IsFingerprintsBitVector ($) {
 301   my($Object) = @_;
 302 
 303   return _IsFingerprintsBitVector($Object);
 304 }
 305 
 306 # Is it a fingerprints bit vector object?
 307 sub _IsFingerprintsBitVector {
 308   my($Object) = @_;
 309 
 310   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
 311 }
 312 
 313 # Return a list of supported similarity coefficients...
 314 sub GetSupportedSimilarityCoefficients () {
 315 
 316   return @SimilarityCoefficients;
 317 }
 318 
 319 # Get bit density for fingerprints bit vector corresponding to on bits...
 320 #
 321 sub GetFingerprintsBitDensity {
 322   my($This) = @_;
 323   my($BitDensity);
 324 
 325   $BitDensity = $This->GetDensityOfSetBits();
 326 
 327   return round($BitDensity, 2);
 328 }
 329 
 330 # Fold fingerprints bit vector by recursively reducing its size by half untill size is less than or equal to
 331 # specified size...
 332 #
 333 sub FoldFingerprintsBitVectorBySize {
 334   my($This, $Size) = @_;
 335 
 336   if (!($Size > 0 && $Size <= $This->GetSize())) {
 337     croak "Error: ${ClassName}->FoldFingerprintsBitVectorBySize: Specified size, $Size, is not valid:  It must be > 0 && <= ", $This->GetSize()," ...";
 338   }
 339 
 340   if ($This->GetSize() <= $Size) {
 341     return $This;
 342   }
 343   return $This->_FoldFingerprintsBitVector('BySize', $Size);
 344 }
 345 
 346 # Fold fingerprints bit vector by recursively reducing its size by half untill bit density of set bits is greater than
 347 #  or equal to specified density...
 348 #
 349 sub FoldFingerprintsBitVectorByDensity {
 350   my($This, $Density) = @_;
 351 
 352   if (!($Density > 0 && $Density <= 1)) {
 353     croak "Error: ${ClassName}->FoldFingerprintsBitVectorByDensity: Specified bit density, $Density, is not valid:  It must be > 0 && <= 1 ...";
 354   }
 355 
 356   if ($This->GetDensityOfSetBits() >= $Density) {
 357     return $This;
 358   }
 359   return $This->_FoldFingerprintsBitVector('ByDensity', $Density);
 360 }
 361 
 362 # Fold fingerprints bit vector using size or density and return folded fingerprint bit vector...
 363 #
 364 sub _FoldFingerprintsBitVector {
 365   my($This, $Mode, $Value) = @_;
 366 
 367   # Fold upto size of 8 bits...
 368   if ($This->GetSize() <= 8) {
 369     return $This;
 370   }
 371 
 372   # Check size or density....
 373   if ($Mode =~ /^BySize$/i) {
 374     if ($This->GetSize() <= $Value) {
 375       return $This;
 376     }
 377   }
 378   elsif ($Mode =~ /^ByDensity$/i) {
 379     if ($This->GetDensityOfSetBits() >= $Value) {
 380       return $This;
 381     }
 382   }
 383   else {
 384     return $This;
 385   }
 386 
 387   # Recursively reduce its size by half...
 388   my($FirstHalfBinaryString, $SecondHalfBinaryString, $FirstHalfFingerprintsBitVector, $SecondHalfFingerprintsBitVector, $FoldedFingerprintsBitVector, $BinaryString, $StringLength);
 389 
 390   $BinaryString = $This->GetBitsAsBinaryString();
 391   $StringLength = length $BinaryString;
 392 
 393   $FirstHalfBinaryString = substr($BinaryString, 0, $StringLength/2);
 394   $SecondHalfBinaryString = substr($BinaryString, $StringLength/2);
 395 
 396   $FirstHalfFingerprintsBitVector = NewFromBinaryString($FirstHalfBinaryString);
 397   $SecondHalfFingerprintsBitVector = NewFromBinaryString($SecondHalfBinaryString);
 398 
 399   $FoldedFingerprintsBitVector = $FirstHalfFingerprintsBitVector | $SecondHalfFingerprintsBitVector;
 400 
 401   return $FoldedFingerprintsBitVector->_FoldFingerprintsBitVector($Mode, $Value);
 402 }
 403 
 404 # Is first bit vector subset of second bit vector?
 405 #
 406 # For a bit vector to be a subset of another bit vector, both vectors must be of
 407 # the same size and the bit positions set in first vector must also be set in the
 408 # secons bit vector.
 409 #
 410 # This functionality can be either invoked as a class function or an object method.
 411 #
 412 sub IsSubSet ($$) {
 413   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 414 
 415   if ($FingerprintsBitVectorA->GetSize() != $FingerprintsBitVectorB->GetSize()) {
 416     return 0;
 417   }
 418   my($AndFingerprintsBitVector);
 419 
 420   $AndFingerprintsBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB;
 421 
 422   return ($FingerprintsBitVectorA->GetNumOfSetBits() == $AndFingerprintsBitVector->GetNumOfSetBits()) ? 1 : 0;
 423 }
 424 
 425 # Return a string containing vector values...
 426 sub StringifyFingerprintsBitVector {
 427   my($This) = @_;
 428   my($FingerprintsBitVectorString);
 429 
 430   # BitVector size information...
 431   #
 432   if ($This->{SpecifiedSize} != $This->GetSize()) {
 433     $FingerprintsBitVectorString = "SpecifiedSize: " . $This->{SpecifiedSize} . "; BitVectorSize: " . $This->GetSize();
 434   }
 435   else {
 436     $FingerprintsBitVectorString = "BitVectorSize: " . $This->GetSize();
 437   }
 438   my($NumOfSetBits, $BitDensity);
 439   $NumOfSetBits = $This->GetNumOfSetBits();
 440   $BitDensity = $This->GetFingerprintsBitDensity();
 441 
 442   $FingerprintsBitVectorString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity";
 443 
 444   # BitVector values...
 445   $FingerprintsBitVectorString .= "; BitVector: " . $This->StringifyBitVector();
 446 
 447   return $FingerprintsBitVectorString;
 448 }
 449 
 450 # For two fingerprints bit vectors A and B of same size, let:
 451 #
 452 #  Na = Number of bits set to "1" in A
 453 #  Nb = Number of bits set to "1" in B
 454 #  Nc = Number of bits set to "1" in both A and B
 455 #  Nd = Number of bits set to "0" in both A and B
 456 #
 457 #  Nt = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd
 458 #
 459 #  Na - Nc = Number of bits set to "1" in A but not in B
 460 #  Nb - Nc = Number of bits set to "1" in B but not in A
 461 #
 462 # Various similarity coefficients [ Ref 40 - 42 ] for a pair of bit vectors A and B are
 463 # defined as follows:
 464 #
 465 # . BaroniUrbani: ( SQRT( Nc * Nd ) + Nc ) / (  SQRT ( Nc * Nd ) + Nc + ( Na - Nc )  + ( Nb - Nc ) ) ( same as Buser )
 466 #
 467 # . Buser: ( SQRT ( Nc * Nd ) + Nc ) / (  SQRT ( Nc * Nd ) + Nc + ( Na - Nc )  + ( Nb - Nc ) ) ( same as BaroniUrbani )
 468 #
 469 # . Cosine: Nc / SQRT ( Na * Nb ) (same as Ochiai)
 470 #
 471 # . Dice: (2 * Nc) / ( Na + Nb )
 472 #
 473 # . Dennis: ( Nc * Nd - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / SQRT ( Nt * Na * Nb)
 474 #
 475 # . Forbes: ( Nt * Nc ) / ( Na * Nb )
 476 #
 477 # . Fossum: ( Nt * ( ( Nc - 1/2 ) ** 2 ) / ( Na * Nb )
 478 #
 479 # . Hamann: ( ( Nc + Nd ) - ( Na - Nc ) - ( Nb - Nc ) ) / Nt
 480 #
 481 # . Jaccard: Nc /  ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Tanimoto)
 482 #
 483 # . Kulczynski1: Nc / ( ( Na - Nc ) + ( Nb - Nc) ) = Nc / ( Na + Nb - 2Nc )
 484 #
 485 # . Kulczynski2: ( ( Nc / 2 ) * ( 2 * Nc + ( Na - Nc ) + ( Nb - Nc) ) ) / ( ( Nc + ( Na - Nc ) ) * ( Nc + ( Nb - Nc ) ) ) = 0.5 * ( Nc / Na + Nc / Nb )
 486 #
 487 # . Matching: ( Nc + Nd ) / Nt
 488 #
 489 # . McConnaughey: ( Nc ** 2 - ( Na - Nc ) * ( Nb - Nc) ) / (  Na * Nb )
 490 #
 491 # . Ochiai: Nc / SQRT ( Na * Nb ) (same as Cosine)
 492 #
 493 # . Pearson: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) / SQRT ( Na * Nb * (  Na - Nc + Nd ) * ( Nb - Nc + Nd ) )
 494 #
 495 # . RogersTanimoto: ( Nc + Nd ) / ( ( Na - Nc)  + ( Nb  - Nc) + Nt) = ( Nc + Nd ) / ( Na  + Nb  - 2Nc + Nt)
 496 #
 497 # . RussellRao: Nc / Nt
 498 #
 499 # . Simpson: Nc / MIN ( Na, Nb)
 500 #
 501 # . SkoalSneath1: Nc / ( Nc + 2 * ( Na - Nc)  + 2 * ( Nb - Nc) ) = Nc / ( 2 * Na + 2 * Nb - 3 * Nc )
 502 #
 503 # . SkoalSneath2: ( 2 * Nc + 2 * Nd ) / ( Nc + Nd + Nt )
 504 #
 505 # . SkoalSneath3: ( Nc + Nd ) / ( ( Na - Nc ) + ( Nb - Nc ) ) = ( Nc + Nd ) / ( Na + Nb - 2 * Nc  )
 506 #
 507 # . Tanimoto: Nc /  ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Jaccard)
 508 #
 509 # . Tversky: Nc / ( alpha * ( Na - Nc ) + ( 1 - alpha) * ( Nb - Nc) + Nc ) = Nc / ( alpha * ( Na - Nb )  + Nb)
 510 #
 511 # . Yule: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / ( ( Nc * Nd ) + ( ( Na - Nc ) * ( Nb - Nc ) )  )
 512 #
 513 #
 514 # Values of Tanimoto/Jaccard and Tversky coefficients are dependent on only those bit which
 515 # are set to "1" in both A and B. In order to take into account all bit positions, modified versions
 516 # of Tanimoto [ Ref. 42 ] and Tversky [  Ref. 43 ] have been developed.
 517 #
 518 # Let:
 519 #
 520 #  Na' = Number of bits set to "0" in A
 521 #  Nb' = Number of bits set to "0" in B
 522 #  Nc' = Number of bits set to "0" in both A and B
 523 #
 524 # . Tanimoto': Nc' /  ( ( Na' - Nc') + ( Nb' - Nc' ) + Nc' ) = Nc' / ( Na' + Nb' - Nc' )
 525 #
 526 # . Tversky': Nc' / ( alpha * ( Na' - Nc' ) + ( 1 - alpha) * ( Nb' - Nc' ) + Nc' ) = Nc' / ( alpha * ( Na' - Nb' )  + Nb')
 527 #
 528 # Then:
 529 #
 530 # . WeightedTanimoto = beta * Tanimoto + (1 - beta) * Tanimoto'
 531 #
 532 # . WeightedTversky = beta * Tversky + (1 - beta) * Tversky'
 533 #
 534 #
 535 
 536 # Calculate BaroniUrbani similarity coefficient for two same size bit vectors.
 537 #
 538 # This functionality can be either invoked as a class function or an object method.
 539 #
 540 sub BaroniUrbaniSimilarityCoefficient ($$) {
 541   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 542 
 543   return BuserSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 544 }
 545 
 546 # Calculate Buser similarity coefficient for two same size bit vectors.
 547 #
 548 # This functionality can be either invoked as a class function or an object method.
 549 #
 550 sub BuserSimilarityCoefficient ($$) {
 551   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 552   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 553 
 554   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 555   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 556   $Nt = $Na + $Nb - $Nc + $Nd;
 557 
 558   $Numerator = sqrt($Nc*$Nd) + $Nc;
 559   $Denominator = sqrt($Nc*$Nd) + ($Na - $Nc)  + ($Nb - $Nc ) + $Nc;
 560 
 561   return  $Denominator ? ($Numerator/$Denominator) : 0;
 562 }
 563 
 564 # Calculate Cosine similarity coefficient for two same size bit vectors.
 565 #
 566 # This functionality can be either invoked as a class function or an object method.
 567 #
 568 sub CosineSimilarityCoefficient ($$) {
 569   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 570   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 571 
 572   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 573 
 574   $Numerator = $Nc;
 575   $Denominator = sqrt($Na*$Nb);
 576 
 577   return  $Denominator ? ($Numerator/$Denominator) : 0;
 578 }
 579 
 580 # Calculate Dice similarity coefficient for two same size bit vectors.
 581 #
 582 # This functionality can be either invoked as a class function or an object method.
 583 #
 584 sub DiceSimilarityCoefficient ($$) {
 585   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 586   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 587 
 588   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 589 
 590   $Numerator = 2*$Nc;
 591   $Denominator = $Na + $Nb;
 592 
 593   return  $Denominator ? ($Numerator/$Denominator) : 0;
 594 }
 595 
 596 # Calculate Dennis similarity coefficient for two same size bit vectors.
 597 #
 598 # This functionality can be either invoked as a class function or an object method.
 599 #
 600 sub DennisSimilarityCoefficient ($$) {
 601   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 602   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 603 
 604   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 605   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 606   $Nt = $Na + $Nb - $Nc + $Nd;
 607 
 608   $Numerator = $Nc*$Nd - (($Na - $Nc)*($Nb - $Nc));
 609   $Denominator = sqrt($Nt*$Na*$Nb);
 610 
 611   return  $Denominator ? ($Numerator/$Denominator) : 0;
 612 }
 613 
 614 # Calculate Forbes similarity coefficient for two same size bit vectors.
 615 #
 616 # This functionality can be either invoked as a class function or an object method.
 617 #
 618 sub ForbesSimilarityCoefficient ($$) {
 619   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 620   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 621 
 622   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 623   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 624   $Nt = $Na + $Nb - $Nc + $Nd;
 625 
 626   $Numerator = $Nt*$Nc;
 627   $Denominator = $Na*$Nb;
 628 
 629   return  $Denominator ? ($Numerator/$Denominator) : 0;
 630 }
 631 
 632 # Calculate Fossum similarity coefficient for two same size bit vectors.
 633 #
 634 # This functionality can be either invoked as a class function or an object method.
 635 #
 636 sub FossumSimilarityCoefficient ($$) {
 637   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 638   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 639 
 640   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 641   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 642   $Nt = $Na + $Nb - $Nc + $Nd;
 643 
 644   $Numerator =  $Nt*(($Nc - 0.5)** 2);
 645   $Denominator =  $Na*$Nb ;
 646 
 647   return  $Denominator ? ($Numerator/$Denominator) : 0;
 648 }
 649 
 650 # Calculate Hamann similarity coefficient for two same size bit vectors.
 651 #
 652 # This functionality can be either invoked as a class function or an object method.
 653 #
 654 sub HamannSimilarityCoefficient ($$) {
 655   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 656   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 657 
 658   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 659   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 660   $Nt = $Na + $Nb - $Nc + $Nd;
 661 
 662   $Numerator =  ($Nc + $Nd ) - ($Na - $Nc) - ($Nb - $Nc) ;
 663   $Denominator = $Nt;
 664 
 665   return  $Denominator ? ($Numerator/$Denominator) : 0;
 666 }
 667 
 668 # Calculate Jacard similarity coefficient for two same size bit vectors.
 669 #
 670 # This functionality can be either invoked as a class function or an object method.
 671 #
 672 sub JacardSimilarityCoefficient ($$) {
 673   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 674 
 675   return TanimotoSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 676 }
 677 
 678 # Calculate Kulczynski1 similarity coefficient for two same size bit vectors.
 679 #
 680 # This functionality can be either invoked as a class function or an object method.
 681 #
 682 sub Kulczynski1SimilarityCoefficient ($$) {
 683   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 684   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 685 
 686   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 687 
 688   $Numerator = $Nc;
 689   $Denominator =  $Na + $Nb - 2*$Nc;
 690 
 691   return  $Denominator ? ($Numerator/$Denominator) : 0;
 692 }
 693 
 694 # Calculate Kulczynski2 similarity coefficient for two same size bit vectors.
 695 #
 696 # This functionality can be either invoked as a class function or an object method.
 697 #
 698 sub Kulczynski2SimilarityCoefficient ($$) {
 699   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 700   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 701 
 702   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 703 
 704   $Numerator = 0.5*($Na*$Nc + $Nb*$Nc);
 705   $Denominator = $Na*$Nb;
 706 
 707   return  $Denominator ? ($Numerator/$Denominator) : 0;
 708 }
 709 
 710 # Calculate Matching similarity coefficient for two same size bit vectors.
 711 #
 712 # This functionality can be either invoked as a class function or an object method.
 713 #
 714 sub MatchingSimilarityCoefficient ($$) {
 715   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 716   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 717 
 718   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 719   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 720   $Nt = $Na + $Nb - $Nc + $Nd;
 721 
 722   $Numerator =  $Nc + $Nd;
 723   $Denominator = $Nt;
 724 
 725   return  $Denominator ? ($Numerator/$Denominator) : 0;
 726 }
 727 
 728 # Calculate McConnaughey similarity coefficient for two same size bit vectors.
 729 #
 730 # This functionality can be either invoked as a class function or an object method.
 731 #
 732 sub McConnaugheySimilarityCoefficient ($$) {
 733   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 734   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 735 
 736   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 737 
 738   $Numerator =  $Nc**2 - (($Na - $Nc)*($Nb - $Nc));
 739   $Denominator = $Na*$Nb ;
 740 
 741   return  $Denominator ? ($Numerator/$Denominator) : 0;
 742 }
 743 
 744 # Calculate Ochiai similarity coefficient for two same size bit vectors.
 745 #
 746 # This functionality can be either invoked as a class function or an object method.
 747 #
 748 sub OchiaiSimilarityCoefficient ($$) {
 749   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 750 
 751   return CosineSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 752 }
 753 
 754 # Calculate Pearson similarity coefficient for two same size bit vectors.
 755 #
 756 # This functionality can be either invoked as a class function or an object method.
 757 #
 758 sub PearsonSimilarityCoefficient ($$) {
 759   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 760   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 761 
 762   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 763   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 764   $Nt = $Na + $Nb - $Nc + $Nd;
 765 
 766   $Numerator = ($Nc*$Nd ) - (($Na - $Nc)*($Nb - $Nc));
 767   $Denominator =  sqrt($Na*$Nb*($Na - $Nc + $Nd )*($Nb - $Nc + $Nd));
 768 
 769   return  $Denominator ? ($Numerator/$Denominator) : 0;
 770 }
 771 
 772 # Calculate RogersTanimoto similarity coefficient for two same size bit vectors.
 773 #
 774 # This functionality can be either invoked as a class function or an object method.
 775 #
 776 sub RogersTanimotoSimilarityCoefficient ($$) {
 777   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 778   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 779 
 780   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 781   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 782   $Nt = $Na + $Nb - $Nc + $Nd;
 783 
 784   $Numerator = $Nc + $Nd;
 785   $Denominator =  ($Na - $Nc)  + ($Nb  - $Nc) + $Nt;
 786 
 787   return  $Denominator ? ($Numerator/$Denominator) : 0;
 788 }
 789 
 790 # Calculate RussellRao similarity coefficient for two same size bit vectors.
 791 #
 792 # This functionality can be either invoked as a class function or an object method.
 793 #
 794 sub RussellRaoSimilarityCoefficient ($$) {
 795   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 796   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 797 
 798   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 799   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 800   $Nt = $Na + $Nb - $Nc + $Nd;
 801 
 802   $Numerator = $Nc;
 803   $Denominator = $Nt;
 804 
 805   return  $Denominator ? ($Numerator/$Denominator) : 0;
 806 }
 807 
 808 # Calculate Simpson similarity coefficient for two same size bit vectors.
 809 #
 810 # This functionality can be either invoked as a class function or an object method.
 811 #
 812 sub SimpsonSimilarityCoefficient ($$) {
 813   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 814   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 815 
 816   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 817 
 818   $Numerator = $Nc;
 819   $Denominator =  min($Na, $Nb);
 820 
 821   return  $Denominator ? ($Numerator/$Denominator) : 0;
 822 }
 823 
 824 # Calculate SkoalSneath1 similarity coefficient for two same size bit vectors.
 825 #
 826 # This functionality can be either invoked as a class function or an object method.
 827 #
 828 sub SkoalSneath1SimilarityCoefficient ($$) {
 829   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 830   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 831 
 832   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 833 
 834   $Numerator = $Nc;
 835   $Denominator = $Nc + 2*($Na - $Nc)  + 2*($Nb - $Nc);
 836 
 837   return  $Denominator ? ($Numerator/$Denominator) : 0;
 838 }
 839 
 840 # Calculate SkoalSneath2 similarity coefficient for two same size bit vectors.
 841 #
 842 # This functionality can be either invoked as a class function or an object method.
 843 #
 844 sub SkoalSneath2SimilarityCoefficient ($$) {
 845   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 846   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 847 
 848   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 849   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 850   $Nt = $Na + $Nb - $Nc + $Nd;
 851 
 852   $Numerator = 2*$Nc + 2*$Nd  ;
 853   $Denominator = $Nc + $Nd + $Nt ;
 854 
 855   return  $Denominator ? ($Numerator/$Denominator) : 0;
 856 }
 857 
 858 # Calculate SkoalSneath3 similarity coefficient for two same size bit vectors.
 859 #
 860 # This functionality can be either invoked as a class function or an object method.
 861 #
 862 sub SkoalSneath3SimilarityCoefficient ($$) {
 863   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 864   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 865 
 866   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 867   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 868   $Nt = $Na + $Nb - $Nc + $Nd;
 869 
 870   $Numerator =  $Nc + $Nd;
 871   $Denominator = ($Na - $Nc) + ($Nb - $Nc ) ;
 872 
 873   return  $Denominator ? ($Numerator/$Denominator) : 0;
 874 }
 875 
 876 # Calculate Tanimoto similarity coefficient for two same size bit vectors.
 877 #
 878 # This functionality can be either invoked as a class function or an object method.
 879 #
 880 sub TanimotoSimilarityCoefficient ($$) {
 881   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 882   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 883 
 884   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 885 
 886   $Numerator = $Nc;
 887   $Denominator = $Na + $Nb - $Nc;
 888 
 889   return  $Denominator ? ($Numerator/$Denominator) : 0;
 890 }
 891 
 892 # Calculate Tversky similarity coefficient for two same size bit vectors.
 893 #
 894 # This functionality can be either invoked as a class function or an object method.
 895 #
 896 sub TverskySimilarityCoefficient ($$$) {
 897   my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha) = @_;
 898   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 899 
 900   if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) {
 901     croak "Error: ${ClassName}->TverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ...";
 902   }
 903 
 904   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 905 
 906   $Numerator = $Nc;
 907   $Denominator =  $Alpha*($Na - $Nb )  + $Nb;
 908 
 909   return  $Denominator ? ($Numerator/$Denominator) : 0;
 910 }
 911 
 912 # Calculate Yule similarity coefficient for two same size bit vectors.
 913 #
 914 # This functionality can be either invoked as a class function or an object method.
 915 #
 916 sub YuleSimilarityCoefficient ($$) {
 917   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 918   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 919 
 920   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 921   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 922   $Nt = $Na + $Nb - $Nc + $Nd;
 923 
 924   $Numerator = ($Nc*$Nd) - (($Na - $Nc)*($Nb - $Nc)) ;
 925   $Denominator = ($Nc*$Nd) + (($Na - $Nc)*($Nb - $Nc))  ;
 926 
 927   return  $Denominator ? ($Numerator/$Denominator) : 0;
 928 }
 929 
 930 # Calculate WeightedTanimoto similarity coefficient for two same size bit vectors.
 931 #
 932 # This functionality can be either invoked as a class function or an object method.
 933 #
 934 sub WeightedTanimotoSimilarityCoefficient ($$$) {
 935   my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Beta) = @_;
 936   my($Na, $Nb, $Nc, $TanimotoForSetBits, $TanimotoForClearBits, $Numerator, $Denominator, $WeightedTanimoto);
 937 
 938   if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) {
 939     croak "Error: ${ClassName}->WeightedTanimotoSimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ...";
 940   }
 941 
 942   # Get Tanimoto for set bits...
 943   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 944 
 945   $Numerator = $Nc;
 946   $Denominator = $Na + $Nb - $Nc;
 947   $TanimotoForSetBits = $Denominator ? ($Numerator/$Denominator) : 0;
 948 
 949   # Get Tanimoto for clear bits...
 950   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 951 
 952   $Numerator = $Nc;
 953   $Denominator = $Na + $Nb - $Nc;
 954   $TanimotoForClearBits = $Denominator ? ($Numerator/$Denominator) : 0;
 955 
 956   $WeightedTanimoto = $Beta*$TanimotoForSetBits + (1 - $Beta)*$TanimotoForClearBits;
 957 
 958   return  $WeightedTanimoto;
 959 }
 960 
 961 # Calculate WeightedTversky similarity coefficient for two same size bit vectors.
 962 #
 963 # This functionality can be either invoked as a class function or an object method.
 964 #
 965 sub WeightedTverskySimilarityCoefficient ($$$) {
 966   my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha, $Beta) = @_;
 967   my($Na, $Nb, $Nc, $TverskyForSetBits, $TverskyForClearBits, $Numerator, $Denominator, $WeightedTversky);
 968 
 969   if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) {
 970     croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ...";
 971   }
 972   if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) {
 973     croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ...";
 974   }
 975 
 976   # Get Tversky for set bits...
 977   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 978 
 979   $Numerator = $Nc;
 980   $Denominator =  $Alpha*($Na - $Nb )  + $Nb;
 981   $TverskyForSetBits =  $Denominator ? ($Numerator/$Denominator) : 0;
 982 
 983   # Get Tversky for clear bits...
 984   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 985 
 986   $Numerator = $Nc;
 987   $Denominator =  $Alpha*($Na - $Nb )  + $Nb;
 988   $TverskyForClearBits =  $Denominator ? ($Numerator/$Denominator) : 0;
 989 
 990   $WeightedTversky = $Beta*$TverskyForSetBits + (1 - $Beta)*$TverskyForClearBits;
 991 
 992   return  $WeightedTversky;
 993 }
 994 
 995 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations...
 996 #
 997 sub _GetNumOfIndividualAndCommonSetBits ($$) {
 998   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 999   my($Na, $Nb, $Nc, $Nd);
1000 
1001   # Number of bits set to "1" in A
1002   $Na = $FingerprintsBitVectorA->GetNumOfSetBits();
1003 
1004   # Number of bits set to "1" in B
1005   $Nb = $FingerprintsBitVectorB->GetNumOfSetBits();
1006 
1007   # Number of bits set to "1" in both A and B
1008   my($NcBitVector);
1009   $NcBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB;
1010   $Nc = $NcBitVector->GetNumOfSetBits();
1011 
1012   return ($Na, $Nb, $Nc);
1013 }
1014 
1015 # Get number of Nd bits in bit vector A and B to be used for similarity coefficient calculations...
1016 #
1017 sub _GetNumOfCommonClearBits ($$) {
1018   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
1019   my($Nd, $NdBitVector);
1020 
1021   #  Number of bits set to "0" in both A and B
1022   $NdBitVector = ~$FingerprintsBitVectorA & ~$FingerprintsBitVectorB;
1023   $Nd = $NdBitVector->GetNumOfSetBits();
1024 
1025   # Correct for number of clear bits used for padding...
1026   if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) {
1027     $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA);
1028   }
1029   elsif (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) {
1030     $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB);
1031   }
1032 
1033   return $Nd;
1034 }
1035 
1036 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations...
1037 #
1038 sub _GetNumOfIndividualAndCommonClearBits ($$) {
1039   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
1040   my($Na, $Nb, $Nc, $Nd);
1041 
1042   # Number of bits set to "0" in A
1043   $Na = $FingerprintsBitVectorA->GetNumOfClearBits();
1044 
1045   # Correct for number of clear bits used for padding...
1046   if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) {
1047     $Na = $Na - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA);
1048   }
1049 
1050   # Number of bits set to "0" in B
1051   $Nb = $FingerprintsBitVectorB->GetNumOfClearBits();
1052 
1053   # Correct for number of clear bits used for padding...
1054   if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) {
1055     $Nb = $Nb - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB);
1056   }
1057 
1058   # Number of bits set to "0" in both A and B
1059   $Nc = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
1060 
1061   return ($Na, $Nb, $Nc);
1062 }
1063 
1064 # Irrespective of specified size, Perl functions used to handle bit data data in
1065 # BitVector class automatically sets the size to the next nearest power of 2
1066 # and clear the extra bits.
1067 #
1068 # SpecifiedSize is used by this class to process any aribitray size during similarity
1069 # coefficient calculations.
1070 #
1071 # Assuming the FingerprintsBitBector class only manipulates bits upto specified
1072 # size, a correction for the extra bits added by BitVector class needs to be applied
1073 # to number of clear bits.
1074 #
1075 sub _GetNumOfClearBitsCorrection {
1076   my($FingerprintsBitVector) = @_;
1077 
1078   return ($FingerprintsBitVector->{Size} - $FingerprintsBitVector->{SpecifiedSize});
1079 }
1080 
1081 # Is number of clear bits correction required?
1082 #
1083 sub _IsNumOfClearBitsCorrectionRequired {
1084   my($FingerprintsBitVector) = @_;
1085 
1086   return ($FingerprintsBitVector->{Size} > $FingerprintsBitVector->{SpecifiedSize}) ? 1 : 0;
1087 }
1088 
1089