1 package Fingerprints::FingerprintsBitVector; 2 # 3 # $RCSfile: FingerprintsBitVector.pm,v $ 4 # $Date: 2015/02/28 20:48:54 $ 5 # $Revision: 1.27 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Carp; 31 use Exporter; 32 use Scalar::Util (); 33 use BitVector; 34 use MathUtil; 35 use TextUtil (); 36 37 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 38 39 @ISA = qw(BitVector Exporter); 40 41 # Similiarity coefficients... 42 my(@SimilarityCoefficients) = qw(BaroniUrbaniSimilarityCoefficient BuserSimilarityCoefficient CosineSimilarityCoefficient DiceSimilarityCoefficient DennisSimilarityCoefficient ForbesSimilarityCoefficient FossumSimilarityCoefficient HamannSimilarityCoefficient JacardSimilarityCoefficient Kulczynski1SimilarityCoefficient Kulczynski2SimilarityCoefficient MatchingSimilarityCoefficient McConnaugheySimilarityCoefficient OchiaiSimilarityCoefficient PearsonSimilarityCoefficient RogersTanimotoSimilarityCoefficient RussellRaoSimilarityCoefficient SimpsonSimilarityCoefficient SkoalSneath1SimilarityCoefficient SkoalSneath2SimilarityCoefficient SkoalSneath3SimilarityCoefficient TanimotoSimilarityCoefficient TverskySimilarityCoefficient YuleSimilarityCoefficient WeightedTanimotoSimilarityCoefficient WeightedTverskySimilarityCoefficient); 43 44 # New from string... 45 my(@NewFromString) = qw(NewFromBinaryString NewFromHexadecimalString NewFromRawBinaryString); 46 47 @EXPORT = qw(IsFingerprintsBitVector); 48 @EXPORT_OK = qw(GetSupportedSimilarityCoefficients @NewFromString @SimilarityCoefficients); 49 50 %EXPORT_TAGS = ( 51 new => [@NewFromString], 52 coefficients => [@SimilarityCoefficients], 53 all => [@EXPORT, @EXPORT_OK] 54 ); 55 56 # Setup class variables... 57 my($ClassName); 58 _InitializeClass(); 59 60 use overload '""' => 'StringifyFingerprintsBitVector'; 61 62 # Class constructor... 63 sub new { 64 my($Class, $Size) = @_; 65 66 # Initialize object... 67 my $This = $Class->SUPER::new($Size); 68 bless $This, ref($Class) || $Class; 69 $This->_InitializeFingerprintsBitVector($Size); 70 71 return $This; 72 } 73 74 # Initialize object data... 75 # 76 # Note: 77 # . The class, BitVector, used to derive this class provides all the functionality to 78 # manipulate bits. 79 # . Irrespective of specified size, Perl functions used to handle bit data in 80 # BitVector class automatically sets the size to the next nearest power of 2. 81 # SpecifiedSize is used by this class to process any aribitray size during similarity 82 # coefficient calculations. 83 # 84 sub _InitializeFingerprintsBitVector { 85 my($This, $Size) = @_; 86 87 if (!defined $Size) { 88 croak "Error: ${ClassName}->new: FingerprintsBitVector object instantiated without specifying its size ..."; 89 } 90 if ($Size <=0) { 91 croak "Error: ${ClassName}->new: Fingerprints bit vector size, $Size, must be a positive integer..."; 92 } 93 94 # Specified size of fingerprints... 95 $This->{SpecifiedSize} = $Size; 96 97 } 98 99 # Initialize class ... 100 sub _InitializeClass { 101 #Class name... 102 $ClassName = __PACKAGE__; 103 } 104 105 # Set specified size... 106 # 107 # Notes: 108 # Irrespective of specified size, Perl functions used to handle bit data in 109 # BitVector class automatically sets the size to the next nearest power of 2. 110 # SpecifiedSize is used by this class to process any aribitray size during similarity 111 # coefficient calculations. 112 # 113 sub SetSpecifiedSize { 114 my($This, $SpecifiedSize) = @_; 115 116 if (!($SpecifiedSize > 0 && $SpecifiedSize <= $This->{Size})) { 117 croak "Error: ${ClassName}->SetSpecifiedSize: Specified size, $SpecifiedSize, is not valid: It must be > 0 && <= ", $This->GetSize()," ..."; 118 } 119 $This->{SpecifiedSize} = $SpecifiedSize; 120 } 121 122 # Get specified size... 123 sub GetSpecifiedSize { 124 my($This) = @_; 125 126 return $This->{SpecifiedSize}; 127 } 128 129 # Set ID... 130 sub SetID { 131 my($This, $Value) = @_; 132 133 $This->{ID} = $Value; 134 135 return $This; 136 } 137 138 # Get ID... 139 sub GetID { 140 my($This) = @_; 141 142 return exists $This->{ID} ? $This->{ID} : 'None'; 143 } 144 145 # Set description... 146 sub SetDescription { 147 my($This, $Value) = @_; 148 149 $This->{Description} = $Value; 150 151 return $This; 152 } 153 154 # Get description... 155 sub GetDescription { 156 my($This) = @_; 157 158 return exists $This->{Description} ? $This->{Description} : 'No description available'; 159 } 160 161 # Set vector type... 162 sub SetVectorType { 163 my($This, $Value) = @_; 164 165 $This->{VectorType} = $Value; 166 167 return $This; 168 } 169 170 # Get vector type... 171 sub GetVectorType { 172 my($This) = @_; 173 174 return exists $This->{VectorType} ? $This->{VectorType} : 'FingerprintsBitVector'; 175 } 176 177 # Create a new fingerprints bit vector using binary string. This functionality can be 178 # either invoked as a class function or an object method. 179 # 180 sub NewFromBinaryString ($;$) { 181 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_; 182 183 if (_IsFingerprintsBitVector($FirstParameter)) { 184 return _NewFingerptinsBitVectorFromString('Binary', $SecondParameter, $ThirdParameter); 185 } 186 else { 187 return _NewFingerptinsBitVectorFromString( 'Binary', $FirstParameter, $SecondParameter); 188 } 189 } 190 191 # Create a new fingerprints bit vector using hexadecimal string. This functionality can be 192 # either invoked as a class function or an object method. 193 # 194 sub NewFromHexadecimalString ($;$) { 195 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_; 196 197 if (_IsFingerprintsBitVector($FirstParameter)) { 198 return _NewFingerptinsBitVectorFromString('Hexadecimal', $SecondParameter, $ThirdParameter); 199 } 200 else { 201 return _NewFingerptinsBitVectorFromString( 'Hexadecimal', $FirstParameter, $SecondParameter); 202 } 203 } 204 205 # Create a new fingerprints bit vector using octal string. This functionality can be 206 # either invoked as a class function or an object method. 207 # 208 # 209 sub NewFromOctalString ($) { 210 croak "Error: ${ClassName}->NewFromOctalString: Creation of fingerprits bit vector from an octal string is not supported ..."; 211 } 212 213 # Create a new fingerprints bit vector using decimal string. This functionality can be 214 # either invoked as a class function or an object method. 215 # 216 sub NewFromDecimalString ($;$) { 217 croak "Error: ${ClassName}->NewFromDecimalString: Creation of fingerprits bit vector from a decimal string is not supported ..."; 218 } 219 220 # Create a new fingerprints bit vector using raw binary string. This functionality can be 221 # either invoked as a class function or an object method. 222 # 223 sub NewFromRawBinaryString ($;$) { 224 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_; 225 226 if (_IsFingerprintsBitVector($FirstParameter)) { 227 return _NewFingerptinsBitVectorFromString('RawBinary', $SecondParameter, $ThirdParameter); 228 } 229 else { 230 return _NewFingerptinsBitVectorFromString( 'RawBinary', $FirstParameter, $SecondParameter); 231 } 232 } 233 234 # Create a new fingerprints bit vector from a string... 235 # 236 # 237 sub _NewFingerptinsBitVectorFromString ($$;$) { 238 my($Format, $String, $BitsOrder) = @_; 239 my($FingerprintsBitVector, $Size); 240 241 $Size = BitVector::_CalculateStringSizeInBits($Format, $String); 242 243 $FingerprintsBitVector = new Fingerprints::FingerprintsBitVector($Size); 244 $FingerprintsBitVector->_SetBitsAsString($Format, $String, $BitsOrder); 245 246 return $FingerprintsBitVector; 247 } 248 249 # Get fingerprint bits as a hexadecimal string... 250 # 251 sub GetBitsAsHexadecimalString { 252 my($This, $BitsOrder) = @_; 253 254 return $This->_GetFingerprintBitsAsString('Hexadecimal', $BitsOrder); 255 } 256 257 # Get fingerprint bits as an octal string... 258 # 259 sub GetBitsAsOctalString { 260 my($This, $BitsOrder) = @_; 261 262 croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as an octal string is not supported ..."; 263 } 264 265 # Get fingerprint bits as an decimal string... 266 # 267 sub GetBitsAsDecimalString { 268 my($This, $BitsOrder) = @_; 269 270 croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as a decimal string is not supported ..."; 271 } 272 273 # Get fingerprint bits as a binary string conatning 1s and 0s... 274 # 275 sub GetBitsAsBinaryString { 276 my($This, $BitsOrder) = @_; 277 278 return $This->_GetFingerprintBitsAsString('Binary', $BitsOrder); 279 } 280 281 # Get fingerprint bits as a binary string conatning 1s and 0s... 282 # 283 sub GetBitsAsRawBinaryString { 284 my($This) = @_; 285 286 return $This->_GetFingerprintBitsAsString('RawBinary'); 287 } 288 289 # Return fingerprint bits as a string... 290 # 291 sub _GetFingerprintBitsAsString { 292 my($This, $Format, $BitsOrder) = @_; 293 294 $BitsOrder = (defined($BitsOrder) && $BitsOrder) ? $BitsOrder : 'Ascending'; 295 296 return $This->_GetBitsAsString($Format, $BitsOrder); 297 } 298 299 # Is it a fingerprints bit vector object? 300 sub IsFingerprintsBitVector ($) { 301 my($Object) = @_; 302 303 return _IsFingerprintsBitVector($Object); 304 } 305 306 # Is it a fingerprints bit vector object? 307 sub _IsFingerprintsBitVector { 308 my($Object) = @_; 309 310 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; 311 } 312 313 # Return a list of supported similarity coefficients... 314 sub GetSupportedSimilarityCoefficients () { 315 316 return @SimilarityCoefficients; 317 } 318 319 # Get bit density for fingerprints bit vector corresponding to on bits... 320 # 321 sub GetFingerprintsBitDensity { 322 my($This) = @_; 323 my($BitDensity); 324 325 $BitDensity = $This->GetDensityOfSetBits(); 326 327 return round($BitDensity, 2); 328 } 329 330 # Fold fingerprints bit vector by recursively reducing its size by half untill size is less than or equal to 331 # specified size... 332 # 333 sub FoldFingerprintsBitVectorBySize { 334 my($This, $Size) = @_; 335 336 if (!($Size > 0 && $Size <= $This->GetSize())) { 337 croak "Error: ${ClassName}->FoldFingerprintsBitVectorBySize: Specified size, $Size, is not valid: It must be > 0 && <= ", $This->GetSize()," ..."; 338 } 339 340 if ($This->GetSize() <= $Size) { 341 return $This; 342 } 343 return $This->_FoldFingerprintsBitVector('BySize', $Size); 344 } 345 346 # Fold fingerprints bit vector by recursively reducing its size by half untill bit density of set bits is greater than 347 # or equal to specified density... 348 # 349 sub FoldFingerprintsBitVectorByDensity { 350 my($This, $Density) = @_; 351 352 if (!($Density > 0 && $Density <= 1)) { 353 croak "Error: ${ClassName}->FoldFingerprintsBitVectorByDensity: Specified bit density, $Density, is not valid: It must be > 0 && <= 1 ..."; 354 } 355 356 if ($This->GetDensityOfSetBits() >= $Density) { 357 return $This; 358 } 359 return $This->_FoldFingerprintsBitVector('ByDensity', $Density); 360 } 361 362 # Fold fingerprints bit vector using size or density and return folded fingerprint bit vector... 363 # 364 sub _FoldFingerprintsBitVector { 365 my($This, $Mode, $Value) = @_; 366 367 # Fold upto size of 8 bits... 368 if ($This->GetSize() <= 8) { 369 return $This; 370 } 371 372 # Check size or density.... 373 if ($Mode =~ /^BySize$/i) { 374 if ($This->GetSize() <= $Value) { 375 return $This; 376 } 377 } 378 elsif ($Mode =~ /^ByDensity$/i) { 379 if ($This->GetDensityOfSetBits() >= $Value) { 380 return $This; 381 } 382 } 383 else { 384 return $This; 385 } 386 387 # Recursively reduce its size by half... 388 my($FirstHalfBinaryString, $SecondHalfBinaryString, $FirstHalfFingerprintsBitVector, $SecondHalfFingerprintsBitVector, $FoldedFingerprintsBitVector, $BinaryString, $StringLength); 389 390 $BinaryString = $This->GetBitsAsBinaryString(); 391 $StringLength = length $BinaryString; 392 393 $FirstHalfBinaryString = substr($BinaryString, 0, $StringLength/2); 394 $SecondHalfBinaryString = substr($BinaryString, $StringLength/2); 395 396 $FirstHalfFingerprintsBitVector = NewFromBinaryString($FirstHalfBinaryString); 397 $SecondHalfFingerprintsBitVector = NewFromBinaryString($SecondHalfBinaryString); 398 399 $FoldedFingerprintsBitVector = $FirstHalfFingerprintsBitVector | $SecondHalfFingerprintsBitVector; 400 401 return $FoldedFingerprintsBitVector->_FoldFingerprintsBitVector($Mode, $Value); 402 } 403 404 # Is first bit vector subset of second bit vector? 405 # 406 # For a bit vector to be a subset of another bit vector, both vectors must be of 407 # the same size and the bit positions set in first vector must also be set in the 408 # secons bit vector. 409 # 410 # This functionality can be either invoked as a class function or an object method. 411 # 412 sub IsSubSet ($$) { 413 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 414 415 if ($FingerprintsBitVectorA->GetSize() != $FingerprintsBitVectorB->GetSize()) { 416 return 0; 417 } 418 my($AndFingerprintsBitVector); 419 420 $AndFingerprintsBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB; 421 422 return ($FingerprintsBitVectorA->GetNumOfSetBits() == $AndFingerprintsBitVector->GetNumOfSetBits()) ? 1 : 0; 423 } 424 425 # Return a string containing vector values... 426 sub StringifyFingerprintsBitVector { 427 my($This) = @_; 428 my($FingerprintsBitVectorString); 429 430 # BitVector size information... 431 # 432 if ($This->{SpecifiedSize} != $This->GetSize()) { 433 $FingerprintsBitVectorString = "SpecifiedSize: " . $This->{SpecifiedSize} . "; BitVectorSize: " . $This->GetSize(); 434 } 435 else { 436 $FingerprintsBitVectorString = "BitVectorSize: " . $This->GetSize(); 437 } 438 my($NumOfSetBits, $BitDensity); 439 $NumOfSetBits = $This->GetNumOfSetBits(); 440 $BitDensity = $This->GetFingerprintsBitDensity(); 441 442 $FingerprintsBitVectorString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity"; 443 444 # BitVector values... 445 $FingerprintsBitVectorString .= "; BitVector: " . $This->StringifyBitVector(); 446 447 return $FingerprintsBitVectorString; 448 } 449 450 # For two fingerprints bit vectors A and B of same size, let: 451 # 452 # Na = Number of bits set to "1" in A 453 # Nb = Number of bits set to "1" in B 454 # Nc = Number of bits set to "1" in both A and B 455 # Nd = Number of bits set to "0" in both A and B 456 # 457 # Nt = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd 458 # 459 # Na - Nc = Number of bits set to "1" in A but not in B 460 # Nb - Nc = Number of bits set to "1" in B but not in A 461 # 462 # Various similarity coefficients [ Ref 40 - 42 ] for a pair of bit vectors A and B are 463 # defined as follows: 464 # 465 # . BaroniUrbani: ( SQRT( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as Buser ) 466 # 467 # . Buser: ( SQRT ( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as BaroniUrbani ) 468 # 469 # . Cosine: Nc / SQRT ( Na * Nb ) (same as Ochiai) 470 # 471 # . Dice: (2 * Nc) / ( Na + Nb ) 472 # 473 # . Dennis: ( Nc * Nd - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / SQRT ( Nt * Na * Nb) 474 # 475 # . Forbes: ( Nt * Nc ) / ( Na * Nb ) 476 # 477 # . Fossum: ( Nt * ( ( Nc - 1/2 ) ** 2 ) / ( Na * Nb ) 478 # 479 # . Hamann: ( ( Nc + Nd ) - ( Na - Nc ) - ( Nb - Nc ) ) / Nt 480 # 481 # . Jaccard: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Tanimoto) 482 # 483 # . Kulczynski1: Nc / ( ( Na - Nc ) + ( Nb - Nc) ) = Nc / ( Na + Nb - 2Nc ) 484 # 485 # . Kulczynski2: ( ( Nc / 2 ) * ( 2 * Nc + ( Na - Nc ) + ( Nb - Nc) ) ) / ( ( Nc + ( Na - Nc ) ) * ( Nc + ( Nb - Nc ) ) ) = 0.5 * ( Nc / Na + Nc / Nb ) 486 # 487 # . Matching: ( Nc + Nd ) / Nt 488 # 489 # . McConnaughey: ( Nc ** 2 - ( Na - Nc ) * ( Nb - Nc) ) / ( Na * Nb ) 490 # 491 # . Ochiai: Nc / SQRT ( Na * Nb ) (same as Cosine) 492 # 493 # . Pearson: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) / SQRT ( Na * Nb * ( Na - Nc + Nd ) * ( Nb - Nc + Nd ) ) 494 # 495 # . RogersTanimoto: ( Nc + Nd ) / ( ( Na - Nc) + ( Nb - Nc) + Nt) = ( Nc + Nd ) / ( Na + Nb - 2Nc + Nt) 496 # 497 # . RussellRao: Nc / Nt 498 # 499 # . Simpson: Nc / MIN ( Na, Nb) 500 # 501 # . SkoalSneath1: Nc / ( Nc + 2 * ( Na - Nc) + 2 * ( Nb - Nc) ) = Nc / ( 2 * Na + 2 * Nb - 3 * Nc ) 502 # 503 # . SkoalSneath2: ( 2 * Nc + 2 * Nd ) / ( Nc + Nd + Nt ) 504 # 505 # . SkoalSneath3: ( Nc + Nd ) / ( ( Na - Nc ) + ( Nb - Nc ) ) = ( Nc + Nd ) / ( Na + Nb - 2 * Nc ) 506 # 507 # . Tanimoto: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Jaccard) 508 # 509 # . Tversky: Nc / ( alpha * ( Na - Nc ) + ( 1 - alpha) * ( Nb - Nc) + Nc ) = Nc / ( alpha * ( Na - Nb ) + Nb) 510 # 511 # . Yule: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / ( ( Nc * Nd ) + ( ( Na - Nc ) * ( Nb - Nc ) ) ) 512 # 513 # 514 # Values of Tanimoto/Jaccard and Tversky coefficients are dependent on only those bit which 515 # are set to "1" in both A and B. In order to take into account all bit positions, modified versions 516 # of Tanimoto [ Ref. 42 ] and Tversky [ Ref. 43 ] have been developed. 517 # 518 # Let: 519 # 520 # Na' = Number of bits set to "0" in A 521 # Nb' = Number of bits set to "0" in B 522 # Nc' = Number of bits set to "0" in both A and B 523 # 524 # . Tanimoto': Nc' / ( ( Na' - Nc') + ( Nb' - Nc' ) + Nc' ) = Nc' / ( Na' + Nb' - Nc' ) 525 # 526 # . Tversky': Nc' / ( alpha * ( Na' - Nc' ) + ( 1 - alpha) * ( Nb' - Nc' ) + Nc' ) = Nc' / ( alpha * ( Na' - Nb' ) + Nb') 527 # 528 # Then: 529 # 530 # . WeightedTanimoto = beta * Tanimoto + (1 - beta) * Tanimoto' 531 # 532 # . WeightedTversky = beta * Tversky + (1 - beta) * Tversky' 533 # 534 # 535 536 # Calculate BaroniUrbani similarity coefficient for two same size bit vectors. 537 # 538 # This functionality can be either invoked as a class function or an object method. 539 # 540 sub BaroniUrbaniSimilarityCoefficient ($$) { 541 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 542 543 return BuserSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB); 544 } 545 546 # Calculate Buser similarity coefficient for two same size bit vectors. 547 # 548 # This functionality can be either invoked as a class function or an object method. 549 # 550 sub BuserSimilarityCoefficient ($$) { 551 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 552 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 553 554 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 555 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 556 $Nt = $Na + $Nb - $Nc + $Nd; 557 558 $Numerator = sqrt($Nc*$Nd) + $Nc; 559 $Denominator = sqrt($Nc*$Nd) + ($Na - $Nc) + ($Nb - $Nc ) + $Nc; 560 561 return $Denominator ? ($Numerator/$Denominator) : 0; 562 } 563 564 # Calculate Cosine similarity coefficient for two same size bit vectors. 565 # 566 # This functionality can be either invoked as a class function or an object method. 567 # 568 sub CosineSimilarityCoefficient ($$) { 569 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 570 my($Na, $Nb, $Nc, $Numerator, $Denominator); 571 572 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 573 574 $Numerator = $Nc; 575 $Denominator = sqrt($Na*$Nb); 576 577 return $Denominator ? ($Numerator/$Denominator) : 0; 578 } 579 580 # Calculate Dice similarity coefficient for two same size bit vectors. 581 # 582 # This functionality can be either invoked as a class function or an object method. 583 # 584 sub DiceSimilarityCoefficient ($$) { 585 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 586 my($Na, $Nb, $Nc, $Numerator, $Denominator); 587 588 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 589 590 $Numerator = 2*$Nc; 591 $Denominator = $Na + $Nb; 592 593 return $Denominator ? ($Numerator/$Denominator) : 0; 594 } 595 596 # Calculate Dennis similarity coefficient for two same size bit vectors. 597 # 598 # This functionality can be either invoked as a class function or an object method. 599 # 600 sub DennisSimilarityCoefficient ($$) { 601 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 602 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 603 604 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 605 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 606 $Nt = $Na + $Nb - $Nc + $Nd; 607 608 $Numerator = $Nc*$Nd - (($Na - $Nc)*($Nb - $Nc)); 609 $Denominator = sqrt($Nt*$Na*$Nb); 610 611 return $Denominator ? ($Numerator/$Denominator) : 0; 612 } 613 614 # Calculate Forbes similarity coefficient for two same size bit vectors. 615 # 616 # This functionality can be either invoked as a class function or an object method. 617 # 618 sub ForbesSimilarityCoefficient ($$) { 619 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 620 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 621 622 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 623 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 624 $Nt = $Na + $Nb - $Nc + $Nd; 625 626 $Numerator = $Nt*$Nc; 627 $Denominator = $Na*$Nb; 628 629 return $Denominator ? ($Numerator/$Denominator) : 0; 630 } 631 632 # Calculate Fossum similarity coefficient for two same size bit vectors. 633 # 634 # This functionality can be either invoked as a class function or an object method. 635 # 636 sub FossumSimilarityCoefficient ($$) { 637 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 638 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 639 640 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 641 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 642 $Nt = $Na + $Nb - $Nc + $Nd; 643 644 $Numerator = $Nt*(($Nc - 0.5)** 2); 645 $Denominator = $Na*$Nb ; 646 647 return $Denominator ? ($Numerator/$Denominator) : 0; 648 } 649 650 # Calculate Hamann similarity coefficient for two same size bit vectors. 651 # 652 # This functionality can be either invoked as a class function or an object method. 653 # 654 sub HamannSimilarityCoefficient ($$) { 655 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 656 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 657 658 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 659 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 660 $Nt = $Na + $Nb - $Nc + $Nd; 661 662 $Numerator = ($Nc + $Nd ) - ($Na - $Nc) - ($Nb - $Nc) ; 663 $Denominator = $Nt; 664 665 return $Denominator ? ($Numerator/$Denominator) : 0; 666 } 667 668 # Calculate Jacard similarity coefficient for two same size bit vectors. 669 # 670 # This functionality can be either invoked as a class function or an object method. 671 # 672 sub JacardSimilarityCoefficient ($$) { 673 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 674 675 return TanimotoSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB); 676 } 677 678 # Calculate Kulczynski1 similarity coefficient for two same size bit vectors. 679 # 680 # This functionality can be either invoked as a class function or an object method. 681 # 682 sub Kulczynski1SimilarityCoefficient ($$) { 683 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 684 my($Na, $Nb, $Nc, $Numerator, $Denominator); 685 686 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 687 688 $Numerator = $Nc; 689 $Denominator = $Na + $Nb - 2*$Nc; 690 691 return $Denominator ? ($Numerator/$Denominator) : 0; 692 } 693 694 # Calculate Kulczynski2 similarity coefficient for two same size bit vectors. 695 # 696 # This functionality can be either invoked as a class function or an object method. 697 # 698 sub Kulczynski2SimilarityCoefficient ($$) { 699 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 700 my($Na, $Nb, $Nc, $Numerator, $Denominator); 701 702 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 703 704 $Numerator = 0.5*($Na*$Nc + $Nb*$Nc); 705 $Denominator = $Na*$Nb; 706 707 return $Denominator ? ($Numerator/$Denominator) : 0; 708 } 709 710 # Calculate Matching similarity coefficient for two same size bit vectors. 711 # 712 # This functionality can be either invoked as a class function or an object method. 713 # 714 sub MatchingSimilarityCoefficient ($$) { 715 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 716 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 717 718 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 719 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 720 $Nt = $Na + $Nb - $Nc + $Nd; 721 722 $Numerator = $Nc + $Nd; 723 $Denominator = $Nt; 724 725 return $Denominator ? ($Numerator/$Denominator) : 0; 726 } 727 728 # Calculate McConnaughey similarity coefficient for two same size bit vectors. 729 # 730 # This functionality can be either invoked as a class function or an object method. 731 # 732 sub McConnaugheySimilarityCoefficient ($$) { 733 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 734 my($Na, $Nb, $Nc, $Numerator, $Denominator); 735 736 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 737 738 $Numerator = $Nc**2 - (($Na - $Nc)*($Nb - $Nc)); 739 $Denominator = $Na*$Nb ; 740 741 return $Denominator ? ($Numerator/$Denominator) : 0; 742 } 743 744 # Calculate Ochiai similarity coefficient for two same size bit vectors. 745 # 746 # This functionality can be either invoked as a class function or an object method. 747 # 748 sub OchiaiSimilarityCoefficient ($$) { 749 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 750 751 return CosineSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB); 752 } 753 754 # Calculate Pearson similarity coefficient for two same size bit vectors. 755 # 756 # This functionality can be either invoked as a class function or an object method. 757 # 758 sub PearsonSimilarityCoefficient ($$) { 759 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 760 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 761 762 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 763 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 764 $Nt = $Na + $Nb - $Nc + $Nd; 765 766 $Numerator = ($Nc*$Nd ) - (($Na - $Nc)*($Nb - $Nc)); 767 $Denominator = sqrt($Na*$Nb*($Na - $Nc + $Nd )*($Nb - $Nc + $Nd)); 768 769 return $Denominator ? ($Numerator/$Denominator) : 0; 770 } 771 772 # Calculate RogersTanimoto similarity coefficient for two same size bit vectors. 773 # 774 # This functionality can be either invoked as a class function or an object method. 775 # 776 sub RogersTanimotoSimilarityCoefficient ($$) { 777 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 778 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 779 780 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 781 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 782 $Nt = $Na + $Nb - $Nc + $Nd; 783 784 $Numerator = $Nc + $Nd; 785 $Denominator = ($Na - $Nc) + ($Nb - $Nc) + $Nt; 786 787 return $Denominator ? ($Numerator/$Denominator) : 0; 788 } 789 790 # Calculate RussellRao similarity coefficient for two same size bit vectors. 791 # 792 # This functionality can be either invoked as a class function or an object method. 793 # 794 sub RussellRaoSimilarityCoefficient ($$) { 795 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 796 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 797 798 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 799 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 800 $Nt = $Na + $Nb - $Nc + $Nd; 801 802 $Numerator = $Nc; 803 $Denominator = $Nt; 804 805 return $Denominator ? ($Numerator/$Denominator) : 0; 806 } 807 808 # Calculate Simpson similarity coefficient for two same size bit vectors. 809 # 810 # This functionality can be either invoked as a class function or an object method. 811 # 812 sub SimpsonSimilarityCoefficient ($$) { 813 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 814 my($Na, $Nb, $Nc, $Numerator, $Denominator); 815 816 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 817 818 $Numerator = $Nc; 819 $Denominator = min($Na, $Nb); 820 821 return $Denominator ? ($Numerator/$Denominator) : 0; 822 } 823 824 # Calculate SkoalSneath1 similarity coefficient for two same size bit vectors. 825 # 826 # This functionality can be either invoked as a class function or an object method. 827 # 828 sub SkoalSneath1SimilarityCoefficient ($$) { 829 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 830 my($Na, $Nb, $Nc, $Numerator, $Denominator); 831 832 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 833 834 $Numerator = $Nc; 835 $Denominator = $Nc + 2*($Na - $Nc) + 2*($Nb - $Nc); 836 837 return $Denominator ? ($Numerator/$Denominator) : 0; 838 } 839 840 # Calculate SkoalSneath2 similarity coefficient for two same size bit vectors. 841 # 842 # This functionality can be either invoked as a class function or an object method. 843 # 844 sub SkoalSneath2SimilarityCoefficient ($$) { 845 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 846 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 847 848 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 849 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 850 $Nt = $Na + $Nb - $Nc + $Nd; 851 852 $Numerator = 2*$Nc + 2*$Nd ; 853 $Denominator = $Nc + $Nd + $Nt ; 854 855 return $Denominator ? ($Numerator/$Denominator) : 0; 856 } 857 858 # Calculate SkoalSneath3 similarity coefficient for two same size bit vectors. 859 # 860 # This functionality can be either invoked as a class function or an object method. 861 # 862 sub SkoalSneath3SimilarityCoefficient ($$) { 863 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 864 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 865 866 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 867 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 868 $Nt = $Na + $Nb - $Nc + $Nd; 869 870 $Numerator = $Nc + $Nd; 871 $Denominator = ($Na - $Nc) + ($Nb - $Nc ) ; 872 873 return $Denominator ? ($Numerator/$Denominator) : 0; 874 } 875 876 # Calculate Tanimoto similarity coefficient for two same size bit vectors. 877 # 878 # This functionality can be either invoked as a class function or an object method. 879 # 880 sub TanimotoSimilarityCoefficient ($$) { 881 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 882 my($Na, $Nb, $Nc, $Numerator, $Denominator); 883 884 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 885 886 $Numerator = $Nc; 887 $Denominator = $Na + $Nb - $Nc; 888 889 return $Denominator ? ($Numerator/$Denominator) : 0; 890 } 891 892 # Calculate Tversky similarity coefficient for two same size bit vectors. 893 # 894 # This functionality can be either invoked as a class function or an object method. 895 # 896 sub TverskySimilarityCoefficient ($$$) { 897 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha) = @_; 898 my($Na, $Nb, $Nc, $Numerator, $Denominator); 899 900 if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) { 901 croak "Error: ${ClassName}->TverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ..."; 902 } 903 904 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 905 906 $Numerator = $Nc; 907 $Denominator = $Alpha*($Na - $Nb ) + $Nb; 908 909 return $Denominator ? ($Numerator/$Denominator) : 0; 910 } 911 912 # Calculate Yule similarity coefficient for two same size bit vectors. 913 # 914 # This functionality can be either invoked as a class function or an object method. 915 # 916 sub YuleSimilarityCoefficient ($$) { 917 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 918 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 919 920 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 921 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 922 $Nt = $Na + $Nb - $Nc + $Nd; 923 924 $Numerator = ($Nc*$Nd) - (($Na - $Nc)*($Nb - $Nc)) ; 925 $Denominator = ($Nc*$Nd) + (($Na - $Nc)*($Nb - $Nc)) ; 926 927 return $Denominator ? ($Numerator/$Denominator) : 0; 928 } 929 930 # Calculate WeightedTanimoto similarity coefficient for two same size bit vectors. 931 # 932 # This functionality can be either invoked as a class function or an object method. 933 # 934 sub WeightedTanimotoSimilarityCoefficient ($$$) { 935 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Beta) = @_; 936 my($Na, $Nb, $Nc, $TanimotoForSetBits, $TanimotoForClearBits, $Numerator, $Denominator, $WeightedTanimoto); 937 938 if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) { 939 croak "Error: ${ClassName}->WeightedTanimotoSimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ..."; 940 } 941 942 # Get Tanimoto for set bits... 943 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 944 945 $Numerator = $Nc; 946 $Denominator = $Na + $Nb - $Nc; 947 $TanimotoForSetBits = $Denominator ? ($Numerator/$Denominator) : 0; 948 949 # Get Tanimoto for clear bits... 950 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 951 952 $Numerator = $Nc; 953 $Denominator = $Na + $Nb - $Nc; 954 $TanimotoForClearBits = $Denominator ? ($Numerator/$Denominator) : 0; 955 956 $WeightedTanimoto = $Beta*$TanimotoForSetBits + (1 - $Beta)*$TanimotoForClearBits; 957 958 return $WeightedTanimoto; 959 } 960 961 # Calculate WeightedTversky similarity coefficient for two same size bit vectors. 962 # 963 # This functionality can be either invoked as a class function or an object method. 964 # 965 sub WeightedTverskySimilarityCoefficient ($$$) { 966 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha, $Beta) = @_; 967 my($Na, $Nb, $Nc, $TverskyForSetBits, $TverskyForClearBits, $Numerator, $Denominator, $WeightedTversky); 968 969 if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) { 970 croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ..."; 971 } 972 if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) { 973 croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ..."; 974 } 975 976 # Get Tversky for set bits... 977 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 978 979 $Numerator = $Nc; 980 $Denominator = $Alpha*($Na - $Nb ) + $Nb; 981 $TverskyForSetBits = $Denominator ? ($Numerator/$Denominator) : 0; 982 983 # Get Tversky for clear bits... 984 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 985 986 $Numerator = $Nc; 987 $Denominator = $Alpha*($Na - $Nb ) + $Nb; 988 $TverskyForClearBits = $Denominator ? ($Numerator/$Denominator) : 0; 989 990 $WeightedTversky = $Beta*$TverskyForSetBits + (1 - $Beta)*$TverskyForClearBits; 991 992 return $WeightedTversky; 993 } 994 995 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations... 996 # 997 sub _GetNumOfIndividualAndCommonSetBits ($$) { 998 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 999 my($Na, $Nb, $Nc, $Nd); 1000 1001 # Number of bits set to "1" in A 1002 $Na = $FingerprintsBitVectorA->GetNumOfSetBits(); 1003 1004 # Number of bits set to "1" in B 1005 $Nb = $FingerprintsBitVectorB->GetNumOfSetBits(); 1006 1007 # Number of bits set to "1" in both A and B 1008 my($NcBitVector); 1009 $NcBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB; 1010 $Nc = $NcBitVector->GetNumOfSetBits(); 1011 1012 return ($Na, $Nb, $Nc); 1013 } 1014 1015 # Get number of Nd bits in bit vector A and B to be used for similarity coefficient calculations... 1016 # 1017 sub _GetNumOfCommonClearBits ($$) { 1018 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 1019 my($Nd, $NdBitVector); 1020 1021 # Number of bits set to "0" in both A and B 1022 $NdBitVector = ~$FingerprintsBitVectorA & ~$FingerprintsBitVectorB; 1023 $Nd = $NdBitVector->GetNumOfSetBits(); 1024 1025 # Correct for number of clear bits used for padding... 1026 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) { 1027 $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA); 1028 } 1029 elsif (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) { 1030 $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB); 1031 } 1032 1033 return $Nd; 1034 } 1035 1036 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations... 1037 # 1038 sub _GetNumOfIndividualAndCommonClearBits ($$) { 1039 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 1040 my($Na, $Nb, $Nc, $Nd); 1041 1042 # Number of bits set to "0" in A 1043 $Na = $FingerprintsBitVectorA->GetNumOfClearBits(); 1044 1045 # Correct for number of clear bits used for padding... 1046 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) { 1047 $Na = $Na - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA); 1048 } 1049 1050 # Number of bits set to "0" in B 1051 $Nb = $FingerprintsBitVectorB->GetNumOfClearBits(); 1052 1053 # Correct for number of clear bits used for padding... 1054 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) { 1055 $Nb = $Nb - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB); 1056 } 1057 1058 # Number of bits set to "0" in both A and B 1059 $Nc = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 1060 1061 return ($Na, $Nb, $Nc); 1062 } 1063 1064 # Irrespective of specified size, Perl functions used to handle bit data data in 1065 # BitVector class automatically sets the size to the next nearest power of 2 1066 # and clear the extra bits. 1067 # 1068 # SpecifiedSize is used by this class to process any aribitray size during similarity 1069 # coefficient calculations. 1070 # 1071 # Assuming the FingerprintsBitBector class only manipulates bits upto specified 1072 # size, a correction for the extra bits added by BitVector class needs to be applied 1073 # to number of clear bits. 1074 # 1075 sub _GetNumOfClearBitsCorrection { 1076 my($FingerprintsBitVector) = @_; 1077 1078 return ($FingerprintsBitVector->{Size} - $FingerprintsBitVector->{SpecifiedSize}); 1079 } 1080 1081 # Is number of clear bits correction required? 1082 # 1083 sub _IsNumOfClearBitsCorrectionRequired { 1084 my($FingerprintsBitVector) = @_; 1085 1086 return ($FingerprintsBitVector->{Size} > $FingerprintsBitVector->{SpecifiedSize}) ? 1 : 0; 1087 } 1088 1089