Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/Fingerprints/FingerprintsBitVector.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
| author | deepakjadmin |
|---|---|
| date | Wed, 20 Jan 2016 09:23:18 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4816e4a8ae95 |
|---|---|
| 1 package Fingerprints::FingerprintsBitVector; | |
| 2 # | |
| 3 # $RCSfile: FingerprintsBitVector.pm,v $ | |
| 4 # $Date: 2015/02/28 20:48:54 $ | |
| 5 # $Revision: 1.27 $ | |
| 6 # | |
| 7 # Author: Manish Sud <msud@san.rr.com> | |
| 8 # | |
| 9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 10 # | |
| 11 # This file is part of MayaChemTools. | |
| 12 # | |
| 13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 14 # the terms of the GNU Lesser General Public License as published by the Free | |
| 15 # Software Foundation; either version 3 of the License, or (at your option) any | |
| 16 # later version. | |
| 17 # | |
| 18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
| 19 # any warranty; without even the implied warranty of merchantability of fitness | |
| 20 # for a particular purpose. See the GNU Lesser General Public License for more | |
| 21 # details. | |
| 22 # | |
| 23 # You should have received a copy of the GNU Lesser General Public License | |
| 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
| 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
| 26 # Boston, MA, 02111-1307, USA. | |
| 27 # | |
| 28 | |
| 29 use strict; | |
| 30 use Carp; | |
| 31 use Exporter; | |
| 32 use Scalar::Util (); | |
| 33 use BitVector; | |
| 34 use MathUtil; | |
| 35 use TextUtil (); | |
| 36 | |
| 37 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
| 38 | |
| 39 @ISA = qw(BitVector Exporter); | |
| 40 | |
| 41 # Similiarity coefficients... | |
| 42 my(@SimilarityCoefficients) = qw(BaroniUrbaniSimilarityCoefficient BuserSimilarityCoefficient CosineSimilarityCoefficient DiceSimilarityCoefficient DennisSimilarityCoefficient ForbesSimilarityCoefficient FossumSimilarityCoefficient HamannSimilarityCoefficient JacardSimilarityCoefficient Kulczynski1SimilarityCoefficient Kulczynski2SimilarityCoefficient MatchingSimilarityCoefficient McConnaugheySimilarityCoefficient OchiaiSimilarityCoefficient PearsonSimilarityCoefficient RogersTanimotoSimilarityCoefficient RussellRaoSimilarityCoefficient SimpsonSimilarityCoefficient SkoalSneath1SimilarityCoefficient SkoalSneath2SimilarityCoefficient SkoalSneath3SimilarityCoefficient TanimotoSimilarityCoefficient TverskySimilarityCoefficient YuleSimilarityCoefficient WeightedTanimotoSimilarityCoefficient WeightedTverskySimilarityCoefficient); | |
| 43 | |
| 44 # New from string... | |
| 45 my(@NewFromString) = qw(NewFromBinaryString NewFromHexadecimalString NewFromRawBinaryString); | |
| 46 | |
| 47 @EXPORT = qw(IsFingerprintsBitVector); | |
| 48 @EXPORT_OK = qw(GetSupportedSimilarityCoefficients @NewFromString @SimilarityCoefficients); | |
| 49 | |
| 50 %EXPORT_TAGS = ( | |
| 51 new => [@NewFromString], | |
| 52 coefficients => [@SimilarityCoefficients], | |
| 53 all => [@EXPORT, @EXPORT_OK] | |
| 54 ); | |
| 55 | |
| 56 # Setup class variables... | |
| 57 my($ClassName); | |
| 58 _InitializeClass(); | |
| 59 | |
| 60 use overload '""' => 'StringifyFingerprintsBitVector'; | |
| 61 | |
| 62 # Class constructor... | |
| 63 sub new { | |
| 64 my($Class, $Size) = @_; | |
| 65 | |
| 66 # Initialize object... | |
| 67 my $This = $Class->SUPER::new($Size); | |
| 68 bless $This, ref($Class) || $Class; | |
| 69 $This->_InitializeFingerprintsBitVector($Size); | |
| 70 | |
| 71 return $This; | |
| 72 } | |
| 73 | |
| 74 # Initialize object data... | |
| 75 # | |
| 76 # Note: | |
| 77 # . The class, BitVector, used to derive this class provides all the functionality to | |
| 78 # manipulate bits. | |
| 79 # . Irrespective of specified size, Perl functions used to handle bit data in | |
| 80 # BitVector class automatically sets the size to the next nearest power of 2. | |
| 81 # SpecifiedSize is used by this class to process any aribitray size during similarity | |
| 82 # coefficient calculations. | |
| 83 # | |
| 84 sub _InitializeFingerprintsBitVector { | |
| 85 my($This, $Size) = @_; | |
| 86 | |
| 87 if (!defined $Size) { | |
| 88 croak "Error: ${ClassName}->new: FingerprintsBitVector object instantiated without specifying its size ..."; | |
| 89 } | |
| 90 if ($Size <=0) { | |
| 91 croak "Error: ${ClassName}->new: Fingerprints bit vector size, $Size, must be a positive integer..."; | |
| 92 } | |
| 93 | |
| 94 # Specified size of fingerprints... | |
| 95 $This->{SpecifiedSize} = $Size; | |
| 96 | |
| 97 } | |
| 98 | |
| 99 # Initialize class ... | |
| 100 sub _InitializeClass { | |
| 101 #Class name... | |
| 102 $ClassName = __PACKAGE__; | |
| 103 } | |
| 104 | |
| 105 # Set specified size... | |
| 106 # | |
| 107 # Notes: | |
| 108 # Irrespective of specified size, Perl functions used to handle bit data in | |
| 109 # BitVector class automatically sets the size to the next nearest power of 2. | |
| 110 # SpecifiedSize is used by this class to process any aribitray size during similarity | |
| 111 # coefficient calculations. | |
| 112 # | |
| 113 sub SetSpecifiedSize { | |
| 114 my($This, $SpecifiedSize) = @_; | |
| 115 | |
| 116 if (!($SpecifiedSize > 0 && $SpecifiedSize <= $This->{Size})) { | |
| 117 croak "Error: ${ClassName}->SetSpecifiedSize: Specified size, $SpecifiedSize, is not valid: It must be > 0 && <= ", $This->GetSize()," ..."; | |
| 118 } | |
| 119 $This->{SpecifiedSize} = $SpecifiedSize; | |
| 120 } | |
| 121 | |
| 122 # Get specified size... | |
| 123 sub GetSpecifiedSize { | |
| 124 my($This) = @_; | |
| 125 | |
| 126 return $This->{SpecifiedSize}; | |
| 127 } | |
| 128 | |
| 129 # Set ID... | |
| 130 sub SetID { | |
| 131 my($This, $Value) = @_; | |
| 132 | |
| 133 $This->{ID} = $Value; | |
| 134 | |
| 135 return $This; | |
| 136 } | |
| 137 | |
| 138 # Get ID... | |
| 139 sub GetID { | |
| 140 my($This) = @_; | |
| 141 | |
| 142 return exists $This->{ID} ? $This->{ID} : 'None'; | |
| 143 } | |
| 144 | |
| 145 # Set description... | |
| 146 sub SetDescription { | |
| 147 my($This, $Value) = @_; | |
| 148 | |
| 149 $This->{Description} = $Value; | |
| 150 | |
| 151 return $This; | |
| 152 } | |
| 153 | |
| 154 # Get description... | |
| 155 sub GetDescription { | |
| 156 my($This) = @_; | |
| 157 | |
| 158 return exists $This->{Description} ? $This->{Description} : 'No description available'; | |
| 159 } | |
| 160 | |
| 161 # Set vector type... | |
| 162 sub SetVectorType { | |
| 163 my($This, $Value) = @_; | |
| 164 | |
| 165 $This->{VectorType} = $Value; | |
| 166 | |
| 167 return $This; | |
| 168 } | |
| 169 | |
| 170 # Get vector type... | |
| 171 sub GetVectorType { | |
| 172 my($This) = @_; | |
| 173 | |
| 174 return exists $This->{VectorType} ? $This->{VectorType} : 'FingerprintsBitVector'; | |
| 175 } | |
| 176 | |
| 177 # Create a new fingerprints bit vector using binary string. This functionality can be | |
| 178 # either invoked as a class function or an object method. | |
| 179 # | |
| 180 sub NewFromBinaryString ($;$) { | |
| 181 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_; | |
| 182 | |
| 183 if (_IsFingerprintsBitVector($FirstParameter)) { | |
| 184 return _NewFingerptinsBitVectorFromString('Binary', $SecondParameter, $ThirdParameter); | |
| 185 } | |
| 186 else { | |
| 187 return _NewFingerptinsBitVectorFromString( 'Binary', $FirstParameter, $SecondParameter); | |
| 188 } | |
| 189 } | |
| 190 | |
| 191 # Create a new fingerprints bit vector using hexadecimal string. This functionality can be | |
| 192 # either invoked as a class function or an object method. | |
| 193 # | |
| 194 sub NewFromHexadecimalString ($;$) { | |
| 195 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_; | |
| 196 | |
| 197 if (_IsFingerprintsBitVector($FirstParameter)) { | |
| 198 return _NewFingerptinsBitVectorFromString('Hexadecimal', $SecondParameter, $ThirdParameter); | |
| 199 } | |
| 200 else { | |
| 201 return _NewFingerptinsBitVectorFromString( 'Hexadecimal', $FirstParameter, $SecondParameter); | |
| 202 } | |
| 203 } | |
| 204 | |
| 205 # Create a new fingerprints bit vector using octal string. This functionality can be | |
| 206 # either invoked as a class function or an object method. | |
| 207 # | |
| 208 # | |
| 209 sub NewFromOctalString ($) { | |
| 210 croak "Error: ${ClassName}->NewFromOctalString: Creation of fingerprits bit vector from an octal string is not supported ..."; | |
| 211 } | |
| 212 | |
| 213 # Create a new fingerprints bit vector using decimal string. This functionality can be | |
| 214 # either invoked as a class function or an object method. | |
| 215 # | |
| 216 sub NewFromDecimalString ($;$) { | |
| 217 croak "Error: ${ClassName}->NewFromDecimalString: Creation of fingerprits bit vector from a decimal string is not supported ..."; | |
| 218 } | |
| 219 | |
| 220 # Create a new fingerprints bit vector using raw binary string. This functionality can be | |
| 221 # either invoked as a class function or an object method. | |
| 222 # | |
| 223 sub NewFromRawBinaryString ($;$) { | |
| 224 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_; | |
| 225 | |
| 226 if (_IsFingerprintsBitVector($FirstParameter)) { | |
| 227 return _NewFingerptinsBitVectorFromString('RawBinary', $SecondParameter, $ThirdParameter); | |
| 228 } | |
| 229 else { | |
| 230 return _NewFingerptinsBitVectorFromString( 'RawBinary', $FirstParameter, $SecondParameter); | |
| 231 } | |
| 232 } | |
| 233 | |
| 234 # Create a new fingerprints bit vector from a string... | |
| 235 # | |
| 236 # | |
| 237 sub _NewFingerptinsBitVectorFromString ($$;$) { | |
| 238 my($Format, $String, $BitsOrder) = @_; | |
| 239 my($FingerprintsBitVector, $Size); | |
| 240 | |
| 241 $Size = BitVector::_CalculateStringSizeInBits($Format, $String); | |
| 242 | |
| 243 $FingerprintsBitVector = new Fingerprints::FingerprintsBitVector($Size); | |
| 244 $FingerprintsBitVector->_SetBitsAsString($Format, $String, $BitsOrder); | |
| 245 | |
| 246 return $FingerprintsBitVector; | |
| 247 } | |
| 248 | |
| 249 # Get fingerprint bits as a hexadecimal string... | |
| 250 # | |
| 251 sub GetBitsAsHexadecimalString { | |
| 252 my($This, $BitsOrder) = @_; | |
| 253 | |
| 254 return $This->_GetFingerprintBitsAsString('Hexadecimal', $BitsOrder); | |
| 255 } | |
| 256 | |
| 257 # Get fingerprint bits as an octal string... | |
| 258 # | |
| 259 sub GetBitsAsOctalString { | |
| 260 my($This, $BitsOrder) = @_; | |
| 261 | |
| 262 croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as an octal string is not supported ..."; | |
| 263 } | |
| 264 | |
| 265 # Get fingerprint bits as an decimal string... | |
| 266 # | |
| 267 sub GetBitsAsDecimalString { | |
| 268 my($This, $BitsOrder) = @_; | |
| 269 | |
| 270 croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as a decimal string is not supported ..."; | |
| 271 } | |
| 272 | |
| 273 # Get fingerprint bits as a binary string conatning 1s and 0s... | |
| 274 # | |
| 275 sub GetBitsAsBinaryString { | |
| 276 my($This, $BitsOrder) = @_; | |
| 277 | |
| 278 return $This->_GetFingerprintBitsAsString('Binary', $BitsOrder); | |
| 279 } | |
| 280 | |
| 281 # Get fingerprint bits as a binary string conatning 1s and 0s... | |
| 282 # | |
| 283 sub GetBitsAsRawBinaryString { | |
| 284 my($This) = @_; | |
| 285 | |
| 286 return $This->_GetFingerprintBitsAsString('RawBinary'); | |
| 287 } | |
| 288 | |
| 289 # Return fingerprint bits as a string... | |
| 290 # | |
| 291 sub _GetFingerprintBitsAsString { | |
| 292 my($This, $Format, $BitsOrder) = @_; | |
| 293 | |
| 294 $BitsOrder = (defined($BitsOrder) && $BitsOrder) ? $BitsOrder : 'Ascending'; | |
| 295 | |
| 296 return $This->_GetBitsAsString($Format, $BitsOrder); | |
| 297 } | |
| 298 | |
| 299 # Is it a fingerprints bit vector object? | |
| 300 sub IsFingerprintsBitVector ($) { | |
| 301 my($Object) = @_; | |
| 302 | |
| 303 return _IsFingerprintsBitVector($Object); | |
| 304 } | |
| 305 | |
| 306 # Is it a fingerprints bit vector object? | |
| 307 sub _IsFingerprintsBitVector { | |
| 308 my($Object) = @_; | |
| 309 | |
| 310 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
| 311 } | |
| 312 | |
| 313 # Return a list of supported similarity coefficients... | |
| 314 sub GetSupportedSimilarityCoefficients () { | |
| 315 | |
| 316 return @SimilarityCoefficients; | |
| 317 } | |
| 318 | |
| 319 # Get bit density for fingerprints bit vector corresponding to on bits... | |
| 320 # | |
| 321 sub GetFingerprintsBitDensity { | |
| 322 my($This) = @_; | |
| 323 my($BitDensity); | |
| 324 | |
| 325 $BitDensity = $This->GetDensityOfSetBits(); | |
| 326 | |
| 327 return round($BitDensity, 2); | |
| 328 } | |
| 329 | |
| 330 # Fold fingerprints bit vector by recursively reducing its size by half untill size is less than or equal to | |
| 331 # specified size... | |
| 332 # | |
| 333 sub FoldFingerprintsBitVectorBySize { | |
| 334 my($This, $Size) = @_; | |
| 335 | |
| 336 if (!($Size > 0 && $Size <= $This->GetSize())) { | |
| 337 croak "Error: ${ClassName}->FoldFingerprintsBitVectorBySize: Specified size, $Size, is not valid: It must be > 0 && <= ", $This->GetSize()," ..."; | |
| 338 } | |
| 339 | |
| 340 if ($This->GetSize() <= $Size) { | |
| 341 return $This; | |
| 342 } | |
| 343 return $This->_FoldFingerprintsBitVector('BySize', $Size); | |
| 344 } | |
| 345 | |
| 346 # Fold fingerprints bit vector by recursively reducing its size by half untill bit density of set bits is greater than | |
| 347 # or equal to specified density... | |
| 348 # | |
| 349 sub FoldFingerprintsBitVectorByDensity { | |
| 350 my($This, $Density) = @_; | |
| 351 | |
| 352 if (!($Density > 0 && $Density <= 1)) { | |
| 353 croak "Error: ${ClassName}->FoldFingerprintsBitVectorByDensity: Specified bit density, $Density, is not valid: It must be > 0 && <= 1 ..."; | |
| 354 } | |
| 355 | |
| 356 if ($This->GetDensityOfSetBits() >= $Density) { | |
| 357 return $This; | |
| 358 } | |
| 359 return $This->_FoldFingerprintsBitVector('ByDensity', $Density); | |
| 360 } | |
| 361 | |
| 362 # Fold fingerprints bit vector using size or density and return folded fingerprint bit vector... | |
| 363 # | |
| 364 sub _FoldFingerprintsBitVector { | |
| 365 my($This, $Mode, $Value) = @_; | |
| 366 | |
| 367 # Fold upto size of 8 bits... | |
| 368 if ($This->GetSize() <= 8) { | |
| 369 return $This; | |
| 370 } | |
| 371 | |
| 372 # Check size or density.... | |
| 373 if ($Mode =~ /^BySize$/i) { | |
| 374 if ($This->GetSize() <= $Value) { | |
| 375 return $This; | |
| 376 } | |
| 377 } | |
| 378 elsif ($Mode =~ /^ByDensity$/i) { | |
| 379 if ($This->GetDensityOfSetBits() >= $Value) { | |
| 380 return $This; | |
| 381 } | |
| 382 } | |
| 383 else { | |
| 384 return $This; | |
| 385 } | |
| 386 | |
| 387 # Recursively reduce its size by half... | |
| 388 my($FirstHalfBinaryString, $SecondHalfBinaryString, $FirstHalfFingerprintsBitVector, $SecondHalfFingerprintsBitVector, $FoldedFingerprintsBitVector, $BinaryString, $StringLength); | |
| 389 | |
| 390 $BinaryString = $This->GetBitsAsBinaryString(); | |
| 391 $StringLength = length $BinaryString; | |
| 392 | |
| 393 $FirstHalfBinaryString = substr($BinaryString, 0, $StringLength/2); | |
| 394 $SecondHalfBinaryString = substr($BinaryString, $StringLength/2); | |
| 395 | |
| 396 $FirstHalfFingerprintsBitVector = NewFromBinaryString($FirstHalfBinaryString); | |
| 397 $SecondHalfFingerprintsBitVector = NewFromBinaryString($SecondHalfBinaryString); | |
| 398 | |
| 399 $FoldedFingerprintsBitVector = $FirstHalfFingerprintsBitVector | $SecondHalfFingerprintsBitVector; | |
| 400 | |
| 401 return $FoldedFingerprintsBitVector->_FoldFingerprintsBitVector($Mode, $Value); | |
| 402 } | |
| 403 | |
| 404 # Is first bit vector subset of second bit vector? | |
| 405 # | |
| 406 # For a bit vector to be a subset of another bit vector, both vectors must be of | |
| 407 # the same size and the bit positions set in first vector must also be set in the | |
| 408 # secons bit vector. | |
| 409 # | |
| 410 # This functionality can be either invoked as a class function or an object method. | |
| 411 # | |
| 412 sub IsSubSet ($$) { | |
| 413 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 414 | |
| 415 if ($FingerprintsBitVectorA->GetSize() != $FingerprintsBitVectorB->GetSize()) { | |
| 416 return 0; | |
| 417 } | |
| 418 my($AndFingerprintsBitVector); | |
| 419 | |
| 420 $AndFingerprintsBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB; | |
| 421 | |
| 422 return ($FingerprintsBitVectorA->GetNumOfSetBits() == $AndFingerprintsBitVector->GetNumOfSetBits()) ? 1 : 0; | |
| 423 } | |
| 424 | |
| 425 # Return a string containing vector values... | |
| 426 sub StringifyFingerprintsBitVector { | |
| 427 my($This) = @_; | |
| 428 my($FingerprintsBitVectorString); | |
| 429 | |
| 430 # BitVector size information... | |
| 431 # | |
| 432 if ($This->{SpecifiedSize} != $This->GetSize()) { | |
| 433 $FingerprintsBitVectorString = "SpecifiedSize: " . $This->{SpecifiedSize} . "; BitVectorSize: " . $This->GetSize(); | |
| 434 } | |
| 435 else { | |
| 436 $FingerprintsBitVectorString = "BitVectorSize: " . $This->GetSize(); | |
| 437 } | |
| 438 my($NumOfSetBits, $BitDensity); | |
| 439 $NumOfSetBits = $This->GetNumOfSetBits(); | |
| 440 $BitDensity = $This->GetFingerprintsBitDensity(); | |
| 441 | |
| 442 $FingerprintsBitVectorString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity"; | |
| 443 | |
| 444 # BitVector values... | |
| 445 $FingerprintsBitVectorString .= "; BitVector: " . $This->StringifyBitVector(); | |
| 446 | |
| 447 return $FingerprintsBitVectorString; | |
| 448 } | |
| 449 | |
| 450 # For two fingerprints bit vectors A and B of same size, let: | |
| 451 # | |
| 452 # Na = Number of bits set to "1" in A | |
| 453 # Nb = Number of bits set to "1" in B | |
| 454 # Nc = Number of bits set to "1" in both A and B | |
| 455 # Nd = Number of bits set to "0" in both A and B | |
| 456 # | |
| 457 # Nt = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd | |
| 458 # | |
| 459 # Na - Nc = Number of bits set to "1" in A but not in B | |
| 460 # Nb - Nc = Number of bits set to "1" in B but not in A | |
| 461 # | |
| 462 # Various similarity coefficients [ Ref 40 - 42 ] for a pair of bit vectors A and B are | |
| 463 # defined as follows: | |
| 464 # | |
| 465 # . BaroniUrbani: ( SQRT( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as Buser ) | |
| 466 # | |
| 467 # . Buser: ( SQRT ( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as BaroniUrbani ) | |
| 468 # | |
| 469 # . Cosine: Nc / SQRT ( Na * Nb ) (same as Ochiai) | |
| 470 # | |
| 471 # . Dice: (2 * Nc) / ( Na + Nb ) | |
| 472 # | |
| 473 # . Dennis: ( Nc * Nd - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / SQRT ( Nt * Na * Nb) | |
| 474 # | |
| 475 # . Forbes: ( Nt * Nc ) / ( Na * Nb ) | |
| 476 # | |
| 477 # . Fossum: ( Nt * ( ( Nc - 1/2 ) ** 2 ) / ( Na * Nb ) | |
| 478 # | |
| 479 # . Hamann: ( ( Nc + Nd ) - ( Na - Nc ) - ( Nb - Nc ) ) / Nt | |
| 480 # | |
| 481 # . Jaccard: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Tanimoto) | |
| 482 # | |
| 483 # . Kulczynski1: Nc / ( ( Na - Nc ) + ( Nb - Nc) ) = Nc / ( Na + Nb - 2Nc ) | |
| 484 # | |
| 485 # . Kulczynski2: ( ( Nc / 2 ) * ( 2 * Nc + ( Na - Nc ) + ( Nb - Nc) ) ) / ( ( Nc + ( Na - Nc ) ) * ( Nc + ( Nb - Nc ) ) ) = 0.5 * ( Nc / Na + Nc / Nb ) | |
| 486 # | |
| 487 # . Matching: ( Nc + Nd ) / Nt | |
| 488 # | |
| 489 # . McConnaughey: ( Nc ** 2 - ( Na - Nc ) * ( Nb - Nc) ) / ( Na * Nb ) | |
| 490 # | |
| 491 # . Ochiai: Nc / SQRT ( Na * Nb ) (same as Cosine) | |
| 492 # | |
| 493 # . Pearson: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) / SQRT ( Na * Nb * ( Na - Nc + Nd ) * ( Nb - Nc + Nd ) ) | |
| 494 # | |
| 495 # . RogersTanimoto: ( Nc + Nd ) / ( ( Na - Nc) + ( Nb - Nc) + Nt) = ( Nc + Nd ) / ( Na + Nb - 2Nc + Nt) | |
| 496 # | |
| 497 # . RussellRao: Nc / Nt | |
| 498 # | |
| 499 # . Simpson: Nc / MIN ( Na, Nb) | |
| 500 # | |
| 501 # . SkoalSneath1: Nc / ( Nc + 2 * ( Na - Nc) + 2 * ( Nb - Nc) ) = Nc / ( 2 * Na + 2 * Nb - 3 * Nc ) | |
| 502 # | |
| 503 # . SkoalSneath2: ( 2 * Nc + 2 * Nd ) / ( Nc + Nd + Nt ) | |
| 504 # | |
| 505 # . SkoalSneath3: ( Nc + Nd ) / ( ( Na - Nc ) + ( Nb - Nc ) ) = ( Nc + Nd ) / ( Na + Nb - 2 * Nc ) | |
| 506 # | |
| 507 # . Tanimoto: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Jaccard) | |
| 508 # | |
| 509 # . Tversky: Nc / ( alpha * ( Na - Nc ) + ( 1 - alpha) * ( Nb - Nc) + Nc ) = Nc / ( alpha * ( Na - Nb ) + Nb) | |
| 510 # | |
| 511 # . Yule: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / ( ( Nc * Nd ) + ( ( Na - Nc ) * ( Nb - Nc ) ) ) | |
| 512 # | |
| 513 # | |
| 514 # Values of Tanimoto/Jaccard and Tversky coefficients are dependent on only those bit which | |
| 515 # are set to "1" in both A and B. In order to take into account all bit positions, modified versions | |
| 516 # of Tanimoto [ Ref. 42 ] and Tversky [ Ref. 43 ] have been developed. | |
| 517 # | |
| 518 # Let: | |
| 519 # | |
| 520 # Na' = Number of bits set to "0" in A | |
| 521 # Nb' = Number of bits set to "0" in B | |
| 522 # Nc' = Number of bits set to "0" in both A and B | |
| 523 # | |
| 524 # . Tanimoto': Nc' / ( ( Na' - Nc') + ( Nb' - Nc' ) + Nc' ) = Nc' / ( Na' + Nb' - Nc' ) | |
| 525 # | |
| 526 # . Tversky': Nc' / ( alpha * ( Na' - Nc' ) + ( 1 - alpha) * ( Nb' - Nc' ) + Nc' ) = Nc' / ( alpha * ( Na' - Nb' ) + Nb') | |
| 527 # | |
| 528 # Then: | |
| 529 # | |
| 530 # . WeightedTanimoto = beta * Tanimoto + (1 - beta) * Tanimoto' | |
| 531 # | |
| 532 # . WeightedTversky = beta * Tversky + (1 - beta) * Tversky' | |
| 533 # | |
| 534 # | |
| 535 | |
| 536 # Calculate BaroniUrbani similarity coefficient for two same size bit vectors. | |
| 537 # | |
| 538 # This functionality can be either invoked as a class function or an object method. | |
| 539 # | |
| 540 sub BaroniUrbaniSimilarityCoefficient ($$) { | |
| 541 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 542 | |
| 543 return BuserSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 544 } | |
| 545 | |
| 546 # Calculate Buser similarity coefficient for two same size bit vectors. | |
| 547 # | |
| 548 # This functionality can be either invoked as a class function or an object method. | |
| 549 # | |
| 550 sub BuserSimilarityCoefficient ($$) { | |
| 551 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 552 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
| 553 | |
| 554 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 555 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 556 $Nt = $Na + $Nb - $Nc + $Nd; | |
| 557 | |
| 558 $Numerator = sqrt($Nc*$Nd) + $Nc; | |
| 559 $Denominator = sqrt($Nc*$Nd) + ($Na - $Nc) + ($Nb - $Nc ) + $Nc; | |
| 560 | |
| 561 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 562 } | |
| 563 | |
| 564 # Calculate Cosine similarity coefficient for two same size bit vectors. | |
| 565 # | |
| 566 # This functionality can be either invoked as a class function or an object method. | |
| 567 # | |
| 568 sub CosineSimilarityCoefficient ($$) { | |
| 569 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 570 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
| 571 | |
| 572 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 573 | |
| 574 $Numerator = $Nc; | |
| 575 $Denominator = sqrt($Na*$Nb); | |
| 576 | |
| 577 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 578 } | |
| 579 | |
| 580 # Calculate Dice similarity coefficient for two same size bit vectors. | |
| 581 # | |
| 582 # This functionality can be either invoked as a class function or an object method. | |
| 583 # | |
| 584 sub DiceSimilarityCoefficient ($$) { | |
| 585 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 586 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
| 587 | |
| 588 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 589 | |
| 590 $Numerator = 2*$Nc; | |
| 591 $Denominator = $Na + $Nb; | |
| 592 | |
| 593 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 594 } | |
| 595 | |
| 596 # Calculate Dennis similarity coefficient for two same size bit vectors. | |
| 597 # | |
| 598 # This functionality can be either invoked as a class function or an object method. | |
| 599 # | |
| 600 sub DennisSimilarityCoefficient ($$) { | |
| 601 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 602 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
| 603 | |
| 604 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 605 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 606 $Nt = $Na + $Nb - $Nc + $Nd; | |
| 607 | |
| 608 $Numerator = $Nc*$Nd - (($Na - $Nc)*($Nb - $Nc)); | |
| 609 $Denominator = sqrt($Nt*$Na*$Nb); | |
| 610 | |
| 611 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 612 } | |
| 613 | |
| 614 # Calculate Forbes similarity coefficient for two same size bit vectors. | |
| 615 # | |
| 616 # This functionality can be either invoked as a class function or an object method. | |
| 617 # | |
| 618 sub ForbesSimilarityCoefficient ($$) { | |
| 619 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 620 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
| 621 | |
| 622 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 623 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 624 $Nt = $Na + $Nb - $Nc + $Nd; | |
| 625 | |
| 626 $Numerator = $Nt*$Nc; | |
| 627 $Denominator = $Na*$Nb; | |
| 628 | |
| 629 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 630 } | |
| 631 | |
| 632 # Calculate Fossum similarity coefficient for two same size bit vectors. | |
| 633 # | |
| 634 # This functionality can be either invoked as a class function or an object method. | |
| 635 # | |
| 636 sub FossumSimilarityCoefficient ($$) { | |
| 637 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 638 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
| 639 | |
| 640 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 641 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 642 $Nt = $Na + $Nb - $Nc + $Nd; | |
| 643 | |
| 644 $Numerator = $Nt*(($Nc - 0.5)** 2); | |
| 645 $Denominator = $Na*$Nb ; | |
| 646 | |
| 647 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 648 } | |
| 649 | |
| 650 # Calculate Hamann similarity coefficient for two same size bit vectors. | |
| 651 # | |
| 652 # This functionality can be either invoked as a class function or an object method. | |
| 653 # | |
| 654 sub HamannSimilarityCoefficient ($$) { | |
| 655 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 656 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
| 657 | |
| 658 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 659 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 660 $Nt = $Na + $Nb - $Nc + $Nd; | |
| 661 | |
| 662 $Numerator = ($Nc + $Nd ) - ($Na - $Nc) - ($Nb - $Nc) ; | |
| 663 $Denominator = $Nt; | |
| 664 | |
| 665 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 666 } | |
| 667 | |
| 668 # Calculate Jacard similarity coefficient for two same size bit vectors. | |
| 669 # | |
| 670 # This functionality can be either invoked as a class function or an object method. | |
| 671 # | |
| 672 sub JacardSimilarityCoefficient ($$) { | |
| 673 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 674 | |
| 675 return TanimotoSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 676 } | |
| 677 | |
| 678 # Calculate Kulczynski1 similarity coefficient for two same size bit vectors. | |
| 679 # | |
| 680 # This functionality can be either invoked as a class function or an object method. | |
| 681 # | |
| 682 sub Kulczynski1SimilarityCoefficient ($$) { | |
| 683 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 684 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
| 685 | |
| 686 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 687 | |
| 688 $Numerator = $Nc; | |
| 689 $Denominator = $Na + $Nb - 2*$Nc; | |
| 690 | |
| 691 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 692 } | |
| 693 | |
| 694 # Calculate Kulczynski2 similarity coefficient for two same size bit vectors. | |
| 695 # | |
| 696 # This functionality can be either invoked as a class function or an object method. | |
| 697 # | |
| 698 sub Kulczynski2SimilarityCoefficient ($$) { | |
| 699 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 700 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
| 701 | |
| 702 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 703 | |
| 704 $Numerator = 0.5*($Na*$Nc + $Nb*$Nc); | |
| 705 $Denominator = $Na*$Nb; | |
| 706 | |
| 707 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 708 } | |
| 709 | |
| 710 # Calculate Matching similarity coefficient for two same size bit vectors. | |
| 711 # | |
| 712 # This functionality can be either invoked as a class function or an object method. | |
| 713 # | |
| 714 sub MatchingSimilarityCoefficient ($$) { | |
| 715 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 716 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
| 717 | |
| 718 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 719 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 720 $Nt = $Na + $Nb - $Nc + $Nd; | |
| 721 | |
| 722 $Numerator = $Nc + $Nd; | |
| 723 $Denominator = $Nt; | |
| 724 | |
| 725 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 726 } | |
| 727 | |
| 728 # Calculate McConnaughey similarity coefficient for two same size bit vectors. | |
| 729 # | |
| 730 # This functionality can be either invoked as a class function or an object method. | |
| 731 # | |
| 732 sub McConnaugheySimilarityCoefficient ($$) { | |
| 733 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 734 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
| 735 | |
| 736 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 737 | |
| 738 $Numerator = $Nc**2 - (($Na - $Nc)*($Nb - $Nc)); | |
| 739 $Denominator = $Na*$Nb ; | |
| 740 | |
| 741 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 742 } | |
| 743 | |
| 744 # Calculate Ochiai similarity coefficient for two same size bit vectors. | |
| 745 # | |
| 746 # This functionality can be either invoked as a class function or an object method. | |
| 747 # | |
| 748 sub OchiaiSimilarityCoefficient ($$) { | |
| 749 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 750 | |
| 751 return CosineSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 752 } | |
| 753 | |
| 754 # Calculate Pearson similarity coefficient for two same size bit vectors. | |
| 755 # | |
| 756 # This functionality can be either invoked as a class function or an object method. | |
| 757 # | |
| 758 sub PearsonSimilarityCoefficient ($$) { | |
| 759 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 760 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
| 761 | |
| 762 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 763 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 764 $Nt = $Na + $Nb - $Nc + $Nd; | |
| 765 | |
| 766 $Numerator = ($Nc*$Nd ) - (($Na - $Nc)*($Nb - $Nc)); | |
| 767 $Denominator = sqrt($Na*$Nb*($Na - $Nc + $Nd )*($Nb - $Nc + $Nd)); | |
| 768 | |
| 769 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 770 } | |
| 771 | |
| 772 # Calculate RogersTanimoto similarity coefficient for two same size bit vectors. | |
| 773 # | |
| 774 # This functionality can be either invoked as a class function or an object method. | |
| 775 # | |
| 776 sub RogersTanimotoSimilarityCoefficient ($$) { | |
| 777 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 778 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
| 779 | |
| 780 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 781 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 782 $Nt = $Na + $Nb - $Nc + $Nd; | |
| 783 | |
| 784 $Numerator = $Nc + $Nd; | |
| 785 $Denominator = ($Na - $Nc) + ($Nb - $Nc) + $Nt; | |
| 786 | |
| 787 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 788 } | |
| 789 | |
| 790 # Calculate RussellRao similarity coefficient for two same size bit vectors. | |
| 791 # | |
| 792 # This functionality can be either invoked as a class function or an object method. | |
| 793 # | |
| 794 sub RussellRaoSimilarityCoefficient ($$) { | |
| 795 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 796 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
| 797 | |
| 798 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 799 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 800 $Nt = $Na + $Nb - $Nc + $Nd; | |
| 801 | |
| 802 $Numerator = $Nc; | |
| 803 $Denominator = $Nt; | |
| 804 | |
| 805 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 806 } | |
| 807 | |
| 808 # Calculate Simpson similarity coefficient for two same size bit vectors. | |
| 809 # | |
| 810 # This functionality can be either invoked as a class function or an object method. | |
| 811 # | |
| 812 sub SimpsonSimilarityCoefficient ($$) { | |
| 813 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 814 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
| 815 | |
| 816 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 817 | |
| 818 $Numerator = $Nc; | |
| 819 $Denominator = min($Na, $Nb); | |
| 820 | |
| 821 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 822 } | |
| 823 | |
| 824 # Calculate SkoalSneath1 similarity coefficient for two same size bit vectors. | |
| 825 # | |
| 826 # This functionality can be either invoked as a class function or an object method. | |
| 827 # | |
| 828 sub SkoalSneath1SimilarityCoefficient ($$) { | |
| 829 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 830 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
| 831 | |
| 832 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 833 | |
| 834 $Numerator = $Nc; | |
| 835 $Denominator = $Nc + 2*($Na - $Nc) + 2*($Nb - $Nc); | |
| 836 | |
| 837 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 838 } | |
| 839 | |
| 840 # Calculate SkoalSneath2 similarity coefficient for two same size bit vectors. | |
| 841 # | |
| 842 # This functionality can be either invoked as a class function or an object method. | |
| 843 # | |
| 844 sub SkoalSneath2SimilarityCoefficient ($$) { | |
| 845 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 846 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
| 847 | |
| 848 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 849 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 850 $Nt = $Na + $Nb - $Nc + $Nd; | |
| 851 | |
| 852 $Numerator = 2*$Nc + 2*$Nd ; | |
| 853 $Denominator = $Nc + $Nd + $Nt ; | |
| 854 | |
| 855 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 856 } | |
| 857 | |
| 858 # Calculate SkoalSneath3 similarity coefficient for two same size bit vectors. | |
| 859 # | |
| 860 # This functionality can be either invoked as a class function or an object method. | |
| 861 # | |
| 862 sub SkoalSneath3SimilarityCoefficient ($$) { | |
| 863 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 864 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
| 865 | |
| 866 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 867 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 868 $Nt = $Na + $Nb - $Nc + $Nd; | |
| 869 | |
| 870 $Numerator = $Nc + $Nd; | |
| 871 $Denominator = ($Na - $Nc) + ($Nb - $Nc ) ; | |
| 872 | |
| 873 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 874 } | |
| 875 | |
| 876 # Calculate Tanimoto similarity coefficient for two same size bit vectors. | |
| 877 # | |
| 878 # This functionality can be either invoked as a class function or an object method. | |
| 879 # | |
| 880 sub TanimotoSimilarityCoefficient ($$) { | |
| 881 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 882 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
| 883 | |
| 884 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 885 | |
| 886 $Numerator = $Nc; | |
| 887 $Denominator = $Na + $Nb - $Nc; | |
| 888 | |
| 889 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 890 } | |
| 891 | |
| 892 # Calculate Tversky similarity coefficient for two same size bit vectors. | |
| 893 # | |
| 894 # This functionality can be either invoked as a class function or an object method. | |
| 895 # | |
| 896 sub TverskySimilarityCoefficient ($$$) { | |
| 897 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha) = @_; | |
| 898 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
| 899 | |
| 900 if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) { | |
| 901 croak "Error: ${ClassName}->TverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ..."; | |
| 902 } | |
| 903 | |
| 904 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 905 | |
| 906 $Numerator = $Nc; | |
| 907 $Denominator = $Alpha*($Na - $Nb ) + $Nb; | |
| 908 | |
| 909 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 910 } | |
| 911 | |
| 912 # Calculate Yule similarity coefficient for two same size bit vectors. | |
| 913 # | |
| 914 # This functionality can be either invoked as a class function or an object method. | |
| 915 # | |
| 916 sub YuleSimilarityCoefficient ($$) { | |
| 917 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 918 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
| 919 | |
| 920 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 921 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 922 $Nt = $Na + $Nb - $Nc + $Nd; | |
| 923 | |
| 924 $Numerator = ($Nc*$Nd) - (($Na - $Nc)*($Nb - $Nc)) ; | |
| 925 $Denominator = ($Nc*$Nd) + (($Na - $Nc)*($Nb - $Nc)) ; | |
| 926 | |
| 927 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 928 } | |
| 929 | |
| 930 # Calculate WeightedTanimoto similarity coefficient for two same size bit vectors. | |
| 931 # | |
| 932 # This functionality can be either invoked as a class function or an object method. | |
| 933 # | |
| 934 sub WeightedTanimotoSimilarityCoefficient ($$$) { | |
| 935 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Beta) = @_; | |
| 936 my($Na, $Nb, $Nc, $TanimotoForSetBits, $TanimotoForClearBits, $Numerator, $Denominator, $WeightedTanimoto); | |
| 937 | |
| 938 if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) { | |
| 939 croak "Error: ${ClassName}->WeightedTanimotoSimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ..."; | |
| 940 } | |
| 941 | |
| 942 # Get Tanimoto for set bits... | |
| 943 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 944 | |
| 945 $Numerator = $Nc; | |
| 946 $Denominator = $Na + $Nb - $Nc; | |
| 947 $TanimotoForSetBits = $Denominator ? ($Numerator/$Denominator) : 0; | |
| 948 | |
| 949 # Get Tanimoto for clear bits... | |
| 950 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 951 | |
| 952 $Numerator = $Nc; | |
| 953 $Denominator = $Na + $Nb - $Nc; | |
| 954 $TanimotoForClearBits = $Denominator ? ($Numerator/$Denominator) : 0; | |
| 955 | |
| 956 $WeightedTanimoto = $Beta*$TanimotoForSetBits + (1 - $Beta)*$TanimotoForClearBits; | |
| 957 | |
| 958 return $WeightedTanimoto; | |
| 959 } | |
| 960 | |
| 961 # Calculate WeightedTversky similarity coefficient for two same size bit vectors. | |
| 962 # | |
| 963 # This functionality can be either invoked as a class function or an object method. | |
| 964 # | |
| 965 sub WeightedTverskySimilarityCoefficient ($$$) { | |
| 966 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha, $Beta) = @_; | |
| 967 my($Na, $Nb, $Nc, $TverskyForSetBits, $TverskyForClearBits, $Numerator, $Denominator, $WeightedTversky); | |
| 968 | |
| 969 if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) { | |
| 970 croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ..."; | |
| 971 } | |
| 972 if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) { | |
| 973 croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ..."; | |
| 974 } | |
| 975 | |
| 976 # Get Tversky for set bits... | |
| 977 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 978 | |
| 979 $Numerator = $Nc; | |
| 980 $Denominator = $Alpha*($Na - $Nb ) + $Nb; | |
| 981 $TverskyForSetBits = $Denominator ? ($Numerator/$Denominator) : 0; | |
| 982 | |
| 983 # Get Tversky for clear bits... | |
| 984 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 985 | |
| 986 $Numerator = $Nc; | |
| 987 $Denominator = $Alpha*($Na - $Nb ) + $Nb; | |
| 988 $TverskyForClearBits = $Denominator ? ($Numerator/$Denominator) : 0; | |
| 989 | |
| 990 $WeightedTversky = $Beta*$TverskyForSetBits + (1 - $Beta)*$TverskyForClearBits; | |
| 991 | |
| 992 return $WeightedTversky; | |
| 993 } | |
| 994 | |
| 995 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations... | |
| 996 # | |
| 997 sub _GetNumOfIndividualAndCommonSetBits ($$) { | |
| 998 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 999 my($Na, $Nb, $Nc, $Nd); | |
| 1000 | |
| 1001 # Number of bits set to "1" in A | |
| 1002 $Na = $FingerprintsBitVectorA->GetNumOfSetBits(); | |
| 1003 | |
| 1004 # Number of bits set to "1" in B | |
| 1005 $Nb = $FingerprintsBitVectorB->GetNumOfSetBits(); | |
| 1006 | |
| 1007 # Number of bits set to "1" in both A and B | |
| 1008 my($NcBitVector); | |
| 1009 $NcBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB; | |
| 1010 $Nc = $NcBitVector->GetNumOfSetBits(); | |
| 1011 | |
| 1012 return ($Na, $Nb, $Nc); | |
| 1013 } | |
| 1014 | |
| 1015 # Get number of Nd bits in bit vector A and B to be used for similarity coefficient calculations... | |
| 1016 # | |
| 1017 sub _GetNumOfCommonClearBits ($$) { | |
| 1018 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 1019 my($Nd, $NdBitVector); | |
| 1020 | |
| 1021 # Number of bits set to "0" in both A and B | |
| 1022 $NdBitVector = ~$FingerprintsBitVectorA & ~$FingerprintsBitVectorB; | |
| 1023 $Nd = $NdBitVector->GetNumOfSetBits(); | |
| 1024 | |
| 1025 # Correct for number of clear bits used for padding... | |
| 1026 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) { | |
| 1027 $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA); | |
| 1028 } | |
| 1029 elsif (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) { | |
| 1030 $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB); | |
| 1031 } | |
| 1032 | |
| 1033 return $Nd; | |
| 1034 } | |
| 1035 | |
| 1036 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations... | |
| 1037 # | |
| 1038 sub _GetNumOfIndividualAndCommonClearBits ($$) { | |
| 1039 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
| 1040 my($Na, $Nb, $Nc, $Nd); | |
| 1041 | |
| 1042 # Number of bits set to "0" in A | |
| 1043 $Na = $FingerprintsBitVectorA->GetNumOfClearBits(); | |
| 1044 | |
| 1045 # Correct for number of clear bits used for padding... | |
| 1046 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) { | |
| 1047 $Na = $Na - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA); | |
| 1048 } | |
| 1049 | |
| 1050 # Number of bits set to "0" in B | |
| 1051 $Nb = $FingerprintsBitVectorB->GetNumOfClearBits(); | |
| 1052 | |
| 1053 # Correct for number of clear bits used for padding... | |
| 1054 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) { | |
| 1055 $Nb = $Nb - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB); | |
| 1056 } | |
| 1057 | |
| 1058 # Number of bits set to "0" in both A and B | |
| 1059 $Nc = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 1060 | |
| 1061 return ($Na, $Nb, $Nc); | |
| 1062 } | |
| 1063 | |
| 1064 # Irrespective of specified size, Perl functions used to handle bit data data in | |
| 1065 # BitVector class automatically sets the size to the next nearest power of 2 | |
| 1066 # and clear the extra bits. | |
| 1067 # | |
| 1068 # SpecifiedSize is used by this class to process any aribitray size during similarity | |
| 1069 # coefficient calculations. | |
| 1070 # | |
| 1071 # Assuming the FingerprintsBitBector class only manipulates bits upto specified | |
| 1072 # size, a correction for the extra bits added by BitVector class needs to be applied | |
| 1073 # to number of clear bits. | |
| 1074 # | |
| 1075 sub _GetNumOfClearBitsCorrection { | |
| 1076 my($FingerprintsBitVector) = @_; | |
| 1077 | |
| 1078 return ($FingerprintsBitVector->{Size} - $FingerprintsBitVector->{SpecifiedSize}); | |
| 1079 } | |
| 1080 | |
| 1081 # Is number of clear bits correction required? | |
| 1082 # | |
| 1083 sub _IsNumOfClearBitsCorrectionRequired { | |
| 1084 my($FingerprintsBitVector) = @_; | |
| 1085 | |
| 1086 return ($FingerprintsBitVector->{Size} > $FingerprintsBitVector->{SpecifiedSize}) ? 1 : 0; | |
| 1087 } | |
| 1088 | |
| 1089 | |
| 1090 1; | |
| 1091 | |
| 1092 __END__ | |
| 1093 | |
| 1094 =head1 NAME | |
| 1095 | |
| 1096 FingerprintsBitVector | |
| 1097 | |
| 1098 =head1 SYNOPSIS | |
| 1099 | |
| 1100 use Fingerprints::FingerprintsBitVector; | |
| 1101 | |
| 1102 use Fingerprints::FingerprintsBitVector qw(:coefficients); | |
| 1103 | |
| 1104 use Fingerprints::FingerprintsBitVector qw(:all); | |
| 1105 | |
| 1106 =head1 DESCRIPTION | |
| 1107 | |
| 1108 B<FingerprintsBitVector> class provides the following methods: | |
| 1109 | |
| 1110 new, BaroniUrbaniSimilarityCoefficient, BuserSimilarityCoefficient, | |
| 1111 CosineSimilarityCoefficient, DennisSimilarityCoefficient, | |
| 1112 DiceSimilarityCoefficient, FoldFingerprintsBitVectorByDensity, | |
| 1113 FoldFingerprintsBitVectorBySize, ForbesSimilarityCoefficient, | |
| 1114 FossumSimilarityCoefficient, GetBitsAsBinaryString, GetBitsAsDecimalString, | |
| 1115 GetBitsAsHexadecimalString, GetBitsAsOctalString, GetBitsAsRawBinaryString, | |
| 1116 GetDescription, GetFingerprintsBitDensity, GetID, GetSpecifiedSize, | |
| 1117 GetSupportedSimilarityCoefficients, GetVectorType, HamannSimilarityCoefficient, | |
| 1118 IsFingerprintsBitVector, IsSubSet, JacardSimilarityCoefficient, | |
| 1119 Kulczynski1SimilarityCoefficient, Kulczynski2SimilarityCoefficient, | |
| 1120 MatchingSimilarityCoefficient, McConnaugheySimilarityCoefficient, | |
| 1121 NewFromBinaryString, NewFromDecimalString, NewFromHexadecimalString, | |
| 1122 NewFromOctalString, NewFromRawBinaryString, OchiaiSimilarityCoefficient, | |
| 1123 PearsonSimilarityCoefficient, RogersTanimotoSimilarityCoefficient, | |
| 1124 RussellRaoSimilarityCoefficient, SetDescription, SetID, SetSpecifiedSize, | |
| 1125 SetVectorType, SimpsonSimilarityCoefficient, SkoalSneath1SimilarityCoefficient, | |
| 1126 SkoalSneath2SimilarityCoefficient, SkoalSneath3SimilarityCoefficient, | |
| 1127 StringifyFingerprintsBitVector, TanimotoSimilarityCoefficient, | |
| 1128 TverskySimilarityCoefficient, WeightedTanimotoSimilarityCoefficient, | |
| 1129 WeightedTverskySimilarityCoefficient, YuleSimilarityCoefficient | |
| 1130 | |
| 1131 The methods available to create fingerprints bit vector from strings and to calculate similarity | |
| 1132 coefficient between two bit vectors can also be invoked as class functions. | |
| 1133 | |
| 1134 B<FingerprintsBitVector> class is derived from B<BitVector> class which provides the functionality | |
| 1135 to manipulate bits. | |
| 1136 | |
| 1137 For two fingerprints bit vectors A and B of same size, let: | |
| 1138 | |
| 1139 Na = Number of bits set to "1" in A | |
| 1140 Nb = Number of bits set to "1" in B | |
| 1141 Nc = Number of bits set to "1" in both A and B | |
| 1142 Nd = Number of bits set to "0" in both A and B | |
| 1143 | |
| 1144 Nt = Number of bits set to "1" or "0" in A or B (Size of A or B) | |
| 1145 Nt = Na + Nb - Nc + Nd | |
| 1146 | |
| 1147 Na - Nc = Number of bits set to "1" in A but not in B | |
| 1148 Nb - Nc = Number of bits set to "1" in B but not in A | |
| 1149 | |
| 1150 Then, various similarity coefficients [ Ref. 40 - 42 ] for a pair of bit vectors A and B are | |
| 1151 defined as follows: | |
| 1152 | |
| 1153 BaroniUrbani: ( SQRT( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as Buser ) | |
| 1154 | |
| 1155 Buser: ( SQRT ( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as BaroniUrbani ) | |
| 1156 | |
| 1157 Cosine: Nc / SQRT ( Na * Nb ) (same as Ochiai) | |
| 1158 | |
| 1159 Dice: (2 * Nc) / ( Na + Nb ) | |
| 1160 | |
| 1161 Dennis: ( Nc * Nd - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / SQRT ( Nt * Na * Nb) | |
| 1162 | |
| 1163 Forbes: ( Nt * Nc ) / ( Na * Nb ) | |
| 1164 | |
| 1165 Fossum: ( Nt * ( ( Nc - 1/2 ) ** 2 ) / ( Na * Nb ) | |
| 1166 | |
| 1167 Hamann: ( ( Nc + Nd ) - ( Na - Nc ) - ( Nb - Nc ) ) / Nt | |
| 1168 | |
| 1169 Jaccard: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Tanimoto) | |
| 1170 | |
| 1171 Kulczynski1: Nc / ( ( Na - Nc ) + ( Nb - Nc) ) = Nc / ( Na + Nb - 2Nc ) | |
| 1172 | |
| 1173 Kulczynski2: ( ( Nc / 2 ) * ( 2 * Nc + ( Na - Nc ) + ( Nb - Nc) ) ) / ( ( Nc + ( Na - Nc ) ) * ( Nc + ( Nb - Nc ) ) ) | |
| 1174 = 0.5 * ( Nc / Na + Nc / Nb ) | |
| 1175 | |
| 1176 Matching: ( Nc + Nd ) / Nt | |
| 1177 | |
| 1178 McConnaughey: ( Nc ** 2 - ( Na - Nc ) * ( Nb - Nc) ) / ( Na * Nb ) | |
| 1179 | |
| 1180 Ochiai: Nc / SQRT ( Na * Nb ) (same as Cosine) | |
| 1181 | |
| 1182 Pearson: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) / SQRT ( Na * Nb * ( Na - Nc + Nd ) * ( Nb - Nc + Nd ) ) | |
| 1183 | |
| 1184 RogersTanimoto: ( Nc + Nd ) / ( ( Na - Nc) + ( Nb - Nc) + Nt) = ( Nc + Nd ) / ( Na + Nb - 2Nc + Nt) | |
| 1185 | |
| 1186 RussellRao: Nc / Nt | |
| 1187 | |
| 1188 Simpson: Nc / MIN ( Na, Nb) | |
| 1189 | |
| 1190 SkoalSneath1: Nc / ( Nc + 2 * ( Na - Nc) + 2 * ( Nb - Nc) ) = Nc / ( 2 * Na + 2 * Nb - 3 * Nc ) | |
| 1191 | |
| 1192 SkoalSneath2: ( 2 * Nc + 2 * Nd ) / ( Nc + Nd + Nt ) | |
| 1193 | |
| 1194 SkoalSneath3: ( Nc + Nd ) / ( ( Na - Nc ) + ( Nb - Nc ) ) = ( Nc + Nd ) / ( Na + Nb - 2 * Nc ) | |
| 1195 | |
| 1196 Tanimoto: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Jaccard) | |
| 1197 | |
| 1198 Tversky: Nc / ( alpha * ( Na - Nc ) + ( 1 - alpha) * ( Nb - Nc) + Nc ) = Nc / ( alpha * ( Na - Nb ) + Nb) | |
| 1199 | |
| 1200 Yule: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / ( ( Nc * Nd ) + ( ( Na - Nc ) * ( Nb - Nc ) ) ) | |
| 1201 | |
| 1202 The values of Tanimoto/Jaccard and Tversky coefficients are dependent on only those bit which | |
| 1203 are set to "1" in both A and B. In order to take into account all bit positions, modified versions | |
| 1204 of Tanimoto [ Ref. 42 ] and Tversky [ Ref. 43 ] have been developed. | |
| 1205 | |
| 1206 Let: | |
| 1207 | |
| 1208 Na' = Number of bits set to "0" in A | |
| 1209 Nb' = Number of bits set to "0" in B | |
| 1210 Nc' = Number of bits set to "0" in both A and B | |
| 1211 | |
| 1212 Tanimoto': Nc' / ( ( Na' - Nc') + ( Nb' - Nc' ) + Nc' ) = Nc' / ( Na' + Nb' - Nc' ) | |
| 1213 | |
| 1214 Tversky': Nc' / ( alpha * ( Na' - Nc' ) + ( 1 - alpha) * ( Nb' - Nc' ) + Nc' ) = Nc' / ( alpha * ( Na' - Nb' ) + Nb') | |
| 1215 | |
| 1216 Then: | |
| 1217 | |
| 1218 WeightedTanimoto = beta * Tanimoto + (1 - beta) * Tanimoto' | |
| 1219 | |
| 1220 WeightedTversky = beta * Tversky + (1 - beta) * Tversky' | |
| 1221 | |
| 1222 =head2 METHODS | |
| 1223 | |
| 1224 =over 4 | |
| 1225 | |
| 1226 =item B<new> | |
| 1227 | |
| 1228 $NewFPBitVector = new Fingerprints::FingerprintsBitVector($Size); | |
| 1229 | |
| 1230 Creates a new I<FingerprintsBitVector> object of size I<Size> and returns newly created | |
| 1231 B<FingerprintsBitVector>. Bit numbers range from 0 to 1 less than I<Size>. | |
| 1232 | |
| 1233 =item B<BaroniUrbaniSimilarityCoefficient> | |
| 1234 | |
| 1235 $Value = $FingerprintsBitVector->BaroniUrbaniSimilarityCoefficient( | |
| 1236 $OtherFingerprintBitVector); | |
| 1237 $Value = Fingerprints::FingerprintsBitVector:: | |
| 1238 BaroniUrbaniSimilarityCoefficient( | |
| 1239 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1240 | |
| 1241 Returns value of I<BaroniUrbani> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1242 | |
| 1243 =item B<BuserSimilarityCoefficient> | |
| 1244 | |
| 1245 $Value = $FingerprintsBitVector->BuserSimilarityCoefficient( | |
| 1246 $OtherFingerprintBitVector); | |
| 1247 $Value = Fingerprints::FingerprintsBitVector::BuserSimilarityCoefficient( | |
| 1248 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1249 | |
| 1250 Returns value of I<Buser> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1251 | |
| 1252 =item B<CosineSimilarityCoefficient> | |
| 1253 | |
| 1254 $Value = $FingerprintsBitVector->CosineSimilarityCoefficient( | |
| 1255 $OtherFingerprintBitVector); | |
| 1256 $Value = Fingerprints::FingerprintsBitVector::CosineSimilarityCoefficient( | |
| 1257 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1258 | |
| 1259 Returns value of I<Cosine> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1260 | |
| 1261 =item B<DennisSimilarityCoefficient> | |
| 1262 | |
| 1263 $Value = $FingerprintsBitVector->DennisSimilarityCoefficient( | |
| 1264 $OtherFingerprintBitVector); | |
| 1265 $Value = Fingerprints::FingerprintsBitVector::DennisSimilarityCoefficient( | |
| 1266 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1267 | |
| 1268 Returns value of I<Dennis> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1269 | |
| 1270 =item B<DiceSimilarityCoefficient> | |
| 1271 | |
| 1272 $Value = $FingerprintsBitVector->DiceSimilarityCoefficient( | |
| 1273 $OtherFingerprintBitVector); | |
| 1274 $Value = Fingerprints::FingerprintsBitVector::DiceSimilarityCoefficient( | |
| 1275 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1276 | |
| 1277 Returns value of I<Dice> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1278 | |
| 1279 =item B<FoldFingerprintsBitVectorByDensity> | |
| 1280 | |
| 1281 $FingerprintsBitVector->FoldFingerprintsBitVectorByDensity($Density); | |
| 1282 | |
| 1283 Folds I<FingerprintsBitVector> by recursively reducing its size by half until bit density of set bits is | |
| 1284 greater than or equal to specified I<Density> and returns folded I<FingerprintsBitVector>. | |
| 1285 | |
| 1286 =item B<FoldFingerprintsBitVectorBySize> | |
| 1287 | |
| 1288 $FingerprintsBitVector->FoldFingerprintsBitVectorBySize($Size); | |
| 1289 | |
| 1290 Folds I<FingerprintsBitVector> by recursively reducing its size by half until size is less than or equal to | |
| 1291 specified I<Size> and returns folded I<FingerprintsBitVector>. | |
| 1292 | |
| 1293 =item B<ForbesSimilarityCoefficient> | |
| 1294 | |
| 1295 $Value = $FingerprintsBitVector->ForbesSimilarityCoefficient( | |
| 1296 $OtherFingerprintBitVector); | |
| 1297 $Value = Fingerprints::FingerprintsBitVector::ForbesSimilarityCoefficient( | |
| 1298 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1299 | |
| 1300 Returns value of I<Forbes> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1301 | |
| 1302 =item B<FossumSimilarityCoefficient> | |
| 1303 | |
| 1304 $Value = $FingerprintsBitVector->FossumSimilarityCoefficient( | |
| 1305 $OtherFingerprintBitVector); | |
| 1306 $Value = Fingerprints::FingerprintsBitVector::FossumSimilarityCoefficient( | |
| 1307 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1308 | |
| 1309 Returns value of I<Fossum> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1310 | |
| 1311 =item B<GetBitsAsBinaryString> | |
| 1312 | |
| 1313 $BinaryASCIIString = $FingerprintsBitVector->GetBitsAsBinaryString(); | |
| 1314 | |
| 1315 Returns fingerprints as a binary ASCII string containing 0s and 1s. | |
| 1316 | |
| 1317 =item B<GetBitsAsHexadecimalString> | |
| 1318 | |
| 1319 $HexadecimalString = $FingerprintsBitVector->GetBitsAsHexadecimalString(); | |
| 1320 | |
| 1321 Returns fingerprints as a hexadecimal string. | |
| 1322 | |
| 1323 =item B<GetBitsAsRawBinaryString> | |
| 1324 | |
| 1325 $RawBinaryString = $FingerprintsBitVector->GetBitsAsRawBinaryString(); | |
| 1326 | |
| 1327 Returns fingerprints as a raw binary string containing packed bit values for each byte. | |
| 1328 | |
| 1329 =item B<GetDescription> | |
| 1330 | |
| 1331 $Description = $FingerprintsBitVector->GetDescription(); | |
| 1332 | |
| 1333 Returns a string containing description of fingerprints bit vector. | |
| 1334 | |
| 1335 =item B<GetFingerprintsBitDensity> | |
| 1336 | |
| 1337 $BitDensity = $FingerprintsBitVector->GetFingerprintsBitDensity(); | |
| 1338 | |
| 1339 Returns I<BitDensity> of I<FingerprintsBitVector> corresponding to bits set to 1s. | |
| 1340 | |
| 1341 =item B<GetID> | |
| 1342 | |
| 1343 $ID = $FingerprintsBitVector->GetID(); | |
| 1344 | |
| 1345 Returns I<ID> of I<FingerprintsBitVector>. | |
| 1346 | |
| 1347 =item B<GetVectorType> | |
| 1348 | |
| 1349 $VectorType = $FingerprintsBitVector->GetVectorType(); | |
| 1350 | |
| 1351 Returns I<VectorType> of I<FingerprintsBitVector>. | |
| 1352 | |
| 1353 =item B<GetSpecifiedSize> | |
| 1354 | |
| 1355 $Size = $FingerprintsBitVector->GetSpecifiedSize(); | |
| 1356 | |
| 1357 Returns value of specified size for bit vector. | |
| 1358 | |
| 1359 =item B<GetSupportedSimilarityCoefficients> | |
| 1360 | |
| 1361 @SimilarityCoefficient = | |
| 1362 Fingerprints::FingerprintsBitVector::GetSupportedSimilarityCoefficients(); | |
| 1363 | |
| 1364 Returns an array containing names of supported similarity coefficients. | |
| 1365 | |
| 1366 =item B<HamannSimilarityCoefficient> | |
| 1367 | |
| 1368 $Value = $FingerprintsBitVector->HamannSimilarityCoefficient( | |
| 1369 $OtherFingerprintBitVector); | |
| 1370 $Value = Fingerprints::FingerprintsBitVector::HamannSimilarityCoefficient( | |
| 1371 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1372 | |
| 1373 Returns value of I<Hamann> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1374 | |
| 1375 =item B<IsFingerprintsBitVector> | |
| 1376 | |
| 1377 $Status = Fingerprints::FingerprintsBitVector:: | |
| 1378 IsFingerprintsBitVector($Object); | |
| 1379 | |
| 1380 Returns 1 or 0 based on whether I<Object> is a B<FingerprintsBitVector> object. | |
| 1381 | |
| 1382 =item B<IsSubSet> | |
| 1383 | |
| 1384 $Status = $FingerprintsBitVector->IsSubSet($OtherFPBitVector); | |
| 1385 $Status = Fingerprints::FingerprintsBitVector::IsSubSet( | |
| 1386 $FPBitVectorA, $FPBitVectorB); | |
| 1387 | |
| 1388 Returns 1 or 0 based on whether first firngerprints bit vector is a subset of second | |
| 1389 fingerprints bit vector. | |
| 1390 | |
| 1391 For a bit vector to be a subset of another bit vector, both vectors must be of | |
| 1392 the same size and the bit positions set in first vector must also be set in the | |
| 1393 second bit vector. | |
| 1394 | |
| 1395 =item B<JacardSimilarityCoefficient> | |
| 1396 | |
| 1397 $Value = $FingerprintsBitVector->JacardSimilarityCoefficient( | |
| 1398 $OtherFingerprintBitVector); | |
| 1399 $Value = Fingerprints::FingerprintsBitVector::JacardSimilarityCoefficient( | |
| 1400 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1401 | |
| 1402 Returns value of I<Jacard> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1403 | |
| 1404 =item B<Kulczynski1SimilarityCoefficient> | |
| 1405 | |
| 1406 $Value = $FingerprintsBitVector->Kulczynski1SimilarityCoefficient( | |
| 1407 $OtherFingerprintBitVector); | |
| 1408 $Value = Fingerprints::FingerprintsBitVector:: | |
| 1409 Kulczynski1SimilarityCoefficient( | |
| 1410 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1411 | |
| 1412 Returns value of I<Kulczynski1> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1413 | |
| 1414 =item B<Kulczynski2SimilarityCoefficient> | |
| 1415 | |
| 1416 $Value = $FingerprintsBitVector->Kulczynski2SimilarityCoefficient( | |
| 1417 $OtherFingerprintBitVector); | |
| 1418 $Value = Fingerprints::FingerprintsBitVector:: | |
| 1419 Kulczynski2SimilarityCoefficient( | |
| 1420 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1421 | |
| 1422 Returns value of I<Kulczynski2> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1423 | |
| 1424 =item B<MatchingSimilarityCoefficient> | |
| 1425 | |
| 1426 $Value = $FingerprintsBitVector->MatchingSimilarityCoefficient( | |
| 1427 $OtherFingerprintBitVector); | |
| 1428 $Value = Fingerprints::FingerprintsBitVector:: | |
| 1429 MatchingSimilarityCoefficient( | |
| 1430 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1431 | |
| 1432 Returns value of I<Matching> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1433 | |
| 1434 =item B<McConnaugheySimilarityCoefficient> | |
| 1435 | |
| 1436 $Value = $FingerprintsBitVector->McConnaugheySimilarityCoefficient( | |
| 1437 $OtherFingerprintBitVector); | |
| 1438 $Value = Fingerprints::FingerprintsBitVector:: | |
| 1439 McConnaugheySimilarityCoefficient( | |
| 1440 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1441 | |
| 1442 Returns value of I<McConnaughey> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1443 | |
| 1444 =item B<NewFromBinaryString> | |
| 1445 | |
| 1446 $NewFPBitVector = $FingerprintsBitVector->NewFromBinaryString( | |
| 1447 $BinaryString); | |
| 1448 $NewFPBitVector = Fingerprints::FingerprintsBitVector::NewFromBinaryString( | |
| 1449 $BinaryString); | |
| 1450 | |
| 1451 Creates a new I<FingerprintsBitVector> using I<BinaryString> and returns new | |
| 1452 B<FingerprintsBitVector> object. | |
| 1453 | |
| 1454 =item B<NewFromHexadecimalString> | |
| 1455 | |
| 1456 $NewFPBitVector = $FingerprintsBitVector->NewFromHexadecimalString( | |
| 1457 $HexdecimalString); | |
| 1458 $NewFPBitVector = Fingerprints::FingerprintsBitVector:: | |
| 1459 NewFromHexadecimalString( | |
| 1460 $HexdecimalString); | |
| 1461 | |
| 1462 Creates a new I<FingerprintsBitVector> using I<HexdecimalString> and returns new | |
| 1463 B<FingerprintsBitVector> object. | |
| 1464 | |
| 1465 =item B<NewFromRawBinaryString> | |
| 1466 | |
| 1467 $NewFPBitVector = $FingerprintsBitVector->NewFromRawBinaryString( | |
| 1468 $RawBinaryString); | |
| 1469 $NewFPBitVector = Fingerprints::FingerprintsBitVector:: | |
| 1470 NewFromRawBinaryString( | |
| 1471 $RawBinaryString); | |
| 1472 | |
| 1473 Creates a new I<FingerprintsBitVector> using I<RawBinaryString> and returns new | |
| 1474 B<FingerprintsBitVector> object. | |
| 1475 | |
| 1476 =item B<OchiaiSimilarityCoefficient> | |
| 1477 | |
| 1478 $Value = $FingerprintsBitVector->OchiaiSimilarityCoefficient( | |
| 1479 $OtherFingerprintBitVector); | |
| 1480 $Value = Fingerprints::FingerprintsBitVector::OchiaiSimilarityCoefficient( | |
| 1481 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1482 | |
| 1483 Returns value of I<Ochiai> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1484 | |
| 1485 =item B<PearsonSimilarityCoefficient> | |
| 1486 | |
| 1487 $Value = $FingerprintsBitVector->PearsonSimilarityCoefficient( | |
| 1488 $OtherFingerprintBitVector); | |
| 1489 $Value = Fingerprints::FingerprintsBitVector::PearsonSimilarityCoefficient( | |
| 1490 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1491 | |
| 1492 Returns value of I<Pearson> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1493 | |
| 1494 =item B<RogersTanimotoSimilarityCoefficient> | |
| 1495 | |
| 1496 $Value = $FingerprintsBitVector->RogersTanimotoSimilarityCoefficient( | |
| 1497 $OtherFingerprintBitVector); | |
| 1498 $Value = Fingerprints::FingerprintsBitVector:: | |
| 1499 RogersTanimotoSimilarityCoefficient( | |
| 1500 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1501 | |
| 1502 Returns value of I<RogersTanimoto> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1503 | |
| 1504 =item B<RussellRaoSimilarityCoefficient> | |
| 1505 | |
| 1506 $Value = $FingerprintsBitVector->RussellRaoSimilarityCoefficient( | |
| 1507 $OtherFingerprintBitVector); | |
| 1508 $Value = Fingerprints::FingerprintsBitVector:: | |
| 1509 RussellRaoSimilarityCoefficient( | |
| 1510 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1511 | |
| 1512 Returns value of I<RussellRao> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1513 | |
| 1514 =item B<SetSpecifiedSize> | |
| 1515 | |
| 1516 $FingerprintsBitVector->SetSpecifiedSize($Size); | |
| 1517 | |
| 1518 Sets specified size for fingerprints bit vector. | |
| 1519 | |
| 1520 Irrespective of specified size, Perl functions used to handle bit data in B<BitVector> class | |
| 1521 automatically sets the size to the next nearest power of 2. I<SpecifiedSize> is used by | |
| 1522 B<FingerprintsBitVector> class to process any aribitray size during similarity coefficient calculations. | |
| 1523 | |
| 1524 =item B<SetDescription> | |
| 1525 | |
| 1526 $FingerprintsBitVector->SetDescription($Description); | |
| 1527 | |
| 1528 Sets I<Description> of fingerprints bit vector and returns I<FingerprintsBitVector>. | |
| 1529 | |
| 1530 =item B<SetID> | |
| 1531 | |
| 1532 $FingerprintsBitVector->SetID($ID); | |
| 1533 | |
| 1534 Sets I<ID> of fingerprints bit vector and returns I<FingerprintsBitVector>. | |
| 1535 | |
| 1536 =item B<SetVectorType> | |
| 1537 | |
| 1538 $FingerprintsBitVector->SetVectorType($VectorType); | |
| 1539 | |
| 1540 Sets I<VectorType> of fingerprints bit vector and returns I<FingerprintsBitVector>. | |
| 1541 | |
| 1542 =item B<SimpsonSimilarityCoefficient> | |
| 1543 | |
| 1544 $Value = $FingerprintsBitVector->SimpsonSimilarityCoefficient( | |
| 1545 $OtherFingerprintBitVector); | |
| 1546 $Value = Fingerprints::FingerprintsBitVector::SimpsonSimilarityCoefficient( | |
| 1547 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1548 | |
| 1549 Returns value of I<Simpson> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1550 | |
| 1551 =item B<SkoalSneath1SimilarityCoefficient> | |
| 1552 | |
| 1553 $Value = $FingerprintsBitVector->SkoalSneath1SimilarityCoefficient( | |
| 1554 $OtherFingerprintBitVector); | |
| 1555 $Value = Fingerprints::FingerprintsBitVector:: | |
| 1556 SkoalSneath1SimilarityCoefficient( | |
| 1557 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1558 | |
| 1559 Returns value of I<SkoalSneath1> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1560 | |
| 1561 =item B<SkoalSneath2SimilarityCoefficient> | |
| 1562 | |
| 1563 $Value = $FingerprintsBitVector->SkoalSneath2SimilarityCoefficient( | |
| 1564 $OtherFingerprintBitVector); | |
| 1565 $Value = Fingerprints::FingerprintsBitVector:: | |
| 1566 SkoalSneath2SimilarityCoefficient( | |
| 1567 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1568 | |
| 1569 Returns value of I<SkoalSneath2> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1570 | |
| 1571 =item B<SkoalSneath3SimilarityCoefficient> | |
| 1572 | |
| 1573 $Value = $FingerprintsBitVector->SkoalSneath3SimilarityCoefficient( | |
| 1574 $OtherFingerprintBitVector); | |
| 1575 $Value = Fingerprints::FingerprintsBitVector:: | |
| 1576 SkoalSneath3SimilarityCoefficient( | |
| 1577 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1578 | |
| 1579 Returns value of I<SkoalSneath3> similarity coefficient between two same size I<FingerprintsBitVectors> | |
| 1580 | |
| 1581 =item B<StringifyFingerprintsBitVector> | |
| 1582 | |
| 1583 $String = $FingerprintsBitVector->StringifyFingerprintsBitVector(); | |
| 1584 | |
| 1585 Returns a string containing information about I<FingerprintsBitVector> object. | |
| 1586 | |
| 1587 =item B<TanimotoSimilarityCoefficient> | |
| 1588 | |
| 1589 $Value = $FingerprintsBitVector->TanimotoSimilarityCoefficient( | |
| 1590 $OtherFingerprintBitVector); | |
| 1591 $Value = Fingerprints::FingerprintsBitVector:: | |
| 1592 TanimotoSimilarityCoefficient( | |
| 1593 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1594 | |
| 1595 Returns value of I<Tanimoto> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1596 | |
| 1597 =item B<TverskySimilarityCoefficient> | |
| 1598 | |
| 1599 $Value = $FingerprintsBitVector->TverskySimilarityCoefficient( | |
| 1600 $OtherFingerprintBitVector, $Alpha); | |
| 1601 $Value = Fingerprints::FingerprintsBitVector:: | |
| 1602 TverskySimilarityCoefficient( | |
| 1603 $FingerprintsBitVectorA, $FingerprintBitVectorB, $Alpha); | |
| 1604 | |
| 1605 Returns value of I<Tversky> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1606 | |
| 1607 =item B<WeightedTanimotoSimilarityCoefficient> | |
| 1608 | |
| 1609 $Value = | |
| 1610 $FingerprintsBitVector->WeightedTanimotoSimilarityCoefficient( | |
| 1611 $OtherFingerprintBitVector, $Beta); | |
| 1612 $Value = | |
| 1613 Fingerprints::FingerprintsBitVector:: | |
| 1614 WeightedTanimotoSimilarityCoefficient( | |
| 1615 $FingerprintsBitVectorA, $FingerprintBitVectorB, $Beta); | |
| 1616 | |
| 1617 Returns value of I<WeightedTanimoto> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1618 | |
| 1619 =item B<WeightedTverskySimilarityCoefficient> | |
| 1620 | |
| 1621 $Value = | |
| 1622 $FingerprintsBitVector->WeightedTverskySimilarityCoefficient( | |
| 1623 $OtherFingerprintBitVector, $Alpha, $Beta); | |
| 1624 $Value = | |
| 1625 Fingerprints::FingerprintsBitVector:: | |
| 1626 WeightedTverskySimilarityCoefficient( | |
| 1627 $FingerprintsBitVectorA, $FingerprintBitVectorB, $Alpha, $Beta); | |
| 1628 | |
| 1629 Returns value of I<WeightedTversky> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1630 | |
| 1631 =item B<YuleSimilarityCoefficient> | |
| 1632 | |
| 1633 $Value = $FingerprintsBitVector->YuleSimilarityCoefficient( | |
| 1634 $OtherFingerprintBitVector); | |
| 1635 $Value = Fingerprints::FingerprintsBitVector::YuleSimilarityCoefficient( | |
| 1636 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
| 1637 | |
| 1638 Returns value of I<Yule> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
| 1639 | |
| 1640 =back | |
| 1641 | |
| 1642 =head1 AUTHOR | |
| 1643 | |
| 1644 Manish Sud <msud@san.rr.com> | |
| 1645 | |
| 1646 =head1 SEE ALSO | |
| 1647 | |
| 1648 BitVector.pm, FingerprintsStringUtil.pm, FingerprintsVector.pm, Vector.pm | |
| 1649 | |
| 1650 =head1 COPYRIGHT | |
| 1651 | |
| 1652 Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 1653 | |
| 1654 This file is part of MayaChemTools. | |
| 1655 | |
| 1656 MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 1657 the terms of the GNU Lesser General Public License as published by the Free | |
| 1658 Software Foundation; either version 3 of the License, or (at your option) | |
| 1659 any later version. | |
| 1660 | |
| 1661 =cut |
