comparison lib/Fingerprints/FingerprintsBitVector.pm @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 package Fingerprints::FingerprintsBitVector;
2 #
3 # $RCSfile: FingerprintsBitVector.pm,v $
4 # $Date: 2015/02/28 20:48:54 $
5 # $Revision: 1.27 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Exporter;
32 use Scalar::Util ();
33 use BitVector;
34 use MathUtil;
35 use TextUtil ();
36
37 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
38
39 @ISA = qw(BitVector Exporter);
40
41 # Similiarity coefficients...
42 my(@SimilarityCoefficients) = qw(BaroniUrbaniSimilarityCoefficient BuserSimilarityCoefficient CosineSimilarityCoefficient DiceSimilarityCoefficient DennisSimilarityCoefficient ForbesSimilarityCoefficient FossumSimilarityCoefficient HamannSimilarityCoefficient JacardSimilarityCoefficient Kulczynski1SimilarityCoefficient Kulczynski2SimilarityCoefficient MatchingSimilarityCoefficient McConnaugheySimilarityCoefficient OchiaiSimilarityCoefficient PearsonSimilarityCoefficient RogersTanimotoSimilarityCoefficient RussellRaoSimilarityCoefficient SimpsonSimilarityCoefficient SkoalSneath1SimilarityCoefficient SkoalSneath2SimilarityCoefficient SkoalSneath3SimilarityCoefficient TanimotoSimilarityCoefficient TverskySimilarityCoefficient YuleSimilarityCoefficient WeightedTanimotoSimilarityCoefficient WeightedTverskySimilarityCoefficient);
43
44 # New from string...
45 my(@NewFromString) = qw(NewFromBinaryString NewFromHexadecimalString NewFromRawBinaryString);
46
47 @EXPORT = qw(IsFingerprintsBitVector);
48 @EXPORT_OK = qw(GetSupportedSimilarityCoefficients @NewFromString @SimilarityCoefficients);
49
50 %EXPORT_TAGS = (
51 new => [@NewFromString],
52 coefficients => [@SimilarityCoefficients],
53 all => [@EXPORT, @EXPORT_OK]
54 );
55
56 # Setup class variables...
57 my($ClassName);
58 _InitializeClass();
59
60 use overload '""' => 'StringifyFingerprintsBitVector';
61
62 # Class constructor...
63 sub new {
64 my($Class, $Size) = @_;
65
66 # Initialize object...
67 my $This = $Class->SUPER::new($Size);
68 bless $This, ref($Class) || $Class;
69 $This->_InitializeFingerprintsBitVector($Size);
70
71 return $This;
72 }
73
74 # Initialize object data...
75 #
76 # Note:
77 # . The class, BitVector, used to derive this class provides all the functionality to
78 # manipulate bits.
79 # . Irrespective of specified size, Perl functions used to handle bit data in
80 # BitVector class automatically sets the size to the next nearest power of 2.
81 # SpecifiedSize is used by this class to process any aribitray size during similarity
82 # coefficient calculations.
83 #
84 sub _InitializeFingerprintsBitVector {
85 my($This, $Size) = @_;
86
87 if (!defined $Size) {
88 croak "Error: ${ClassName}->new: FingerprintsBitVector object instantiated without specifying its size ...";
89 }
90 if ($Size <=0) {
91 croak "Error: ${ClassName}->new: Fingerprints bit vector size, $Size, must be a positive integer...";
92 }
93
94 # Specified size of fingerprints...
95 $This->{SpecifiedSize} = $Size;
96
97 }
98
99 # Initialize class ...
100 sub _InitializeClass {
101 #Class name...
102 $ClassName = __PACKAGE__;
103 }
104
105 # Set specified size...
106 #
107 # Notes:
108 # Irrespective of specified size, Perl functions used to handle bit data in
109 # BitVector class automatically sets the size to the next nearest power of 2.
110 # SpecifiedSize is used by this class to process any aribitray size during similarity
111 # coefficient calculations.
112 #
113 sub SetSpecifiedSize {
114 my($This, $SpecifiedSize) = @_;
115
116 if (!($SpecifiedSize > 0 && $SpecifiedSize <= $This->{Size})) {
117 croak "Error: ${ClassName}->SetSpecifiedSize: Specified size, $SpecifiedSize, is not valid: It must be > 0 && <= ", $This->GetSize()," ...";
118 }
119 $This->{SpecifiedSize} = $SpecifiedSize;
120 }
121
122 # Get specified size...
123 sub GetSpecifiedSize {
124 my($This) = @_;
125
126 return $This->{SpecifiedSize};
127 }
128
129 # Set ID...
130 sub SetID {
131 my($This, $Value) = @_;
132
133 $This->{ID} = $Value;
134
135 return $This;
136 }
137
138 # Get ID...
139 sub GetID {
140 my($This) = @_;
141
142 return exists $This->{ID} ? $This->{ID} : 'None';
143 }
144
145 # Set description...
146 sub SetDescription {
147 my($This, $Value) = @_;
148
149 $This->{Description} = $Value;
150
151 return $This;
152 }
153
154 # Get description...
155 sub GetDescription {
156 my($This) = @_;
157
158 return exists $This->{Description} ? $This->{Description} : 'No description available';
159 }
160
161 # Set vector type...
162 sub SetVectorType {
163 my($This, $Value) = @_;
164
165 $This->{VectorType} = $Value;
166
167 return $This;
168 }
169
170 # Get vector type...
171 sub GetVectorType {
172 my($This) = @_;
173
174 return exists $This->{VectorType} ? $This->{VectorType} : 'FingerprintsBitVector';
175 }
176
177 # Create a new fingerprints bit vector using binary string. This functionality can be
178 # either invoked as a class function or an object method.
179 #
180 sub NewFromBinaryString ($;$) {
181 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_;
182
183 if (_IsFingerprintsBitVector($FirstParameter)) {
184 return _NewFingerptinsBitVectorFromString('Binary', $SecondParameter, $ThirdParameter);
185 }
186 else {
187 return _NewFingerptinsBitVectorFromString( 'Binary', $FirstParameter, $SecondParameter);
188 }
189 }
190
191 # Create a new fingerprints bit vector using hexadecimal string. This functionality can be
192 # either invoked as a class function or an object method.
193 #
194 sub NewFromHexadecimalString ($;$) {
195 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_;
196
197 if (_IsFingerprintsBitVector($FirstParameter)) {
198 return _NewFingerptinsBitVectorFromString('Hexadecimal', $SecondParameter, $ThirdParameter);
199 }
200 else {
201 return _NewFingerptinsBitVectorFromString( 'Hexadecimal', $FirstParameter, $SecondParameter);
202 }
203 }
204
205 # Create a new fingerprints bit vector using octal string. This functionality can be
206 # either invoked as a class function or an object method.
207 #
208 #
209 sub NewFromOctalString ($) {
210 croak "Error: ${ClassName}->NewFromOctalString: Creation of fingerprits bit vector from an octal string is not supported ...";
211 }
212
213 # Create a new fingerprints bit vector using decimal string. This functionality can be
214 # either invoked as a class function or an object method.
215 #
216 sub NewFromDecimalString ($;$) {
217 croak "Error: ${ClassName}->NewFromDecimalString: Creation of fingerprits bit vector from a decimal string is not supported ...";
218 }
219
220 # Create a new fingerprints bit vector using raw binary string. This functionality can be
221 # either invoked as a class function or an object method.
222 #
223 sub NewFromRawBinaryString ($;$) {
224 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_;
225
226 if (_IsFingerprintsBitVector($FirstParameter)) {
227 return _NewFingerptinsBitVectorFromString('RawBinary', $SecondParameter, $ThirdParameter);
228 }
229 else {
230 return _NewFingerptinsBitVectorFromString( 'RawBinary', $FirstParameter, $SecondParameter);
231 }
232 }
233
234 # Create a new fingerprints bit vector from a string...
235 #
236 #
237 sub _NewFingerptinsBitVectorFromString ($$;$) {
238 my($Format, $String, $BitsOrder) = @_;
239 my($FingerprintsBitVector, $Size);
240
241 $Size = BitVector::_CalculateStringSizeInBits($Format, $String);
242
243 $FingerprintsBitVector = new Fingerprints::FingerprintsBitVector($Size);
244 $FingerprintsBitVector->_SetBitsAsString($Format, $String, $BitsOrder);
245
246 return $FingerprintsBitVector;
247 }
248
249 # Get fingerprint bits as a hexadecimal string...
250 #
251 sub GetBitsAsHexadecimalString {
252 my($This, $BitsOrder) = @_;
253
254 return $This->_GetFingerprintBitsAsString('Hexadecimal', $BitsOrder);
255 }
256
257 # Get fingerprint bits as an octal string...
258 #
259 sub GetBitsAsOctalString {
260 my($This, $BitsOrder) = @_;
261
262 croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as an octal string is not supported ...";
263 }
264
265 # Get fingerprint bits as an decimal string...
266 #
267 sub GetBitsAsDecimalString {
268 my($This, $BitsOrder) = @_;
269
270 croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as a decimal string is not supported ...";
271 }
272
273 # Get fingerprint bits as a binary string conatning 1s and 0s...
274 #
275 sub GetBitsAsBinaryString {
276 my($This, $BitsOrder) = @_;
277
278 return $This->_GetFingerprintBitsAsString('Binary', $BitsOrder);
279 }
280
281 # Get fingerprint bits as a binary string conatning 1s and 0s...
282 #
283 sub GetBitsAsRawBinaryString {
284 my($This) = @_;
285
286 return $This->_GetFingerprintBitsAsString('RawBinary');
287 }
288
289 # Return fingerprint bits as a string...
290 #
291 sub _GetFingerprintBitsAsString {
292 my($This, $Format, $BitsOrder) = @_;
293
294 $BitsOrder = (defined($BitsOrder) && $BitsOrder) ? $BitsOrder : 'Ascending';
295
296 return $This->_GetBitsAsString($Format, $BitsOrder);
297 }
298
299 # Is it a fingerprints bit vector object?
300 sub IsFingerprintsBitVector ($) {
301 my($Object) = @_;
302
303 return _IsFingerprintsBitVector($Object);
304 }
305
306 # Is it a fingerprints bit vector object?
307 sub _IsFingerprintsBitVector {
308 my($Object) = @_;
309
310 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
311 }
312
313 # Return a list of supported similarity coefficients...
314 sub GetSupportedSimilarityCoefficients () {
315
316 return @SimilarityCoefficients;
317 }
318
319 # Get bit density for fingerprints bit vector corresponding to on bits...
320 #
321 sub GetFingerprintsBitDensity {
322 my($This) = @_;
323 my($BitDensity);
324
325 $BitDensity = $This->GetDensityOfSetBits();
326
327 return round($BitDensity, 2);
328 }
329
330 # Fold fingerprints bit vector by recursively reducing its size by half untill size is less than or equal to
331 # specified size...
332 #
333 sub FoldFingerprintsBitVectorBySize {
334 my($This, $Size) = @_;
335
336 if (!($Size > 0 && $Size <= $This->GetSize())) {
337 croak "Error: ${ClassName}->FoldFingerprintsBitVectorBySize: Specified size, $Size, is not valid: It must be > 0 && <= ", $This->GetSize()," ...";
338 }
339
340 if ($This->GetSize() <= $Size) {
341 return $This;
342 }
343 return $This->_FoldFingerprintsBitVector('BySize', $Size);
344 }
345
346 # Fold fingerprints bit vector by recursively reducing its size by half untill bit density of set bits is greater than
347 # or equal to specified density...
348 #
349 sub FoldFingerprintsBitVectorByDensity {
350 my($This, $Density) = @_;
351
352 if (!($Density > 0 && $Density <= 1)) {
353 croak "Error: ${ClassName}->FoldFingerprintsBitVectorByDensity: Specified bit density, $Density, is not valid: It must be > 0 && <= 1 ...";
354 }
355
356 if ($This->GetDensityOfSetBits() >= $Density) {
357 return $This;
358 }
359 return $This->_FoldFingerprintsBitVector('ByDensity', $Density);
360 }
361
362 # Fold fingerprints bit vector using size or density and return folded fingerprint bit vector...
363 #
364 sub _FoldFingerprintsBitVector {
365 my($This, $Mode, $Value) = @_;
366
367 # Fold upto size of 8 bits...
368 if ($This->GetSize() <= 8) {
369 return $This;
370 }
371
372 # Check size or density....
373 if ($Mode =~ /^BySize$/i) {
374 if ($This->GetSize() <= $Value) {
375 return $This;
376 }
377 }
378 elsif ($Mode =~ /^ByDensity$/i) {
379 if ($This->GetDensityOfSetBits() >= $Value) {
380 return $This;
381 }
382 }
383 else {
384 return $This;
385 }
386
387 # Recursively reduce its size by half...
388 my($FirstHalfBinaryString, $SecondHalfBinaryString, $FirstHalfFingerprintsBitVector, $SecondHalfFingerprintsBitVector, $FoldedFingerprintsBitVector, $BinaryString, $StringLength);
389
390 $BinaryString = $This->GetBitsAsBinaryString();
391 $StringLength = length $BinaryString;
392
393 $FirstHalfBinaryString = substr($BinaryString, 0, $StringLength/2);
394 $SecondHalfBinaryString = substr($BinaryString, $StringLength/2);
395
396 $FirstHalfFingerprintsBitVector = NewFromBinaryString($FirstHalfBinaryString);
397 $SecondHalfFingerprintsBitVector = NewFromBinaryString($SecondHalfBinaryString);
398
399 $FoldedFingerprintsBitVector = $FirstHalfFingerprintsBitVector | $SecondHalfFingerprintsBitVector;
400
401 return $FoldedFingerprintsBitVector->_FoldFingerprintsBitVector($Mode, $Value);
402 }
403
404 # Is first bit vector subset of second bit vector?
405 #
406 # For a bit vector to be a subset of another bit vector, both vectors must be of
407 # the same size and the bit positions set in first vector must also be set in the
408 # secons bit vector.
409 #
410 # This functionality can be either invoked as a class function or an object method.
411 #
412 sub IsSubSet ($$) {
413 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
414
415 if ($FingerprintsBitVectorA->GetSize() != $FingerprintsBitVectorB->GetSize()) {
416 return 0;
417 }
418 my($AndFingerprintsBitVector);
419
420 $AndFingerprintsBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB;
421
422 return ($FingerprintsBitVectorA->GetNumOfSetBits() == $AndFingerprintsBitVector->GetNumOfSetBits()) ? 1 : 0;
423 }
424
425 # Return a string containing vector values...
426 sub StringifyFingerprintsBitVector {
427 my($This) = @_;
428 my($FingerprintsBitVectorString);
429
430 # BitVector size information...
431 #
432 if ($This->{SpecifiedSize} != $This->GetSize()) {
433 $FingerprintsBitVectorString = "SpecifiedSize: " . $This->{SpecifiedSize} . "; BitVectorSize: " . $This->GetSize();
434 }
435 else {
436 $FingerprintsBitVectorString = "BitVectorSize: " . $This->GetSize();
437 }
438 my($NumOfSetBits, $BitDensity);
439 $NumOfSetBits = $This->GetNumOfSetBits();
440 $BitDensity = $This->GetFingerprintsBitDensity();
441
442 $FingerprintsBitVectorString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity";
443
444 # BitVector values...
445 $FingerprintsBitVectorString .= "; BitVector: " . $This->StringifyBitVector();
446
447 return $FingerprintsBitVectorString;
448 }
449
450 # For two fingerprints bit vectors A and B of same size, let:
451 #
452 # Na = Number of bits set to "1" in A
453 # Nb = Number of bits set to "1" in B
454 # Nc = Number of bits set to "1" in both A and B
455 # Nd = Number of bits set to "0" in both A and B
456 #
457 # Nt = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd
458 #
459 # Na - Nc = Number of bits set to "1" in A but not in B
460 # Nb - Nc = Number of bits set to "1" in B but not in A
461 #
462 # Various similarity coefficients [ Ref 40 - 42 ] for a pair of bit vectors A and B are
463 # defined as follows:
464 #
465 # . BaroniUrbani: ( SQRT( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as Buser )
466 #
467 # . Buser: ( SQRT ( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as BaroniUrbani )
468 #
469 # . Cosine: Nc / SQRT ( Na * Nb ) (same as Ochiai)
470 #
471 # . Dice: (2 * Nc) / ( Na + Nb )
472 #
473 # . Dennis: ( Nc * Nd - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / SQRT ( Nt * Na * Nb)
474 #
475 # . Forbes: ( Nt * Nc ) / ( Na * Nb )
476 #
477 # . Fossum: ( Nt * ( ( Nc - 1/2 ) ** 2 ) / ( Na * Nb )
478 #
479 # . Hamann: ( ( Nc + Nd ) - ( Na - Nc ) - ( Nb - Nc ) ) / Nt
480 #
481 # . Jaccard: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Tanimoto)
482 #
483 # . Kulczynski1: Nc / ( ( Na - Nc ) + ( Nb - Nc) ) = Nc / ( Na + Nb - 2Nc )
484 #
485 # . Kulczynski2: ( ( Nc / 2 ) * ( 2 * Nc + ( Na - Nc ) + ( Nb - Nc) ) ) / ( ( Nc + ( Na - Nc ) ) * ( Nc + ( Nb - Nc ) ) ) = 0.5 * ( Nc / Na + Nc / Nb )
486 #
487 # . Matching: ( Nc + Nd ) / Nt
488 #
489 # . McConnaughey: ( Nc ** 2 - ( Na - Nc ) * ( Nb - Nc) ) / ( Na * Nb )
490 #
491 # . Ochiai: Nc / SQRT ( Na * Nb ) (same as Cosine)
492 #
493 # . Pearson: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) / SQRT ( Na * Nb * ( Na - Nc + Nd ) * ( Nb - Nc + Nd ) )
494 #
495 # . RogersTanimoto: ( Nc + Nd ) / ( ( Na - Nc) + ( Nb - Nc) + Nt) = ( Nc + Nd ) / ( Na + Nb - 2Nc + Nt)
496 #
497 # . RussellRao: Nc / Nt
498 #
499 # . Simpson: Nc / MIN ( Na, Nb)
500 #
501 # . SkoalSneath1: Nc / ( Nc + 2 * ( Na - Nc) + 2 * ( Nb - Nc) ) = Nc / ( 2 * Na + 2 * Nb - 3 * Nc )
502 #
503 # . SkoalSneath2: ( 2 * Nc + 2 * Nd ) / ( Nc + Nd + Nt )
504 #
505 # . SkoalSneath3: ( Nc + Nd ) / ( ( Na - Nc ) + ( Nb - Nc ) ) = ( Nc + Nd ) / ( Na + Nb - 2 * Nc )
506 #
507 # . Tanimoto: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Jaccard)
508 #
509 # . Tversky: Nc / ( alpha * ( Na - Nc ) + ( 1 - alpha) * ( Nb - Nc) + Nc ) = Nc / ( alpha * ( Na - Nb ) + Nb)
510 #
511 # . Yule: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / ( ( Nc * Nd ) + ( ( Na - Nc ) * ( Nb - Nc ) ) )
512 #
513 #
514 # Values of Tanimoto/Jaccard and Tversky coefficients are dependent on only those bit which
515 # are set to "1" in both A and B. In order to take into account all bit positions, modified versions
516 # of Tanimoto [ Ref. 42 ] and Tversky [ Ref. 43 ] have been developed.
517 #
518 # Let:
519 #
520 # Na' = Number of bits set to "0" in A
521 # Nb' = Number of bits set to "0" in B
522 # Nc' = Number of bits set to "0" in both A and B
523 #
524 # . Tanimoto': Nc' / ( ( Na' - Nc') + ( Nb' - Nc' ) + Nc' ) = Nc' / ( Na' + Nb' - Nc' )
525 #
526 # . Tversky': Nc' / ( alpha * ( Na' - Nc' ) + ( 1 - alpha) * ( Nb' - Nc' ) + Nc' ) = Nc' / ( alpha * ( Na' - Nb' ) + Nb')
527 #
528 # Then:
529 #
530 # . WeightedTanimoto = beta * Tanimoto + (1 - beta) * Tanimoto'
531 #
532 # . WeightedTversky = beta * Tversky + (1 - beta) * Tversky'
533 #
534 #
535
536 # Calculate BaroniUrbani similarity coefficient for two same size bit vectors.
537 #
538 # This functionality can be either invoked as a class function or an object method.
539 #
540 sub BaroniUrbaniSimilarityCoefficient ($$) {
541 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
542
543 return BuserSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB);
544 }
545
546 # Calculate Buser similarity coefficient for two same size bit vectors.
547 #
548 # This functionality can be either invoked as a class function or an object method.
549 #
550 sub BuserSimilarityCoefficient ($$) {
551 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
552 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
553
554 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
555 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
556 $Nt = $Na + $Nb - $Nc + $Nd;
557
558 $Numerator = sqrt($Nc*$Nd) + $Nc;
559 $Denominator = sqrt($Nc*$Nd) + ($Na - $Nc) + ($Nb - $Nc ) + $Nc;
560
561 return $Denominator ? ($Numerator/$Denominator) : 0;
562 }
563
564 # Calculate Cosine similarity coefficient for two same size bit vectors.
565 #
566 # This functionality can be either invoked as a class function or an object method.
567 #
568 sub CosineSimilarityCoefficient ($$) {
569 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
570 my($Na, $Nb, $Nc, $Numerator, $Denominator);
571
572 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
573
574 $Numerator = $Nc;
575 $Denominator = sqrt($Na*$Nb);
576
577 return $Denominator ? ($Numerator/$Denominator) : 0;
578 }
579
580 # Calculate Dice similarity coefficient for two same size bit vectors.
581 #
582 # This functionality can be either invoked as a class function or an object method.
583 #
584 sub DiceSimilarityCoefficient ($$) {
585 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
586 my($Na, $Nb, $Nc, $Numerator, $Denominator);
587
588 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
589
590 $Numerator = 2*$Nc;
591 $Denominator = $Na + $Nb;
592
593 return $Denominator ? ($Numerator/$Denominator) : 0;
594 }
595
596 # Calculate Dennis similarity coefficient for two same size bit vectors.
597 #
598 # This functionality can be either invoked as a class function or an object method.
599 #
600 sub DennisSimilarityCoefficient ($$) {
601 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
602 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
603
604 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
605 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
606 $Nt = $Na + $Nb - $Nc + $Nd;
607
608 $Numerator = $Nc*$Nd - (($Na - $Nc)*($Nb - $Nc));
609 $Denominator = sqrt($Nt*$Na*$Nb);
610
611 return $Denominator ? ($Numerator/$Denominator) : 0;
612 }
613
614 # Calculate Forbes similarity coefficient for two same size bit vectors.
615 #
616 # This functionality can be either invoked as a class function or an object method.
617 #
618 sub ForbesSimilarityCoefficient ($$) {
619 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
620 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
621
622 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
623 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
624 $Nt = $Na + $Nb - $Nc + $Nd;
625
626 $Numerator = $Nt*$Nc;
627 $Denominator = $Na*$Nb;
628
629 return $Denominator ? ($Numerator/$Denominator) : 0;
630 }
631
632 # Calculate Fossum similarity coefficient for two same size bit vectors.
633 #
634 # This functionality can be either invoked as a class function or an object method.
635 #
636 sub FossumSimilarityCoefficient ($$) {
637 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
638 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
639
640 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
641 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
642 $Nt = $Na + $Nb - $Nc + $Nd;
643
644 $Numerator = $Nt*(($Nc - 0.5)** 2);
645 $Denominator = $Na*$Nb ;
646
647 return $Denominator ? ($Numerator/$Denominator) : 0;
648 }
649
650 # Calculate Hamann similarity coefficient for two same size bit vectors.
651 #
652 # This functionality can be either invoked as a class function or an object method.
653 #
654 sub HamannSimilarityCoefficient ($$) {
655 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
656 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
657
658 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
659 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
660 $Nt = $Na + $Nb - $Nc + $Nd;
661
662 $Numerator = ($Nc + $Nd ) - ($Na - $Nc) - ($Nb - $Nc) ;
663 $Denominator = $Nt;
664
665 return $Denominator ? ($Numerator/$Denominator) : 0;
666 }
667
668 # Calculate Jacard similarity coefficient for two same size bit vectors.
669 #
670 # This functionality can be either invoked as a class function or an object method.
671 #
672 sub JacardSimilarityCoefficient ($$) {
673 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
674
675 return TanimotoSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB);
676 }
677
678 # Calculate Kulczynski1 similarity coefficient for two same size bit vectors.
679 #
680 # This functionality can be either invoked as a class function or an object method.
681 #
682 sub Kulczynski1SimilarityCoefficient ($$) {
683 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
684 my($Na, $Nb, $Nc, $Numerator, $Denominator);
685
686 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
687
688 $Numerator = $Nc;
689 $Denominator = $Na + $Nb - 2*$Nc;
690
691 return $Denominator ? ($Numerator/$Denominator) : 0;
692 }
693
694 # Calculate Kulczynski2 similarity coefficient for two same size bit vectors.
695 #
696 # This functionality can be either invoked as a class function or an object method.
697 #
698 sub Kulczynski2SimilarityCoefficient ($$) {
699 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
700 my($Na, $Nb, $Nc, $Numerator, $Denominator);
701
702 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
703
704 $Numerator = 0.5*($Na*$Nc + $Nb*$Nc);
705 $Denominator = $Na*$Nb;
706
707 return $Denominator ? ($Numerator/$Denominator) : 0;
708 }
709
710 # Calculate Matching similarity coefficient for two same size bit vectors.
711 #
712 # This functionality can be either invoked as a class function or an object method.
713 #
714 sub MatchingSimilarityCoefficient ($$) {
715 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
716 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
717
718 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
719 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
720 $Nt = $Na + $Nb - $Nc + $Nd;
721
722 $Numerator = $Nc + $Nd;
723 $Denominator = $Nt;
724
725 return $Denominator ? ($Numerator/$Denominator) : 0;
726 }
727
728 # Calculate McConnaughey similarity coefficient for two same size bit vectors.
729 #
730 # This functionality can be either invoked as a class function or an object method.
731 #
732 sub McConnaugheySimilarityCoefficient ($$) {
733 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
734 my($Na, $Nb, $Nc, $Numerator, $Denominator);
735
736 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
737
738 $Numerator = $Nc**2 - (($Na - $Nc)*($Nb - $Nc));
739 $Denominator = $Na*$Nb ;
740
741 return $Denominator ? ($Numerator/$Denominator) : 0;
742 }
743
744 # Calculate Ochiai similarity coefficient for two same size bit vectors.
745 #
746 # This functionality can be either invoked as a class function or an object method.
747 #
748 sub OchiaiSimilarityCoefficient ($$) {
749 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
750
751 return CosineSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB);
752 }
753
754 # Calculate Pearson similarity coefficient for two same size bit vectors.
755 #
756 # This functionality can be either invoked as a class function or an object method.
757 #
758 sub PearsonSimilarityCoefficient ($$) {
759 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
760 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
761
762 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
763 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
764 $Nt = $Na + $Nb - $Nc + $Nd;
765
766 $Numerator = ($Nc*$Nd ) - (($Na - $Nc)*($Nb - $Nc));
767 $Denominator = sqrt($Na*$Nb*($Na - $Nc + $Nd )*($Nb - $Nc + $Nd));
768
769 return $Denominator ? ($Numerator/$Denominator) : 0;
770 }
771
772 # Calculate RogersTanimoto similarity coefficient for two same size bit vectors.
773 #
774 # This functionality can be either invoked as a class function or an object method.
775 #
776 sub RogersTanimotoSimilarityCoefficient ($$) {
777 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
778 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
779
780 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
781 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
782 $Nt = $Na + $Nb - $Nc + $Nd;
783
784 $Numerator = $Nc + $Nd;
785 $Denominator = ($Na - $Nc) + ($Nb - $Nc) + $Nt;
786
787 return $Denominator ? ($Numerator/$Denominator) : 0;
788 }
789
790 # Calculate RussellRao similarity coefficient for two same size bit vectors.
791 #
792 # This functionality can be either invoked as a class function or an object method.
793 #
794 sub RussellRaoSimilarityCoefficient ($$) {
795 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
796 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
797
798 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
799 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
800 $Nt = $Na + $Nb - $Nc + $Nd;
801
802 $Numerator = $Nc;
803 $Denominator = $Nt;
804
805 return $Denominator ? ($Numerator/$Denominator) : 0;
806 }
807
808 # Calculate Simpson similarity coefficient for two same size bit vectors.
809 #
810 # This functionality can be either invoked as a class function or an object method.
811 #
812 sub SimpsonSimilarityCoefficient ($$) {
813 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
814 my($Na, $Nb, $Nc, $Numerator, $Denominator);
815
816 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
817
818 $Numerator = $Nc;
819 $Denominator = min($Na, $Nb);
820
821 return $Denominator ? ($Numerator/$Denominator) : 0;
822 }
823
824 # Calculate SkoalSneath1 similarity coefficient for two same size bit vectors.
825 #
826 # This functionality can be either invoked as a class function or an object method.
827 #
828 sub SkoalSneath1SimilarityCoefficient ($$) {
829 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
830 my($Na, $Nb, $Nc, $Numerator, $Denominator);
831
832 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
833
834 $Numerator = $Nc;
835 $Denominator = $Nc + 2*($Na - $Nc) + 2*($Nb - $Nc);
836
837 return $Denominator ? ($Numerator/$Denominator) : 0;
838 }
839
840 # Calculate SkoalSneath2 similarity coefficient for two same size bit vectors.
841 #
842 # This functionality can be either invoked as a class function or an object method.
843 #
844 sub SkoalSneath2SimilarityCoefficient ($$) {
845 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
846 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
847
848 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
849 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
850 $Nt = $Na + $Nb - $Nc + $Nd;
851
852 $Numerator = 2*$Nc + 2*$Nd ;
853 $Denominator = $Nc + $Nd + $Nt ;
854
855 return $Denominator ? ($Numerator/$Denominator) : 0;
856 }
857
858 # Calculate SkoalSneath3 similarity coefficient for two same size bit vectors.
859 #
860 # This functionality can be either invoked as a class function or an object method.
861 #
862 sub SkoalSneath3SimilarityCoefficient ($$) {
863 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
864 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
865
866 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
867 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
868 $Nt = $Na + $Nb - $Nc + $Nd;
869
870 $Numerator = $Nc + $Nd;
871 $Denominator = ($Na - $Nc) + ($Nb - $Nc ) ;
872
873 return $Denominator ? ($Numerator/$Denominator) : 0;
874 }
875
876 # Calculate Tanimoto similarity coefficient for two same size bit vectors.
877 #
878 # This functionality can be either invoked as a class function or an object method.
879 #
880 sub TanimotoSimilarityCoefficient ($$) {
881 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
882 my($Na, $Nb, $Nc, $Numerator, $Denominator);
883
884 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
885
886 $Numerator = $Nc;
887 $Denominator = $Na + $Nb - $Nc;
888
889 return $Denominator ? ($Numerator/$Denominator) : 0;
890 }
891
892 # Calculate Tversky similarity coefficient for two same size bit vectors.
893 #
894 # This functionality can be either invoked as a class function or an object method.
895 #
896 sub TverskySimilarityCoefficient ($$$) {
897 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha) = @_;
898 my($Na, $Nb, $Nc, $Numerator, $Denominator);
899
900 if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) {
901 croak "Error: ${ClassName}->TverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ...";
902 }
903
904 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
905
906 $Numerator = $Nc;
907 $Denominator = $Alpha*($Na - $Nb ) + $Nb;
908
909 return $Denominator ? ($Numerator/$Denominator) : 0;
910 }
911
912 # Calculate Yule similarity coefficient for two same size bit vectors.
913 #
914 # This functionality can be either invoked as a class function or an object method.
915 #
916 sub YuleSimilarityCoefficient ($$) {
917 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
918 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
919
920 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
921 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
922 $Nt = $Na + $Nb - $Nc + $Nd;
923
924 $Numerator = ($Nc*$Nd) - (($Na - $Nc)*($Nb - $Nc)) ;
925 $Denominator = ($Nc*$Nd) + (($Na - $Nc)*($Nb - $Nc)) ;
926
927 return $Denominator ? ($Numerator/$Denominator) : 0;
928 }
929
930 # Calculate WeightedTanimoto similarity coefficient for two same size bit vectors.
931 #
932 # This functionality can be either invoked as a class function or an object method.
933 #
934 sub WeightedTanimotoSimilarityCoefficient ($$$) {
935 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Beta) = @_;
936 my($Na, $Nb, $Nc, $TanimotoForSetBits, $TanimotoForClearBits, $Numerator, $Denominator, $WeightedTanimoto);
937
938 if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) {
939 croak "Error: ${ClassName}->WeightedTanimotoSimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ...";
940 }
941
942 # Get Tanimoto for set bits...
943 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
944
945 $Numerator = $Nc;
946 $Denominator = $Na + $Nb - $Nc;
947 $TanimotoForSetBits = $Denominator ? ($Numerator/$Denominator) : 0;
948
949 # Get Tanimoto for clear bits...
950 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
951
952 $Numerator = $Nc;
953 $Denominator = $Na + $Nb - $Nc;
954 $TanimotoForClearBits = $Denominator ? ($Numerator/$Denominator) : 0;
955
956 $WeightedTanimoto = $Beta*$TanimotoForSetBits + (1 - $Beta)*$TanimotoForClearBits;
957
958 return $WeightedTanimoto;
959 }
960
961 # Calculate WeightedTversky similarity coefficient for two same size bit vectors.
962 #
963 # This functionality can be either invoked as a class function or an object method.
964 #
965 sub WeightedTverskySimilarityCoefficient ($$$) {
966 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha, $Beta) = @_;
967 my($Na, $Nb, $Nc, $TverskyForSetBits, $TverskyForClearBits, $Numerator, $Denominator, $WeightedTversky);
968
969 if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) {
970 croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ...";
971 }
972 if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) {
973 croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ...";
974 }
975
976 # Get Tversky for set bits...
977 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
978
979 $Numerator = $Nc;
980 $Denominator = $Alpha*($Na - $Nb ) + $Nb;
981 $TverskyForSetBits = $Denominator ? ($Numerator/$Denominator) : 0;
982
983 # Get Tversky for clear bits...
984 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
985
986 $Numerator = $Nc;
987 $Denominator = $Alpha*($Na - $Nb ) + $Nb;
988 $TverskyForClearBits = $Denominator ? ($Numerator/$Denominator) : 0;
989
990 $WeightedTversky = $Beta*$TverskyForSetBits + (1 - $Beta)*$TverskyForClearBits;
991
992 return $WeightedTversky;
993 }
994
995 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations...
996 #
997 sub _GetNumOfIndividualAndCommonSetBits ($$) {
998 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
999 my($Na, $Nb, $Nc, $Nd);
1000
1001 # Number of bits set to "1" in A
1002 $Na = $FingerprintsBitVectorA->GetNumOfSetBits();
1003
1004 # Number of bits set to "1" in B
1005 $Nb = $FingerprintsBitVectorB->GetNumOfSetBits();
1006
1007 # Number of bits set to "1" in both A and B
1008 my($NcBitVector);
1009 $NcBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB;
1010 $Nc = $NcBitVector->GetNumOfSetBits();
1011
1012 return ($Na, $Nb, $Nc);
1013 }
1014
1015 # Get number of Nd bits in bit vector A and B to be used for similarity coefficient calculations...
1016 #
1017 sub _GetNumOfCommonClearBits ($$) {
1018 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
1019 my($Nd, $NdBitVector);
1020
1021 # Number of bits set to "0" in both A and B
1022 $NdBitVector = ~$FingerprintsBitVectorA & ~$FingerprintsBitVectorB;
1023 $Nd = $NdBitVector->GetNumOfSetBits();
1024
1025 # Correct for number of clear bits used for padding...
1026 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) {
1027 $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA);
1028 }
1029 elsif (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) {
1030 $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB);
1031 }
1032
1033 return $Nd;
1034 }
1035
1036 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations...
1037 #
1038 sub _GetNumOfIndividualAndCommonClearBits ($$) {
1039 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
1040 my($Na, $Nb, $Nc, $Nd);
1041
1042 # Number of bits set to "0" in A
1043 $Na = $FingerprintsBitVectorA->GetNumOfClearBits();
1044
1045 # Correct for number of clear bits used for padding...
1046 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) {
1047 $Na = $Na - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA);
1048 }
1049
1050 # Number of bits set to "0" in B
1051 $Nb = $FingerprintsBitVectorB->GetNumOfClearBits();
1052
1053 # Correct for number of clear bits used for padding...
1054 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) {
1055 $Nb = $Nb - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB);
1056 }
1057
1058 # Number of bits set to "0" in both A and B
1059 $Nc = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
1060
1061 return ($Na, $Nb, $Nc);
1062 }
1063
1064 # Irrespective of specified size, Perl functions used to handle bit data data in
1065 # BitVector class automatically sets the size to the next nearest power of 2
1066 # and clear the extra bits.
1067 #
1068 # SpecifiedSize is used by this class to process any aribitray size during similarity
1069 # coefficient calculations.
1070 #
1071 # Assuming the FingerprintsBitBector class only manipulates bits upto specified
1072 # size, a correction for the extra bits added by BitVector class needs to be applied
1073 # to number of clear bits.
1074 #
1075 sub _GetNumOfClearBitsCorrection {
1076 my($FingerprintsBitVector) = @_;
1077
1078 return ($FingerprintsBitVector->{Size} - $FingerprintsBitVector->{SpecifiedSize});
1079 }
1080
1081 # Is number of clear bits correction required?
1082 #
1083 sub _IsNumOfClearBitsCorrectionRequired {
1084 my($FingerprintsBitVector) = @_;
1085
1086 return ($FingerprintsBitVector->{Size} > $FingerprintsBitVector->{SpecifiedSize}) ? 1 : 0;
1087 }
1088
1089
1090 1;
1091
1092 __END__
1093
1094 =head1 NAME
1095
1096 FingerprintsBitVector
1097
1098 =head1 SYNOPSIS
1099
1100 use Fingerprints::FingerprintsBitVector;
1101
1102 use Fingerprints::FingerprintsBitVector qw(:coefficients);
1103
1104 use Fingerprints::FingerprintsBitVector qw(:all);
1105
1106 =head1 DESCRIPTION
1107
1108 B<FingerprintsBitVector> class provides the following methods:
1109
1110 new, BaroniUrbaniSimilarityCoefficient, BuserSimilarityCoefficient,
1111 CosineSimilarityCoefficient, DennisSimilarityCoefficient,
1112 DiceSimilarityCoefficient, FoldFingerprintsBitVectorByDensity,
1113 FoldFingerprintsBitVectorBySize, ForbesSimilarityCoefficient,
1114 FossumSimilarityCoefficient, GetBitsAsBinaryString, GetBitsAsDecimalString,
1115 GetBitsAsHexadecimalString, GetBitsAsOctalString, GetBitsAsRawBinaryString,
1116 GetDescription, GetFingerprintsBitDensity, GetID, GetSpecifiedSize,
1117 GetSupportedSimilarityCoefficients, GetVectorType, HamannSimilarityCoefficient,
1118 IsFingerprintsBitVector, IsSubSet, JacardSimilarityCoefficient,
1119 Kulczynski1SimilarityCoefficient, Kulczynski2SimilarityCoefficient,
1120 MatchingSimilarityCoefficient, McConnaugheySimilarityCoefficient,
1121 NewFromBinaryString, NewFromDecimalString, NewFromHexadecimalString,
1122 NewFromOctalString, NewFromRawBinaryString, OchiaiSimilarityCoefficient,
1123 PearsonSimilarityCoefficient, RogersTanimotoSimilarityCoefficient,
1124 RussellRaoSimilarityCoefficient, SetDescription, SetID, SetSpecifiedSize,
1125 SetVectorType, SimpsonSimilarityCoefficient, SkoalSneath1SimilarityCoefficient,
1126 SkoalSneath2SimilarityCoefficient, SkoalSneath3SimilarityCoefficient,
1127 StringifyFingerprintsBitVector, TanimotoSimilarityCoefficient,
1128 TverskySimilarityCoefficient, WeightedTanimotoSimilarityCoefficient,
1129 WeightedTverskySimilarityCoefficient, YuleSimilarityCoefficient
1130
1131 The methods available to create fingerprints bit vector from strings and to calculate similarity
1132 coefficient between two bit vectors can also be invoked as class functions.
1133
1134 B<FingerprintsBitVector> class is derived from B<BitVector> class which provides the functionality
1135 to manipulate bits.
1136
1137 For two fingerprints bit vectors A and B of same size, let:
1138
1139 Na = Number of bits set to "1" in A
1140 Nb = Number of bits set to "1" in B
1141 Nc = Number of bits set to "1" in both A and B
1142 Nd = Number of bits set to "0" in both A and B
1143
1144 Nt = Number of bits set to "1" or "0" in A or B (Size of A or B)
1145 Nt = Na + Nb - Nc + Nd
1146
1147 Na - Nc = Number of bits set to "1" in A but not in B
1148 Nb - Nc = Number of bits set to "1" in B but not in A
1149
1150 Then, various similarity coefficients [ Ref. 40 - 42 ] for a pair of bit vectors A and B are
1151 defined as follows:
1152
1153 BaroniUrbani: ( SQRT( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as Buser )
1154
1155 Buser: ( SQRT ( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as BaroniUrbani )
1156
1157 Cosine: Nc / SQRT ( Na * Nb ) (same as Ochiai)
1158
1159 Dice: (2 * Nc) / ( Na + Nb )
1160
1161 Dennis: ( Nc * Nd - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / SQRT ( Nt * Na * Nb)
1162
1163 Forbes: ( Nt * Nc ) / ( Na * Nb )
1164
1165 Fossum: ( Nt * ( ( Nc - 1/2 ) ** 2 ) / ( Na * Nb )
1166
1167 Hamann: ( ( Nc + Nd ) - ( Na - Nc ) - ( Nb - Nc ) ) / Nt
1168
1169 Jaccard: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Tanimoto)
1170
1171 Kulczynski1: Nc / ( ( Na - Nc ) + ( Nb - Nc) ) = Nc / ( Na + Nb - 2Nc )
1172
1173 Kulczynski2: ( ( Nc / 2 ) * ( 2 * Nc + ( Na - Nc ) + ( Nb - Nc) ) ) / ( ( Nc + ( Na - Nc ) ) * ( Nc + ( Nb - Nc ) ) )
1174 = 0.5 * ( Nc / Na + Nc / Nb )
1175
1176 Matching: ( Nc + Nd ) / Nt
1177
1178 McConnaughey: ( Nc ** 2 - ( Na - Nc ) * ( Nb - Nc) ) / ( Na * Nb )
1179
1180 Ochiai: Nc / SQRT ( Na * Nb ) (same as Cosine)
1181
1182 Pearson: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) / SQRT ( Na * Nb * ( Na - Nc + Nd ) * ( Nb - Nc + Nd ) )
1183
1184 RogersTanimoto: ( Nc + Nd ) / ( ( Na - Nc) + ( Nb - Nc) + Nt) = ( Nc + Nd ) / ( Na + Nb - 2Nc + Nt)
1185
1186 RussellRao: Nc / Nt
1187
1188 Simpson: Nc / MIN ( Na, Nb)
1189
1190 SkoalSneath1: Nc / ( Nc + 2 * ( Na - Nc) + 2 * ( Nb - Nc) ) = Nc / ( 2 * Na + 2 * Nb - 3 * Nc )
1191
1192 SkoalSneath2: ( 2 * Nc + 2 * Nd ) / ( Nc + Nd + Nt )
1193
1194 SkoalSneath3: ( Nc + Nd ) / ( ( Na - Nc ) + ( Nb - Nc ) ) = ( Nc + Nd ) / ( Na + Nb - 2 * Nc )
1195
1196 Tanimoto: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Jaccard)
1197
1198 Tversky: Nc / ( alpha * ( Na - Nc ) + ( 1 - alpha) * ( Nb - Nc) + Nc ) = Nc / ( alpha * ( Na - Nb ) + Nb)
1199
1200 Yule: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / ( ( Nc * Nd ) + ( ( Na - Nc ) * ( Nb - Nc ) ) )
1201
1202 The values of Tanimoto/Jaccard and Tversky coefficients are dependent on only those bit which
1203 are set to "1" in both A and B. In order to take into account all bit positions, modified versions
1204 of Tanimoto [ Ref. 42 ] and Tversky [ Ref. 43 ] have been developed.
1205
1206 Let:
1207
1208 Na' = Number of bits set to "0" in A
1209 Nb' = Number of bits set to "0" in B
1210 Nc' = Number of bits set to "0" in both A and B
1211
1212 Tanimoto': Nc' / ( ( Na' - Nc') + ( Nb' - Nc' ) + Nc' ) = Nc' / ( Na' + Nb' - Nc' )
1213
1214 Tversky': Nc' / ( alpha * ( Na' - Nc' ) + ( 1 - alpha) * ( Nb' - Nc' ) + Nc' ) = Nc' / ( alpha * ( Na' - Nb' ) + Nb')
1215
1216 Then:
1217
1218 WeightedTanimoto = beta * Tanimoto + (1 - beta) * Tanimoto'
1219
1220 WeightedTversky = beta * Tversky + (1 - beta) * Tversky'
1221
1222 =head2 METHODS
1223
1224 =over 4
1225
1226 =item B<new>
1227
1228 $NewFPBitVector = new Fingerprints::FingerprintsBitVector($Size);
1229
1230 Creates a new I<FingerprintsBitVector> object of size I<Size> and returns newly created
1231 B<FingerprintsBitVector>. Bit numbers range from 0 to 1 less than I<Size>.
1232
1233 =item B<BaroniUrbaniSimilarityCoefficient>
1234
1235 $Value = $FingerprintsBitVector->BaroniUrbaniSimilarityCoefficient(
1236 $OtherFingerprintBitVector);
1237 $Value = Fingerprints::FingerprintsBitVector::
1238 BaroniUrbaniSimilarityCoefficient(
1239 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1240
1241 Returns value of I<BaroniUrbani> similarity coefficient between two same size I<FingerprintsBitVectors>.
1242
1243 =item B<BuserSimilarityCoefficient>
1244
1245 $Value = $FingerprintsBitVector->BuserSimilarityCoefficient(
1246 $OtherFingerprintBitVector);
1247 $Value = Fingerprints::FingerprintsBitVector::BuserSimilarityCoefficient(
1248 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1249
1250 Returns value of I<Buser> similarity coefficient between two same size I<FingerprintsBitVectors>.
1251
1252 =item B<CosineSimilarityCoefficient>
1253
1254 $Value = $FingerprintsBitVector->CosineSimilarityCoefficient(
1255 $OtherFingerprintBitVector);
1256 $Value = Fingerprints::FingerprintsBitVector::CosineSimilarityCoefficient(
1257 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1258
1259 Returns value of I<Cosine> similarity coefficient between two same size I<FingerprintsBitVectors>.
1260
1261 =item B<DennisSimilarityCoefficient>
1262
1263 $Value = $FingerprintsBitVector->DennisSimilarityCoefficient(
1264 $OtherFingerprintBitVector);
1265 $Value = Fingerprints::FingerprintsBitVector::DennisSimilarityCoefficient(
1266 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1267
1268 Returns value of I<Dennis> similarity coefficient between two same size I<FingerprintsBitVectors>.
1269
1270 =item B<DiceSimilarityCoefficient>
1271
1272 $Value = $FingerprintsBitVector->DiceSimilarityCoefficient(
1273 $OtherFingerprintBitVector);
1274 $Value = Fingerprints::FingerprintsBitVector::DiceSimilarityCoefficient(
1275 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1276
1277 Returns value of I<Dice> similarity coefficient between two same size I<FingerprintsBitVectors>.
1278
1279 =item B<FoldFingerprintsBitVectorByDensity>
1280
1281 $FingerprintsBitVector->FoldFingerprintsBitVectorByDensity($Density);
1282
1283 Folds I<FingerprintsBitVector> by recursively reducing its size by half until bit density of set bits is
1284 greater than or equal to specified I<Density> and returns folded I<FingerprintsBitVector>.
1285
1286 =item B<FoldFingerprintsBitVectorBySize>
1287
1288 $FingerprintsBitVector->FoldFingerprintsBitVectorBySize($Size);
1289
1290 Folds I<FingerprintsBitVector> by recursively reducing its size by half until size is less than or equal to
1291 specified I<Size> and returns folded I<FingerprintsBitVector>.
1292
1293 =item B<ForbesSimilarityCoefficient>
1294
1295 $Value = $FingerprintsBitVector->ForbesSimilarityCoefficient(
1296 $OtherFingerprintBitVector);
1297 $Value = Fingerprints::FingerprintsBitVector::ForbesSimilarityCoefficient(
1298 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1299
1300 Returns value of I<Forbes> similarity coefficient between two same size I<FingerprintsBitVectors>.
1301
1302 =item B<FossumSimilarityCoefficient>
1303
1304 $Value = $FingerprintsBitVector->FossumSimilarityCoefficient(
1305 $OtherFingerprintBitVector);
1306 $Value = Fingerprints::FingerprintsBitVector::FossumSimilarityCoefficient(
1307 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1308
1309 Returns value of I<Fossum> similarity coefficient between two same size I<FingerprintsBitVectors>.
1310
1311 =item B<GetBitsAsBinaryString>
1312
1313 $BinaryASCIIString = $FingerprintsBitVector->GetBitsAsBinaryString();
1314
1315 Returns fingerprints as a binary ASCII string containing 0s and 1s.
1316
1317 =item B<GetBitsAsHexadecimalString>
1318
1319 $HexadecimalString = $FingerprintsBitVector->GetBitsAsHexadecimalString();
1320
1321 Returns fingerprints as a hexadecimal string.
1322
1323 =item B<GetBitsAsRawBinaryString>
1324
1325 $RawBinaryString = $FingerprintsBitVector->GetBitsAsRawBinaryString();
1326
1327 Returns fingerprints as a raw binary string containing packed bit values for each byte.
1328
1329 =item B<GetDescription>
1330
1331 $Description = $FingerprintsBitVector->GetDescription();
1332
1333 Returns a string containing description of fingerprints bit vector.
1334
1335 =item B<GetFingerprintsBitDensity>
1336
1337 $BitDensity = $FingerprintsBitVector->GetFingerprintsBitDensity();
1338
1339 Returns I<BitDensity> of I<FingerprintsBitVector> corresponding to bits set to 1s.
1340
1341 =item B<GetID>
1342
1343 $ID = $FingerprintsBitVector->GetID();
1344
1345 Returns I<ID> of I<FingerprintsBitVector>.
1346
1347 =item B<GetVectorType>
1348
1349 $VectorType = $FingerprintsBitVector->GetVectorType();
1350
1351 Returns I<VectorType> of I<FingerprintsBitVector>.
1352
1353 =item B<GetSpecifiedSize>
1354
1355 $Size = $FingerprintsBitVector->GetSpecifiedSize();
1356
1357 Returns value of specified size for bit vector.
1358
1359 =item B<GetSupportedSimilarityCoefficients>
1360
1361 @SimilarityCoefficient =
1362 Fingerprints::FingerprintsBitVector::GetSupportedSimilarityCoefficients();
1363
1364 Returns an array containing names of supported similarity coefficients.
1365
1366 =item B<HamannSimilarityCoefficient>
1367
1368 $Value = $FingerprintsBitVector->HamannSimilarityCoefficient(
1369 $OtherFingerprintBitVector);
1370 $Value = Fingerprints::FingerprintsBitVector::HamannSimilarityCoefficient(
1371 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1372
1373 Returns value of I<Hamann> similarity coefficient between two same size I<FingerprintsBitVectors>.
1374
1375 =item B<IsFingerprintsBitVector>
1376
1377 $Status = Fingerprints::FingerprintsBitVector::
1378 IsFingerprintsBitVector($Object);
1379
1380 Returns 1 or 0 based on whether I<Object> is a B<FingerprintsBitVector> object.
1381
1382 =item B<IsSubSet>
1383
1384 $Status = $FingerprintsBitVector->IsSubSet($OtherFPBitVector);
1385 $Status = Fingerprints::FingerprintsBitVector::IsSubSet(
1386 $FPBitVectorA, $FPBitVectorB);
1387
1388 Returns 1 or 0 based on whether first firngerprints bit vector is a subset of second
1389 fingerprints bit vector.
1390
1391 For a bit vector to be a subset of another bit vector, both vectors must be of
1392 the same size and the bit positions set in first vector must also be set in the
1393 second bit vector.
1394
1395 =item B<JacardSimilarityCoefficient>
1396
1397 $Value = $FingerprintsBitVector->JacardSimilarityCoefficient(
1398 $OtherFingerprintBitVector);
1399 $Value = Fingerprints::FingerprintsBitVector::JacardSimilarityCoefficient(
1400 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1401
1402 Returns value of I<Jacard> similarity coefficient between two same size I<FingerprintsBitVectors>.
1403
1404 =item B<Kulczynski1SimilarityCoefficient>
1405
1406 $Value = $FingerprintsBitVector->Kulczynski1SimilarityCoefficient(
1407 $OtherFingerprintBitVector);
1408 $Value = Fingerprints::FingerprintsBitVector::
1409 Kulczynski1SimilarityCoefficient(
1410 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1411
1412 Returns value of I<Kulczynski1> similarity coefficient between two same size I<FingerprintsBitVectors>.
1413
1414 =item B<Kulczynski2SimilarityCoefficient>
1415
1416 $Value = $FingerprintsBitVector->Kulczynski2SimilarityCoefficient(
1417 $OtherFingerprintBitVector);
1418 $Value = Fingerprints::FingerprintsBitVector::
1419 Kulczynski2SimilarityCoefficient(
1420 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1421
1422 Returns value of I<Kulczynski2> similarity coefficient between two same size I<FingerprintsBitVectors>.
1423
1424 =item B<MatchingSimilarityCoefficient>
1425
1426 $Value = $FingerprintsBitVector->MatchingSimilarityCoefficient(
1427 $OtherFingerprintBitVector);
1428 $Value = Fingerprints::FingerprintsBitVector::
1429 MatchingSimilarityCoefficient(
1430 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1431
1432 Returns value of I<Matching> similarity coefficient between two same size I<FingerprintsBitVectors>.
1433
1434 =item B<McConnaugheySimilarityCoefficient>
1435
1436 $Value = $FingerprintsBitVector->McConnaugheySimilarityCoefficient(
1437 $OtherFingerprintBitVector);
1438 $Value = Fingerprints::FingerprintsBitVector::
1439 McConnaugheySimilarityCoefficient(
1440 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1441
1442 Returns value of I<McConnaughey> similarity coefficient between two same size I<FingerprintsBitVectors>.
1443
1444 =item B<NewFromBinaryString>
1445
1446 $NewFPBitVector = $FingerprintsBitVector->NewFromBinaryString(
1447 $BinaryString);
1448 $NewFPBitVector = Fingerprints::FingerprintsBitVector::NewFromBinaryString(
1449 $BinaryString);
1450
1451 Creates a new I<FingerprintsBitVector> using I<BinaryString> and returns new
1452 B<FingerprintsBitVector> object.
1453
1454 =item B<NewFromHexadecimalString>
1455
1456 $NewFPBitVector = $FingerprintsBitVector->NewFromHexadecimalString(
1457 $HexdecimalString);
1458 $NewFPBitVector = Fingerprints::FingerprintsBitVector::
1459 NewFromHexadecimalString(
1460 $HexdecimalString);
1461
1462 Creates a new I<FingerprintsBitVector> using I<HexdecimalString> and returns new
1463 B<FingerprintsBitVector> object.
1464
1465 =item B<NewFromRawBinaryString>
1466
1467 $NewFPBitVector = $FingerprintsBitVector->NewFromRawBinaryString(
1468 $RawBinaryString);
1469 $NewFPBitVector = Fingerprints::FingerprintsBitVector::
1470 NewFromRawBinaryString(
1471 $RawBinaryString);
1472
1473 Creates a new I<FingerprintsBitVector> using I<RawBinaryString> and returns new
1474 B<FingerprintsBitVector> object.
1475
1476 =item B<OchiaiSimilarityCoefficient>
1477
1478 $Value = $FingerprintsBitVector->OchiaiSimilarityCoefficient(
1479 $OtherFingerprintBitVector);
1480 $Value = Fingerprints::FingerprintsBitVector::OchiaiSimilarityCoefficient(
1481 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1482
1483 Returns value of I<Ochiai> similarity coefficient between two same size I<FingerprintsBitVectors>.
1484
1485 =item B<PearsonSimilarityCoefficient>
1486
1487 $Value = $FingerprintsBitVector->PearsonSimilarityCoefficient(
1488 $OtherFingerprintBitVector);
1489 $Value = Fingerprints::FingerprintsBitVector::PearsonSimilarityCoefficient(
1490 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1491
1492 Returns value of I<Pearson> similarity coefficient between two same size I<FingerprintsBitVectors>.
1493
1494 =item B<RogersTanimotoSimilarityCoefficient>
1495
1496 $Value = $FingerprintsBitVector->RogersTanimotoSimilarityCoefficient(
1497 $OtherFingerprintBitVector);
1498 $Value = Fingerprints::FingerprintsBitVector::
1499 RogersTanimotoSimilarityCoefficient(
1500 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1501
1502 Returns value of I<RogersTanimoto> similarity coefficient between two same size I<FingerprintsBitVectors>.
1503
1504 =item B<RussellRaoSimilarityCoefficient>
1505
1506 $Value = $FingerprintsBitVector->RussellRaoSimilarityCoefficient(
1507 $OtherFingerprintBitVector);
1508 $Value = Fingerprints::FingerprintsBitVector::
1509 RussellRaoSimilarityCoefficient(
1510 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1511
1512 Returns value of I<RussellRao> similarity coefficient between two same size I<FingerprintsBitVectors>.
1513
1514 =item B<SetSpecifiedSize>
1515
1516 $FingerprintsBitVector->SetSpecifiedSize($Size);
1517
1518 Sets specified size for fingerprints bit vector.
1519
1520 Irrespective of specified size, Perl functions used to handle bit data in B<BitVector> class
1521 automatically sets the size to the next nearest power of 2. I<SpecifiedSize> is used by
1522 B<FingerprintsBitVector> class to process any aribitray size during similarity coefficient calculations.
1523
1524 =item B<SetDescription>
1525
1526 $FingerprintsBitVector->SetDescription($Description);
1527
1528 Sets I<Description> of fingerprints bit vector and returns I<FingerprintsBitVector>.
1529
1530 =item B<SetID>
1531
1532 $FingerprintsBitVector->SetID($ID);
1533
1534 Sets I<ID> of fingerprints bit vector and returns I<FingerprintsBitVector>.
1535
1536 =item B<SetVectorType>
1537
1538 $FingerprintsBitVector->SetVectorType($VectorType);
1539
1540 Sets I<VectorType> of fingerprints bit vector and returns I<FingerprintsBitVector>.
1541
1542 =item B<SimpsonSimilarityCoefficient>
1543
1544 $Value = $FingerprintsBitVector->SimpsonSimilarityCoefficient(
1545 $OtherFingerprintBitVector);
1546 $Value = Fingerprints::FingerprintsBitVector::SimpsonSimilarityCoefficient(
1547 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1548
1549 Returns value of I<Simpson> similarity coefficient between two same size I<FingerprintsBitVectors>.
1550
1551 =item B<SkoalSneath1SimilarityCoefficient>
1552
1553 $Value = $FingerprintsBitVector->SkoalSneath1SimilarityCoefficient(
1554 $OtherFingerprintBitVector);
1555 $Value = Fingerprints::FingerprintsBitVector::
1556 SkoalSneath1SimilarityCoefficient(
1557 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1558
1559 Returns value of I<SkoalSneath1> similarity coefficient between two same size I<FingerprintsBitVectors>.
1560
1561 =item B<SkoalSneath2SimilarityCoefficient>
1562
1563 $Value = $FingerprintsBitVector->SkoalSneath2SimilarityCoefficient(
1564 $OtherFingerprintBitVector);
1565 $Value = Fingerprints::FingerprintsBitVector::
1566 SkoalSneath2SimilarityCoefficient(
1567 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1568
1569 Returns value of I<SkoalSneath2> similarity coefficient between two same size I<FingerprintsBitVectors>.
1570
1571 =item B<SkoalSneath3SimilarityCoefficient>
1572
1573 $Value = $FingerprintsBitVector->SkoalSneath3SimilarityCoefficient(
1574 $OtherFingerprintBitVector);
1575 $Value = Fingerprints::FingerprintsBitVector::
1576 SkoalSneath3SimilarityCoefficient(
1577 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1578
1579 Returns value of I<SkoalSneath3> similarity coefficient between two same size I<FingerprintsBitVectors>
1580
1581 =item B<StringifyFingerprintsBitVector>
1582
1583 $String = $FingerprintsBitVector->StringifyFingerprintsBitVector();
1584
1585 Returns a string containing information about I<FingerprintsBitVector> object.
1586
1587 =item B<TanimotoSimilarityCoefficient>
1588
1589 $Value = $FingerprintsBitVector->TanimotoSimilarityCoefficient(
1590 $OtherFingerprintBitVector);
1591 $Value = Fingerprints::FingerprintsBitVector::
1592 TanimotoSimilarityCoefficient(
1593 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1594
1595 Returns value of I<Tanimoto> similarity coefficient between two same size I<FingerprintsBitVectors>.
1596
1597 =item B<TverskySimilarityCoefficient>
1598
1599 $Value = $FingerprintsBitVector->TverskySimilarityCoefficient(
1600 $OtherFingerprintBitVector, $Alpha);
1601 $Value = Fingerprints::FingerprintsBitVector::
1602 TverskySimilarityCoefficient(
1603 $FingerprintsBitVectorA, $FingerprintBitVectorB, $Alpha);
1604
1605 Returns value of I<Tversky> similarity coefficient between two same size I<FingerprintsBitVectors>.
1606
1607 =item B<WeightedTanimotoSimilarityCoefficient>
1608
1609 $Value =
1610 $FingerprintsBitVector->WeightedTanimotoSimilarityCoefficient(
1611 $OtherFingerprintBitVector, $Beta);
1612 $Value =
1613 Fingerprints::FingerprintsBitVector::
1614 WeightedTanimotoSimilarityCoefficient(
1615 $FingerprintsBitVectorA, $FingerprintBitVectorB, $Beta);
1616
1617 Returns value of I<WeightedTanimoto> similarity coefficient between two same size I<FingerprintsBitVectors>.
1618
1619 =item B<WeightedTverskySimilarityCoefficient>
1620
1621 $Value =
1622 $FingerprintsBitVector->WeightedTverskySimilarityCoefficient(
1623 $OtherFingerprintBitVector, $Alpha, $Beta);
1624 $Value =
1625 Fingerprints::FingerprintsBitVector::
1626 WeightedTverskySimilarityCoefficient(
1627 $FingerprintsBitVectorA, $FingerprintBitVectorB, $Alpha, $Beta);
1628
1629 Returns value of I<WeightedTversky> similarity coefficient between two same size I<FingerprintsBitVectors>.
1630
1631 =item B<YuleSimilarityCoefficient>
1632
1633 $Value = $FingerprintsBitVector->YuleSimilarityCoefficient(
1634 $OtherFingerprintBitVector);
1635 $Value = Fingerprints::FingerprintsBitVector::YuleSimilarityCoefficient(
1636 $FingerprintsBitVectorA, $FingerprintBitVectorB);
1637
1638 Returns value of I<Yule> similarity coefficient between two same size I<FingerprintsBitVectors>.
1639
1640 =back
1641
1642 =head1 AUTHOR
1643
1644 Manish Sud <msud@san.rr.com>
1645
1646 =head1 SEE ALSO
1647
1648 BitVector.pm, FingerprintsStringUtil.pm, FingerprintsVector.pm, Vector.pm
1649
1650 =head1 COPYRIGHT
1651
1652 Copyright (C) 2015 Manish Sud. All rights reserved.
1653
1654 This file is part of MayaChemTools.
1655
1656 MayaChemTools is free software; you can redistribute it and/or modify it under
1657 the terms of the GNU Lesser General Public License as published by the Free
1658 Software Foundation; either version 3 of the License, or (at your option)
1659 any later version.
1660
1661 =cut