0
|
1 package Fingerprints::FingerprintsBitVector;
|
|
2 #
|
|
3 # $RCSfile: FingerprintsBitVector.pm,v $
|
|
4 # $Date: 2015/02/28 20:48:54 $
|
|
5 # $Revision: 1.27 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Exporter;
|
|
32 use Scalar::Util ();
|
|
33 use BitVector;
|
|
34 use MathUtil;
|
|
35 use TextUtil ();
|
|
36
|
|
37 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
38
|
|
39 @ISA = qw(BitVector Exporter);
|
|
40
|
|
41 # Similiarity coefficients...
|
|
42 my(@SimilarityCoefficients) = qw(BaroniUrbaniSimilarityCoefficient BuserSimilarityCoefficient CosineSimilarityCoefficient DiceSimilarityCoefficient DennisSimilarityCoefficient ForbesSimilarityCoefficient FossumSimilarityCoefficient HamannSimilarityCoefficient JacardSimilarityCoefficient Kulczynski1SimilarityCoefficient Kulczynski2SimilarityCoefficient MatchingSimilarityCoefficient McConnaugheySimilarityCoefficient OchiaiSimilarityCoefficient PearsonSimilarityCoefficient RogersTanimotoSimilarityCoefficient RussellRaoSimilarityCoefficient SimpsonSimilarityCoefficient SkoalSneath1SimilarityCoefficient SkoalSneath2SimilarityCoefficient SkoalSneath3SimilarityCoefficient TanimotoSimilarityCoefficient TverskySimilarityCoefficient YuleSimilarityCoefficient WeightedTanimotoSimilarityCoefficient WeightedTverskySimilarityCoefficient);
|
|
43
|
|
44 # New from string...
|
|
45 my(@NewFromString) = qw(NewFromBinaryString NewFromHexadecimalString NewFromRawBinaryString);
|
|
46
|
|
47 @EXPORT = qw(IsFingerprintsBitVector);
|
|
48 @EXPORT_OK = qw(GetSupportedSimilarityCoefficients @NewFromString @SimilarityCoefficients);
|
|
49
|
|
50 %EXPORT_TAGS = (
|
|
51 new => [@NewFromString],
|
|
52 coefficients => [@SimilarityCoefficients],
|
|
53 all => [@EXPORT, @EXPORT_OK]
|
|
54 );
|
|
55
|
|
56 # Setup class variables...
|
|
57 my($ClassName);
|
|
58 _InitializeClass();
|
|
59
|
|
60 use overload '""' => 'StringifyFingerprintsBitVector';
|
|
61
|
|
62 # Class constructor...
|
|
63 sub new {
|
|
64 my($Class, $Size) = @_;
|
|
65
|
|
66 # Initialize object...
|
|
67 my $This = $Class->SUPER::new($Size);
|
|
68 bless $This, ref($Class) || $Class;
|
|
69 $This->_InitializeFingerprintsBitVector($Size);
|
|
70
|
|
71 return $This;
|
|
72 }
|
|
73
|
|
74 # Initialize object data...
|
|
75 #
|
|
76 # Note:
|
|
77 # . The class, BitVector, used to derive this class provides all the functionality to
|
|
78 # manipulate bits.
|
|
79 # . Irrespective of specified size, Perl functions used to handle bit data in
|
|
80 # BitVector class automatically sets the size to the next nearest power of 2.
|
|
81 # SpecifiedSize is used by this class to process any aribitray size during similarity
|
|
82 # coefficient calculations.
|
|
83 #
|
|
84 sub _InitializeFingerprintsBitVector {
|
|
85 my($This, $Size) = @_;
|
|
86
|
|
87 if (!defined $Size) {
|
|
88 croak "Error: ${ClassName}->new: FingerprintsBitVector object instantiated without specifying its size ...";
|
|
89 }
|
|
90 if ($Size <=0) {
|
|
91 croak "Error: ${ClassName}->new: Fingerprints bit vector size, $Size, must be a positive integer...";
|
|
92 }
|
|
93
|
|
94 # Specified size of fingerprints...
|
|
95 $This->{SpecifiedSize} = $Size;
|
|
96
|
|
97 }
|
|
98
|
|
99 # Initialize class ...
|
|
100 sub _InitializeClass {
|
|
101 #Class name...
|
|
102 $ClassName = __PACKAGE__;
|
|
103 }
|
|
104
|
|
105 # Set specified size...
|
|
106 #
|
|
107 # Notes:
|
|
108 # Irrespective of specified size, Perl functions used to handle bit data in
|
|
109 # BitVector class automatically sets the size to the next nearest power of 2.
|
|
110 # SpecifiedSize is used by this class to process any aribitray size during similarity
|
|
111 # coefficient calculations.
|
|
112 #
|
|
113 sub SetSpecifiedSize {
|
|
114 my($This, $SpecifiedSize) = @_;
|
|
115
|
|
116 if (!($SpecifiedSize > 0 && $SpecifiedSize <= $This->{Size})) {
|
|
117 croak "Error: ${ClassName}->SetSpecifiedSize: Specified size, $SpecifiedSize, is not valid: It must be > 0 && <= ", $This->GetSize()," ...";
|
|
118 }
|
|
119 $This->{SpecifiedSize} = $SpecifiedSize;
|
|
120 }
|
|
121
|
|
122 # Get specified size...
|
|
123 sub GetSpecifiedSize {
|
|
124 my($This) = @_;
|
|
125
|
|
126 return $This->{SpecifiedSize};
|
|
127 }
|
|
128
|
|
129 # Set ID...
|
|
130 sub SetID {
|
|
131 my($This, $Value) = @_;
|
|
132
|
|
133 $This->{ID} = $Value;
|
|
134
|
|
135 return $This;
|
|
136 }
|
|
137
|
|
138 # Get ID...
|
|
139 sub GetID {
|
|
140 my($This) = @_;
|
|
141
|
|
142 return exists $This->{ID} ? $This->{ID} : 'None';
|
|
143 }
|
|
144
|
|
145 # Set description...
|
|
146 sub SetDescription {
|
|
147 my($This, $Value) = @_;
|
|
148
|
|
149 $This->{Description} = $Value;
|
|
150
|
|
151 return $This;
|
|
152 }
|
|
153
|
|
154 # Get description...
|
|
155 sub GetDescription {
|
|
156 my($This) = @_;
|
|
157
|
|
158 return exists $This->{Description} ? $This->{Description} : 'No description available';
|
|
159 }
|
|
160
|
|
161 # Set vector type...
|
|
162 sub SetVectorType {
|
|
163 my($This, $Value) = @_;
|
|
164
|
|
165 $This->{VectorType} = $Value;
|
|
166
|
|
167 return $This;
|
|
168 }
|
|
169
|
|
170 # Get vector type...
|
|
171 sub GetVectorType {
|
|
172 my($This) = @_;
|
|
173
|
|
174 return exists $This->{VectorType} ? $This->{VectorType} : 'FingerprintsBitVector';
|
|
175 }
|
|
176
|
|
177 # Create a new fingerprints bit vector using binary string. This functionality can be
|
|
178 # either invoked as a class function or an object method.
|
|
179 #
|
|
180 sub NewFromBinaryString ($;$) {
|
|
181 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_;
|
|
182
|
|
183 if (_IsFingerprintsBitVector($FirstParameter)) {
|
|
184 return _NewFingerptinsBitVectorFromString('Binary', $SecondParameter, $ThirdParameter);
|
|
185 }
|
|
186 else {
|
|
187 return _NewFingerptinsBitVectorFromString( 'Binary', $FirstParameter, $SecondParameter);
|
|
188 }
|
|
189 }
|
|
190
|
|
191 # Create a new fingerprints bit vector using hexadecimal string. This functionality can be
|
|
192 # either invoked as a class function or an object method.
|
|
193 #
|
|
194 sub NewFromHexadecimalString ($;$) {
|
|
195 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_;
|
|
196
|
|
197 if (_IsFingerprintsBitVector($FirstParameter)) {
|
|
198 return _NewFingerptinsBitVectorFromString('Hexadecimal', $SecondParameter, $ThirdParameter);
|
|
199 }
|
|
200 else {
|
|
201 return _NewFingerptinsBitVectorFromString( 'Hexadecimal', $FirstParameter, $SecondParameter);
|
|
202 }
|
|
203 }
|
|
204
|
|
205 # Create a new fingerprints bit vector using octal string. This functionality can be
|
|
206 # either invoked as a class function or an object method.
|
|
207 #
|
|
208 #
|
|
209 sub NewFromOctalString ($) {
|
|
210 croak "Error: ${ClassName}->NewFromOctalString: Creation of fingerprits bit vector from an octal string is not supported ...";
|
|
211 }
|
|
212
|
|
213 # Create a new fingerprints bit vector using decimal string. This functionality can be
|
|
214 # either invoked as a class function or an object method.
|
|
215 #
|
|
216 sub NewFromDecimalString ($;$) {
|
|
217 croak "Error: ${ClassName}->NewFromDecimalString: Creation of fingerprits bit vector from a decimal string is not supported ...";
|
|
218 }
|
|
219
|
|
220 # Create a new fingerprints bit vector using raw binary string. This functionality can be
|
|
221 # either invoked as a class function or an object method.
|
|
222 #
|
|
223 sub NewFromRawBinaryString ($;$) {
|
|
224 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_;
|
|
225
|
|
226 if (_IsFingerprintsBitVector($FirstParameter)) {
|
|
227 return _NewFingerptinsBitVectorFromString('RawBinary', $SecondParameter, $ThirdParameter);
|
|
228 }
|
|
229 else {
|
|
230 return _NewFingerptinsBitVectorFromString( 'RawBinary', $FirstParameter, $SecondParameter);
|
|
231 }
|
|
232 }
|
|
233
|
|
234 # Create a new fingerprints bit vector from a string...
|
|
235 #
|
|
236 #
|
|
237 sub _NewFingerptinsBitVectorFromString ($$;$) {
|
|
238 my($Format, $String, $BitsOrder) = @_;
|
|
239 my($FingerprintsBitVector, $Size);
|
|
240
|
|
241 $Size = BitVector::_CalculateStringSizeInBits($Format, $String);
|
|
242
|
|
243 $FingerprintsBitVector = new Fingerprints::FingerprintsBitVector($Size);
|
|
244 $FingerprintsBitVector->_SetBitsAsString($Format, $String, $BitsOrder);
|
|
245
|
|
246 return $FingerprintsBitVector;
|
|
247 }
|
|
248
|
|
249 # Get fingerprint bits as a hexadecimal string...
|
|
250 #
|
|
251 sub GetBitsAsHexadecimalString {
|
|
252 my($This, $BitsOrder) = @_;
|
|
253
|
|
254 return $This->_GetFingerprintBitsAsString('Hexadecimal', $BitsOrder);
|
|
255 }
|
|
256
|
|
257 # Get fingerprint bits as an octal string...
|
|
258 #
|
|
259 sub GetBitsAsOctalString {
|
|
260 my($This, $BitsOrder) = @_;
|
|
261
|
|
262 croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as an octal string is not supported ...";
|
|
263 }
|
|
264
|
|
265 # Get fingerprint bits as an decimal string...
|
|
266 #
|
|
267 sub GetBitsAsDecimalString {
|
|
268 my($This, $BitsOrder) = @_;
|
|
269
|
|
270 croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as a decimal string is not supported ...";
|
|
271 }
|
|
272
|
|
273 # Get fingerprint bits as a binary string conatning 1s and 0s...
|
|
274 #
|
|
275 sub GetBitsAsBinaryString {
|
|
276 my($This, $BitsOrder) = @_;
|
|
277
|
|
278 return $This->_GetFingerprintBitsAsString('Binary', $BitsOrder);
|
|
279 }
|
|
280
|
|
281 # Get fingerprint bits as a binary string conatning 1s and 0s...
|
|
282 #
|
|
283 sub GetBitsAsRawBinaryString {
|
|
284 my($This) = @_;
|
|
285
|
|
286 return $This->_GetFingerprintBitsAsString('RawBinary');
|
|
287 }
|
|
288
|
|
289 # Return fingerprint bits as a string...
|
|
290 #
|
|
291 sub _GetFingerprintBitsAsString {
|
|
292 my($This, $Format, $BitsOrder) = @_;
|
|
293
|
|
294 $BitsOrder = (defined($BitsOrder) && $BitsOrder) ? $BitsOrder : 'Ascending';
|
|
295
|
|
296 return $This->_GetBitsAsString($Format, $BitsOrder);
|
|
297 }
|
|
298
|
|
299 # Is it a fingerprints bit vector object?
|
|
300 sub IsFingerprintsBitVector ($) {
|
|
301 my($Object) = @_;
|
|
302
|
|
303 return _IsFingerprintsBitVector($Object);
|
|
304 }
|
|
305
|
|
306 # Is it a fingerprints bit vector object?
|
|
307 sub _IsFingerprintsBitVector {
|
|
308 my($Object) = @_;
|
|
309
|
|
310 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
|
|
311 }
|
|
312
|
|
313 # Return a list of supported similarity coefficients...
|
|
314 sub GetSupportedSimilarityCoefficients () {
|
|
315
|
|
316 return @SimilarityCoefficients;
|
|
317 }
|
|
318
|
|
319 # Get bit density for fingerprints bit vector corresponding to on bits...
|
|
320 #
|
|
321 sub GetFingerprintsBitDensity {
|
|
322 my($This) = @_;
|
|
323 my($BitDensity);
|
|
324
|
|
325 $BitDensity = $This->GetDensityOfSetBits();
|
|
326
|
|
327 return round($BitDensity, 2);
|
|
328 }
|
|
329
|
|
330 # Fold fingerprints bit vector by recursively reducing its size by half untill size is less than or equal to
|
|
331 # specified size...
|
|
332 #
|
|
333 sub FoldFingerprintsBitVectorBySize {
|
|
334 my($This, $Size) = @_;
|
|
335
|
|
336 if (!($Size > 0 && $Size <= $This->GetSize())) {
|
|
337 croak "Error: ${ClassName}->FoldFingerprintsBitVectorBySize: Specified size, $Size, is not valid: It must be > 0 && <= ", $This->GetSize()," ...";
|
|
338 }
|
|
339
|
|
340 if ($This->GetSize() <= $Size) {
|
|
341 return $This;
|
|
342 }
|
|
343 return $This->_FoldFingerprintsBitVector('BySize', $Size);
|
|
344 }
|
|
345
|
|
346 # Fold fingerprints bit vector by recursively reducing its size by half untill bit density of set bits is greater than
|
|
347 # or equal to specified density...
|
|
348 #
|
|
349 sub FoldFingerprintsBitVectorByDensity {
|
|
350 my($This, $Density) = @_;
|
|
351
|
|
352 if (!($Density > 0 && $Density <= 1)) {
|
|
353 croak "Error: ${ClassName}->FoldFingerprintsBitVectorByDensity: Specified bit density, $Density, is not valid: It must be > 0 && <= 1 ...";
|
|
354 }
|
|
355
|
|
356 if ($This->GetDensityOfSetBits() >= $Density) {
|
|
357 return $This;
|
|
358 }
|
|
359 return $This->_FoldFingerprintsBitVector('ByDensity', $Density);
|
|
360 }
|
|
361
|
|
362 # Fold fingerprints bit vector using size or density and return folded fingerprint bit vector...
|
|
363 #
|
|
364 sub _FoldFingerprintsBitVector {
|
|
365 my($This, $Mode, $Value) = @_;
|
|
366
|
|
367 # Fold upto size of 8 bits...
|
|
368 if ($This->GetSize() <= 8) {
|
|
369 return $This;
|
|
370 }
|
|
371
|
|
372 # Check size or density....
|
|
373 if ($Mode =~ /^BySize$/i) {
|
|
374 if ($This->GetSize() <= $Value) {
|
|
375 return $This;
|
|
376 }
|
|
377 }
|
|
378 elsif ($Mode =~ /^ByDensity$/i) {
|
|
379 if ($This->GetDensityOfSetBits() >= $Value) {
|
|
380 return $This;
|
|
381 }
|
|
382 }
|
|
383 else {
|
|
384 return $This;
|
|
385 }
|
|
386
|
|
387 # Recursively reduce its size by half...
|
|
388 my($FirstHalfBinaryString, $SecondHalfBinaryString, $FirstHalfFingerprintsBitVector, $SecondHalfFingerprintsBitVector, $FoldedFingerprintsBitVector, $BinaryString, $StringLength);
|
|
389
|
|
390 $BinaryString = $This->GetBitsAsBinaryString();
|
|
391 $StringLength = length $BinaryString;
|
|
392
|
|
393 $FirstHalfBinaryString = substr($BinaryString, 0, $StringLength/2);
|
|
394 $SecondHalfBinaryString = substr($BinaryString, $StringLength/2);
|
|
395
|
|
396 $FirstHalfFingerprintsBitVector = NewFromBinaryString($FirstHalfBinaryString);
|
|
397 $SecondHalfFingerprintsBitVector = NewFromBinaryString($SecondHalfBinaryString);
|
|
398
|
|
399 $FoldedFingerprintsBitVector = $FirstHalfFingerprintsBitVector | $SecondHalfFingerprintsBitVector;
|
|
400
|
|
401 return $FoldedFingerprintsBitVector->_FoldFingerprintsBitVector($Mode, $Value);
|
|
402 }
|
|
403
|
|
404 # Is first bit vector subset of second bit vector?
|
|
405 #
|
|
406 # For a bit vector to be a subset of another bit vector, both vectors must be of
|
|
407 # the same size and the bit positions set in first vector must also be set in the
|
|
408 # secons bit vector.
|
|
409 #
|
|
410 # This functionality can be either invoked as a class function or an object method.
|
|
411 #
|
|
412 sub IsSubSet ($$) {
|
|
413 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
414
|
|
415 if ($FingerprintsBitVectorA->GetSize() != $FingerprintsBitVectorB->GetSize()) {
|
|
416 return 0;
|
|
417 }
|
|
418 my($AndFingerprintsBitVector);
|
|
419
|
|
420 $AndFingerprintsBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB;
|
|
421
|
|
422 return ($FingerprintsBitVectorA->GetNumOfSetBits() == $AndFingerprintsBitVector->GetNumOfSetBits()) ? 1 : 0;
|
|
423 }
|
|
424
|
|
425 # Return a string containing vector values...
|
|
426 sub StringifyFingerprintsBitVector {
|
|
427 my($This) = @_;
|
|
428 my($FingerprintsBitVectorString);
|
|
429
|
|
430 # BitVector size information...
|
|
431 #
|
|
432 if ($This->{SpecifiedSize} != $This->GetSize()) {
|
|
433 $FingerprintsBitVectorString = "SpecifiedSize: " . $This->{SpecifiedSize} . "; BitVectorSize: " . $This->GetSize();
|
|
434 }
|
|
435 else {
|
|
436 $FingerprintsBitVectorString = "BitVectorSize: " . $This->GetSize();
|
|
437 }
|
|
438 my($NumOfSetBits, $BitDensity);
|
|
439 $NumOfSetBits = $This->GetNumOfSetBits();
|
|
440 $BitDensity = $This->GetFingerprintsBitDensity();
|
|
441
|
|
442 $FingerprintsBitVectorString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity";
|
|
443
|
|
444 # BitVector values...
|
|
445 $FingerprintsBitVectorString .= "; BitVector: " . $This->StringifyBitVector();
|
|
446
|
|
447 return $FingerprintsBitVectorString;
|
|
448 }
|
|
449
|
|
450 # For two fingerprints bit vectors A and B of same size, let:
|
|
451 #
|
|
452 # Na = Number of bits set to "1" in A
|
|
453 # Nb = Number of bits set to "1" in B
|
|
454 # Nc = Number of bits set to "1" in both A and B
|
|
455 # Nd = Number of bits set to "0" in both A and B
|
|
456 #
|
|
457 # Nt = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd
|
|
458 #
|
|
459 # Na - Nc = Number of bits set to "1" in A but not in B
|
|
460 # Nb - Nc = Number of bits set to "1" in B but not in A
|
|
461 #
|
|
462 # Various similarity coefficients [ Ref 40 - 42 ] for a pair of bit vectors A and B are
|
|
463 # defined as follows:
|
|
464 #
|
|
465 # . BaroniUrbani: ( SQRT( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as Buser )
|
|
466 #
|
|
467 # . Buser: ( SQRT ( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as BaroniUrbani )
|
|
468 #
|
|
469 # . Cosine: Nc / SQRT ( Na * Nb ) (same as Ochiai)
|
|
470 #
|
|
471 # . Dice: (2 * Nc) / ( Na + Nb )
|
|
472 #
|
|
473 # . Dennis: ( Nc * Nd - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / SQRT ( Nt * Na * Nb)
|
|
474 #
|
|
475 # . Forbes: ( Nt * Nc ) / ( Na * Nb )
|
|
476 #
|
|
477 # . Fossum: ( Nt * ( ( Nc - 1/2 ) ** 2 ) / ( Na * Nb )
|
|
478 #
|
|
479 # . Hamann: ( ( Nc + Nd ) - ( Na - Nc ) - ( Nb - Nc ) ) / Nt
|
|
480 #
|
|
481 # . Jaccard: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Tanimoto)
|
|
482 #
|
|
483 # . Kulczynski1: Nc / ( ( Na - Nc ) + ( Nb - Nc) ) = Nc / ( Na + Nb - 2Nc )
|
|
484 #
|
|
485 # . Kulczynski2: ( ( Nc / 2 ) * ( 2 * Nc + ( Na - Nc ) + ( Nb - Nc) ) ) / ( ( Nc + ( Na - Nc ) ) * ( Nc + ( Nb - Nc ) ) ) = 0.5 * ( Nc / Na + Nc / Nb )
|
|
486 #
|
|
487 # . Matching: ( Nc + Nd ) / Nt
|
|
488 #
|
|
489 # . McConnaughey: ( Nc ** 2 - ( Na - Nc ) * ( Nb - Nc) ) / ( Na * Nb )
|
|
490 #
|
|
491 # . Ochiai: Nc / SQRT ( Na * Nb ) (same as Cosine)
|
|
492 #
|
|
493 # . Pearson: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) / SQRT ( Na * Nb * ( Na - Nc + Nd ) * ( Nb - Nc + Nd ) )
|
|
494 #
|
|
495 # . RogersTanimoto: ( Nc + Nd ) / ( ( Na - Nc) + ( Nb - Nc) + Nt) = ( Nc + Nd ) / ( Na + Nb - 2Nc + Nt)
|
|
496 #
|
|
497 # . RussellRao: Nc / Nt
|
|
498 #
|
|
499 # . Simpson: Nc / MIN ( Na, Nb)
|
|
500 #
|
|
501 # . SkoalSneath1: Nc / ( Nc + 2 * ( Na - Nc) + 2 * ( Nb - Nc) ) = Nc / ( 2 * Na + 2 * Nb - 3 * Nc )
|
|
502 #
|
|
503 # . SkoalSneath2: ( 2 * Nc + 2 * Nd ) / ( Nc + Nd + Nt )
|
|
504 #
|
|
505 # . SkoalSneath3: ( Nc + Nd ) / ( ( Na - Nc ) + ( Nb - Nc ) ) = ( Nc + Nd ) / ( Na + Nb - 2 * Nc )
|
|
506 #
|
|
507 # . Tanimoto: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Jaccard)
|
|
508 #
|
|
509 # . Tversky: Nc / ( alpha * ( Na - Nc ) + ( 1 - alpha) * ( Nb - Nc) + Nc ) = Nc / ( alpha * ( Na - Nb ) + Nb)
|
|
510 #
|
|
511 # . Yule: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / ( ( Nc * Nd ) + ( ( Na - Nc ) * ( Nb - Nc ) ) )
|
|
512 #
|
|
513 #
|
|
514 # Values of Tanimoto/Jaccard and Tversky coefficients are dependent on only those bit which
|
|
515 # are set to "1" in both A and B. In order to take into account all bit positions, modified versions
|
|
516 # of Tanimoto [ Ref. 42 ] and Tversky [ Ref. 43 ] have been developed.
|
|
517 #
|
|
518 # Let:
|
|
519 #
|
|
520 # Na' = Number of bits set to "0" in A
|
|
521 # Nb' = Number of bits set to "0" in B
|
|
522 # Nc' = Number of bits set to "0" in both A and B
|
|
523 #
|
|
524 # . Tanimoto': Nc' / ( ( Na' - Nc') + ( Nb' - Nc' ) + Nc' ) = Nc' / ( Na' + Nb' - Nc' )
|
|
525 #
|
|
526 # . Tversky': Nc' / ( alpha * ( Na' - Nc' ) + ( 1 - alpha) * ( Nb' - Nc' ) + Nc' ) = Nc' / ( alpha * ( Na' - Nb' ) + Nb')
|
|
527 #
|
|
528 # Then:
|
|
529 #
|
|
530 # . WeightedTanimoto = beta * Tanimoto + (1 - beta) * Tanimoto'
|
|
531 #
|
|
532 # . WeightedTversky = beta * Tversky + (1 - beta) * Tversky'
|
|
533 #
|
|
534 #
|
|
535
|
|
536 # Calculate BaroniUrbani similarity coefficient for two same size bit vectors.
|
|
537 #
|
|
538 # This functionality can be either invoked as a class function or an object method.
|
|
539 #
|
|
540 sub BaroniUrbaniSimilarityCoefficient ($$) {
|
|
541 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
542
|
|
543 return BuserSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
544 }
|
|
545
|
|
546 # Calculate Buser similarity coefficient for two same size bit vectors.
|
|
547 #
|
|
548 # This functionality can be either invoked as a class function or an object method.
|
|
549 #
|
|
550 sub BuserSimilarityCoefficient ($$) {
|
|
551 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
552 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
|
|
553
|
|
554 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
555 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
556 $Nt = $Na + $Nb - $Nc + $Nd;
|
|
557
|
|
558 $Numerator = sqrt($Nc*$Nd) + $Nc;
|
|
559 $Denominator = sqrt($Nc*$Nd) + ($Na - $Nc) + ($Nb - $Nc ) + $Nc;
|
|
560
|
|
561 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
562 }
|
|
563
|
|
564 # Calculate Cosine similarity coefficient for two same size bit vectors.
|
|
565 #
|
|
566 # This functionality can be either invoked as a class function or an object method.
|
|
567 #
|
|
568 sub CosineSimilarityCoefficient ($$) {
|
|
569 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
570 my($Na, $Nb, $Nc, $Numerator, $Denominator);
|
|
571
|
|
572 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
573
|
|
574 $Numerator = $Nc;
|
|
575 $Denominator = sqrt($Na*$Nb);
|
|
576
|
|
577 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
578 }
|
|
579
|
|
580 # Calculate Dice similarity coefficient for two same size bit vectors.
|
|
581 #
|
|
582 # This functionality can be either invoked as a class function or an object method.
|
|
583 #
|
|
584 sub DiceSimilarityCoefficient ($$) {
|
|
585 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
586 my($Na, $Nb, $Nc, $Numerator, $Denominator);
|
|
587
|
|
588 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
589
|
|
590 $Numerator = 2*$Nc;
|
|
591 $Denominator = $Na + $Nb;
|
|
592
|
|
593 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
594 }
|
|
595
|
|
596 # Calculate Dennis similarity coefficient for two same size bit vectors.
|
|
597 #
|
|
598 # This functionality can be either invoked as a class function or an object method.
|
|
599 #
|
|
600 sub DennisSimilarityCoefficient ($$) {
|
|
601 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
602 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
|
|
603
|
|
604 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
605 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
606 $Nt = $Na + $Nb - $Nc + $Nd;
|
|
607
|
|
608 $Numerator = $Nc*$Nd - (($Na - $Nc)*($Nb - $Nc));
|
|
609 $Denominator = sqrt($Nt*$Na*$Nb);
|
|
610
|
|
611 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
612 }
|
|
613
|
|
614 # Calculate Forbes similarity coefficient for two same size bit vectors.
|
|
615 #
|
|
616 # This functionality can be either invoked as a class function or an object method.
|
|
617 #
|
|
618 sub ForbesSimilarityCoefficient ($$) {
|
|
619 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
620 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
|
|
621
|
|
622 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
623 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
624 $Nt = $Na + $Nb - $Nc + $Nd;
|
|
625
|
|
626 $Numerator = $Nt*$Nc;
|
|
627 $Denominator = $Na*$Nb;
|
|
628
|
|
629 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
630 }
|
|
631
|
|
632 # Calculate Fossum similarity coefficient for two same size bit vectors.
|
|
633 #
|
|
634 # This functionality can be either invoked as a class function or an object method.
|
|
635 #
|
|
636 sub FossumSimilarityCoefficient ($$) {
|
|
637 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
638 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
|
|
639
|
|
640 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
641 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
642 $Nt = $Na + $Nb - $Nc + $Nd;
|
|
643
|
|
644 $Numerator = $Nt*(($Nc - 0.5)** 2);
|
|
645 $Denominator = $Na*$Nb ;
|
|
646
|
|
647 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
648 }
|
|
649
|
|
650 # Calculate Hamann similarity coefficient for two same size bit vectors.
|
|
651 #
|
|
652 # This functionality can be either invoked as a class function or an object method.
|
|
653 #
|
|
654 sub HamannSimilarityCoefficient ($$) {
|
|
655 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
656 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
|
|
657
|
|
658 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
659 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
660 $Nt = $Na + $Nb - $Nc + $Nd;
|
|
661
|
|
662 $Numerator = ($Nc + $Nd ) - ($Na - $Nc) - ($Nb - $Nc) ;
|
|
663 $Denominator = $Nt;
|
|
664
|
|
665 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
666 }
|
|
667
|
|
668 # Calculate Jacard similarity coefficient for two same size bit vectors.
|
|
669 #
|
|
670 # This functionality can be either invoked as a class function or an object method.
|
|
671 #
|
|
672 sub JacardSimilarityCoefficient ($$) {
|
|
673 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
674
|
|
675 return TanimotoSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
676 }
|
|
677
|
|
678 # Calculate Kulczynski1 similarity coefficient for two same size bit vectors.
|
|
679 #
|
|
680 # This functionality can be either invoked as a class function or an object method.
|
|
681 #
|
|
682 sub Kulczynski1SimilarityCoefficient ($$) {
|
|
683 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
684 my($Na, $Nb, $Nc, $Numerator, $Denominator);
|
|
685
|
|
686 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
687
|
|
688 $Numerator = $Nc;
|
|
689 $Denominator = $Na + $Nb - 2*$Nc;
|
|
690
|
|
691 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
692 }
|
|
693
|
|
694 # Calculate Kulczynski2 similarity coefficient for two same size bit vectors.
|
|
695 #
|
|
696 # This functionality can be either invoked as a class function or an object method.
|
|
697 #
|
|
698 sub Kulczynski2SimilarityCoefficient ($$) {
|
|
699 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
700 my($Na, $Nb, $Nc, $Numerator, $Denominator);
|
|
701
|
|
702 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
703
|
|
704 $Numerator = 0.5*($Na*$Nc + $Nb*$Nc);
|
|
705 $Denominator = $Na*$Nb;
|
|
706
|
|
707 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
708 }
|
|
709
|
|
710 # Calculate Matching similarity coefficient for two same size bit vectors.
|
|
711 #
|
|
712 # This functionality can be either invoked as a class function or an object method.
|
|
713 #
|
|
714 sub MatchingSimilarityCoefficient ($$) {
|
|
715 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
716 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
|
|
717
|
|
718 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
719 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
720 $Nt = $Na + $Nb - $Nc + $Nd;
|
|
721
|
|
722 $Numerator = $Nc + $Nd;
|
|
723 $Denominator = $Nt;
|
|
724
|
|
725 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
726 }
|
|
727
|
|
728 # Calculate McConnaughey similarity coefficient for two same size bit vectors.
|
|
729 #
|
|
730 # This functionality can be either invoked as a class function or an object method.
|
|
731 #
|
|
732 sub McConnaugheySimilarityCoefficient ($$) {
|
|
733 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
734 my($Na, $Nb, $Nc, $Numerator, $Denominator);
|
|
735
|
|
736 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
737
|
|
738 $Numerator = $Nc**2 - (($Na - $Nc)*($Nb - $Nc));
|
|
739 $Denominator = $Na*$Nb ;
|
|
740
|
|
741 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
742 }
|
|
743
|
|
744 # Calculate Ochiai similarity coefficient for two same size bit vectors.
|
|
745 #
|
|
746 # This functionality can be either invoked as a class function or an object method.
|
|
747 #
|
|
748 sub OchiaiSimilarityCoefficient ($$) {
|
|
749 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
750
|
|
751 return CosineSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
752 }
|
|
753
|
|
754 # Calculate Pearson similarity coefficient for two same size bit vectors.
|
|
755 #
|
|
756 # This functionality can be either invoked as a class function or an object method.
|
|
757 #
|
|
758 sub PearsonSimilarityCoefficient ($$) {
|
|
759 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
760 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
|
|
761
|
|
762 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
763 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
764 $Nt = $Na + $Nb - $Nc + $Nd;
|
|
765
|
|
766 $Numerator = ($Nc*$Nd ) - (($Na - $Nc)*($Nb - $Nc));
|
|
767 $Denominator = sqrt($Na*$Nb*($Na - $Nc + $Nd )*($Nb - $Nc + $Nd));
|
|
768
|
|
769 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
770 }
|
|
771
|
|
772 # Calculate RogersTanimoto similarity coefficient for two same size bit vectors.
|
|
773 #
|
|
774 # This functionality can be either invoked as a class function or an object method.
|
|
775 #
|
|
776 sub RogersTanimotoSimilarityCoefficient ($$) {
|
|
777 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
778 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
|
|
779
|
|
780 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
781 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
782 $Nt = $Na + $Nb - $Nc + $Nd;
|
|
783
|
|
784 $Numerator = $Nc + $Nd;
|
|
785 $Denominator = ($Na - $Nc) + ($Nb - $Nc) + $Nt;
|
|
786
|
|
787 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
788 }
|
|
789
|
|
790 # Calculate RussellRao similarity coefficient for two same size bit vectors.
|
|
791 #
|
|
792 # This functionality can be either invoked as a class function or an object method.
|
|
793 #
|
|
794 sub RussellRaoSimilarityCoefficient ($$) {
|
|
795 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
796 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
|
|
797
|
|
798 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
799 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
800 $Nt = $Na + $Nb - $Nc + $Nd;
|
|
801
|
|
802 $Numerator = $Nc;
|
|
803 $Denominator = $Nt;
|
|
804
|
|
805 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
806 }
|
|
807
|
|
808 # Calculate Simpson similarity coefficient for two same size bit vectors.
|
|
809 #
|
|
810 # This functionality can be either invoked as a class function or an object method.
|
|
811 #
|
|
812 sub SimpsonSimilarityCoefficient ($$) {
|
|
813 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
814 my($Na, $Nb, $Nc, $Numerator, $Denominator);
|
|
815
|
|
816 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
817
|
|
818 $Numerator = $Nc;
|
|
819 $Denominator = min($Na, $Nb);
|
|
820
|
|
821 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
822 }
|
|
823
|
|
824 # Calculate SkoalSneath1 similarity coefficient for two same size bit vectors.
|
|
825 #
|
|
826 # This functionality can be either invoked as a class function or an object method.
|
|
827 #
|
|
828 sub SkoalSneath1SimilarityCoefficient ($$) {
|
|
829 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
830 my($Na, $Nb, $Nc, $Numerator, $Denominator);
|
|
831
|
|
832 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
833
|
|
834 $Numerator = $Nc;
|
|
835 $Denominator = $Nc + 2*($Na - $Nc) + 2*($Nb - $Nc);
|
|
836
|
|
837 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
838 }
|
|
839
|
|
840 # Calculate SkoalSneath2 similarity coefficient for two same size bit vectors.
|
|
841 #
|
|
842 # This functionality can be either invoked as a class function or an object method.
|
|
843 #
|
|
844 sub SkoalSneath2SimilarityCoefficient ($$) {
|
|
845 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
846 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
|
|
847
|
|
848 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
849 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
850 $Nt = $Na + $Nb - $Nc + $Nd;
|
|
851
|
|
852 $Numerator = 2*$Nc + 2*$Nd ;
|
|
853 $Denominator = $Nc + $Nd + $Nt ;
|
|
854
|
|
855 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
856 }
|
|
857
|
|
858 # Calculate SkoalSneath3 similarity coefficient for two same size bit vectors.
|
|
859 #
|
|
860 # This functionality can be either invoked as a class function or an object method.
|
|
861 #
|
|
862 sub SkoalSneath3SimilarityCoefficient ($$) {
|
|
863 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
864 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
|
|
865
|
|
866 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
867 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
868 $Nt = $Na + $Nb - $Nc + $Nd;
|
|
869
|
|
870 $Numerator = $Nc + $Nd;
|
|
871 $Denominator = ($Na - $Nc) + ($Nb - $Nc ) ;
|
|
872
|
|
873 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
874 }
|
|
875
|
|
876 # Calculate Tanimoto similarity coefficient for two same size bit vectors.
|
|
877 #
|
|
878 # This functionality can be either invoked as a class function or an object method.
|
|
879 #
|
|
880 sub TanimotoSimilarityCoefficient ($$) {
|
|
881 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
882 my($Na, $Nb, $Nc, $Numerator, $Denominator);
|
|
883
|
|
884 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
885
|
|
886 $Numerator = $Nc;
|
|
887 $Denominator = $Na + $Nb - $Nc;
|
|
888
|
|
889 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
890 }
|
|
891
|
|
892 # Calculate Tversky similarity coefficient for two same size bit vectors.
|
|
893 #
|
|
894 # This functionality can be either invoked as a class function or an object method.
|
|
895 #
|
|
896 sub TverskySimilarityCoefficient ($$$) {
|
|
897 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha) = @_;
|
|
898 my($Na, $Nb, $Nc, $Numerator, $Denominator);
|
|
899
|
|
900 if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) {
|
|
901 croak "Error: ${ClassName}->TverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ...";
|
|
902 }
|
|
903
|
|
904 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
905
|
|
906 $Numerator = $Nc;
|
|
907 $Denominator = $Alpha*($Na - $Nb ) + $Nb;
|
|
908
|
|
909 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
910 }
|
|
911
|
|
912 # Calculate Yule similarity coefficient for two same size bit vectors.
|
|
913 #
|
|
914 # This functionality can be either invoked as a class function or an object method.
|
|
915 #
|
|
916 sub YuleSimilarityCoefficient ($$) {
|
|
917 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
918 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
|
|
919
|
|
920 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
921 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
922 $Nt = $Na + $Nb - $Nc + $Nd;
|
|
923
|
|
924 $Numerator = ($Nc*$Nd) - (($Na - $Nc)*($Nb - $Nc)) ;
|
|
925 $Denominator = ($Nc*$Nd) + (($Na - $Nc)*($Nb - $Nc)) ;
|
|
926
|
|
927 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
928 }
|
|
929
|
|
930 # Calculate WeightedTanimoto similarity coefficient for two same size bit vectors.
|
|
931 #
|
|
932 # This functionality can be either invoked as a class function or an object method.
|
|
933 #
|
|
934 sub WeightedTanimotoSimilarityCoefficient ($$$) {
|
|
935 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Beta) = @_;
|
|
936 my($Na, $Nb, $Nc, $TanimotoForSetBits, $TanimotoForClearBits, $Numerator, $Denominator, $WeightedTanimoto);
|
|
937
|
|
938 if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) {
|
|
939 croak "Error: ${ClassName}->WeightedTanimotoSimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ...";
|
|
940 }
|
|
941
|
|
942 # Get Tanimoto for set bits...
|
|
943 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
944
|
|
945 $Numerator = $Nc;
|
|
946 $Denominator = $Na + $Nb - $Nc;
|
|
947 $TanimotoForSetBits = $Denominator ? ($Numerator/$Denominator) : 0;
|
|
948
|
|
949 # Get Tanimoto for clear bits...
|
|
950 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
951
|
|
952 $Numerator = $Nc;
|
|
953 $Denominator = $Na + $Nb - $Nc;
|
|
954 $TanimotoForClearBits = $Denominator ? ($Numerator/$Denominator) : 0;
|
|
955
|
|
956 $WeightedTanimoto = $Beta*$TanimotoForSetBits + (1 - $Beta)*$TanimotoForClearBits;
|
|
957
|
|
958 return $WeightedTanimoto;
|
|
959 }
|
|
960
|
|
961 # Calculate WeightedTversky similarity coefficient for two same size bit vectors.
|
|
962 #
|
|
963 # This functionality can be either invoked as a class function or an object method.
|
|
964 #
|
|
965 sub WeightedTverskySimilarityCoefficient ($$$) {
|
|
966 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha, $Beta) = @_;
|
|
967 my($Na, $Nb, $Nc, $TverskyForSetBits, $TverskyForClearBits, $Numerator, $Denominator, $WeightedTversky);
|
|
968
|
|
969 if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) {
|
|
970 croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ...";
|
|
971 }
|
|
972 if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) {
|
|
973 croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ...";
|
|
974 }
|
|
975
|
|
976 # Get Tversky for set bits...
|
|
977 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
978
|
|
979 $Numerator = $Nc;
|
|
980 $Denominator = $Alpha*($Na - $Nb ) + $Nb;
|
|
981 $TverskyForSetBits = $Denominator ? ($Numerator/$Denominator) : 0;
|
|
982
|
|
983 # Get Tversky for clear bits...
|
|
984 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
985
|
|
986 $Numerator = $Nc;
|
|
987 $Denominator = $Alpha*($Na - $Nb ) + $Nb;
|
|
988 $TverskyForClearBits = $Denominator ? ($Numerator/$Denominator) : 0;
|
|
989
|
|
990 $WeightedTversky = $Beta*$TverskyForSetBits + (1 - $Beta)*$TverskyForClearBits;
|
|
991
|
|
992 return $WeightedTversky;
|
|
993 }
|
|
994
|
|
995 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations...
|
|
996 #
|
|
997 sub _GetNumOfIndividualAndCommonSetBits ($$) {
|
|
998 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
999 my($Na, $Nb, $Nc, $Nd);
|
|
1000
|
|
1001 # Number of bits set to "1" in A
|
|
1002 $Na = $FingerprintsBitVectorA->GetNumOfSetBits();
|
|
1003
|
|
1004 # Number of bits set to "1" in B
|
|
1005 $Nb = $FingerprintsBitVectorB->GetNumOfSetBits();
|
|
1006
|
|
1007 # Number of bits set to "1" in both A and B
|
|
1008 my($NcBitVector);
|
|
1009 $NcBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB;
|
|
1010 $Nc = $NcBitVector->GetNumOfSetBits();
|
|
1011
|
|
1012 return ($Na, $Nb, $Nc);
|
|
1013 }
|
|
1014
|
|
1015 # Get number of Nd bits in bit vector A and B to be used for similarity coefficient calculations...
|
|
1016 #
|
|
1017 sub _GetNumOfCommonClearBits ($$) {
|
|
1018 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
1019 my($Nd, $NdBitVector);
|
|
1020
|
|
1021 # Number of bits set to "0" in both A and B
|
|
1022 $NdBitVector = ~$FingerprintsBitVectorA & ~$FingerprintsBitVectorB;
|
|
1023 $Nd = $NdBitVector->GetNumOfSetBits();
|
|
1024
|
|
1025 # Correct for number of clear bits used for padding...
|
|
1026 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) {
|
|
1027 $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA);
|
|
1028 }
|
|
1029 elsif (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) {
|
|
1030 $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB);
|
|
1031 }
|
|
1032
|
|
1033 return $Nd;
|
|
1034 }
|
|
1035
|
|
1036 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations...
|
|
1037 #
|
|
1038 sub _GetNumOfIndividualAndCommonClearBits ($$) {
|
|
1039 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
|
|
1040 my($Na, $Nb, $Nc, $Nd);
|
|
1041
|
|
1042 # Number of bits set to "0" in A
|
|
1043 $Na = $FingerprintsBitVectorA->GetNumOfClearBits();
|
|
1044
|
|
1045 # Correct for number of clear bits used for padding...
|
|
1046 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) {
|
|
1047 $Na = $Na - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA);
|
|
1048 }
|
|
1049
|
|
1050 # Number of bits set to "0" in B
|
|
1051 $Nb = $FingerprintsBitVectorB->GetNumOfClearBits();
|
|
1052
|
|
1053 # Correct for number of clear bits used for padding...
|
|
1054 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) {
|
|
1055 $Nb = $Nb - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB);
|
|
1056 }
|
|
1057
|
|
1058 # Number of bits set to "0" in both A and B
|
|
1059 $Nc = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
1060
|
|
1061 return ($Na, $Nb, $Nc);
|
|
1062 }
|
|
1063
|
|
1064 # Irrespective of specified size, Perl functions used to handle bit data data in
|
|
1065 # BitVector class automatically sets the size to the next nearest power of 2
|
|
1066 # and clear the extra bits.
|
|
1067 #
|
|
1068 # SpecifiedSize is used by this class to process any aribitray size during similarity
|
|
1069 # coefficient calculations.
|
|
1070 #
|
|
1071 # Assuming the FingerprintsBitBector class only manipulates bits upto specified
|
|
1072 # size, a correction for the extra bits added by BitVector class needs to be applied
|
|
1073 # to number of clear bits.
|
|
1074 #
|
|
1075 sub _GetNumOfClearBitsCorrection {
|
|
1076 my($FingerprintsBitVector) = @_;
|
|
1077
|
|
1078 return ($FingerprintsBitVector->{Size} - $FingerprintsBitVector->{SpecifiedSize});
|
|
1079 }
|
|
1080
|
|
1081 # Is number of clear bits correction required?
|
|
1082 #
|
|
1083 sub _IsNumOfClearBitsCorrectionRequired {
|
|
1084 my($FingerprintsBitVector) = @_;
|
|
1085
|
|
1086 return ($FingerprintsBitVector->{Size} > $FingerprintsBitVector->{SpecifiedSize}) ? 1 : 0;
|
|
1087 }
|
|
1088
|
|
1089
|
|
1090 1;
|
|
1091
|
|
1092 __END__
|
|
1093
|
|
1094 =head1 NAME
|
|
1095
|
|
1096 FingerprintsBitVector
|
|
1097
|
|
1098 =head1 SYNOPSIS
|
|
1099
|
|
1100 use Fingerprints::FingerprintsBitVector;
|
|
1101
|
|
1102 use Fingerprints::FingerprintsBitVector qw(:coefficients);
|
|
1103
|
|
1104 use Fingerprints::FingerprintsBitVector qw(:all);
|
|
1105
|
|
1106 =head1 DESCRIPTION
|
|
1107
|
|
1108 B<FingerprintsBitVector> class provides the following methods:
|
|
1109
|
|
1110 new, BaroniUrbaniSimilarityCoefficient, BuserSimilarityCoefficient,
|
|
1111 CosineSimilarityCoefficient, DennisSimilarityCoefficient,
|
|
1112 DiceSimilarityCoefficient, FoldFingerprintsBitVectorByDensity,
|
|
1113 FoldFingerprintsBitVectorBySize, ForbesSimilarityCoefficient,
|
|
1114 FossumSimilarityCoefficient, GetBitsAsBinaryString, GetBitsAsDecimalString,
|
|
1115 GetBitsAsHexadecimalString, GetBitsAsOctalString, GetBitsAsRawBinaryString,
|
|
1116 GetDescription, GetFingerprintsBitDensity, GetID, GetSpecifiedSize,
|
|
1117 GetSupportedSimilarityCoefficients, GetVectorType, HamannSimilarityCoefficient,
|
|
1118 IsFingerprintsBitVector, IsSubSet, JacardSimilarityCoefficient,
|
|
1119 Kulczynski1SimilarityCoefficient, Kulczynski2SimilarityCoefficient,
|
|
1120 MatchingSimilarityCoefficient, McConnaugheySimilarityCoefficient,
|
|
1121 NewFromBinaryString, NewFromDecimalString, NewFromHexadecimalString,
|
|
1122 NewFromOctalString, NewFromRawBinaryString, OchiaiSimilarityCoefficient,
|
|
1123 PearsonSimilarityCoefficient, RogersTanimotoSimilarityCoefficient,
|
|
1124 RussellRaoSimilarityCoefficient, SetDescription, SetID, SetSpecifiedSize,
|
|
1125 SetVectorType, SimpsonSimilarityCoefficient, SkoalSneath1SimilarityCoefficient,
|
|
1126 SkoalSneath2SimilarityCoefficient, SkoalSneath3SimilarityCoefficient,
|
|
1127 StringifyFingerprintsBitVector, TanimotoSimilarityCoefficient,
|
|
1128 TverskySimilarityCoefficient, WeightedTanimotoSimilarityCoefficient,
|
|
1129 WeightedTverskySimilarityCoefficient, YuleSimilarityCoefficient
|
|
1130
|
|
1131 The methods available to create fingerprints bit vector from strings and to calculate similarity
|
|
1132 coefficient between two bit vectors can also be invoked as class functions.
|
|
1133
|
|
1134 B<FingerprintsBitVector> class is derived from B<BitVector> class which provides the functionality
|
|
1135 to manipulate bits.
|
|
1136
|
|
1137 For two fingerprints bit vectors A and B of same size, let:
|
|
1138
|
|
1139 Na = Number of bits set to "1" in A
|
|
1140 Nb = Number of bits set to "1" in B
|
|
1141 Nc = Number of bits set to "1" in both A and B
|
|
1142 Nd = Number of bits set to "0" in both A and B
|
|
1143
|
|
1144 Nt = Number of bits set to "1" or "0" in A or B (Size of A or B)
|
|
1145 Nt = Na + Nb - Nc + Nd
|
|
1146
|
|
1147 Na - Nc = Number of bits set to "1" in A but not in B
|
|
1148 Nb - Nc = Number of bits set to "1" in B but not in A
|
|
1149
|
|
1150 Then, various similarity coefficients [ Ref. 40 - 42 ] for a pair of bit vectors A and B are
|
|
1151 defined as follows:
|
|
1152
|
|
1153 BaroniUrbani: ( SQRT( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as Buser )
|
|
1154
|
|
1155 Buser: ( SQRT ( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as BaroniUrbani )
|
|
1156
|
|
1157 Cosine: Nc / SQRT ( Na * Nb ) (same as Ochiai)
|
|
1158
|
|
1159 Dice: (2 * Nc) / ( Na + Nb )
|
|
1160
|
|
1161 Dennis: ( Nc * Nd - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / SQRT ( Nt * Na * Nb)
|
|
1162
|
|
1163 Forbes: ( Nt * Nc ) / ( Na * Nb )
|
|
1164
|
|
1165 Fossum: ( Nt * ( ( Nc - 1/2 ) ** 2 ) / ( Na * Nb )
|
|
1166
|
|
1167 Hamann: ( ( Nc + Nd ) - ( Na - Nc ) - ( Nb - Nc ) ) / Nt
|
|
1168
|
|
1169 Jaccard: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Tanimoto)
|
|
1170
|
|
1171 Kulczynski1: Nc / ( ( Na - Nc ) + ( Nb - Nc) ) = Nc / ( Na + Nb - 2Nc )
|
|
1172
|
|
1173 Kulczynski2: ( ( Nc / 2 ) * ( 2 * Nc + ( Na - Nc ) + ( Nb - Nc) ) ) / ( ( Nc + ( Na - Nc ) ) * ( Nc + ( Nb - Nc ) ) )
|
|
1174 = 0.5 * ( Nc / Na + Nc / Nb )
|
|
1175
|
|
1176 Matching: ( Nc + Nd ) / Nt
|
|
1177
|
|
1178 McConnaughey: ( Nc ** 2 - ( Na - Nc ) * ( Nb - Nc) ) / ( Na * Nb )
|
|
1179
|
|
1180 Ochiai: Nc / SQRT ( Na * Nb ) (same as Cosine)
|
|
1181
|
|
1182 Pearson: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) / SQRT ( Na * Nb * ( Na - Nc + Nd ) * ( Nb - Nc + Nd ) )
|
|
1183
|
|
1184 RogersTanimoto: ( Nc + Nd ) / ( ( Na - Nc) + ( Nb - Nc) + Nt) = ( Nc + Nd ) / ( Na + Nb - 2Nc + Nt)
|
|
1185
|
|
1186 RussellRao: Nc / Nt
|
|
1187
|
|
1188 Simpson: Nc / MIN ( Na, Nb)
|
|
1189
|
|
1190 SkoalSneath1: Nc / ( Nc + 2 * ( Na - Nc) + 2 * ( Nb - Nc) ) = Nc / ( 2 * Na + 2 * Nb - 3 * Nc )
|
|
1191
|
|
1192 SkoalSneath2: ( 2 * Nc + 2 * Nd ) / ( Nc + Nd + Nt )
|
|
1193
|
|
1194 SkoalSneath3: ( Nc + Nd ) / ( ( Na - Nc ) + ( Nb - Nc ) ) = ( Nc + Nd ) / ( Na + Nb - 2 * Nc )
|
|
1195
|
|
1196 Tanimoto: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Jaccard)
|
|
1197
|
|
1198 Tversky: Nc / ( alpha * ( Na - Nc ) + ( 1 - alpha) * ( Nb - Nc) + Nc ) = Nc / ( alpha * ( Na - Nb ) + Nb)
|
|
1199
|
|
1200 Yule: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / ( ( Nc * Nd ) + ( ( Na - Nc ) * ( Nb - Nc ) ) )
|
|
1201
|
|
1202 The values of Tanimoto/Jaccard and Tversky coefficients are dependent on only those bit which
|
|
1203 are set to "1" in both A and B. In order to take into account all bit positions, modified versions
|
|
1204 of Tanimoto [ Ref. 42 ] and Tversky [ Ref. 43 ] have been developed.
|
|
1205
|
|
1206 Let:
|
|
1207
|
|
1208 Na' = Number of bits set to "0" in A
|
|
1209 Nb' = Number of bits set to "0" in B
|
|
1210 Nc' = Number of bits set to "0" in both A and B
|
|
1211
|
|
1212 Tanimoto': Nc' / ( ( Na' - Nc') + ( Nb' - Nc' ) + Nc' ) = Nc' / ( Na' + Nb' - Nc' )
|
|
1213
|
|
1214 Tversky': Nc' / ( alpha * ( Na' - Nc' ) + ( 1 - alpha) * ( Nb' - Nc' ) + Nc' ) = Nc' / ( alpha * ( Na' - Nb' ) + Nb')
|
|
1215
|
|
1216 Then:
|
|
1217
|
|
1218 WeightedTanimoto = beta * Tanimoto + (1 - beta) * Tanimoto'
|
|
1219
|
|
1220 WeightedTversky = beta * Tversky + (1 - beta) * Tversky'
|
|
1221
|
|
1222 =head2 METHODS
|
|
1223
|
|
1224 =over 4
|
|
1225
|
|
1226 =item B<new>
|
|
1227
|
|
1228 $NewFPBitVector = new Fingerprints::FingerprintsBitVector($Size);
|
|
1229
|
|
1230 Creates a new I<FingerprintsBitVector> object of size I<Size> and returns newly created
|
|
1231 B<FingerprintsBitVector>. Bit numbers range from 0 to 1 less than I<Size>.
|
|
1232
|
|
1233 =item B<BaroniUrbaniSimilarityCoefficient>
|
|
1234
|
|
1235 $Value = $FingerprintsBitVector->BaroniUrbaniSimilarityCoefficient(
|
|
1236 $OtherFingerprintBitVector);
|
|
1237 $Value = Fingerprints::FingerprintsBitVector::
|
|
1238 BaroniUrbaniSimilarityCoefficient(
|
|
1239 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1240
|
|
1241 Returns value of I<BaroniUrbani> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1242
|
|
1243 =item B<BuserSimilarityCoefficient>
|
|
1244
|
|
1245 $Value = $FingerprintsBitVector->BuserSimilarityCoefficient(
|
|
1246 $OtherFingerprintBitVector);
|
|
1247 $Value = Fingerprints::FingerprintsBitVector::BuserSimilarityCoefficient(
|
|
1248 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1249
|
|
1250 Returns value of I<Buser> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1251
|
|
1252 =item B<CosineSimilarityCoefficient>
|
|
1253
|
|
1254 $Value = $FingerprintsBitVector->CosineSimilarityCoefficient(
|
|
1255 $OtherFingerprintBitVector);
|
|
1256 $Value = Fingerprints::FingerprintsBitVector::CosineSimilarityCoefficient(
|
|
1257 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1258
|
|
1259 Returns value of I<Cosine> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1260
|
|
1261 =item B<DennisSimilarityCoefficient>
|
|
1262
|
|
1263 $Value = $FingerprintsBitVector->DennisSimilarityCoefficient(
|
|
1264 $OtherFingerprintBitVector);
|
|
1265 $Value = Fingerprints::FingerprintsBitVector::DennisSimilarityCoefficient(
|
|
1266 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1267
|
|
1268 Returns value of I<Dennis> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1269
|
|
1270 =item B<DiceSimilarityCoefficient>
|
|
1271
|
|
1272 $Value = $FingerprintsBitVector->DiceSimilarityCoefficient(
|
|
1273 $OtherFingerprintBitVector);
|
|
1274 $Value = Fingerprints::FingerprintsBitVector::DiceSimilarityCoefficient(
|
|
1275 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1276
|
|
1277 Returns value of I<Dice> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1278
|
|
1279 =item B<FoldFingerprintsBitVectorByDensity>
|
|
1280
|
|
1281 $FingerprintsBitVector->FoldFingerprintsBitVectorByDensity($Density);
|
|
1282
|
|
1283 Folds I<FingerprintsBitVector> by recursively reducing its size by half until bit density of set bits is
|
|
1284 greater than or equal to specified I<Density> and returns folded I<FingerprintsBitVector>.
|
|
1285
|
|
1286 =item B<FoldFingerprintsBitVectorBySize>
|
|
1287
|
|
1288 $FingerprintsBitVector->FoldFingerprintsBitVectorBySize($Size);
|
|
1289
|
|
1290 Folds I<FingerprintsBitVector> by recursively reducing its size by half until size is less than or equal to
|
|
1291 specified I<Size> and returns folded I<FingerprintsBitVector>.
|
|
1292
|
|
1293 =item B<ForbesSimilarityCoefficient>
|
|
1294
|
|
1295 $Value = $FingerprintsBitVector->ForbesSimilarityCoefficient(
|
|
1296 $OtherFingerprintBitVector);
|
|
1297 $Value = Fingerprints::FingerprintsBitVector::ForbesSimilarityCoefficient(
|
|
1298 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1299
|
|
1300 Returns value of I<Forbes> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1301
|
|
1302 =item B<FossumSimilarityCoefficient>
|
|
1303
|
|
1304 $Value = $FingerprintsBitVector->FossumSimilarityCoefficient(
|
|
1305 $OtherFingerprintBitVector);
|
|
1306 $Value = Fingerprints::FingerprintsBitVector::FossumSimilarityCoefficient(
|
|
1307 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1308
|
|
1309 Returns value of I<Fossum> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1310
|
|
1311 =item B<GetBitsAsBinaryString>
|
|
1312
|
|
1313 $BinaryASCIIString = $FingerprintsBitVector->GetBitsAsBinaryString();
|
|
1314
|
|
1315 Returns fingerprints as a binary ASCII string containing 0s and 1s.
|
|
1316
|
|
1317 =item B<GetBitsAsHexadecimalString>
|
|
1318
|
|
1319 $HexadecimalString = $FingerprintsBitVector->GetBitsAsHexadecimalString();
|
|
1320
|
|
1321 Returns fingerprints as a hexadecimal string.
|
|
1322
|
|
1323 =item B<GetBitsAsRawBinaryString>
|
|
1324
|
|
1325 $RawBinaryString = $FingerprintsBitVector->GetBitsAsRawBinaryString();
|
|
1326
|
|
1327 Returns fingerprints as a raw binary string containing packed bit values for each byte.
|
|
1328
|
|
1329 =item B<GetDescription>
|
|
1330
|
|
1331 $Description = $FingerprintsBitVector->GetDescription();
|
|
1332
|
|
1333 Returns a string containing description of fingerprints bit vector.
|
|
1334
|
|
1335 =item B<GetFingerprintsBitDensity>
|
|
1336
|
|
1337 $BitDensity = $FingerprintsBitVector->GetFingerprintsBitDensity();
|
|
1338
|
|
1339 Returns I<BitDensity> of I<FingerprintsBitVector> corresponding to bits set to 1s.
|
|
1340
|
|
1341 =item B<GetID>
|
|
1342
|
|
1343 $ID = $FingerprintsBitVector->GetID();
|
|
1344
|
|
1345 Returns I<ID> of I<FingerprintsBitVector>.
|
|
1346
|
|
1347 =item B<GetVectorType>
|
|
1348
|
|
1349 $VectorType = $FingerprintsBitVector->GetVectorType();
|
|
1350
|
|
1351 Returns I<VectorType> of I<FingerprintsBitVector>.
|
|
1352
|
|
1353 =item B<GetSpecifiedSize>
|
|
1354
|
|
1355 $Size = $FingerprintsBitVector->GetSpecifiedSize();
|
|
1356
|
|
1357 Returns value of specified size for bit vector.
|
|
1358
|
|
1359 =item B<GetSupportedSimilarityCoefficients>
|
|
1360
|
|
1361 @SimilarityCoefficient =
|
|
1362 Fingerprints::FingerprintsBitVector::GetSupportedSimilarityCoefficients();
|
|
1363
|
|
1364 Returns an array containing names of supported similarity coefficients.
|
|
1365
|
|
1366 =item B<HamannSimilarityCoefficient>
|
|
1367
|
|
1368 $Value = $FingerprintsBitVector->HamannSimilarityCoefficient(
|
|
1369 $OtherFingerprintBitVector);
|
|
1370 $Value = Fingerprints::FingerprintsBitVector::HamannSimilarityCoefficient(
|
|
1371 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1372
|
|
1373 Returns value of I<Hamann> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1374
|
|
1375 =item B<IsFingerprintsBitVector>
|
|
1376
|
|
1377 $Status = Fingerprints::FingerprintsBitVector::
|
|
1378 IsFingerprintsBitVector($Object);
|
|
1379
|
|
1380 Returns 1 or 0 based on whether I<Object> is a B<FingerprintsBitVector> object.
|
|
1381
|
|
1382 =item B<IsSubSet>
|
|
1383
|
|
1384 $Status = $FingerprintsBitVector->IsSubSet($OtherFPBitVector);
|
|
1385 $Status = Fingerprints::FingerprintsBitVector::IsSubSet(
|
|
1386 $FPBitVectorA, $FPBitVectorB);
|
|
1387
|
|
1388 Returns 1 or 0 based on whether first firngerprints bit vector is a subset of second
|
|
1389 fingerprints bit vector.
|
|
1390
|
|
1391 For a bit vector to be a subset of another bit vector, both vectors must be of
|
|
1392 the same size and the bit positions set in first vector must also be set in the
|
|
1393 second bit vector.
|
|
1394
|
|
1395 =item B<JacardSimilarityCoefficient>
|
|
1396
|
|
1397 $Value = $FingerprintsBitVector->JacardSimilarityCoefficient(
|
|
1398 $OtherFingerprintBitVector);
|
|
1399 $Value = Fingerprints::FingerprintsBitVector::JacardSimilarityCoefficient(
|
|
1400 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1401
|
|
1402 Returns value of I<Jacard> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1403
|
|
1404 =item B<Kulczynski1SimilarityCoefficient>
|
|
1405
|
|
1406 $Value = $FingerprintsBitVector->Kulczynski1SimilarityCoefficient(
|
|
1407 $OtherFingerprintBitVector);
|
|
1408 $Value = Fingerprints::FingerprintsBitVector::
|
|
1409 Kulczynski1SimilarityCoefficient(
|
|
1410 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1411
|
|
1412 Returns value of I<Kulczynski1> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1413
|
|
1414 =item B<Kulczynski2SimilarityCoefficient>
|
|
1415
|
|
1416 $Value = $FingerprintsBitVector->Kulczynski2SimilarityCoefficient(
|
|
1417 $OtherFingerprintBitVector);
|
|
1418 $Value = Fingerprints::FingerprintsBitVector::
|
|
1419 Kulczynski2SimilarityCoefficient(
|
|
1420 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1421
|
|
1422 Returns value of I<Kulczynski2> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1423
|
|
1424 =item B<MatchingSimilarityCoefficient>
|
|
1425
|
|
1426 $Value = $FingerprintsBitVector->MatchingSimilarityCoefficient(
|
|
1427 $OtherFingerprintBitVector);
|
|
1428 $Value = Fingerprints::FingerprintsBitVector::
|
|
1429 MatchingSimilarityCoefficient(
|
|
1430 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1431
|
|
1432 Returns value of I<Matching> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1433
|
|
1434 =item B<McConnaugheySimilarityCoefficient>
|
|
1435
|
|
1436 $Value = $FingerprintsBitVector->McConnaugheySimilarityCoefficient(
|
|
1437 $OtherFingerprintBitVector);
|
|
1438 $Value = Fingerprints::FingerprintsBitVector::
|
|
1439 McConnaugheySimilarityCoefficient(
|
|
1440 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1441
|
|
1442 Returns value of I<McConnaughey> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1443
|
|
1444 =item B<NewFromBinaryString>
|
|
1445
|
|
1446 $NewFPBitVector = $FingerprintsBitVector->NewFromBinaryString(
|
|
1447 $BinaryString);
|
|
1448 $NewFPBitVector = Fingerprints::FingerprintsBitVector::NewFromBinaryString(
|
|
1449 $BinaryString);
|
|
1450
|
|
1451 Creates a new I<FingerprintsBitVector> using I<BinaryString> and returns new
|
|
1452 B<FingerprintsBitVector> object.
|
|
1453
|
|
1454 =item B<NewFromHexadecimalString>
|
|
1455
|
|
1456 $NewFPBitVector = $FingerprintsBitVector->NewFromHexadecimalString(
|
|
1457 $HexdecimalString);
|
|
1458 $NewFPBitVector = Fingerprints::FingerprintsBitVector::
|
|
1459 NewFromHexadecimalString(
|
|
1460 $HexdecimalString);
|
|
1461
|
|
1462 Creates a new I<FingerprintsBitVector> using I<HexdecimalString> and returns new
|
|
1463 B<FingerprintsBitVector> object.
|
|
1464
|
|
1465 =item B<NewFromRawBinaryString>
|
|
1466
|
|
1467 $NewFPBitVector = $FingerprintsBitVector->NewFromRawBinaryString(
|
|
1468 $RawBinaryString);
|
|
1469 $NewFPBitVector = Fingerprints::FingerprintsBitVector::
|
|
1470 NewFromRawBinaryString(
|
|
1471 $RawBinaryString);
|
|
1472
|
|
1473 Creates a new I<FingerprintsBitVector> using I<RawBinaryString> and returns new
|
|
1474 B<FingerprintsBitVector> object.
|
|
1475
|
|
1476 =item B<OchiaiSimilarityCoefficient>
|
|
1477
|
|
1478 $Value = $FingerprintsBitVector->OchiaiSimilarityCoefficient(
|
|
1479 $OtherFingerprintBitVector);
|
|
1480 $Value = Fingerprints::FingerprintsBitVector::OchiaiSimilarityCoefficient(
|
|
1481 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1482
|
|
1483 Returns value of I<Ochiai> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1484
|
|
1485 =item B<PearsonSimilarityCoefficient>
|
|
1486
|
|
1487 $Value = $FingerprintsBitVector->PearsonSimilarityCoefficient(
|
|
1488 $OtherFingerprintBitVector);
|
|
1489 $Value = Fingerprints::FingerprintsBitVector::PearsonSimilarityCoefficient(
|
|
1490 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1491
|
|
1492 Returns value of I<Pearson> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1493
|
|
1494 =item B<RogersTanimotoSimilarityCoefficient>
|
|
1495
|
|
1496 $Value = $FingerprintsBitVector->RogersTanimotoSimilarityCoefficient(
|
|
1497 $OtherFingerprintBitVector);
|
|
1498 $Value = Fingerprints::FingerprintsBitVector::
|
|
1499 RogersTanimotoSimilarityCoefficient(
|
|
1500 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1501
|
|
1502 Returns value of I<RogersTanimoto> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1503
|
|
1504 =item B<RussellRaoSimilarityCoefficient>
|
|
1505
|
|
1506 $Value = $FingerprintsBitVector->RussellRaoSimilarityCoefficient(
|
|
1507 $OtherFingerprintBitVector);
|
|
1508 $Value = Fingerprints::FingerprintsBitVector::
|
|
1509 RussellRaoSimilarityCoefficient(
|
|
1510 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1511
|
|
1512 Returns value of I<RussellRao> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1513
|
|
1514 =item B<SetSpecifiedSize>
|
|
1515
|
|
1516 $FingerprintsBitVector->SetSpecifiedSize($Size);
|
|
1517
|
|
1518 Sets specified size for fingerprints bit vector.
|
|
1519
|
|
1520 Irrespective of specified size, Perl functions used to handle bit data in B<BitVector> class
|
|
1521 automatically sets the size to the next nearest power of 2. I<SpecifiedSize> is used by
|
|
1522 B<FingerprintsBitVector> class to process any aribitray size during similarity coefficient calculations.
|
|
1523
|
|
1524 =item B<SetDescription>
|
|
1525
|
|
1526 $FingerprintsBitVector->SetDescription($Description);
|
|
1527
|
|
1528 Sets I<Description> of fingerprints bit vector and returns I<FingerprintsBitVector>.
|
|
1529
|
|
1530 =item B<SetID>
|
|
1531
|
|
1532 $FingerprintsBitVector->SetID($ID);
|
|
1533
|
|
1534 Sets I<ID> of fingerprints bit vector and returns I<FingerprintsBitVector>.
|
|
1535
|
|
1536 =item B<SetVectorType>
|
|
1537
|
|
1538 $FingerprintsBitVector->SetVectorType($VectorType);
|
|
1539
|
|
1540 Sets I<VectorType> of fingerprints bit vector and returns I<FingerprintsBitVector>.
|
|
1541
|
|
1542 =item B<SimpsonSimilarityCoefficient>
|
|
1543
|
|
1544 $Value = $FingerprintsBitVector->SimpsonSimilarityCoefficient(
|
|
1545 $OtherFingerprintBitVector);
|
|
1546 $Value = Fingerprints::FingerprintsBitVector::SimpsonSimilarityCoefficient(
|
|
1547 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1548
|
|
1549 Returns value of I<Simpson> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1550
|
|
1551 =item B<SkoalSneath1SimilarityCoefficient>
|
|
1552
|
|
1553 $Value = $FingerprintsBitVector->SkoalSneath1SimilarityCoefficient(
|
|
1554 $OtherFingerprintBitVector);
|
|
1555 $Value = Fingerprints::FingerprintsBitVector::
|
|
1556 SkoalSneath1SimilarityCoefficient(
|
|
1557 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1558
|
|
1559 Returns value of I<SkoalSneath1> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1560
|
|
1561 =item B<SkoalSneath2SimilarityCoefficient>
|
|
1562
|
|
1563 $Value = $FingerprintsBitVector->SkoalSneath2SimilarityCoefficient(
|
|
1564 $OtherFingerprintBitVector);
|
|
1565 $Value = Fingerprints::FingerprintsBitVector::
|
|
1566 SkoalSneath2SimilarityCoefficient(
|
|
1567 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1568
|
|
1569 Returns value of I<SkoalSneath2> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1570
|
|
1571 =item B<SkoalSneath3SimilarityCoefficient>
|
|
1572
|
|
1573 $Value = $FingerprintsBitVector->SkoalSneath3SimilarityCoefficient(
|
|
1574 $OtherFingerprintBitVector);
|
|
1575 $Value = Fingerprints::FingerprintsBitVector::
|
|
1576 SkoalSneath3SimilarityCoefficient(
|
|
1577 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1578
|
|
1579 Returns value of I<SkoalSneath3> similarity coefficient between two same size I<FingerprintsBitVectors>
|
|
1580
|
|
1581 =item B<StringifyFingerprintsBitVector>
|
|
1582
|
|
1583 $String = $FingerprintsBitVector->StringifyFingerprintsBitVector();
|
|
1584
|
|
1585 Returns a string containing information about I<FingerprintsBitVector> object.
|
|
1586
|
|
1587 =item B<TanimotoSimilarityCoefficient>
|
|
1588
|
|
1589 $Value = $FingerprintsBitVector->TanimotoSimilarityCoefficient(
|
|
1590 $OtherFingerprintBitVector);
|
|
1591 $Value = Fingerprints::FingerprintsBitVector::
|
|
1592 TanimotoSimilarityCoefficient(
|
|
1593 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1594
|
|
1595 Returns value of I<Tanimoto> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1596
|
|
1597 =item B<TverskySimilarityCoefficient>
|
|
1598
|
|
1599 $Value = $FingerprintsBitVector->TverskySimilarityCoefficient(
|
|
1600 $OtherFingerprintBitVector, $Alpha);
|
|
1601 $Value = Fingerprints::FingerprintsBitVector::
|
|
1602 TverskySimilarityCoefficient(
|
|
1603 $FingerprintsBitVectorA, $FingerprintBitVectorB, $Alpha);
|
|
1604
|
|
1605 Returns value of I<Tversky> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1606
|
|
1607 =item B<WeightedTanimotoSimilarityCoefficient>
|
|
1608
|
|
1609 $Value =
|
|
1610 $FingerprintsBitVector->WeightedTanimotoSimilarityCoefficient(
|
|
1611 $OtherFingerprintBitVector, $Beta);
|
|
1612 $Value =
|
|
1613 Fingerprints::FingerprintsBitVector::
|
|
1614 WeightedTanimotoSimilarityCoefficient(
|
|
1615 $FingerprintsBitVectorA, $FingerprintBitVectorB, $Beta);
|
|
1616
|
|
1617 Returns value of I<WeightedTanimoto> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1618
|
|
1619 =item B<WeightedTverskySimilarityCoefficient>
|
|
1620
|
|
1621 $Value =
|
|
1622 $FingerprintsBitVector->WeightedTverskySimilarityCoefficient(
|
|
1623 $OtherFingerprintBitVector, $Alpha, $Beta);
|
|
1624 $Value =
|
|
1625 Fingerprints::FingerprintsBitVector::
|
|
1626 WeightedTverskySimilarityCoefficient(
|
|
1627 $FingerprintsBitVectorA, $FingerprintBitVectorB, $Alpha, $Beta);
|
|
1628
|
|
1629 Returns value of I<WeightedTversky> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1630
|
|
1631 =item B<YuleSimilarityCoefficient>
|
|
1632
|
|
1633 $Value = $FingerprintsBitVector->YuleSimilarityCoefficient(
|
|
1634 $OtherFingerprintBitVector);
|
|
1635 $Value = Fingerprints::FingerprintsBitVector::YuleSimilarityCoefficient(
|
|
1636 $FingerprintsBitVectorA, $FingerprintBitVectorB);
|
|
1637
|
|
1638 Returns value of I<Yule> similarity coefficient between two same size I<FingerprintsBitVectors>.
|
|
1639
|
|
1640 =back
|
|
1641
|
|
1642 =head1 AUTHOR
|
|
1643
|
|
1644 Manish Sud <msud@san.rr.com>
|
|
1645
|
|
1646 =head1 SEE ALSO
|
|
1647
|
|
1648 BitVector.pm, FingerprintsStringUtil.pm, FingerprintsVector.pm, Vector.pm
|
|
1649
|
|
1650 =head1 COPYRIGHT
|
|
1651
|
|
1652 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
1653
|
|
1654 This file is part of MayaChemTools.
|
|
1655
|
|
1656 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
1657 the terms of the GNU Lesser General Public License as published by the Free
|
|
1658 Software Foundation; either version 3 of the License, or (at your option)
|
|
1659 any later version.
|
|
1660
|
|
1661 =cut
|