Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/Fingerprints/FingerprintsBitVector.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4816e4a8ae95 |
---|---|
1 package Fingerprints::FingerprintsBitVector; | |
2 # | |
3 # $RCSfile: FingerprintsBitVector.pm,v $ | |
4 # $Date: 2015/02/28 20:48:54 $ | |
5 # $Revision: 1.27 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Carp; | |
31 use Exporter; | |
32 use Scalar::Util (); | |
33 use BitVector; | |
34 use MathUtil; | |
35 use TextUtil (); | |
36 | |
37 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
38 | |
39 @ISA = qw(BitVector Exporter); | |
40 | |
41 # Similiarity coefficients... | |
42 my(@SimilarityCoefficients) = qw(BaroniUrbaniSimilarityCoefficient BuserSimilarityCoefficient CosineSimilarityCoefficient DiceSimilarityCoefficient DennisSimilarityCoefficient ForbesSimilarityCoefficient FossumSimilarityCoefficient HamannSimilarityCoefficient JacardSimilarityCoefficient Kulczynski1SimilarityCoefficient Kulczynski2SimilarityCoefficient MatchingSimilarityCoefficient McConnaugheySimilarityCoefficient OchiaiSimilarityCoefficient PearsonSimilarityCoefficient RogersTanimotoSimilarityCoefficient RussellRaoSimilarityCoefficient SimpsonSimilarityCoefficient SkoalSneath1SimilarityCoefficient SkoalSneath2SimilarityCoefficient SkoalSneath3SimilarityCoefficient TanimotoSimilarityCoefficient TverskySimilarityCoefficient YuleSimilarityCoefficient WeightedTanimotoSimilarityCoefficient WeightedTverskySimilarityCoefficient); | |
43 | |
44 # New from string... | |
45 my(@NewFromString) = qw(NewFromBinaryString NewFromHexadecimalString NewFromRawBinaryString); | |
46 | |
47 @EXPORT = qw(IsFingerprintsBitVector); | |
48 @EXPORT_OK = qw(GetSupportedSimilarityCoefficients @NewFromString @SimilarityCoefficients); | |
49 | |
50 %EXPORT_TAGS = ( | |
51 new => [@NewFromString], | |
52 coefficients => [@SimilarityCoefficients], | |
53 all => [@EXPORT, @EXPORT_OK] | |
54 ); | |
55 | |
56 # Setup class variables... | |
57 my($ClassName); | |
58 _InitializeClass(); | |
59 | |
60 use overload '""' => 'StringifyFingerprintsBitVector'; | |
61 | |
62 # Class constructor... | |
63 sub new { | |
64 my($Class, $Size) = @_; | |
65 | |
66 # Initialize object... | |
67 my $This = $Class->SUPER::new($Size); | |
68 bless $This, ref($Class) || $Class; | |
69 $This->_InitializeFingerprintsBitVector($Size); | |
70 | |
71 return $This; | |
72 } | |
73 | |
74 # Initialize object data... | |
75 # | |
76 # Note: | |
77 # . The class, BitVector, used to derive this class provides all the functionality to | |
78 # manipulate bits. | |
79 # . Irrespective of specified size, Perl functions used to handle bit data in | |
80 # BitVector class automatically sets the size to the next nearest power of 2. | |
81 # SpecifiedSize is used by this class to process any aribitray size during similarity | |
82 # coefficient calculations. | |
83 # | |
84 sub _InitializeFingerprintsBitVector { | |
85 my($This, $Size) = @_; | |
86 | |
87 if (!defined $Size) { | |
88 croak "Error: ${ClassName}->new: FingerprintsBitVector object instantiated without specifying its size ..."; | |
89 } | |
90 if ($Size <=0) { | |
91 croak "Error: ${ClassName}->new: Fingerprints bit vector size, $Size, must be a positive integer..."; | |
92 } | |
93 | |
94 # Specified size of fingerprints... | |
95 $This->{SpecifiedSize} = $Size; | |
96 | |
97 } | |
98 | |
99 # Initialize class ... | |
100 sub _InitializeClass { | |
101 #Class name... | |
102 $ClassName = __PACKAGE__; | |
103 } | |
104 | |
105 # Set specified size... | |
106 # | |
107 # Notes: | |
108 # Irrespective of specified size, Perl functions used to handle bit data in | |
109 # BitVector class automatically sets the size to the next nearest power of 2. | |
110 # SpecifiedSize is used by this class to process any aribitray size during similarity | |
111 # coefficient calculations. | |
112 # | |
113 sub SetSpecifiedSize { | |
114 my($This, $SpecifiedSize) = @_; | |
115 | |
116 if (!($SpecifiedSize > 0 && $SpecifiedSize <= $This->{Size})) { | |
117 croak "Error: ${ClassName}->SetSpecifiedSize: Specified size, $SpecifiedSize, is not valid: It must be > 0 && <= ", $This->GetSize()," ..."; | |
118 } | |
119 $This->{SpecifiedSize} = $SpecifiedSize; | |
120 } | |
121 | |
122 # Get specified size... | |
123 sub GetSpecifiedSize { | |
124 my($This) = @_; | |
125 | |
126 return $This->{SpecifiedSize}; | |
127 } | |
128 | |
129 # Set ID... | |
130 sub SetID { | |
131 my($This, $Value) = @_; | |
132 | |
133 $This->{ID} = $Value; | |
134 | |
135 return $This; | |
136 } | |
137 | |
138 # Get ID... | |
139 sub GetID { | |
140 my($This) = @_; | |
141 | |
142 return exists $This->{ID} ? $This->{ID} : 'None'; | |
143 } | |
144 | |
145 # Set description... | |
146 sub SetDescription { | |
147 my($This, $Value) = @_; | |
148 | |
149 $This->{Description} = $Value; | |
150 | |
151 return $This; | |
152 } | |
153 | |
154 # Get description... | |
155 sub GetDescription { | |
156 my($This) = @_; | |
157 | |
158 return exists $This->{Description} ? $This->{Description} : 'No description available'; | |
159 } | |
160 | |
161 # Set vector type... | |
162 sub SetVectorType { | |
163 my($This, $Value) = @_; | |
164 | |
165 $This->{VectorType} = $Value; | |
166 | |
167 return $This; | |
168 } | |
169 | |
170 # Get vector type... | |
171 sub GetVectorType { | |
172 my($This) = @_; | |
173 | |
174 return exists $This->{VectorType} ? $This->{VectorType} : 'FingerprintsBitVector'; | |
175 } | |
176 | |
177 # Create a new fingerprints bit vector using binary string. This functionality can be | |
178 # either invoked as a class function or an object method. | |
179 # | |
180 sub NewFromBinaryString ($;$) { | |
181 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_; | |
182 | |
183 if (_IsFingerprintsBitVector($FirstParameter)) { | |
184 return _NewFingerptinsBitVectorFromString('Binary', $SecondParameter, $ThirdParameter); | |
185 } | |
186 else { | |
187 return _NewFingerptinsBitVectorFromString( 'Binary', $FirstParameter, $SecondParameter); | |
188 } | |
189 } | |
190 | |
191 # Create a new fingerprints bit vector using hexadecimal string. This functionality can be | |
192 # either invoked as a class function or an object method. | |
193 # | |
194 sub NewFromHexadecimalString ($;$) { | |
195 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_; | |
196 | |
197 if (_IsFingerprintsBitVector($FirstParameter)) { | |
198 return _NewFingerptinsBitVectorFromString('Hexadecimal', $SecondParameter, $ThirdParameter); | |
199 } | |
200 else { | |
201 return _NewFingerptinsBitVectorFromString( 'Hexadecimal', $FirstParameter, $SecondParameter); | |
202 } | |
203 } | |
204 | |
205 # Create a new fingerprints bit vector using octal string. This functionality can be | |
206 # either invoked as a class function or an object method. | |
207 # | |
208 # | |
209 sub NewFromOctalString ($) { | |
210 croak "Error: ${ClassName}->NewFromOctalString: Creation of fingerprits bit vector from an octal string is not supported ..."; | |
211 } | |
212 | |
213 # Create a new fingerprints bit vector using decimal string. This functionality can be | |
214 # either invoked as a class function or an object method. | |
215 # | |
216 sub NewFromDecimalString ($;$) { | |
217 croak "Error: ${ClassName}->NewFromDecimalString: Creation of fingerprits bit vector from a decimal string is not supported ..."; | |
218 } | |
219 | |
220 # Create a new fingerprints bit vector using raw binary string. This functionality can be | |
221 # either invoked as a class function or an object method. | |
222 # | |
223 sub NewFromRawBinaryString ($;$) { | |
224 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_; | |
225 | |
226 if (_IsFingerprintsBitVector($FirstParameter)) { | |
227 return _NewFingerptinsBitVectorFromString('RawBinary', $SecondParameter, $ThirdParameter); | |
228 } | |
229 else { | |
230 return _NewFingerptinsBitVectorFromString( 'RawBinary', $FirstParameter, $SecondParameter); | |
231 } | |
232 } | |
233 | |
234 # Create a new fingerprints bit vector from a string... | |
235 # | |
236 # | |
237 sub _NewFingerptinsBitVectorFromString ($$;$) { | |
238 my($Format, $String, $BitsOrder) = @_; | |
239 my($FingerprintsBitVector, $Size); | |
240 | |
241 $Size = BitVector::_CalculateStringSizeInBits($Format, $String); | |
242 | |
243 $FingerprintsBitVector = new Fingerprints::FingerprintsBitVector($Size); | |
244 $FingerprintsBitVector->_SetBitsAsString($Format, $String, $BitsOrder); | |
245 | |
246 return $FingerprintsBitVector; | |
247 } | |
248 | |
249 # Get fingerprint bits as a hexadecimal string... | |
250 # | |
251 sub GetBitsAsHexadecimalString { | |
252 my($This, $BitsOrder) = @_; | |
253 | |
254 return $This->_GetFingerprintBitsAsString('Hexadecimal', $BitsOrder); | |
255 } | |
256 | |
257 # Get fingerprint bits as an octal string... | |
258 # | |
259 sub GetBitsAsOctalString { | |
260 my($This, $BitsOrder) = @_; | |
261 | |
262 croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as an octal string is not supported ..."; | |
263 } | |
264 | |
265 # Get fingerprint bits as an decimal string... | |
266 # | |
267 sub GetBitsAsDecimalString { | |
268 my($This, $BitsOrder) = @_; | |
269 | |
270 croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as a decimal string is not supported ..."; | |
271 } | |
272 | |
273 # Get fingerprint bits as a binary string conatning 1s and 0s... | |
274 # | |
275 sub GetBitsAsBinaryString { | |
276 my($This, $BitsOrder) = @_; | |
277 | |
278 return $This->_GetFingerprintBitsAsString('Binary', $BitsOrder); | |
279 } | |
280 | |
281 # Get fingerprint bits as a binary string conatning 1s and 0s... | |
282 # | |
283 sub GetBitsAsRawBinaryString { | |
284 my($This) = @_; | |
285 | |
286 return $This->_GetFingerprintBitsAsString('RawBinary'); | |
287 } | |
288 | |
289 # Return fingerprint bits as a string... | |
290 # | |
291 sub _GetFingerprintBitsAsString { | |
292 my($This, $Format, $BitsOrder) = @_; | |
293 | |
294 $BitsOrder = (defined($BitsOrder) && $BitsOrder) ? $BitsOrder : 'Ascending'; | |
295 | |
296 return $This->_GetBitsAsString($Format, $BitsOrder); | |
297 } | |
298 | |
299 # Is it a fingerprints bit vector object? | |
300 sub IsFingerprintsBitVector ($) { | |
301 my($Object) = @_; | |
302 | |
303 return _IsFingerprintsBitVector($Object); | |
304 } | |
305 | |
306 # Is it a fingerprints bit vector object? | |
307 sub _IsFingerprintsBitVector { | |
308 my($Object) = @_; | |
309 | |
310 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
311 } | |
312 | |
313 # Return a list of supported similarity coefficients... | |
314 sub GetSupportedSimilarityCoefficients () { | |
315 | |
316 return @SimilarityCoefficients; | |
317 } | |
318 | |
319 # Get bit density for fingerprints bit vector corresponding to on bits... | |
320 # | |
321 sub GetFingerprintsBitDensity { | |
322 my($This) = @_; | |
323 my($BitDensity); | |
324 | |
325 $BitDensity = $This->GetDensityOfSetBits(); | |
326 | |
327 return round($BitDensity, 2); | |
328 } | |
329 | |
330 # Fold fingerprints bit vector by recursively reducing its size by half untill size is less than or equal to | |
331 # specified size... | |
332 # | |
333 sub FoldFingerprintsBitVectorBySize { | |
334 my($This, $Size) = @_; | |
335 | |
336 if (!($Size > 0 && $Size <= $This->GetSize())) { | |
337 croak "Error: ${ClassName}->FoldFingerprintsBitVectorBySize: Specified size, $Size, is not valid: It must be > 0 && <= ", $This->GetSize()," ..."; | |
338 } | |
339 | |
340 if ($This->GetSize() <= $Size) { | |
341 return $This; | |
342 } | |
343 return $This->_FoldFingerprintsBitVector('BySize', $Size); | |
344 } | |
345 | |
346 # Fold fingerprints bit vector by recursively reducing its size by half untill bit density of set bits is greater than | |
347 # or equal to specified density... | |
348 # | |
349 sub FoldFingerprintsBitVectorByDensity { | |
350 my($This, $Density) = @_; | |
351 | |
352 if (!($Density > 0 && $Density <= 1)) { | |
353 croak "Error: ${ClassName}->FoldFingerprintsBitVectorByDensity: Specified bit density, $Density, is not valid: It must be > 0 && <= 1 ..."; | |
354 } | |
355 | |
356 if ($This->GetDensityOfSetBits() >= $Density) { | |
357 return $This; | |
358 } | |
359 return $This->_FoldFingerprintsBitVector('ByDensity', $Density); | |
360 } | |
361 | |
362 # Fold fingerprints bit vector using size or density and return folded fingerprint bit vector... | |
363 # | |
364 sub _FoldFingerprintsBitVector { | |
365 my($This, $Mode, $Value) = @_; | |
366 | |
367 # Fold upto size of 8 bits... | |
368 if ($This->GetSize() <= 8) { | |
369 return $This; | |
370 } | |
371 | |
372 # Check size or density.... | |
373 if ($Mode =~ /^BySize$/i) { | |
374 if ($This->GetSize() <= $Value) { | |
375 return $This; | |
376 } | |
377 } | |
378 elsif ($Mode =~ /^ByDensity$/i) { | |
379 if ($This->GetDensityOfSetBits() >= $Value) { | |
380 return $This; | |
381 } | |
382 } | |
383 else { | |
384 return $This; | |
385 } | |
386 | |
387 # Recursively reduce its size by half... | |
388 my($FirstHalfBinaryString, $SecondHalfBinaryString, $FirstHalfFingerprintsBitVector, $SecondHalfFingerprintsBitVector, $FoldedFingerprintsBitVector, $BinaryString, $StringLength); | |
389 | |
390 $BinaryString = $This->GetBitsAsBinaryString(); | |
391 $StringLength = length $BinaryString; | |
392 | |
393 $FirstHalfBinaryString = substr($BinaryString, 0, $StringLength/2); | |
394 $SecondHalfBinaryString = substr($BinaryString, $StringLength/2); | |
395 | |
396 $FirstHalfFingerprintsBitVector = NewFromBinaryString($FirstHalfBinaryString); | |
397 $SecondHalfFingerprintsBitVector = NewFromBinaryString($SecondHalfBinaryString); | |
398 | |
399 $FoldedFingerprintsBitVector = $FirstHalfFingerprintsBitVector | $SecondHalfFingerprintsBitVector; | |
400 | |
401 return $FoldedFingerprintsBitVector->_FoldFingerprintsBitVector($Mode, $Value); | |
402 } | |
403 | |
404 # Is first bit vector subset of second bit vector? | |
405 # | |
406 # For a bit vector to be a subset of another bit vector, both vectors must be of | |
407 # the same size and the bit positions set in first vector must also be set in the | |
408 # secons bit vector. | |
409 # | |
410 # This functionality can be either invoked as a class function or an object method. | |
411 # | |
412 sub IsSubSet ($$) { | |
413 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
414 | |
415 if ($FingerprintsBitVectorA->GetSize() != $FingerprintsBitVectorB->GetSize()) { | |
416 return 0; | |
417 } | |
418 my($AndFingerprintsBitVector); | |
419 | |
420 $AndFingerprintsBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB; | |
421 | |
422 return ($FingerprintsBitVectorA->GetNumOfSetBits() == $AndFingerprintsBitVector->GetNumOfSetBits()) ? 1 : 0; | |
423 } | |
424 | |
425 # Return a string containing vector values... | |
426 sub StringifyFingerprintsBitVector { | |
427 my($This) = @_; | |
428 my($FingerprintsBitVectorString); | |
429 | |
430 # BitVector size information... | |
431 # | |
432 if ($This->{SpecifiedSize} != $This->GetSize()) { | |
433 $FingerprintsBitVectorString = "SpecifiedSize: " . $This->{SpecifiedSize} . "; BitVectorSize: " . $This->GetSize(); | |
434 } | |
435 else { | |
436 $FingerprintsBitVectorString = "BitVectorSize: " . $This->GetSize(); | |
437 } | |
438 my($NumOfSetBits, $BitDensity); | |
439 $NumOfSetBits = $This->GetNumOfSetBits(); | |
440 $BitDensity = $This->GetFingerprintsBitDensity(); | |
441 | |
442 $FingerprintsBitVectorString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity"; | |
443 | |
444 # BitVector values... | |
445 $FingerprintsBitVectorString .= "; BitVector: " . $This->StringifyBitVector(); | |
446 | |
447 return $FingerprintsBitVectorString; | |
448 } | |
449 | |
450 # For two fingerprints bit vectors A and B of same size, let: | |
451 # | |
452 # Na = Number of bits set to "1" in A | |
453 # Nb = Number of bits set to "1" in B | |
454 # Nc = Number of bits set to "1" in both A and B | |
455 # Nd = Number of bits set to "0" in both A and B | |
456 # | |
457 # Nt = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd | |
458 # | |
459 # Na - Nc = Number of bits set to "1" in A but not in B | |
460 # Nb - Nc = Number of bits set to "1" in B but not in A | |
461 # | |
462 # Various similarity coefficients [ Ref 40 - 42 ] for a pair of bit vectors A and B are | |
463 # defined as follows: | |
464 # | |
465 # . BaroniUrbani: ( SQRT( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as Buser ) | |
466 # | |
467 # . Buser: ( SQRT ( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as BaroniUrbani ) | |
468 # | |
469 # . Cosine: Nc / SQRT ( Na * Nb ) (same as Ochiai) | |
470 # | |
471 # . Dice: (2 * Nc) / ( Na + Nb ) | |
472 # | |
473 # . Dennis: ( Nc * Nd - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / SQRT ( Nt * Na * Nb) | |
474 # | |
475 # . Forbes: ( Nt * Nc ) / ( Na * Nb ) | |
476 # | |
477 # . Fossum: ( Nt * ( ( Nc - 1/2 ) ** 2 ) / ( Na * Nb ) | |
478 # | |
479 # . Hamann: ( ( Nc + Nd ) - ( Na - Nc ) - ( Nb - Nc ) ) / Nt | |
480 # | |
481 # . Jaccard: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Tanimoto) | |
482 # | |
483 # . Kulczynski1: Nc / ( ( Na - Nc ) + ( Nb - Nc) ) = Nc / ( Na + Nb - 2Nc ) | |
484 # | |
485 # . Kulczynski2: ( ( Nc / 2 ) * ( 2 * Nc + ( Na - Nc ) + ( Nb - Nc) ) ) / ( ( Nc + ( Na - Nc ) ) * ( Nc + ( Nb - Nc ) ) ) = 0.5 * ( Nc / Na + Nc / Nb ) | |
486 # | |
487 # . Matching: ( Nc + Nd ) / Nt | |
488 # | |
489 # . McConnaughey: ( Nc ** 2 - ( Na - Nc ) * ( Nb - Nc) ) / ( Na * Nb ) | |
490 # | |
491 # . Ochiai: Nc / SQRT ( Na * Nb ) (same as Cosine) | |
492 # | |
493 # . Pearson: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) / SQRT ( Na * Nb * ( Na - Nc + Nd ) * ( Nb - Nc + Nd ) ) | |
494 # | |
495 # . RogersTanimoto: ( Nc + Nd ) / ( ( Na - Nc) + ( Nb - Nc) + Nt) = ( Nc + Nd ) / ( Na + Nb - 2Nc + Nt) | |
496 # | |
497 # . RussellRao: Nc / Nt | |
498 # | |
499 # . Simpson: Nc / MIN ( Na, Nb) | |
500 # | |
501 # . SkoalSneath1: Nc / ( Nc + 2 * ( Na - Nc) + 2 * ( Nb - Nc) ) = Nc / ( 2 * Na + 2 * Nb - 3 * Nc ) | |
502 # | |
503 # . SkoalSneath2: ( 2 * Nc + 2 * Nd ) / ( Nc + Nd + Nt ) | |
504 # | |
505 # . SkoalSneath3: ( Nc + Nd ) / ( ( Na - Nc ) + ( Nb - Nc ) ) = ( Nc + Nd ) / ( Na + Nb - 2 * Nc ) | |
506 # | |
507 # . Tanimoto: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Jaccard) | |
508 # | |
509 # . Tversky: Nc / ( alpha * ( Na - Nc ) + ( 1 - alpha) * ( Nb - Nc) + Nc ) = Nc / ( alpha * ( Na - Nb ) + Nb) | |
510 # | |
511 # . Yule: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / ( ( Nc * Nd ) + ( ( Na - Nc ) * ( Nb - Nc ) ) ) | |
512 # | |
513 # | |
514 # Values of Tanimoto/Jaccard and Tversky coefficients are dependent on only those bit which | |
515 # are set to "1" in both A and B. In order to take into account all bit positions, modified versions | |
516 # of Tanimoto [ Ref. 42 ] and Tversky [ Ref. 43 ] have been developed. | |
517 # | |
518 # Let: | |
519 # | |
520 # Na' = Number of bits set to "0" in A | |
521 # Nb' = Number of bits set to "0" in B | |
522 # Nc' = Number of bits set to "0" in both A and B | |
523 # | |
524 # . Tanimoto': Nc' / ( ( Na' - Nc') + ( Nb' - Nc' ) + Nc' ) = Nc' / ( Na' + Nb' - Nc' ) | |
525 # | |
526 # . Tversky': Nc' / ( alpha * ( Na' - Nc' ) + ( 1 - alpha) * ( Nb' - Nc' ) + Nc' ) = Nc' / ( alpha * ( Na' - Nb' ) + Nb') | |
527 # | |
528 # Then: | |
529 # | |
530 # . WeightedTanimoto = beta * Tanimoto + (1 - beta) * Tanimoto' | |
531 # | |
532 # . WeightedTversky = beta * Tversky + (1 - beta) * Tversky' | |
533 # | |
534 # | |
535 | |
536 # Calculate BaroniUrbani similarity coefficient for two same size bit vectors. | |
537 # | |
538 # This functionality can be either invoked as a class function or an object method. | |
539 # | |
540 sub BaroniUrbaniSimilarityCoefficient ($$) { | |
541 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
542 | |
543 return BuserSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
544 } | |
545 | |
546 # Calculate Buser similarity coefficient for two same size bit vectors. | |
547 # | |
548 # This functionality can be either invoked as a class function or an object method. | |
549 # | |
550 sub BuserSimilarityCoefficient ($$) { | |
551 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
552 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
553 | |
554 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
555 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
556 $Nt = $Na + $Nb - $Nc + $Nd; | |
557 | |
558 $Numerator = sqrt($Nc*$Nd) + $Nc; | |
559 $Denominator = sqrt($Nc*$Nd) + ($Na - $Nc) + ($Nb - $Nc ) + $Nc; | |
560 | |
561 return $Denominator ? ($Numerator/$Denominator) : 0; | |
562 } | |
563 | |
564 # Calculate Cosine similarity coefficient for two same size bit vectors. | |
565 # | |
566 # This functionality can be either invoked as a class function or an object method. | |
567 # | |
568 sub CosineSimilarityCoefficient ($$) { | |
569 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
570 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
571 | |
572 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
573 | |
574 $Numerator = $Nc; | |
575 $Denominator = sqrt($Na*$Nb); | |
576 | |
577 return $Denominator ? ($Numerator/$Denominator) : 0; | |
578 } | |
579 | |
580 # Calculate Dice similarity coefficient for two same size bit vectors. | |
581 # | |
582 # This functionality can be either invoked as a class function or an object method. | |
583 # | |
584 sub DiceSimilarityCoefficient ($$) { | |
585 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
586 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
587 | |
588 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
589 | |
590 $Numerator = 2*$Nc; | |
591 $Denominator = $Na + $Nb; | |
592 | |
593 return $Denominator ? ($Numerator/$Denominator) : 0; | |
594 } | |
595 | |
596 # Calculate Dennis similarity coefficient for two same size bit vectors. | |
597 # | |
598 # This functionality can be either invoked as a class function or an object method. | |
599 # | |
600 sub DennisSimilarityCoefficient ($$) { | |
601 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
602 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
603 | |
604 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
605 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
606 $Nt = $Na + $Nb - $Nc + $Nd; | |
607 | |
608 $Numerator = $Nc*$Nd - (($Na - $Nc)*($Nb - $Nc)); | |
609 $Denominator = sqrt($Nt*$Na*$Nb); | |
610 | |
611 return $Denominator ? ($Numerator/$Denominator) : 0; | |
612 } | |
613 | |
614 # Calculate Forbes similarity coefficient for two same size bit vectors. | |
615 # | |
616 # This functionality can be either invoked as a class function or an object method. | |
617 # | |
618 sub ForbesSimilarityCoefficient ($$) { | |
619 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
620 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
621 | |
622 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
623 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
624 $Nt = $Na + $Nb - $Nc + $Nd; | |
625 | |
626 $Numerator = $Nt*$Nc; | |
627 $Denominator = $Na*$Nb; | |
628 | |
629 return $Denominator ? ($Numerator/$Denominator) : 0; | |
630 } | |
631 | |
632 # Calculate Fossum similarity coefficient for two same size bit vectors. | |
633 # | |
634 # This functionality can be either invoked as a class function or an object method. | |
635 # | |
636 sub FossumSimilarityCoefficient ($$) { | |
637 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
638 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
639 | |
640 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
641 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
642 $Nt = $Na + $Nb - $Nc + $Nd; | |
643 | |
644 $Numerator = $Nt*(($Nc - 0.5)** 2); | |
645 $Denominator = $Na*$Nb ; | |
646 | |
647 return $Denominator ? ($Numerator/$Denominator) : 0; | |
648 } | |
649 | |
650 # Calculate Hamann similarity coefficient for two same size bit vectors. | |
651 # | |
652 # This functionality can be either invoked as a class function or an object method. | |
653 # | |
654 sub HamannSimilarityCoefficient ($$) { | |
655 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
656 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
657 | |
658 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
659 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
660 $Nt = $Na + $Nb - $Nc + $Nd; | |
661 | |
662 $Numerator = ($Nc + $Nd ) - ($Na - $Nc) - ($Nb - $Nc) ; | |
663 $Denominator = $Nt; | |
664 | |
665 return $Denominator ? ($Numerator/$Denominator) : 0; | |
666 } | |
667 | |
668 # Calculate Jacard similarity coefficient for two same size bit vectors. | |
669 # | |
670 # This functionality can be either invoked as a class function or an object method. | |
671 # | |
672 sub JacardSimilarityCoefficient ($$) { | |
673 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
674 | |
675 return TanimotoSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
676 } | |
677 | |
678 # Calculate Kulczynski1 similarity coefficient for two same size bit vectors. | |
679 # | |
680 # This functionality can be either invoked as a class function or an object method. | |
681 # | |
682 sub Kulczynski1SimilarityCoefficient ($$) { | |
683 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
684 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
685 | |
686 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
687 | |
688 $Numerator = $Nc; | |
689 $Denominator = $Na + $Nb - 2*$Nc; | |
690 | |
691 return $Denominator ? ($Numerator/$Denominator) : 0; | |
692 } | |
693 | |
694 # Calculate Kulczynski2 similarity coefficient for two same size bit vectors. | |
695 # | |
696 # This functionality can be either invoked as a class function or an object method. | |
697 # | |
698 sub Kulczynski2SimilarityCoefficient ($$) { | |
699 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
700 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
701 | |
702 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
703 | |
704 $Numerator = 0.5*($Na*$Nc + $Nb*$Nc); | |
705 $Denominator = $Na*$Nb; | |
706 | |
707 return $Denominator ? ($Numerator/$Denominator) : 0; | |
708 } | |
709 | |
710 # Calculate Matching similarity coefficient for two same size bit vectors. | |
711 # | |
712 # This functionality can be either invoked as a class function or an object method. | |
713 # | |
714 sub MatchingSimilarityCoefficient ($$) { | |
715 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
716 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
717 | |
718 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
719 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
720 $Nt = $Na + $Nb - $Nc + $Nd; | |
721 | |
722 $Numerator = $Nc + $Nd; | |
723 $Denominator = $Nt; | |
724 | |
725 return $Denominator ? ($Numerator/$Denominator) : 0; | |
726 } | |
727 | |
728 # Calculate McConnaughey similarity coefficient for two same size bit vectors. | |
729 # | |
730 # This functionality can be either invoked as a class function or an object method. | |
731 # | |
732 sub McConnaugheySimilarityCoefficient ($$) { | |
733 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
734 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
735 | |
736 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
737 | |
738 $Numerator = $Nc**2 - (($Na - $Nc)*($Nb - $Nc)); | |
739 $Denominator = $Na*$Nb ; | |
740 | |
741 return $Denominator ? ($Numerator/$Denominator) : 0; | |
742 } | |
743 | |
744 # Calculate Ochiai similarity coefficient for two same size bit vectors. | |
745 # | |
746 # This functionality can be either invoked as a class function or an object method. | |
747 # | |
748 sub OchiaiSimilarityCoefficient ($$) { | |
749 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
750 | |
751 return CosineSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
752 } | |
753 | |
754 # Calculate Pearson similarity coefficient for two same size bit vectors. | |
755 # | |
756 # This functionality can be either invoked as a class function or an object method. | |
757 # | |
758 sub PearsonSimilarityCoefficient ($$) { | |
759 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
760 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
761 | |
762 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
763 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
764 $Nt = $Na + $Nb - $Nc + $Nd; | |
765 | |
766 $Numerator = ($Nc*$Nd ) - (($Na - $Nc)*($Nb - $Nc)); | |
767 $Denominator = sqrt($Na*$Nb*($Na - $Nc + $Nd )*($Nb - $Nc + $Nd)); | |
768 | |
769 return $Denominator ? ($Numerator/$Denominator) : 0; | |
770 } | |
771 | |
772 # Calculate RogersTanimoto similarity coefficient for two same size bit vectors. | |
773 # | |
774 # This functionality can be either invoked as a class function or an object method. | |
775 # | |
776 sub RogersTanimotoSimilarityCoefficient ($$) { | |
777 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
778 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
779 | |
780 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
781 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
782 $Nt = $Na + $Nb - $Nc + $Nd; | |
783 | |
784 $Numerator = $Nc + $Nd; | |
785 $Denominator = ($Na - $Nc) + ($Nb - $Nc) + $Nt; | |
786 | |
787 return $Denominator ? ($Numerator/$Denominator) : 0; | |
788 } | |
789 | |
790 # Calculate RussellRao similarity coefficient for two same size bit vectors. | |
791 # | |
792 # This functionality can be either invoked as a class function or an object method. | |
793 # | |
794 sub RussellRaoSimilarityCoefficient ($$) { | |
795 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
796 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
797 | |
798 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
799 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
800 $Nt = $Na + $Nb - $Nc + $Nd; | |
801 | |
802 $Numerator = $Nc; | |
803 $Denominator = $Nt; | |
804 | |
805 return $Denominator ? ($Numerator/$Denominator) : 0; | |
806 } | |
807 | |
808 # Calculate Simpson similarity coefficient for two same size bit vectors. | |
809 # | |
810 # This functionality can be either invoked as a class function or an object method. | |
811 # | |
812 sub SimpsonSimilarityCoefficient ($$) { | |
813 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
814 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
815 | |
816 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
817 | |
818 $Numerator = $Nc; | |
819 $Denominator = min($Na, $Nb); | |
820 | |
821 return $Denominator ? ($Numerator/$Denominator) : 0; | |
822 } | |
823 | |
824 # Calculate SkoalSneath1 similarity coefficient for two same size bit vectors. | |
825 # | |
826 # This functionality can be either invoked as a class function or an object method. | |
827 # | |
828 sub SkoalSneath1SimilarityCoefficient ($$) { | |
829 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
830 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
831 | |
832 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
833 | |
834 $Numerator = $Nc; | |
835 $Denominator = $Nc + 2*($Na - $Nc) + 2*($Nb - $Nc); | |
836 | |
837 return $Denominator ? ($Numerator/$Denominator) : 0; | |
838 } | |
839 | |
840 # Calculate SkoalSneath2 similarity coefficient for two same size bit vectors. | |
841 # | |
842 # This functionality can be either invoked as a class function or an object method. | |
843 # | |
844 sub SkoalSneath2SimilarityCoefficient ($$) { | |
845 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
846 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
847 | |
848 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
849 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
850 $Nt = $Na + $Nb - $Nc + $Nd; | |
851 | |
852 $Numerator = 2*$Nc + 2*$Nd ; | |
853 $Denominator = $Nc + $Nd + $Nt ; | |
854 | |
855 return $Denominator ? ($Numerator/$Denominator) : 0; | |
856 } | |
857 | |
858 # Calculate SkoalSneath3 similarity coefficient for two same size bit vectors. | |
859 # | |
860 # This functionality can be either invoked as a class function or an object method. | |
861 # | |
862 sub SkoalSneath3SimilarityCoefficient ($$) { | |
863 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
864 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
865 | |
866 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
867 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
868 $Nt = $Na + $Nb - $Nc + $Nd; | |
869 | |
870 $Numerator = $Nc + $Nd; | |
871 $Denominator = ($Na - $Nc) + ($Nb - $Nc ) ; | |
872 | |
873 return $Denominator ? ($Numerator/$Denominator) : 0; | |
874 } | |
875 | |
876 # Calculate Tanimoto similarity coefficient for two same size bit vectors. | |
877 # | |
878 # This functionality can be either invoked as a class function or an object method. | |
879 # | |
880 sub TanimotoSimilarityCoefficient ($$) { | |
881 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
882 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
883 | |
884 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
885 | |
886 $Numerator = $Nc; | |
887 $Denominator = $Na + $Nb - $Nc; | |
888 | |
889 return $Denominator ? ($Numerator/$Denominator) : 0; | |
890 } | |
891 | |
892 # Calculate Tversky similarity coefficient for two same size bit vectors. | |
893 # | |
894 # This functionality can be either invoked as a class function or an object method. | |
895 # | |
896 sub TverskySimilarityCoefficient ($$$) { | |
897 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha) = @_; | |
898 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
899 | |
900 if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) { | |
901 croak "Error: ${ClassName}->TverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ..."; | |
902 } | |
903 | |
904 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
905 | |
906 $Numerator = $Nc; | |
907 $Denominator = $Alpha*($Na - $Nb ) + $Nb; | |
908 | |
909 return $Denominator ? ($Numerator/$Denominator) : 0; | |
910 } | |
911 | |
912 # Calculate Yule similarity coefficient for two same size bit vectors. | |
913 # | |
914 # This functionality can be either invoked as a class function or an object method. | |
915 # | |
916 sub YuleSimilarityCoefficient ($$) { | |
917 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
918 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); | |
919 | |
920 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
921 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
922 $Nt = $Na + $Nb - $Nc + $Nd; | |
923 | |
924 $Numerator = ($Nc*$Nd) - (($Na - $Nc)*($Nb - $Nc)) ; | |
925 $Denominator = ($Nc*$Nd) + (($Na - $Nc)*($Nb - $Nc)) ; | |
926 | |
927 return $Denominator ? ($Numerator/$Denominator) : 0; | |
928 } | |
929 | |
930 # Calculate WeightedTanimoto similarity coefficient for two same size bit vectors. | |
931 # | |
932 # This functionality can be either invoked as a class function or an object method. | |
933 # | |
934 sub WeightedTanimotoSimilarityCoefficient ($$$) { | |
935 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Beta) = @_; | |
936 my($Na, $Nb, $Nc, $TanimotoForSetBits, $TanimotoForClearBits, $Numerator, $Denominator, $WeightedTanimoto); | |
937 | |
938 if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) { | |
939 croak "Error: ${ClassName}->WeightedTanimotoSimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ..."; | |
940 } | |
941 | |
942 # Get Tanimoto for set bits... | |
943 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
944 | |
945 $Numerator = $Nc; | |
946 $Denominator = $Na + $Nb - $Nc; | |
947 $TanimotoForSetBits = $Denominator ? ($Numerator/$Denominator) : 0; | |
948 | |
949 # Get Tanimoto for clear bits... | |
950 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
951 | |
952 $Numerator = $Nc; | |
953 $Denominator = $Na + $Nb - $Nc; | |
954 $TanimotoForClearBits = $Denominator ? ($Numerator/$Denominator) : 0; | |
955 | |
956 $WeightedTanimoto = $Beta*$TanimotoForSetBits + (1 - $Beta)*$TanimotoForClearBits; | |
957 | |
958 return $WeightedTanimoto; | |
959 } | |
960 | |
961 # Calculate WeightedTversky similarity coefficient for two same size bit vectors. | |
962 # | |
963 # This functionality can be either invoked as a class function or an object method. | |
964 # | |
965 sub WeightedTverskySimilarityCoefficient ($$$) { | |
966 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha, $Beta) = @_; | |
967 my($Na, $Nb, $Nc, $TverskyForSetBits, $TverskyForClearBits, $Numerator, $Denominator, $WeightedTversky); | |
968 | |
969 if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) { | |
970 croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ..."; | |
971 } | |
972 if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) { | |
973 croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ..."; | |
974 } | |
975 | |
976 # Get Tversky for set bits... | |
977 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
978 | |
979 $Numerator = $Nc; | |
980 $Denominator = $Alpha*($Na - $Nb ) + $Nb; | |
981 $TverskyForSetBits = $Denominator ? ($Numerator/$Denominator) : 0; | |
982 | |
983 # Get Tversky for clear bits... | |
984 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
985 | |
986 $Numerator = $Nc; | |
987 $Denominator = $Alpha*($Na - $Nb ) + $Nb; | |
988 $TverskyForClearBits = $Denominator ? ($Numerator/$Denominator) : 0; | |
989 | |
990 $WeightedTversky = $Beta*$TverskyForSetBits + (1 - $Beta)*$TverskyForClearBits; | |
991 | |
992 return $WeightedTversky; | |
993 } | |
994 | |
995 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations... | |
996 # | |
997 sub _GetNumOfIndividualAndCommonSetBits ($$) { | |
998 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
999 my($Na, $Nb, $Nc, $Nd); | |
1000 | |
1001 # Number of bits set to "1" in A | |
1002 $Na = $FingerprintsBitVectorA->GetNumOfSetBits(); | |
1003 | |
1004 # Number of bits set to "1" in B | |
1005 $Nb = $FingerprintsBitVectorB->GetNumOfSetBits(); | |
1006 | |
1007 # Number of bits set to "1" in both A and B | |
1008 my($NcBitVector); | |
1009 $NcBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB; | |
1010 $Nc = $NcBitVector->GetNumOfSetBits(); | |
1011 | |
1012 return ($Na, $Nb, $Nc); | |
1013 } | |
1014 | |
1015 # Get number of Nd bits in bit vector A and B to be used for similarity coefficient calculations... | |
1016 # | |
1017 sub _GetNumOfCommonClearBits ($$) { | |
1018 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
1019 my($Nd, $NdBitVector); | |
1020 | |
1021 # Number of bits set to "0" in both A and B | |
1022 $NdBitVector = ~$FingerprintsBitVectorA & ~$FingerprintsBitVectorB; | |
1023 $Nd = $NdBitVector->GetNumOfSetBits(); | |
1024 | |
1025 # Correct for number of clear bits used for padding... | |
1026 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) { | |
1027 $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA); | |
1028 } | |
1029 elsif (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) { | |
1030 $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB); | |
1031 } | |
1032 | |
1033 return $Nd; | |
1034 } | |
1035 | |
1036 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations... | |
1037 # | |
1038 sub _GetNumOfIndividualAndCommonClearBits ($$) { | |
1039 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; | |
1040 my($Na, $Nb, $Nc, $Nd); | |
1041 | |
1042 # Number of bits set to "0" in A | |
1043 $Na = $FingerprintsBitVectorA->GetNumOfClearBits(); | |
1044 | |
1045 # Correct for number of clear bits used for padding... | |
1046 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) { | |
1047 $Na = $Na - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA); | |
1048 } | |
1049 | |
1050 # Number of bits set to "0" in B | |
1051 $Nb = $FingerprintsBitVectorB->GetNumOfClearBits(); | |
1052 | |
1053 # Correct for number of clear bits used for padding... | |
1054 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) { | |
1055 $Nb = $Nb - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB); | |
1056 } | |
1057 | |
1058 # Number of bits set to "0" in both A and B | |
1059 $Nc = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
1060 | |
1061 return ($Na, $Nb, $Nc); | |
1062 } | |
1063 | |
1064 # Irrespective of specified size, Perl functions used to handle bit data data in | |
1065 # BitVector class automatically sets the size to the next nearest power of 2 | |
1066 # and clear the extra bits. | |
1067 # | |
1068 # SpecifiedSize is used by this class to process any aribitray size during similarity | |
1069 # coefficient calculations. | |
1070 # | |
1071 # Assuming the FingerprintsBitBector class only manipulates bits upto specified | |
1072 # size, a correction for the extra bits added by BitVector class needs to be applied | |
1073 # to number of clear bits. | |
1074 # | |
1075 sub _GetNumOfClearBitsCorrection { | |
1076 my($FingerprintsBitVector) = @_; | |
1077 | |
1078 return ($FingerprintsBitVector->{Size} - $FingerprintsBitVector->{SpecifiedSize}); | |
1079 } | |
1080 | |
1081 # Is number of clear bits correction required? | |
1082 # | |
1083 sub _IsNumOfClearBitsCorrectionRequired { | |
1084 my($FingerprintsBitVector) = @_; | |
1085 | |
1086 return ($FingerprintsBitVector->{Size} > $FingerprintsBitVector->{SpecifiedSize}) ? 1 : 0; | |
1087 } | |
1088 | |
1089 | |
1090 1; | |
1091 | |
1092 __END__ | |
1093 | |
1094 =head1 NAME | |
1095 | |
1096 FingerprintsBitVector | |
1097 | |
1098 =head1 SYNOPSIS | |
1099 | |
1100 use Fingerprints::FingerprintsBitVector; | |
1101 | |
1102 use Fingerprints::FingerprintsBitVector qw(:coefficients); | |
1103 | |
1104 use Fingerprints::FingerprintsBitVector qw(:all); | |
1105 | |
1106 =head1 DESCRIPTION | |
1107 | |
1108 B<FingerprintsBitVector> class provides the following methods: | |
1109 | |
1110 new, BaroniUrbaniSimilarityCoefficient, BuserSimilarityCoefficient, | |
1111 CosineSimilarityCoefficient, DennisSimilarityCoefficient, | |
1112 DiceSimilarityCoefficient, FoldFingerprintsBitVectorByDensity, | |
1113 FoldFingerprintsBitVectorBySize, ForbesSimilarityCoefficient, | |
1114 FossumSimilarityCoefficient, GetBitsAsBinaryString, GetBitsAsDecimalString, | |
1115 GetBitsAsHexadecimalString, GetBitsAsOctalString, GetBitsAsRawBinaryString, | |
1116 GetDescription, GetFingerprintsBitDensity, GetID, GetSpecifiedSize, | |
1117 GetSupportedSimilarityCoefficients, GetVectorType, HamannSimilarityCoefficient, | |
1118 IsFingerprintsBitVector, IsSubSet, JacardSimilarityCoefficient, | |
1119 Kulczynski1SimilarityCoefficient, Kulczynski2SimilarityCoefficient, | |
1120 MatchingSimilarityCoefficient, McConnaugheySimilarityCoefficient, | |
1121 NewFromBinaryString, NewFromDecimalString, NewFromHexadecimalString, | |
1122 NewFromOctalString, NewFromRawBinaryString, OchiaiSimilarityCoefficient, | |
1123 PearsonSimilarityCoefficient, RogersTanimotoSimilarityCoefficient, | |
1124 RussellRaoSimilarityCoefficient, SetDescription, SetID, SetSpecifiedSize, | |
1125 SetVectorType, SimpsonSimilarityCoefficient, SkoalSneath1SimilarityCoefficient, | |
1126 SkoalSneath2SimilarityCoefficient, SkoalSneath3SimilarityCoefficient, | |
1127 StringifyFingerprintsBitVector, TanimotoSimilarityCoefficient, | |
1128 TverskySimilarityCoefficient, WeightedTanimotoSimilarityCoefficient, | |
1129 WeightedTverskySimilarityCoefficient, YuleSimilarityCoefficient | |
1130 | |
1131 The methods available to create fingerprints bit vector from strings and to calculate similarity | |
1132 coefficient between two bit vectors can also be invoked as class functions. | |
1133 | |
1134 B<FingerprintsBitVector> class is derived from B<BitVector> class which provides the functionality | |
1135 to manipulate bits. | |
1136 | |
1137 For two fingerprints bit vectors A and B of same size, let: | |
1138 | |
1139 Na = Number of bits set to "1" in A | |
1140 Nb = Number of bits set to "1" in B | |
1141 Nc = Number of bits set to "1" in both A and B | |
1142 Nd = Number of bits set to "0" in both A and B | |
1143 | |
1144 Nt = Number of bits set to "1" or "0" in A or B (Size of A or B) | |
1145 Nt = Na + Nb - Nc + Nd | |
1146 | |
1147 Na - Nc = Number of bits set to "1" in A but not in B | |
1148 Nb - Nc = Number of bits set to "1" in B but not in A | |
1149 | |
1150 Then, various similarity coefficients [ Ref. 40 - 42 ] for a pair of bit vectors A and B are | |
1151 defined as follows: | |
1152 | |
1153 BaroniUrbani: ( SQRT( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as Buser ) | |
1154 | |
1155 Buser: ( SQRT ( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as BaroniUrbani ) | |
1156 | |
1157 Cosine: Nc / SQRT ( Na * Nb ) (same as Ochiai) | |
1158 | |
1159 Dice: (2 * Nc) / ( Na + Nb ) | |
1160 | |
1161 Dennis: ( Nc * Nd - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / SQRT ( Nt * Na * Nb) | |
1162 | |
1163 Forbes: ( Nt * Nc ) / ( Na * Nb ) | |
1164 | |
1165 Fossum: ( Nt * ( ( Nc - 1/2 ) ** 2 ) / ( Na * Nb ) | |
1166 | |
1167 Hamann: ( ( Nc + Nd ) - ( Na - Nc ) - ( Nb - Nc ) ) / Nt | |
1168 | |
1169 Jaccard: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Tanimoto) | |
1170 | |
1171 Kulczynski1: Nc / ( ( Na - Nc ) + ( Nb - Nc) ) = Nc / ( Na + Nb - 2Nc ) | |
1172 | |
1173 Kulczynski2: ( ( Nc / 2 ) * ( 2 * Nc + ( Na - Nc ) + ( Nb - Nc) ) ) / ( ( Nc + ( Na - Nc ) ) * ( Nc + ( Nb - Nc ) ) ) | |
1174 = 0.5 * ( Nc / Na + Nc / Nb ) | |
1175 | |
1176 Matching: ( Nc + Nd ) / Nt | |
1177 | |
1178 McConnaughey: ( Nc ** 2 - ( Na - Nc ) * ( Nb - Nc) ) / ( Na * Nb ) | |
1179 | |
1180 Ochiai: Nc / SQRT ( Na * Nb ) (same as Cosine) | |
1181 | |
1182 Pearson: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) / SQRT ( Na * Nb * ( Na - Nc + Nd ) * ( Nb - Nc + Nd ) ) | |
1183 | |
1184 RogersTanimoto: ( Nc + Nd ) / ( ( Na - Nc) + ( Nb - Nc) + Nt) = ( Nc + Nd ) / ( Na + Nb - 2Nc + Nt) | |
1185 | |
1186 RussellRao: Nc / Nt | |
1187 | |
1188 Simpson: Nc / MIN ( Na, Nb) | |
1189 | |
1190 SkoalSneath1: Nc / ( Nc + 2 * ( Na - Nc) + 2 * ( Nb - Nc) ) = Nc / ( 2 * Na + 2 * Nb - 3 * Nc ) | |
1191 | |
1192 SkoalSneath2: ( 2 * Nc + 2 * Nd ) / ( Nc + Nd + Nt ) | |
1193 | |
1194 SkoalSneath3: ( Nc + Nd ) / ( ( Na - Nc ) + ( Nb - Nc ) ) = ( Nc + Nd ) / ( Na + Nb - 2 * Nc ) | |
1195 | |
1196 Tanimoto: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Jaccard) | |
1197 | |
1198 Tversky: Nc / ( alpha * ( Na - Nc ) + ( 1 - alpha) * ( Nb - Nc) + Nc ) = Nc / ( alpha * ( Na - Nb ) + Nb) | |
1199 | |
1200 Yule: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / ( ( Nc * Nd ) + ( ( Na - Nc ) * ( Nb - Nc ) ) ) | |
1201 | |
1202 The values of Tanimoto/Jaccard and Tversky coefficients are dependent on only those bit which | |
1203 are set to "1" in both A and B. In order to take into account all bit positions, modified versions | |
1204 of Tanimoto [ Ref. 42 ] and Tversky [ Ref. 43 ] have been developed. | |
1205 | |
1206 Let: | |
1207 | |
1208 Na' = Number of bits set to "0" in A | |
1209 Nb' = Number of bits set to "0" in B | |
1210 Nc' = Number of bits set to "0" in both A and B | |
1211 | |
1212 Tanimoto': Nc' / ( ( Na' - Nc') + ( Nb' - Nc' ) + Nc' ) = Nc' / ( Na' + Nb' - Nc' ) | |
1213 | |
1214 Tversky': Nc' / ( alpha * ( Na' - Nc' ) + ( 1 - alpha) * ( Nb' - Nc' ) + Nc' ) = Nc' / ( alpha * ( Na' - Nb' ) + Nb') | |
1215 | |
1216 Then: | |
1217 | |
1218 WeightedTanimoto = beta * Tanimoto + (1 - beta) * Tanimoto' | |
1219 | |
1220 WeightedTversky = beta * Tversky + (1 - beta) * Tversky' | |
1221 | |
1222 =head2 METHODS | |
1223 | |
1224 =over 4 | |
1225 | |
1226 =item B<new> | |
1227 | |
1228 $NewFPBitVector = new Fingerprints::FingerprintsBitVector($Size); | |
1229 | |
1230 Creates a new I<FingerprintsBitVector> object of size I<Size> and returns newly created | |
1231 B<FingerprintsBitVector>. Bit numbers range from 0 to 1 less than I<Size>. | |
1232 | |
1233 =item B<BaroniUrbaniSimilarityCoefficient> | |
1234 | |
1235 $Value = $FingerprintsBitVector->BaroniUrbaniSimilarityCoefficient( | |
1236 $OtherFingerprintBitVector); | |
1237 $Value = Fingerprints::FingerprintsBitVector:: | |
1238 BaroniUrbaniSimilarityCoefficient( | |
1239 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1240 | |
1241 Returns value of I<BaroniUrbani> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1242 | |
1243 =item B<BuserSimilarityCoefficient> | |
1244 | |
1245 $Value = $FingerprintsBitVector->BuserSimilarityCoefficient( | |
1246 $OtherFingerprintBitVector); | |
1247 $Value = Fingerprints::FingerprintsBitVector::BuserSimilarityCoefficient( | |
1248 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1249 | |
1250 Returns value of I<Buser> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1251 | |
1252 =item B<CosineSimilarityCoefficient> | |
1253 | |
1254 $Value = $FingerprintsBitVector->CosineSimilarityCoefficient( | |
1255 $OtherFingerprintBitVector); | |
1256 $Value = Fingerprints::FingerprintsBitVector::CosineSimilarityCoefficient( | |
1257 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1258 | |
1259 Returns value of I<Cosine> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1260 | |
1261 =item B<DennisSimilarityCoefficient> | |
1262 | |
1263 $Value = $FingerprintsBitVector->DennisSimilarityCoefficient( | |
1264 $OtherFingerprintBitVector); | |
1265 $Value = Fingerprints::FingerprintsBitVector::DennisSimilarityCoefficient( | |
1266 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1267 | |
1268 Returns value of I<Dennis> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1269 | |
1270 =item B<DiceSimilarityCoefficient> | |
1271 | |
1272 $Value = $FingerprintsBitVector->DiceSimilarityCoefficient( | |
1273 $OtherFingerprintBitVector); | |
1274 $Value = Fingerprints::FingerprintsBitVector::DiceSimilarityCoefficient( | |
1275 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1276 | |
1277 Returns value of I<Dice> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1278 | |
1279 =item B<FoldFingerprintsBitVectorByDensity> | |
1280 | |
1281 $FingerprintsBitVector->FoldFingerprintsBitVectorByDensity($Density); | |
1282 | |
1283 Folds I<FingerprintsBitVector> by recursively reducing its size by half until bit density of set bits is | |
1284 greater than or equal to specified I<Density> and returns folded I<FingerprintsBitVector>. | |
1285 | |
1286 =item B<FoldFingerprintsBitVectorBySize> | |
1287 | |
1288 $FingerprintsBitVector->FoldFingerprintsBitVectorBySize($Size); | |
1289 | |
1290 Folds I<FingerprintsBitVector> by recursively reducing its size by half until size is less than or equal to | |
1291 specified I<Size> and returns folded I<FingerprintsBitVector>. | |
1292 | |
1293 =item B<ForbesSimilarityCoefficient> | |
1294 | |
1295 $Value = $FingerprintsBitVector->ForbesSimilarityCoefficient( | |
1296 $OtherFingerprintBitVector); | |
1297 $Value = Fingerprints::FingerprintsBitVector::ForbesSimilarityCoefficient( | |
1298 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1299 | |
1300 Returns value of I<Forbes> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1301 | |
1302 =item B<FossumSimilarityCoefficient> | |
1303 | |
1304 $Value = $FingerprintsBitVector->FossumSimilarityCoefficient( | |
1305 $OtherFingerprintBitVector); | |
1306 $Value = Fingerprints::FingerprintsBitVector::FossumSimilarityCoefficient( | |
1307 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1308 | |
1309 Returns value of I<Fossum> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1310 | |
1311 =item B<GetBitsAsBinaryString> | |
1312 | |
1313 $BinaryASCIIString = $FingerprintsBitVector->GetBitsAsBinaryString(); | |
1314 | |
1315 Returns fingerprints as a binary ASCII string containing 0s and 1s. | |
1316 | |
1317 =item B<GetBitsAsHexadecimalString> | |
1318 | |
1319 $HexadecimalString = $FingerprintsBitVector->GetBitsAsHexadecimalString(); | |
1320 | |
1321 Returns fingerprints as a hexadecimal string. | |
1322 | |
1323 =item B<GetBitsAsRawBinaryString> | |
1324 | |
1325 $RawBinaryString = $FingerprintsBitVector->GetBitsAsRawBinaryString(); | |
1326 | |
1327 Returns fingerprints as a raw binary string containing packed bit values for each byte. | |
1328 | |
1329 =item B<GetDescription> | |
1330 | |
1331 $Description = $FingerprintsBitVector->GetDescription(); | |
1332 | |
1333 Returns a string containing description of fingerprints bit vector. | |
1334 | |
1335 =item B<GetFingerprintsBitDensity> | |
1336 | |
1337 $BitDensity = $FingerprintsBitVector->GetFingerprintsBitDensity(); | |
1338 | |
1339 Returns I<BitDensity> of I<FingerprintsBitVector> corresponding to bits set to 1s. | |
1340 | |
1341 =item B<GetID> | |
1342 | |
1343 $ID = $FingerprintsBitVector->GetID(); | |
1344 | |
1345 Returns I<ID> of I<FingerprintsBitVector>. | |
1346 | |
1347 =item B<GetVectorType> | |
1348 | |
1349 $VectorType = $FingerprintsBitVector->GetVectorType(); | |
1350 | |
1351 Returns I<VectorType> of I<FingerprintsBitVector>. | |
1352 | |
1353 =item B<GetSpecifiedSize> | |
1354 | |
1355 $Size = $FingerprintsBitVector->GetSpecifiedSize(); | |
1356 | |
1357 Returns value of specified size for bit vector. | |
1358 | |
1359 =item B<GetSupportedSimilarityCoefficients> | |
1360 | |
1361 @SimilarityCoefficient = | |
1362 Fingerprints::FingerprintsBitVector::GetSupportedSimilarityCoefficients(); | |
1363 | |
1364 Returns an array containing names of supported similarity coefficients. | |
1365 | |
1366 =item B<HamannSimilarityCoefficient> | |
1367 | |
1368 $Value = $FingerprintsBitVector->HamannSimilarityCoefficient( | |
1369 $OtherFingerprintBitVector); | |
1370 $Value = Fingerprints::FingerprintsBitVector::HamannSimilarityCoefficient( | |
1371 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1372 | |
1373 Returns value of I<Hamann> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1374 | |
1375 =item B<IsFingerprintsBitVector> | |
1376 | |
1377 $Status = Fingerprints::FingerprintsBitVector:: | |
1378 IsFingerprintsBitVector($Object); | |
1379 | |
1380 Returns 1 or 0 based on whether I<Object> is a B<FingerprintsBitVector> object. | |
1381 | |
1382 =item B<IsSubSet> | |
1383 | |
1384 $Status = $FingerprintsBitVector->IsSubSet($OtherFPBitVector); | |
1385 $Status = Fingerprints::FingerprintsBitVector::IsSubSet( | |
1386 $FPBitVectorA, $FPBitVectorB); | |
1387 | |
1388 Returns 1 or 0 based on whether first firngerprints bit vector is a subset of second | |
1389 fingerprints bit vector. | |
1390 | |
1391 For a bit vector to be a subset of another bit vector, both vectors must be of | |
1392 the same size and the bit positions set in first vector must also be set in the | |
1393 second bit vector. | |
1394 | |
1395 =item B<JacardSimilarityCoefficient> | |
1396 | |
1397 $Value = $FingerprintsBitVector->JacardSimilarityCoefficient( | |
1398 $OtherFingerprintBitVector); | |
1399 $Value = Fingerprints::FingerprintsBitVector::JacardSimilarityCoefficient( | |
1400 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1401 | |
1402 Returns value of I<Jacard> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1403 | |
1404 =item B<Kulczynski1SimilarityCoefficient> | |
1405 | |
1406 $Value = $FingerprintsBitVector->Kulczynski1SimilarityCoefficient( | |
1407 $OtherFingerprintBitVector); | |
1408 $Value = Fingerprints::FingerprintsBitVector:: | |
1409 Kulczynski1SimilarityCoefficient( | |
1410 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1411 | |
1412 Returns value of I<Kulczynski1> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1413 | |
1414 =item B<Kulczynski2SimilarityCoefficient> | |
1415 | |
1416 $Value = $FingerprintsBitVector->Kulczynski2SimilarityCoefficient( | |
1417 $OtherFingerprintBitVector); | |
1418 $Value = Fingerprints::FingerprintsBitVector:: | |
1419 Kulczynski2SimilarityCoefficient( | |
1420 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1421 | |
1422 Returns value of I<Kulczynski2> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1423 | |
1424 =item B<MatchingSimilarityCoefficient> | |
1425 | |
1426 $Value = $FingerprintsBitVector->MatchingSimilarityCoefficient( | |
1427 $OtherFingerprintBitVector); | |
1428 $Value = Fingerprints::FingerprintsBitVector:: | |
1429 MatchingSimilarityCoefficient( | |
1430 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1431 | |
1432 Returns value of I<Matching> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1433 | |
1434 =item B<McConnaugheySimilarityCoefficient> | |
1435 | |
1436 $Value = $FingerprintsBitVector->McConnaugheySimilarityCoefficient( | |
1437 $OtherFingerprintBitVector); | |
1438 $Value = Fingerprints::FingerprintsBitVector:: | |
1439 McConnaugheySimilarityCoefficient( | |
1440 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1441 | |
1442 Returns value of I<McConnaughey> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1443 | |
1444 =item B<NewFromBinaryString> | |
1445 | |
1446 $NewFPBitVector = $FingerprintsBitVector->NewFromBinaryString( | |
1447 $BinaryString); | |
1448 $NewFPBitVector = Fingerprints::FingerprintsBitVector::NewFromBinaryString( | |
1449 $BinaryString); | |
1450 | |
1451 Creates a new I<FingerprintsBitVector> using I<BinaryString> and returns new | |
1452 B<FingerprintsBitVector> object. | |
1453 | |
1454 =item B<NewFromHexadecimalString> | |
1455 | |
1456 $NewFPBitVector = $FingerprintsBitVector->NewFromHexadecimalString( | |
1457 $HexdecimalString); | |
1458 $NewFPBitVector = Fingerprints::FingerprintsBitVector:: | |
1459 NewFromHexadecimalString( | |
1460 $HexdecimalString); | |
1461 | |
1462 Creates a new I<FingerprintsBitVector> using I<HexdecimalString> and returns new | |
1463 B<FingerprintsBitVector> object. | |
1464 | |
1465 =item B<NewFromRawBinaryString> | |
1466 | |
1467 $NewFPBitVector = $FingerprintsBitVector->NewFromRawBinaryString( | |
1468 $RawBinaryString); | |
1469 $NewFPBitVector = Fingerprints::FingerprintsBitVector:: | |
1470 NewFromRawBinaryString( | |
1471 $RawBinaryString); | |
1472 | |
1473 Creates a new I<FingerprintsBitVector> using I<RawBinaryString> and returns new | |
1474 B<FingerprintsBitVector> object. | |
1475 | |
1476 =item B<OchiaiSimilarityCoefficient> | |
1477 | |
1478 $Value = $FingerprintsBitVector->OchiaiSimilarityCoefficient( | |
1479 $OtherFingerprintBitVector); | |
1480 $Value = Fingerprints::FingerprintsBitVector::OchiaiSimilarityCoefficient( | |
1481 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1482 | |
1483 Returns value of I<Ochiai> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1484 | |
1485 =item B<PearsonSimilarityCoefficient> | |
1486 | |
1487 $Value = $FingerprintsBitVector->PearsonSimilarityCoefficient( | |
1488 $OtherFingerprintBitVector); | |
1489 $Value = Fingerprints::FingerprintsBitVector::PearsonSimilarityCoefficient( | |
1490 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1491 | |
1492 Returns value of I<Pearson> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1493 | |
1494 =item B<RogersTanimotoSimilarityCoefficient> | |
1495 | |
1496 $Value = $FingerprintsBitVector->RogersTanimotoSimilarityCoefficient( | |
1497 $OtherFingerprintBitVector); | |
1498 $Value = Fingerprints::FingerprintsBitVector:: | |
1499 RogersTanimotoSimilarityCoefficient( | |
1500 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1501 | |
1502 Returns value of I<RogersTanimoto> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1503 | |
1504 =item B<RussellRaoSimilarityCoefficient> | |
1505 | |
1506 $Value = $FingerprintsBitVector->RussellRaoSimilarityCoefficient( | |
1507 $OtherFingerprintBitVector); | |
1508 $Value = Fingerprints::FingerprintsBitVector:: | |
1509 RussellRaoSimilarityCoefficient( | |
1510 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1511 | |
1512 Returns value of I<RussellRao> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1513 | |
1514 =item B<SetSpecifiedSize> | |
1515 | |
1516 $FingerprintsBitVector->SetSpecifiedSize($Size); | |
1517 | |
1518 Sets specified size for fingerprints bit vector. | |
1519 | |
1520 Irrespective of specified size, Perl functions used to handle bit data in B<BitVector> class | |
1521 automatically sets the size to the next nearest power of 2. I<SpecifiedSize> is used by | |
1522 B<FingerprintsBitVector> class to process any aribitray size during similarity coefficient calculations. | |
1523 | |
1524 =item B<SetDescription> | |
1525 | |
1526 $FingerprintsBitVector->SetDescription($Description); | |
1527 | |
1528 Sets I<Description> of fingerprints bit vector and returns I<FingerprintsBitVector>. | |
1529 | |
1530 =item B<SetID> | |
1531 | |
1532 $FingerprintsBitVector->SetID($ID); | |
1533 | |
1534 Sets I<ID> of fingerprints bit vector and returns I<FingerprintsBitVector>. | |
1535 | |
1536 =item B<SetVectorType> | |
1537 | |
1538 $FingerprintsBitVector->SetVectorType($VectorType); | |
1539 | |
1540 Sets I<VectorType> of fingerprints bit vector and returns I<FingerprintsBitVector>. | |
1541 | |
1542 =item B<SimpsonSimilarityCoefficient> | |
1543 | |
1544 $Value = $FingerprintsBitVector->SimpsonSimilarityCoefficient( | |
1545 $OtherFingerprintBitVector); | |
1546 $Value = Fingerprints::FingerprintsBitVector::SimpsonSimilarityCoefficient( | |
1547 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1548 | |
1549 Returns value of I<Simpson> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1550 | |
1551 =item B<SkoalSneath1SimilarityCoefficient> | |
1552 | |
1553 $Value = $FingerprintsBitVector->SkoalSneath1SimilarityCoefficient( | |
1554 $OtherFingerprintBitVector); | |
1555 $Value = Fingerprints::FingerprintsBitVector:: | |
1556 SkoalSneath1SimilarityCoefficient( | |
1557 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1558 | |
1559 Returns value of I<SkoalSneath1> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1560 | |
1561 =item B<SkoalSneath2SimilarityCoefficient> | |
1562 | |
1563 $Value = $FingerprintsBitVector->SkoalSneath2SimilarityCoefficient( | |
1564 $OtherFingerprintBitVector); | |
1565 $Value = Fingerprints::FingerprintsBitVector:: | |
1566 SkoalSneath2SimilarityCoefficient( | |
1567 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1568 | |
1569 Returns value of I<SkoalSneath2> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1570 | |
1571 =item B<SkoalSneath3SimilarityCoefficient> | |
1572 | |
1573 $Value = $FingerprintsBitVector->SkoalSneath3SimilarityCoefficient( | |
1574 $OtherFingerprintBitVector); | |
1575 $Value = Fingerprints::FingerprintsBitVector:: | |
1576 SkoalSneath3SimilarityCoefficient( | |
1577 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1578 | |
1579 Returns value of I<SkoalSneath3> similarity coefficient between two same size I<FingerprintsBitVectors> | |
1580 | |
1581 =item B<StringifyFingerprintsBitVector> | |
1582 | |
1583 $String = $FingerprintsBitVector->StringifyFingerprintsBitVector(); | |
1584 | |
1585 Returns a string containing information about I<FingerprintsBitVector> object. | |
1586 | |
1587 =item B<TanimotoSimilarityCoefficient> | |
1588 | |
1589 $Value = $FingerprintsBitVector->TanimotoSimilarityCoefficient( | |
1590 $OtherFingerprintBitVector); | |
1591 $Value = Fingerprints::FingerprintsBitVector:: | |
1592 TanimotoSimilarityCoefficient( | |
1593 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1594 | |
1595 Returns value of I<Tanimoto> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1596 | |
1597 =item B<TverskySimilarityCoefficient> | |
1598 | |
1599 $Value = $FingerprintsBitVector->TverskySimilarityCoefficient( | |
1600 $OtherFingerprintBitVector, $Alpha); | |
1601 $Value = Fingerprints::FingerprintsBitVector:: | |
1602 TverskySimilarityCoefficient( | |
1603 $FingerprintsBitVectorA, $FingerprintBitVectorB, $Alpha); | |
1604 | |
1605 Returns value of I<Tversky> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1606 | |
1607 =item B<WeightedTanimotoSimilarityCoefficient> | |
1608 | |
1609 $Value = | |
1610 $FingerprintsBitVector->WeightedTanimotoSimilarityCoefficient( | |
1611 $OtherFingerprintBitVector, $Beta); | |
1612 $Value = | |
1613 Fingerprints::FingerprintsBitVector:: | |
1614 WeightedTanimotoSimilarityCoefficient( | |
1615 $FingerprintsBitVectorA, $FingerprintBitVectorB, $Beta); | |
1616 | |
1617 Returns value of I<WeightedTanimoto> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1618 | |
1619 =item B<WeightedTverskySimilarityCoefficient> | |
1620 | |
1621 $Value = | |
1622 $FingerprintsBitVector->WeightedTverskySimilarityCoefficient( | |
1623 $OtherFingerprintBitVector, $Alpha, $Beta); | |
1624 $Value = | |
1625 Fingerprints::FingerprintsBitVector:: | |
1626 WeightedTverskySimilarityCoefficient( | |
1627 $FingerprintsBitVectorA, $FingerprintBitVectorB, $Alpha, $Beta); | |
1628 | |
1629 Returns value of I<WeightedTversky> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1630 | |
1631 =item B<YuleSimilarityCoefficient> | |
1632 | |
1633 $Value = $FingerprintsBitVector->YuleSimilarityCoefficient( | |
1634 $OtherFingerprintBitVector); | |
1635 $Value = Fingerprints::FingerprintsBitVector::YuleSimilarityCoefficient( | |
1636 $FingerprintsBitVectorA, $FingerprintBitVectorB); | |
1637 | |
1638 Returns value of I<Yule> similarity coefficient between two same size I<FingerprintsBitVectors>. | |
1639 | |
1640 =back | |
1641 | |
1642 =head1 AUTHOR | |
1643 | |
1644 Manish Sud <msud@san.rr.com> | |
1645 | |
1646 =head1 SEE ALSO | |
1647 | |
1648 BitVector.pm, FingerprintsStringUtil.pm, FingerprintsVector.pm, Vector.pm | |
1649 | |
1650 =head1 COPYRIGHT | |
1651 | |
1652 Copyright (C) 2015 Manish Sud. All rights reserved. | |
1653 | |
1654 This file is part of MayaChemTools. | |
1655 | |
1656 MayaChemTools is free software; you can redistribute it and/or modify it under | |
1657 the terms of the GNU Lesser General Public License as published by the Free | |
1658 Software Foundation; either version 3 of the License, or (at your option) | |
1659 any later version. | |
1660 | |
1661 =cut |