Mercurial > repos > deepakjadmin > mayatool3_test1
comparison lib/Fingerprints/FingerprintsVector.pm @ 1:2abf0d43254d draft
Uploaded
| author | deepakjadmin |
|---|---|
| date | Wed, 20 Jan 2016 09:10:43 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:1791cb0984a7 | 1:2abf0d43254d |
|---|---|
| 1 package Fingerprints::FingerprintsVector; | |
| 2 # | |
| 3 # $RCSfile: FingerprintsVector.pm,v $ | |
| 4 # $Date: 2015/02/28 20:48:54 $ | |
| 5 # $Revision: 1.31 $ | |
| 6 # | |
| 7 # Author: Manish Sud <msud@san.rr.com> | |
| 8 # | |
| 9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 10 # | |
| 11 # This file is part of MayaChemTools. | |
| 12 # | |
| 13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 14 # the terms of the GNU Lesser General Public License as published by the Free | |
| 15 # Software Foundation; either version 3 of the License, or (at your option) any | |
| 16 # later version. | |
| 17 # | |
| 18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
| 19 # any warranty; without even the implied warranty of merchantability of fitness | |
| 20 # for a particular purpose. See the GNU Lesser General Public License for more | |
| 21 # details. | |
| 22 # | |
| 23 # You should have received a copy of the GNU Lesser General Public License | |
| 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
| 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
| 26 # Boston, MA, 02111-1307, USA. | |
| 27 # | |
| 28 | |
| 29 use strict; | |
| 30 use Carp; | |
| 31 use Exporter; | |
| 32 use Scalar::Util (); | |
| 33 use MathUtil (); | |
| 34 use TextUtil (); | |
| 35 use StatisticsUtil (); | |
| 36 use BitVector; | |
| 37 use Vector; | |
| 38 | |
| 39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
| 40 | |
| 41 @ISA = qw(Exporter); | |
| 42 | |
| 43 # Distance coefficients | |
| 44 my(@DistanceCoefficients) = qw(CityBlockDistanceCoefficient EuclideanDistanceCoefficient HammingDistanceCoefficient ManhattanDistanceCoefficient SoergelDistanceCoefficient); | |
| 45 | |
| 46 # Similarity coefficients... | |
| 47 my(@SimilarityCoefficients) = qw(CosineSimilarityCoefficient CzekanowskiSimilarityCoefficient DiceSimilarityCoefficient OchiaiSimilarityCoefficient JaccardSimilarityCoefficient SorensonSimilarityCoefficient TanimotoSimilarityCoefficient); | |
| 48 | |
| 49 # New from string... | |
| 50 my(@NewFromString) = qw(NewFromValuesString NewFromValuesAndIDsString NewFromIDsAndValuesString NewFromValuesAndIDsPairsString NewFromIDsAndValuesPairsString); | |
| 51 | |
| 52 @EXPORT = qw(IsFingerprintsVector); | |
| 53 @EXPORT_OK = qw(GetSupportedDistanceCoefficients GetSupportedSimilarityCoefficients GetSupportedDistanceAndSimilarityCoefficients @DistanceCoefficients @SimilarityCoefficients); | |
| 54 | |
| 55 %EXPORT_TAGS = ( | |
| 56 new => [@NewFromString], | |
| 57 distancecoefficients => [@DistanceCoefficients], | |
| 58 similaritycoefficients => [@SimilarityCoefficients], | |
| 59 all => [@EXPORT, @EXPORT_OK] | |
| 60 ); | |
| 61 | |
| 62 # Setup class variables... | |
| 63 my($ClassName); | |
| 64 _InitializeClass(); | |
| 65 | |
| 66 # Overload Perl functions... | |
| 67 use overload '""' => 'StringifyFingerprintsVector'; | |
| 68 | |
| 69 # Class constructor... | |
| 70 sub new { | |
| 71 my($Class, %NamesAndValues) = @_; | |
| 72 | |
| 73 # Initialize object... | |
| 74 my $This = {}; | |
| 75 bless $This, ref($Class) || $Class; | |
| 76 | |
| 77 $This->_InitializeFingerprintsVector(); | |
| 78 | |
| 79 $This->_InitializeFingerprintsVectorProperties(%NamesAndValues); | |
| 80 | |
| 81 return $This; | |
| 82 } | |
| 83 | |
| 84 # Initialize object data... | |
| 85 # | |
| 86 sub _InitializeFingerprintsVector { | |
| 87 my($This) = @_; | |
| 88 | |
| 89 # Type of fingerprint vector... | |
| 90 $This->{Type} = ''; | |
| 91 | |
| 92 # Fingerprint vector values... | |
| 93 @{$This->{Values}} = (); | |
| 94 | |
| 95 # Fingerprint vector value IDs... | |
| 96 @{$This->{ValueIDs}} = (); | |
| 97 | |
| 98 return $This; | |
| 99 } | |
| 100 | |
| 101 # Initialize class ... | |
| 102 sub _InitializeClass { | |
| 103 #Class name... | |
| 104 $ClassName = __PACKAGE__; | |
| 105 } | |
| 106 | |
| 107 # Initialize object properties.... | |
| 108 sub _InitializeFingerprintsVectorProperties { | |
| 109 my($This, %NamesAndValues) = @_; | |
| 110 | |
| 111 my($Name, $Value, $MethodName); | |
| 112 while (($Name, $Value) = each %NamesAndValues) { | |
| 113 $MethodName = "Set${Name}"; | |
| 114 $This->$MethodName($Value); | |
| 115 } | |
| 116 | |
| 117 if (!exists $NamesAndValues{Type}) { | |
| 118 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying type..."; | |
| 119 } | |
| 120 return $This; | |
| 121 } | |
| 122 | |
| 123 # Create a new fingerprints vector using space delimited values string. This functionality can be | |
| 124 # either invoked as a class function or an object method. | |
| 125 # | |
| 126 sub NewFromValuesString ($$;$) { | |
| 127 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_; | |
| 128 my($This, $Type, $ValuesString); | |
| 129 | |
| 130 if (@_ == 3) { | |
| 131 ($This, $Type, $ValuesString) = ($FirstParameter, $SecondParameter, $ThirdParamater); | |
| 132 } | |
| 133 else { | |
| 134 ($This, $Type, $ValuesString) = (undef, $FirstParameter, $SecondParameter); | |
| 135 } | |
| 136 my($FingerprintsVector, @Values); | |
| 137 | |
| 138 @Values = (); | |
| 139 if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) { | |
| 140 @Values = split(' ', $ValuesString); | |
| 141 } | |
| 142 | |
| 143 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values); | |
| 144 | |
| 145 return $FingerprintsVector; | |
| 146 } | |
| 147 | |
| 148 # Create a new fingerprints vector using values and IDs string containing semicolon | |
| 149 # delimited value string and value IDs strings. The values within value and value IDs | |
| 150 # string are delimited by spaces. | |
| 151 # | |
| 152 # This functionality can be either invoked as a class function or an object method. | |
| 153 # | |
| 154 sub NewFromValuesAndIDsString ($$;$) { | |
| 155 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_; | |
| 156 my($This, $Type, $ValuesAndIDsString); | |
| 157 | |
| 158 if (@_ == 3) { | |
| 159 ($This, $Type, $ValuesAndIDsString) = ($FirstParameter, $SecondParameter, $ThirdParamater); | |
| 160 } | |
| 161 else { | |
| 162 ($This, $Type, $ValuesAndIDsString) = (undef, $FirstParameter, $SecondParameter); | |
| 163 } | |
| 164 my($FingerprintsVector, $ValuesString, $ValueIDsString, @Values, @ValueIDs); | |
| 165 | |
| 166 ($ValuesString, $ValueIDsString) = split(';', $ValuesAndIDsString); | |
| 167 | |
| 168 @Values = (); | |
| 169 if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) { | |
| 170 @Values = split(' ', $ValuesString); | |
| 171 } | |
| 172 @ValueIDs = (); | |
| 173 if (defined($ValueIDsString) && length($ValueIDsString) && $ValueIDsString !~ /^None$/i) { | |
| 174 @ValueIDs = split(' ', $ValueIDsString); | |
| 175 } | |
| 176 | |
| 177 if (@Values != @ValueIDs ) { | |
| 178 carp "Warning: ${ClassName}->NewFromValuesAndIDsString: Object can't be instantiated: Number specified values, " . scalar @Values . ", must be equal to number of specified value IDs, " . scalar @ValueIDs . "..."; | |
| 179 return undef; | |
| 180 } | |
| 181 | |
| 182 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs); | |
| 183 | |
| 184 return $FingerprintsVector; | |
| 185 } | |
| 186 | |
| 187 # Create a new fingerprints vector using IDs and values string containing semicolon | |
| 188 # delimited value IDs string and values strings. The values within value and value IDs | |
| 189 # string are delimited by spaces. | |
| 190 # | |
| 191 # This functionality can be either invoked as a class function or an object method. | |
| 192 # | |
| 193 sub NewFromIDsAndValuesString ($$;$) { | |
| 194 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_; | |
| 195 my($This, $Type, $IDsAndValuesString); | |
| 196 | |
| 197 if (@_ == 3) { | |
| 198 ($This, $Type, $IDsAndValuesString) = ($FirstParameter, $SecondParameter, $ThirdParamater); | |
| 199 } | |
| 200 else { | |
| 201 ($This, $Type, $IDsAndValuesString) = (undef, $FirstParameter, $SecondParameter); | |
| 202 } | |
| 203 my($FingerprintsVector, $ValuesString, $ValueIDsString, @Values, @ValueIDs); | |
| 204 | |
| 205 ($ValueIDsString, $ValuesString) = split(';', $IDsAndValuesString); | |
| 206 | |
| 207 @Values = (); | |
| 208 if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) { | |
| 209 @Values = split(' ', $ValuesString); | |
| 210 } | |
| 211 @ValueIDs = (); | |
| 212 if (defined($ValueIDsString) && length($ValueIDsString) && $ValueIDsString !~ /^None$/i) { | |
| 213 @ValueIDs = split(' ', $ValueIDsString); | |
| 214 } | |
| 215 | |
| 216 if (@Values != @ValueIDs ) { | |
| 217 carp "Warning: ${ClassName}->NewFromIDsAndValuesString: Object can't be instantiated: Number specified values, " . scalar @Values . ", must be equal to number of specified value IDs, " . scalar @ValueIDs . "..."; | |
| 218 return undef; | |
| 219 } | |
| 220 | |
| 221 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs); | |
| 222 | |
| 223 return $FingerprintsVector; | |
| 224 } | |
| 225 | |
| 226 # Create a new fingerprints vector using values and IDs pairs string containing space | |
| 227 # value and value IDs pairs. | |
| 228 # | |
| 229 # This functionality can be either invoked as a class function or an object method. | |
| 230 # | |
| 231 sub NewFromValuesAndIDsPairsString ($$;$) { | |
| 232 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_; | |
| 233 my($This, $Type, $ValuesAndIDsPairsString); | |
| 234 | |
| 235 if (@_ == 3) { | |
| 236 ($This, $Type, $ValuesAndIDsPairsString) = ($FirstParameter, $SecondParameter, $ThirdParamater); | |
| 237 } | |
| 238 else { | |
| 239 ($This, $Type, $ValuesAndIDsPairsString) = (undef, $FirstParameter, $SecondParameter); | |
| 240 } | |
| 241 my($FingerprintsVector, $Index, @Values, @ValueIDs, @ValuesAndIDsPairs); | |
| 242 | |
| 243 @ValuesAndIDsPairs = split(' ', $ValuesAndIDsPairsString); | |
| 244 if (@ValuesAndIDsPairs % 2) { | |
| 245 carp "Warning: ${ClassName}->NewFromValuesAndIDsPairsString: No fingerprint vector created: Invalid values and IDs pairs data: Input list must contain even number of values and IDs pairs..."; | |
| 246 return undef; | |
| 247 } | |
| 248 | |
| 249 @Values = (); @ValueIDs = (); | |
| 250 if (!(@ValuesAndIDsPairs == 2 && $ValuesAndIDsPairs[0] =~ /^None$/i && $ValuesAndIDsPairs[1] =~ /^None$/i)) { | |
| 251 for ($Index = 0; $Index < $#ValuesAndIDsPairs; $Index += 2) { | |
| 252 push @Values, $ValuesAndIDsPairs[$Index]; | |
| 253 push @ValueIDs, $ValuesAndIDsPairs[$Index + 1]; | |
| 254 } | |
| 255 } | |
| 256 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs); | |
| 257 | |
| 258 return $FingerprintsVector; | |
| 259 } | |
| 260 | |
| 261 # Create a new fingerprints vector using IDs and values pairs string containing space | |
| 262 # value IDs and valus pairs. | |
| 263 # | |
| 264 # This functionality can be either invoked as a class function or an object method. | |
| 265 # | |
| 266 sub NewFromIDsAndValuesPairsString ($$;$) { | |
| 267 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_; | |
| 268 my($This, $Type, $IDsAndValuesPairsString); | |
| 269 | |
| 270 if (@_ == 3) { | |
| 271 ($This, $Type, $IDsAndValuesPairsString) = ($FirstParameter, $SecondParameter, $ThirdParamater); | |
| 272 } | |
| 273 else { | |
| 274 ($This, $Type, $IDsAndValuesPairsString) = (undef, $FirstParameter, $SecondParameter); | |
| 275 } | |
| 276 my($FingerprintsVector, $Index, @Values, @ValueIDs, @IDsAndValuesPairs); | |
| 277 | |
| 278 @IDsAndValuesPairs = split(' ', $IDsAndValuesPairsString); | |
| 279 if (@IDsAndValuesPairs % 2) { | |
| 280 croak "Error: ${ClassName}->NewFromIDsAndValuesPairsString: No fingerprint vector created: Invalid values and IDs pairs data: Input list must contain even number of values and IDs pairs..."; | |
| 281 return undef; | |
| 282 } | |
| 283 | |
| 284 @Values = (); @ValueIDs = (); | |
| 285 if (!(@IDsAndValuesPairs == 2 && $IDsAndValuesPairs[0] =~ /^None$/i && $IDsAndValuesPairs[1] =~ /^None$/i)) { | |
| 286 for ($Index = 0; $Index < $#IDsAndValuesPairs; $Index += 2) { | |
| 287 push @ValueIDs, $IDsAndValuesPairs[$Index]; | |
| 288 push @Values, $IDsAndValuesPairs[$Index + 1]; | |
| 289 } | |
| 290 } | |
| 291 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs); | |
| 292 | |
| 293 return $FingerprintsVector; | |
| 294 } | |
| 295 | |
| 296 # Set type of fingerprint vector. Supported types are: OrderedNumericalValues, NumericalValues, and | |
| 297 # AlphaNumericalValues | |
| 298 # | |
| 299 # . For OrderedNumericalValues type, both vectors must be of the same size and contain similar | |
| 300 # types of numerical values in the same order. | |
| 301 # | |
| 302 # . For NumericalValues type, vector value IDs for both vectors must be specified; however, their | |
| 303 # size and order of IDs and numerical values may be different. For each vector, value IDs must | |
| 304 # correspond to vector values. | |
| 305 # | |
| 306 # . For AlphaNumericalValues type, vectors may contain both numerical and alphanumerical values | |
| 307 # and their sizes may be different. | |
| 308 # | |
| 309 sub SetType { | |
| 310 my($This, $Type) = @_; | |
| 311 | |
| 312 if ($Type !~ /^(OrderedNumericalValues|NumericalValues|AlphaNumericalValues)$/i) { | |
| 313 croak "Error: ${ClassName}->SetType: Specified value, $Type, for Type is not vaild. Supported types in current release of MayaChemTools: OrderedNumericalValues, NumericalValues or AlphaNumericalValues"; | |
| 314 } | |
| 315 | |
| 316 if ($This->{Type}) { | |
| 317 croak "Error: ${ClassName}->SetType: Can't change intial fingerprints vector type: It's already set..."; | |
| 318 } | |
| 319 $This->{Type} = $Type; | |
| 320 | |
| 321 return $This; | |
| 322 } | |
| 323 | |
| 324 # Get fingerpints vector type... | |
| 325 # | |
| 326 sub GetType { | |
| 327 my($This) = @_; | |
| 328 | |
| 329 return $This->{Type}; | |
| 330 } | |
| 331 | |
| 332 # Set ID... | |
| 333 sub SetID { | |
| 334 my($This, $Value) = @_; | |
| 335 | |
| 336 $This->{ID} = $Value; | |
| 337 | |
| 338 return $This; | |
| 339 } | |
| 340 | |
| 341 # Get ID... | |
| 342 sub GetID { | |
| 343 my($This) = @_; | |
| 344 | |
| 345 return exists $This->{ID} ? $This->{ID} : 'None'; | |
| 346 } | |
| 347 | |
| 348 # Set description... | |
| 349 sub SetDescription { | |
| 350 my($This, $Value) = @_; | |
| 351 | |
| 352 $This->{Description} = $Value; | |
| 353 | |
| 354 return $This; | |
| 355 } | |
| 356 | |
| 357 # Get description... | |
| 358 sub GetDescription { | |
| 359 my($This) = @_; | |
| 360 | |
| 361 return exists $This->{Description} ? $This->{Description} : 'No description available'; | |
| 362 } | |
| 363 | |
| 364 # Set vector type... | |
| 365 sub SetVectorType { | |
| 366 my($This, $Value) = @_; | |
| 367 | |
| 368 $This->{VectorType} = $Value; | |
| 369 | |
| 370 return $This; | |
| 371 } | |
| 372 | |
| 373 # Get vector type... | |
| 374 sub GetVectorType { | |
| 375 my($This) = @_; | |
| 376 | |
| 377 return exists $This->{VectorType} ? $This->{VectorType} : 'FingerprintsVector'; | |
| 378 } | |
| 379 | |
| 380 # Set values of a fingerprint vector using a vector, reference to an array or an array... | |
| 381 # | |
| 382 sub SetValues { | |
| 383 my($This, @Values) = @_; | |
| 384 | |
| 385 $This->_SetOrAddValuesOrValueIDs("SetValues", @Values); | |
| 386 | |
| 387 return $This; | |
| 388 } | |
| 389 | |
| 390 # Set value IDs of a fingerprint vector using a vector, reference to an array or an array... | |
| 391 # | |
| 392 sub SetValueIDs { | |
| 393 my($This, @Values) = @_; | |
| 394 | |
| 395 $This->_SetOrAddValuesOrValueIDs("SetValueIDs", @Values); | |
| 396 | |
| 397 return $This; | |
| 398 } | |
| 399 | |
| 400 # Add values to a fingerprint vector using a vector, reference to an array or an array... | |
| 401 # | |
| 402 sub AddValues { | |
| 403 my($This, @Values) = @_; | |
| 404 | |
| 405 $This->_SetOrAddValuesOrValueIDs("AddValues", @Values); | |
| 406 | |
| 407 return $This; | |
| 408 } | |
| 409 | |
| 410 # Add value IDs to a fingerprint vector using a vector, reference to an array or an array... | |
| 411 # | |
| 412 sub AddValueIDs { | |
| 413 my($This, @Values) = @_; | |
| 414 | |
| 415 $This->_SetOrAddValuesOrValueIDs("AddValueIDs", @Values); | |
| 416 | |
| 417 return $This; | |
| 418 } | |
| 419 | |
| 420 # Set or add values or value IDs using: | |
| 421 # | |
| 422 # o List of values or ValueIDs | |
| 423 # o Reference to an list of values or ValuesIDs | |
| 424 # o A vector containing values or ValueIDs | |
| 425 # | |
| 426 sub _SetOrAddValuesOrValueIDs { | |
| 427 my($This, $Mode, @Values) = @_; | |
| 428 | |
| 429 if (!@Values) { | |
| 430 return; | |
| 431 } | |
| 432 | |
| 433 # Collect specified values or valueIDs... | |
| 434 my($FirstValue, $TypeOfFirstValue, $ValuesRef); | |
| 435 | |
| 436 $FirstValue = $Values[0]; | |
| 437 $TypeOfFirstValue = ref $FirstValue; | |
| 438 if ($TypeOfFirstValue =~ /^(SCALAR|HASH|CODE|REF|GLOB)/) { | |
| 439 croak "Error: ${ClassName}-> _SetOrAddValuesOrValueIDs: Trying to add values to vector object with a reference to unsupported value format..."; | |
| 440 } | |
| 441 | |
| 442 if (Vector::IsVector($FirstValue)) { | |
| 443 # It's a vector... | |
| 444 $ValuesRef = $FirstValue->GetValues(); | |
| 445 } | |
| 446 elsif ($TypeOfFirstValue =~ /^ARRAY/) { | |
| 447 # It's an array refernce... | |
| 448 $ValuesRef = $FirstValue; | |
| 449 } | |
| 450 else { | |
| 451 # It's a list of values... | |
| 452 $ValuesRef = \@Values; | |
| 453 } | |
| 454 | |
| 455 # Set or add values or value IDs... | |
| 456 MODE: { | |
| 457 if ($Mode =~ /^SetValues$/i) { @{$This->{Values}} = (); push @{$This->{Values}}, @{$ValuesRef}; last MODE; } | |
| 458 if ($Mode =~ /^SetValueIDs$/i) { @{$This->{ValueIDs}} = (); push @{$This->{ValueIDs}}, @{$ValuesRef}; last MODE; } | |
| 459 if ($Mode =~ /^AddValues$/i) { push @{$This->{Values}}, @{$ValuesRef}; last MODE; } | |
| 460 if ($Mode =~ /^AddValueIDs$/i) { push @{$This->{ValueIDs}}, @{$ValuesRef}; last MODE; } | |
| 461 croak "Error: ${ClassName}-> _SetOrAddValuesOrValueIDs: Unknown mode $Mode..."; | |
| 462 } | |
| 463 return $This; | |
| 464 } | |
| 465 | |
| 466 # Set a specific value in fingerprint vector with indicies starting from 0.. | |
| 467 # | |
| 468 sub SetValue { | |
| 469 my($This, $Index, $Value, $SkipCheck) = @_; | |
| 470 | |
| 471 # Just set it... | |
| 472 if ($SkipCheck) { | |
| 473 return $This->_SetValue($Index, $Value); | |
| 474 } | |
| 475 | |
| 476 # Check and set... | |
| 477 if ($Index < 0) { | |
| 478 croak "Error: ${ClassName}->SetValue: Index value must be a positive number..."; | |
| 479 } | |
| 480 if ($Index >= $This->GetNumOfValues()) { | |
| 481 croak "Error: ${ClassName}->SetValue: Index vaue must be less than number of values..."; | |
| 482 } | |
| 483 | |
| 484 return $This->_SetValue($Index, $Value); | |
| 485 } | |
| 486 | |
| 487 # Set a fingerprint vector value... | |
| 488 # | |
| 489 sub _SetValue { | |
| 490 my($This, $Index, $Value) = @_; | |
| 491 | |
| 492 $This->{Values}[$Index] = $Value; | |
| 493 | |
| 494 return $This; | |
| 495 } | |
| 496 | |
| 497 # Get a specific value from fingerprint vector with indicies starting from 0... | |
| 498 # | |
| 499 sub GetValue { | |
| 500 my($This, $Index) = @_; | |
| 501 | |
| 502 if ($Index < 0) { | |
| 503 croak "Error: ${ClassName}->GetValue: Index value must be a positive number..."; | |
| 504 } | |
| 505 if ($Index >= $This->GetNumOfValues()) { | |
| 506 croak "Error: ${ClassName}->GetValue: Index value must be less than number of values..."; | |
| 507 } | |
| 508 return $This->_GetValue($Index); | |
| 509 } | |
| 510 | |
| 511 # Get a fingerprint vector value... | |
| 512 sub _GetValue { | |
| 513 my($This, $Index) = @_; | |
| 514 | |
| 515 return $This->{Values}[$Index]; | |
| 516 } | |
| 517 | |
| 518 # Return vector values as an array or reference to an array... | |
| 519 # | |
| 520 sub GetValues { | |
| 521 my($This) = @_; | |
| 522 | |
| 523 return wantarray ? @{$This->{Values}} : \@{$This->{Values}}; | |
| 524 } | |
| 525 | |
| 526 # Set a specific value ID in fingerprint vector with indicies starting from 0.. | |
| 527 # | |
| 528 sub SetValueID { | |
| 529 my($This, $Index, $Value, $SkipCheck) = @_; | |
| 530 | |
| 531 # Just set it... | |
| 532 if ($SkipCheck) { | |
| 533 return $This->_SetValueID($Index, $Value); | |
| 534 } | |
| 535 | |
| 536 # Check and set... | |
| 537 if ($Index < 0) { | |
| 538 croak "Error: ${ClassName}->SetValueID: Index value must be a positive number..."; | |
| 539 } | |
| 540 if ($Index >= $This->GetNumOfValueIDs()) { | |
| 541 croak "Error: ${ClassName}->SetValueID: Index vaue must be less than number of value IDs..."; | |
| 542 } | |
| 543 | |
| 544 return $This->_SetValueID($Index, $Value); | |
| 545 } | |
| 546 | |
| 547 # Set a fingerprint vector value ID... | |
| 548 # | |
| 549 sub _SetValueID { | |
| 550 my($This, $Index, $Value) = @_; | |
| 551 | |
| 552 $This->{ValueIDs}[$Index] = $Value; | |
| 553 | |
| 554 return $This; | |
| 555 } | |
| 556 | |
| 557 # Get a specific value ID from fingerprint vector with indicies starting from 0... | |
| 558 # | |
| 559 sub GetValueID { | |
| 560 my($This, $Index) = @_; | |
| 561 | |
| 562 if ($Index < 0) { | |
| 563 croak "Error: ${ClassName}->GetValueID: Index value must be a positive number..."; | |
| 564 } | |
| 565 if ($Index >= $This->GetNumOfValueIDs()) { | |
| 566 croak "Error: ${ClassName}->GetValueID: Index value must be less than number of value IDs..."; | |
| 567 } | |
| 568 return $This->_GetValueID($Index); | |
| 569 } | |
| 570 | |
| 571 # Get a fingerprint vector value ID... | |
| 572 # | |
| 573 sub _GetValueID { | |
| 574 my($This, $Index) = @_; | |
| 575 | |
| 576 return $This->{ValueIDs}[$Index]; | |
| 577 } | |
| 578 | |
| 579 # Return vector value IDs as an array or reference to an array... | |
| 580 # | |
| 581 sub GetValueIDs { | |
| 582 my($This) = @_; | |
| 583 | |
| 584 return wantarray ? @{$This->{ValueIDs}} : \@{$This->{ValueIDs}}; | |
| 585 } | |
| 586 | |
| 587 # Get fingerprints vector string containing values and/or IDs string in a specifed format... | |
| 588 # | |
| 589 sub GetFingerprintsVectorString { | |
| 590 my($This, $Format) = @_; | |
| 591 | |
| 592 FORMAT : { | |
| 593 if ($Format =~ /^(IDsAndValuesString|IDsAndValues)$/i) { return $This->GetIDsAndValuesString(); last FORMAT; } | |
| 594 if ($Format =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) { return $This->GetIDsAndValuesPairsString(); last FORMAT; } | |
| 595 if ($Format =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) { return $This->GetValuesAndIDsString(); last FORMAT; } | |
| 596 if ($Format =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) { return $This->GetValuesAndIDsPairsString(); last FORMAT;} | |
| 597 if ($Format =~ /^(ValueIDsString|ValueIDs)$/i) { return $This->GetValueIDsString(); last FORMAT; } | |
| 598 if ($Format =~ /^(ValuesString|Values)$/i) { return $This->GetValuesString(); last FORMAT; } | |
| 599 croak "Error: ${ClassName}->GetFingerprintsVectorString: Specified vector string format, $Format, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValueIDsString, ValueIDs, ValuesString, Values..."; | |
| 600 } | |
| 601 return ''; | |
| 602 } | |
| 603 # Get vector value IDs and values string as space delimited ASCII string separated | |
| 604 # by semicolon... | |
| 605 # | |
| 606 sub GetIDsAndValuesString { | |
| 607 my($This) = @_; | |
| 608 | |
| 609 if (@{$This->{ValueIDs}} && @{$This->{Values}}) { | |
| 610 # Both IDs and values are available... | |
| 611 return join(' ', @{$This->{ValueIDs}}) . ";" . join(' ', @{$This->{Values}}); | |
| 612 } | |
| 613 elsif (@{$This->{Values}}) { | |
| 614 # Only values are available... | |
| 615 return "None;" . join(' ', @{$This->{Values}}); | |
| 616 } | |
| 617 else { | |
| 618 # Values are not available... | |
| 619 return "None;None"; | |
| 620 } | |
| 621 } | |
| 622 | |
| 623 # Get vector value IDs and value pairs string as space delimited ASCII string... | |
| 624 # | |
| 625 sub GetIDsAndValuesPairsString { | |
| 626 my($This) = @_; | |
| 627 my($Index, $ValueIDsPresent, @IDsAndValuesPairs); | |
| 628 | |
| 629 if (!@{$This->{Values}}) { | |
| 630 # Values are unavailable... | |
| 631 return "None None"; | |
| 632 } | |
| 633 | |
| 634 $ValueIDsPresent = @{$This->{ValueIDs}} ? 1 : 0; | |
| 635 | |
| 636 @IDsAndValuesPairs = (); | |
| 637 for $Index (0 .. $#{$This->{Values}}) { | |
| 638 if ($ValueIDsPresent) { | |
| 639 push @IDsAndValuesPairs, ($This->{ValueIDs}->[$Index], $This->{Values}->[$Index]); | |
| 640 } | |
| 641 else { | |
| 642 push @IDsAndValuesPairs, ('None', $This->{Values}->[$Index]); | |
| 643 } | |
| 644 } | |
| 645 return join(' ', @IDsAndValuesPairs); | |
| 646 } | |
| 647 | |
| 648 # Get vector value and value IDs string as space delimited ASCII string separated | |
| 649 # by semicolon... | |
| 650 # | |
| 651 sub GetValuesAndIDsString { | |
| 652 my($This) = @_; | |
| 653 | |
| 654 if (@{$This->{ValueIDs}} && @{$This->{Values}}) { | |
| 655 # Both IDs and values are available... | |
| 656 return join(' ', @{$This->{Values}}) . ";" . join(' ', @{$This->{ValueIDs}}); | |
| 657 } | |
| 658 elsif (@{$This->{Values}}) { | |
| 659 # Only values are available... | |
| 660 return join(' ', @{$This->{Values}}) . ";None"; | |
| 661 } | |
| 662 else { | |
| 663 # Values are not available... | |
| 664 return "None;None"; | |
| 665 } | |
| 666 } | |
| 667 | |
| 668 # Get vector value and value ID pairs string as space delimited ASCII string... | |
| 669 # | |
| 670 sub GetValuesAndIDsPairsString { | |
| 671 my($This) = @_; | |
| 672 my($Index, $ValueIDsPresent, @ValuesAndIDsPairs); | |
| 673 | |
| 674 if (!@{$This->{Values}}) { | |
| 675 # Values are unavailable... | |
| 676 return "None None"; | |
| 677 } | |
| 678 | |
| 679 $ValueIDsPresent = @{$This->{ValueIDs}} ? 1 : 0; | |
| 680 | |
| 681 @ValuesAndIDsPairs = (); | |
| 682 for $Index (0 .. $#{$This->{Values}}) { | |
| 683 if ($ValueIDsPresent) { | |
| 684 push @ValuesAndIDsPairs, ($This->{Values}->[$Index], $This->{ValueIDs}->[$Index]); | |
| 685 } | |
| 686 else { | |
| 687 push @ValuesAndIDsPairs, ($This->{Values}->[$Index], 'None'); | |
| 688 } | |
| 689 } | |
| 690 return join(' ', @ValuesAndIDsPairs); | |
| 691 } | |
| 692 | |
| 693 # Get vector value IDs string as space delimited ASCII string... | |
| 694 # | |
| 695 sub GetValueIDsString { | |
| 696 my($This) = @_; | |
| 697 | |
| 698 return @{$This->{ValueIDs}} ? join(' ', @{$This->{ValueIDs}}) : 'None'; | |
| 699 } | |
| 700 | |
| 701 # Get vector value string as space delimited ASCII string... | |
| 702 # | |
| 703 sub GetValuesString { | |
| 704 my($This) = @_; | |
| 705 | |
| 706 return @{$This->{Values}} ? join(' ', @{$This->{Values}}) : 'None'; | |
| 707 } | |
| 708 | |
| 709 # Get number of values... | |
| 710 sub GetNumOfValues { | |
| 711 my($This) = @_; | |
| 712 | |
| 713 return scalar @{$This->{Values}}; | |
| 714 } | |
| 715 | |
| 716 # Get number of non-zero values... | |
| 717 sub GetNumOfNonZeroValues { | |
| 718 my($This) = @_; | |
| 719 my($Count, $Index, $Size); | |
| 720 | |
| 721 $Count = 0; | |
| 722 $Size = $This->GetNumOfValues(); | |
| 723 | |
| 724 for $Index (0 .. ($Size -1)) { | |
| 725 if ($This->{Values}[$Index] != 0) { | |
| 726 $Count++; | |
| 727 } | |
| 728 } | |
| 729 return $Count; | |
| 730 } | |
| 731 | |
| 732 # Get number of value IDs... | |
| 733 sub GetNumOfValueIDs { | |
| 734 my($This) = @_; | |
| 735 | |
| 736 return scalar @{$This->{ValueIDs}}; | |
| 737 } | |
| 738 | |
| 739 # FinegerprintsVectors class provides methods to calculate similarity between vectors | |
| 740 # containing three different types of values: | |
| 741 # | |
| 742 # Type I: OrderedNumericalValues | |
| 743 # | |
| 744 # . Size of two vectors are same | |
| 745 # . Vectors contain real values in a specific order. For example: MACCS keys count, Topological | |
| 746 # pharnacophore atom pairs and so on. | |
| 747 # . Option to calculate similarity value using continious values or binary values | |
| 748 # | |
| 749 # Type II: UnorderedNumericalValues | |
| 750 # | |
| 751 # . Size of two vectors might not be same | |
| 752 # . Vectors contain unordered real value identified by value IDs. For example: Toplogical atom pairs, | |
| 753 # Topological atom torsions and so on | |
| 754 # . Option to calculate similarity value using continous values or binary values | |
| 755 # | |
| 756 # Type III: AlphaNumericalValues | |
| 757 # | |
| 758 # . Size of two vectors might not be same | |
| 759 # . Vectors contain unordered alphanumerical values. For example: Extended connectivity fingerprints, | |
| 760 # atom neighbothood fingerpritns. | |
| 761 # . The vector values are treated as keys or bit indices and similarity value is calculated accordingly. | |
| 762 # | |
| 763 # Before performing similarity or distance calculations between vectors containing UnorderedNumericalValues | |
| 764 # or AlphaNumericalValues, the vectors are tranformed into vectors containing unique OrderedNumericalValues | |
| 765 # using value IDs for UnorderedNumericalValues and values itself for AlphaNumericalValues. | |
| 766 # | |
| 767 # Three forms similarity or distance calculation between two vectors: AlgebraicForm, BinaryForm or | |
| 768 # SetTheoreticForm. | |
| 769 # | |
| 770 # The value of an extra paramter, CalculationMode, passed to each similarity or distance function | |
| 771 # controls the calculation. Supported values for CalculationMode: AlgebraicForm, BinaryForm and | |
| 772 # SetTheoreticForm. Default: AlgebraicForm. | |
| 773 # | |
| 774 # For BinaryForm CalculationMode, the ordered list of processed final vector values containing the value or | |
| 775 # count of each unique value type is simply converted into a binary vector containing 1s and 0s | |
| 776 # corresponding to presence or absence of values before calculating similarity or distance between | |
| 777 # two vectors. | |
| 778 # | |
| 779 # For two fingerprint vectors A and B of same size containing OrderedNumericalValues, let: | |
| 780 # | |
| 781 # N = Number values in A or B | |
| 782 # | |
| 783 # Xa = Values of vector A | |
| 784 # Xb = Values of vector B | |
| 785 # | |
| 786 # Xai = Value of ith element in A | |
| 787 # Xbi = Value of ith element in B | |
| 788 # | |
| 789 # SUM = Sum of i over N values | |
| 790 # | |
| 791 # For SetTheoreticForm of calculation between two vectors, let: | |
| 792 # | |
| 793 # SetIntersectionXaXb = SUM ( MIN ( Xai, Xbi ) ) | |
| 794 # SetDifferenceXaXb = SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) | |
| 795 # | |
| 796 # For BinaryForm of calculation between two vectors, let: | |
| 797 # | |
| 798 # Na = Number of bits set to "1" in A = SUM ( Xai ) | |
| 799 # Nb = Number of bits set to "1" in B = SUM ( Xbi ) | |
| 800 # Nc = Number of bits set to "1" in both A and B = SUM ( Xai * Xbi ) | |
| 801 # Nd = Number of bits set to "0" in both A and B = SUM ( 1 - Xai - Xbi + Xai * Xbi) | |
| 802 # | |
| 803 # N = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd | |
| 804 # | |
| 805 # Additionally, for BinaryForm various values also correspond to: | |
| 806 # | |
| 807 # Na = | Xa | | |
| 808 # Nb = | Xb | | |
| 809 # Nc = | SetIntersectionXaXb | | |
| 810 # Nd = N - | SetDifferenceXaXb | | |
| 811 # | |
| 812 # | SetDifferenceXaXb | = N - Nd = Na + Nb - Nc + Nd - Nd = Na + Nb - Nc | |
| 813 # = | Xa | + | Xb | - | SetIntersectionXaXb | | |
| 814 # | |
| 815 # Various distance coefficients and similarity coefficients [ Ref 40, Ref 62, Ref 64 ] for a pair vectors A and B | |
| 816 # in AlgebraicForm and BinaryForm are defined as follows: | |
| 817 # | |
| 818 # . CityBlockDistanceCoefficient: ( same as HammingDistanceCoefficient and ManhattanDistanceCoefficient) | |
| 819 # | |
| 820 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) ) | |
| 821 # | |
| 822 # . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc | |
| 823 # | |
| 824 # . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb | | |
| 825 # = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) | |
| 826 # | |
| 827 # . CosineSimilarityCoefficient: ( same as OchiaiSimilarityCoefficient) | |
| 828 # | |
| 829 # . AlgebraicForm: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) ) | |
| 830 # | |
| 831 # . BinaryForm: Nc / SQRT ( Na * Nb) | |
| 832 # | |
| 833 # . SetTheoreticForm: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| ) | |
| 834 # = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) ) | |
| 835 # | |
| 836 # . CzekanowskiSimilarityCoefficient: ( same as DiceSimilarityCoefficient and SorensonSimilarityCoefficient) | |
| 837 # | |
| 838 # . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) ) | |
| 839 # | |
| 840 # . BinaryForm: 2 * Nc / ( Na + Nb ) | |
| 841 # | |
| 842 # . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) | |
| 843 # = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) ) | |
| 844 # | |
| 845 # . DiceSimilarityCoefficient: ( same as CzekanowskiSimilarityCoefficient and SorensonSimilarityCoefficient) | |
| 846 # | |
| 847 # . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) ) | |
| 848 # | |
| 849 # . BinaryForm: 2 * Nc / ( Na + Nb ) | |
| 850 # | |
| 851 # . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) | |
| 852 # = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) ) | |
| 853 # | |
| 854 # . EuclideanDistanceCoefficient: | |
| 855 # | |
| 856 # . AlgebraicForm: SQRT ( SUM ( ( ( Xai - Xbi ) ** 2 ) ) ) | |
| 857 # | |
| 858 # . BinaryForm: SQRT ( ( Na - Nc ) + ( Nb - Nc ) ) = SQRT ( Na + Nb - 2 * Nc ) | |
| 859 # | |
| 860 # . SetTheoreticForm: SQRT ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) | |
| 861 # = SQRT ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) ) | |
| 862 # | |
| 863 # . HammingDistanceCoefficient: ( same as CityBlockDistanceCoefficient and ManhattanDistanceCoefficient) | |
| 864 # | |
| 865 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) ) | |
| 866 # | |
| 867 # . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc | |
| 868 # | |
| 869 # . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb | | |
| 870 # = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) | |
| 871 # | |
| 872 # . JaccardSimilarityCoefficient: ( same as TanimotoSimilarityCoefficient) | |
| 873 # | |
| 874 # . AlgebraicForm: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) ) | |
| 875 # | |
| 876 # . BinaryForm: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) | |
| 877 # | |
| 878 # . SetTheoreticForm: | SetIntersectionXaXb | / | SetDifferenceXaXb | | |
| 879 # = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) ) | |
| 880 # | |
| 881 # . ManhattanDistanceCoefficient: ( same as CityBlockDistanceCoefficient and HammingDistanceCoefficient) | |
| 882 # | |
| 883 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) ) | |
| 884 # | |
| 885 # . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc | |
| 886 # | |
| 887 # . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb | | |
| 888 # = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) | |
| 889 # | |
| 890 # . OchiaiSimilarityCoefficient: ( same as CosineSimilarityCoefficient) | |
| 891 # | |
| 892 # . AlgebraicForm: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) ) | |
| 893 # | |
| 894 # . BinaryForm: Nc / SQRT ( Na * Nb) | |
| 895 # | |
| 896 # . SetTheoreticForm: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| ) | |
| 897 # = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) ) | |
| 898 # | |
| 899 # . SorensonSimilarityCoefficient: ( same as CzekanowskiSimilarityCoefficient and DiceSimilarityCoefficient) | |
| 900 # | |
| 901 # . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) ) | |
| 902 # | |
| 903 # . BinaryForm: 2 * Nc / ( Na + Nb ) | |
| 904 # | |
| 905 # . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) | |
| 906 # = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) ) | |
| 907 # | |
| 908 # . SoergelDistanceCoefficient: | |
| 909 # | |
| 910 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) ) / SUM ( MAX ( Xai, Xbi ) ) | |
| 911 # | |
| 912 # . BinaryForm: 1 - Nc / ( Na + Nb - Nc ) = ( Na + Nb - 2 * Nc ) / ( Na + Nb - Nc ) | |
| 913 # | |
| 914 # . SetTheoreticForm: ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) / | SetDifferenceXaXb | | |
| 915 # = ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) ) | |
| 916 # | |
| 917 # . TanimotoSimilarityCoefficient: ( same as JaccardSimilarityCoefficient) | |
| 918 # | |
| 919 # . AlgebraicForm: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) ) | |
| 920 # | |
| 921 # . BinaryForm: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) | |
| 922 # | |
| 923 # . SetTheoreticForm: | SetIntersectionXaXb | / | SetDifferenceXaXb | | |
| 924 # = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) ) | |
| 925 # | |
| 926 # | |
| 927 | |
| 928 # Calculate Hamming distance coefficient between two fingerprint vectors. | |
| 929 # | |
| 930 # This functionality can be either invoked as a class function or an object method. | |
| 931 # | |
| 932 sub HammingDistanceCoefficient ($$;$$) { | |
| 933 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
| 934 | |
| 935 return CityBlockDistanceCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
| 936 } | |
| 937 | |
| 938 # Calculate Hamming distance coefficient between two fingerprint vectors. | |
| 939 # | |
| 940 # This functionality can be either invoked as a class function or an object method. | |
| 941 # | |
| 942 sub ManhattanDistanceCoefficient ($$;$$) { | |
| 943 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
| 944 | |
| 945 return CityBlockDistanceCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
| 946 } | |
| 947 | |
| 948 # Calculate CityBlock distance coefficient between two fingerprint vectors. | |
| 949 # | |
| 950 # This functionality can be either invoked as a class function or an object method. | |
| 951 # | |
| 952 sub CityBlockDistanceCoefficient ($$;$$) { | |
| 953 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
| 954 | |
| 955 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm'; | |
| 956 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0; | |
| 957 | |
| 958 # Validate and process fingerprints vectors for similarity calculations... | |
| 959 # | |
| 960 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("CityBlockDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
| 961 | |
| 962 # Perform the calculation... | |
| 963 if ($CalculationMode =~ /^AlgebraicForm$/i) { | |
| 964 return _CityBlockDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 965 } | |
| 966 elsif ($CalculationMode =~ /^BinaryForm$/i) { | |
| 967 return _CityBlockDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 968 } | |
| 969 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) { | |
| 970 return _CityBlockDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 971 } | |
| 972 else { | |
| 973 return undef; | |
| 974 } | |
| 975 } | |
| 976 | |
| 977 # Calculate CityBlock distance coefficient using algebraic form... | |
| 978 # | |
| 979 sub _CityBlockDistanceCoefficientUsingAlgebraicForm { | |
| 980 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 981 my($SumAbsSubtractionXaiXbi); | |
| 982 | |
| 983 $SumAbsSubtractionXaiXbi = _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
| 984 | |
| 985 return $SumAbsSubtractionXaiXbi; | |
| 986 } | |
| 987 | |
| 988 # Calculate CityBlock distance coefficient using binary form... | |
| 989 # | |
| 990 sub _CityBlockDistanceCoefficientUsingBinaryForm { | |
| 991 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 992 my($Na, $Nb, $Nc); | |
| 993 | |
| 994 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB); | |
| 995 | |
| 996 return ($Na + $Nb - 2 * $Nc); | |
| 997 } | |
| 998 | |
| 999 # Calculate CityBlock distance coefficient using set theoretic form... | |
| 1000 # | |
| 1001 sub _CityBlockDistanceCoefficientUsingSetTheoreticForm { | |
| 1002 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1003 my($SumMinXaiXbi, $SumXai, $SumXbi); | |
| 1004 | |
| 1005 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA); | |
| 1006 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB); | |
| 1007 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1008 | |
| 1009 return ($SumXai + $SumXbi - 2 * $SumMinXaiXbi); | |
| 1010 } | |
| 1011 | |
| 1012 # Calculate Ochiai similarity cofficient between two fingerprint vectors. | |
| 1013 # | |
| 1014 # This functionality can be either invoked as a class function or an object method. | |
| 1015 # | |
| 1016 sub OchiaiSimilarityCoefficient ($$;$$) { | |
| 1017 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
| 1018 | |
| 1019 return CosineSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
| 1020 } | |
| 1021 | |
| 1022 # Calculate Cosine similarity cofficient between two fingerprint vectors. | |
| 1023 # | |
| 1024 # This functionality can be either invoked as a class function or an object method. | |
| 1025 # | |
| 1026 sub CosineSimilarityCoefficient ($$;$$) { | |
| 1027 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
| 1028 | |
| 1029 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm'; | |
| 1030 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0; | |
| 1031 | |
| 1032 # Validate and process fingerprints vectors for similarity calculations... | |
| 1033 # | |
| 1034 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("CosineSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
| 1035 | |
| 1036 # Perform the calculation... | |
| 1037 if ($CalculationMode =~ /^AlgebraicForm$/i) { | |
| 1038 return _CosineSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1039 } | |
| 1040 elsif ($CalculationMode =~ /^BinaryForm$/i) { | |
| 1041 return _CosineSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1042 } | |
| 1043 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) { | |
| 1044 return _CosineSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1045 } | |
| 1046 else { | |
| 1047 return undef; | |
| 1048 } | |
| 1049 } | |
| 1050 | |
| 1051 # Calculate Cosine similarity coefficient using algebraic form... | |
| 1052 # | |
| 1053 sub _CosineSimilarityCoefficientUsingAlgebraicForm { | |
| 1054 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1055 my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator); | |
| 1056 | |
| 1057 $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA); | |
| 1058 $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB); | |
| 1059 $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1060 | |
| 1061 $Numerator = $SumProductXaiXbi; | |
| 1062 $Denominator = sqrt($SumXai2 * $SumXbi2); | |
| 1063 | |
| 1064 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 1065 } | |
| 1066 | |
| 1067 # CalculateCosine similarity coefficient using binary form... | |
| 1068 # | |
| 1069 sub _CosineSimilarityCoefficientUsingBinaryForm { | |
| 1070 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1071 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
| 1072 | |
| 1073 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1074 | |
| 1075 $Numerator = $Nc; | |
| 1076 $Denominator = sqrt($Na * $Nb); | |
| 1077 | |
| 1078 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 1079 } | |
| 1080 | |
| 1081 # Calculate Cosine similarity coefficient using set theoretic form... | |
| 1082 # | |
| 1083 sub _CosineSimilarityCoefficientUsingSetTheoreticForm { | |
| 1084 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1085 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator); | |
| 1086 | |
| 1087 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA); | |
| 1088 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB); | |
| 1089 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1090 | |
| 1091 $Numerator = $SumMinXaiXbi; | |
| 1092 $Denominator = sqrt($SumXai * $SumXbi); | |
| 1093 | |
| 1094 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 1095 } | |
| 1096 | |
| 1097 # Calculate Czekanowski similarity cofficient between two fingerprint vectors. | |
| 1098 # | |
| 1099 # This functionality can be either invoked as a class function or an object method. | |
| 1100 # | |
| 1101 sub CzekanowskiSimilarityCoefficient ($$;$$) { | |
| 1102 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
| 1103 | |
| 1104 return DiceSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
| 1105 } | |
| 1106 | |
| 1107 # Calculate Sorenson similarity cofficient between two fingerprint vectors. | |
| 1108 # | |
| 1109 # This functionality can be either invoked as a class function or an object method. | |
| 1110 # | |
| 1111 sub SorensonSimilarityCoefficient ($$;$$) { | |
| 1112 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
| 1113 | |
| 1114 return DiceSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
| 1115 } | |
| 1116 | |
| 1117 # Calculate Dice similarity cofficient between two fingerprint vectors. | |
| 1118 # | |
| 1119 # This functionality can be either invoked as a class function or an object method. | |
| 1120 # | |
| 1121 sub DiceSimilarityCoefficient ($$;$$) { | |
| 1122 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
| 1123 | |
| 1124 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm'; | |
| 1125 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0; | |
| 1126 | |
| 1127 # Validate and process fingerprints vectors for similarity calculations... | |
| 1128 # | |
| 1129 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("DiceSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
| 1130 | |
| 1131 # Perform the calculation... | |
| 1132 if ($CalculationMode =~ /^AlgebraicForm$/i) { | |
| 1133 return _DiceSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1134 } | |
| 1135 elsif ($CalculationMode =~ /^BinaryForm$/i) { | |
| 1136 return _DiceSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1137 } | |
| 1138 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) { | |
| 1139 return _DiceSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1140 } | |
| 1141 else { | |
| 1142 return undef; | |
| 1143 } | |
| 1144 } | |
| 1145 | |
| 1146 # Calculate Dice similarity coefficient using algebraic form... | |
| 1147 # | |
| 1148 sub _DiceSimilarityCoefficientUsingAlgebraicForm { | |
| 1149 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1150 my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator); | |
| 1151 | |
| 1152 $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA); | |
| 1153 $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB); | |
| 1154 $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1155 | |
| 1156 $Numerator = 2 * $SumProductXaiXbi; | |
| 1157 $Denominator = $SumXai2 + $SumXbi2; | |
| 1158 | |
| 1159 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 1160 } | |
| 1161 | |
| 1162 # Calculate Dice similarity coefficient using binary form... | |
| 1163 # | |
| 1164 sub _DiceSimilarityCoefficientUsingBinaryForm { | |
| 1165 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1166 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
| 1167 | |
| 1168 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1169 | |
| 1170 $Numerator = 2 * $Nc; | |
| 1171 $Denominator = $Na + $Nb; | |
| 1172 | |
| 1173 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 1174 } | |
| 1175 | |
| 1176 # Calculate Dice similarity coefficient using set theoretic form... | |
| 1177 # | |
| 1178 sub _DiceSimilarityCoefficientUsingSetTheoreticForm { | |
| 1179 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1180 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator); | |
| 1181 | |
| 1182 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA); | |
| 1183 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB); | |
| 1184 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1185 | |
| 1186 $Numerator = 2 * $SumMinXaiXbi; | |
| 1187 $Denominator = $SumXai + $SumXbi; | |
| 1188 | |
| 1189 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 1190 } | |
| 1191 | |
| 1192 | |
| 1193 # Calculate Euclidean distance coefficient between two fingerprint vectors. | |
| 1194 # | |
| 1195 # This functionality can be either invoked as a class function or an object method. | |
| 1196 # | |
| 1197 sub EuclideanDistanceCoefficient ($$;$$) { | |
| 1198 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
| 1199 | |
| 1200 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm'; | |
| 1201 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0; | |
| 1202 | |
| 1203 # Validate and process fingerprints vectors for similarity calculations... | |
| 1204 # | |
| 1205 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("EuclideanDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
| 1206 | |
| 1207 # Perform the calculation... | |
| 1208 if ($CalculationMode =~ /^AlgebraicForm$/i) { | |
| 1209 return _EuclideanDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1210 } | |
| 1211 elsif ($CalculationMode =~ /^BinaryForm$/i) { | |
| 1212 return _EuclideanDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1213 } | |
| 1214 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) { | |
| 1215 return _EuclideanDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1216 } | |
| 1217 else { | |
| 1218 return undef; | |
| 1219 } | |
| 1220 } | |
| 1221 | |
| 1222 # Calculate Euclidean distance coefficient using algebraic form... | |
| 1223 # | |
| 1224 sub _EuclideanDistanceCoefficientUsingAlgebraicForm { | |
| 1225 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1226 my($SumSquaresSubtractionXaiXbi); | |
| 1227 | |
| 1228 $SumSquaresSubtractionXaiXbi = _GetSumOfSquaresOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1229 | |
| 1230 return sqrt($SumSquaresSubtractionXaiXbi); | |
| 1231 } | |
| 1232 | |
| 1233 # Calculate Euclidean distance coefficient using binary form... | |
| 1234 # | |
| 1235 sub _EuclideanDistanceCoefficientUsingBinaryForm { | |
| 1236 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1237 my($Na, $Nb, $Nc); | |
| 1238 | |
| 1239 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1240 | |
| 1241 return (sqrt($Na + $Nb - 2 * $Nc)); | |
| 1242 } | |
| 1243 | |
| 1244 # Calculate Euclidean distance coefficient using set theoretic form... | |
| 1245 # | |
| 1246 sub _EuclideanDistanceCoefficientUsingSetTheoreticForm { | |
| 1247 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1248 my($SumMinXaiXbi, $SumXai, $SumXbi); | |
| 1249 | |
| 1250 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA); | |
| 1251 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB); | |
| 1252 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1253 | |
| 1254 return (sqrt($SumXai + $SumXbi - 2 * $SumMinXaiXbi)); | |
| 1255 } | |
| 1256 | |
| 1257 # Calculate Jaccard similarity cofficient between two fingerprint vectors. | |
| 1258 # | |
| 1259 # This functionality can be either invoked as a class function or an object method. | |
| 1260 # | |
| 1261 sub JaccardSimilarityCoefficient ($$;$$) { | |
| 1262 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
| 1263 | |
| 1264 return TanimotoSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
| 1265 } | |
| 1266 | |
| 1267 # Calculate Tanimoto similarity cofficient between two fingerprint vectors. | |
| 1268 # | |
| 1269 # This functionality can be either invoked as a class function or an object method. | |
| 1270 # | |
| 1271 sub TanimotoSimilarityCoefficient ($$;$$) { | |
| 1272 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
| 1273 | |
| 1274 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm'; | |
| 1275 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0; | |
| 1276 | |
| 1277 # Validate and process fingerprints vectors for similarity calculations... | |
| 1278 # | |
| 1279 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("TanimotoSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
| 1280 | |
| 1281 # Perform the calculation... | |
| 1282 if ($CalculationMode =~ /^AlgebraicForm$/i) { | |
| 1283 return _TanimotoSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1284 } | |
| 1285 elsif ($CalculationMode =~ /^BinaryForm$/i) { | |
| 1286 return _TanimotoSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1287 } | |
| 1288 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) { | |
| 1289 return _TanimotoSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1290 } | |
| 1291 else { | |
| 1292 return undef; | |
| 1293 } | |
| 1294 } | |
| 1295 | |
| 1296 # Calculate Tanimoto similarity coefficient using algebraic form... | |
| 1297 # | |
| 1298 sub _TanimotoSimilarityCoefficientUsingAlgebraicForm { | |
| 1299 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1300 my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator); | |
| 1301 | |
| 1302 $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA); | |
| 1303 $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB); | |
| 1304 $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1305 | |
| 1306 $Numerator = $SumProductXaiXbi; | |
| 1307 $Denominator = $SumXai2 + $SumXbi2 - $SumProductXaiXbi; | |
| 1308 | |
| 1309 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 1310 } | |
| 1311 | |
| 1312 # Calculate Tanimoto similarity coefficient using binary form... | |
| 1313 # | |
| 1314 sub _TanimotoSimilarityCoefficientUsingBinaryForm { | |
| 1315 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1316 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
| 1317 | |
| 1318 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1319 | |
| 1320 $Numerator = $Nc; | |
| 1321 $Denominator = $Na + $Nb - $Nc; | |
| 1322 | |
| 1323 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 1324 } | |
| 1325 | |
| 1326 # Calculate Tanimoto similarity coefficient using set theoretic form... | |
| 1327 # | |
| 1328 sub _TanimotoSimilarityCoefficientUsingSetTheoreticForm { | |
| 1329 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1330 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator); | |
| 1331 | |
| 1332 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA); | |
| 1333 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB); | |
| 1334 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1335 | |
| 1336 $Numerator = $SumMinXaiXbi; | |
| 1337 $Denominator = $SumXai + $SumXbi - $SumMinXaiXbi; | |
| 1338 | |
| 1339 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 1340 } | |
| 1341 | |
| 1342 | |
| 1343 # Calculate Soergel distance coefficient between two fingerprint vectors. | |
| 1344 # | |
| 1345 # This functionality can be either invoked as a class function or an object method. | |
| 1346 # | |
| 1347 sub SoergelDistanceCoefficient ($$;$$) { | |
| 1348 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
| 1349 | |
| 1350 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm'; | |
| 1351 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0; | |
| 1352 | |
| 1353 # Validate and process fingerprints vectors for similarity calculations... | |
| 1354 # | |
| 1355 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("SoergelDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
| 1356 | |
| 1357 # Perform the calculation... | |
| 1358 if ($CalculationMode =~ /^AlgebraicForm$/i) { | |
| 1359 return _SoergelDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1360 } | |
| 1361 elsif ($CalculationMode =~ /^BinaryForm$/i) { | |
| 1362 return _SoergelDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1363 } | |
| 1364 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) { | |
| 1365 return _SoergelDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1366 } | |
| 1367 else { | |
| 1368 return undef; | |
| 1369 } | |
| 1370 } | |
| 1371 | |
| 1372 # Calculate Soergel distance coefficientusing algebraic form... | |
| 1373 # | |
| 1374 sub _SoergelDistanceCoefficientUsingAlgebraicForm { | |
| 1375 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1376 my($SumAbsSubtractionXaiXbi, $SumMaxXaiXbi, $Numerator, $Denominator); | |
| 1377 | |
| 1378 $SumAbsSubtractionXaiXbi = _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1379 $SumMaxXaiXbi = _GetSumOfMaximumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1380 | |
| 1381 $Numerator = $SumAbsSubtractionXaiXbi; | |
| 1382 $Denominator = $SumMaxXaiXbi; | |
| 1383 | |
| 1384 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 1385 } | |
| 1386 | |
| 1387 # Calculate Soergel distance coefficient using binary form... | |
| 1388 # | |
| 1389 sub _SoergelDistanceCoefficientUsingBinaryForm { | |
| 1390 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1391 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
| 1392 | |
| 1393 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1394 | |
| 1395 $Numerator = $Na + $Nb - 2 * $Nc; | |
| 1396 $Denominator = $Na + $Nb - $Nc; | |
| 1397 | |
| 1398 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 1399 } | |
| 1400 | |
| 1401 # Calculate SoergelDistanceCoefficient using set theoretic form... | |
| 1402 # | |
| 1403 sub _SoergelDistanceCoefficientUsingSetTheoreticForm { | |
| 1404 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1405 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator); | |
| 1406 | |
| 1407 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA); | |
| 1408 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB); | |
| 1409 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1410 | |
| 1411 $Numerator = $SumXai + $SumXbi - 2 * $SumMinXaiXbi; | |
| 1412 $Denominator = $SumXai + $SumXbi - $SumMinXaiXbi; | |
| 1413 | |
| 1414 return $Denominator ? ($Numerator/$Denominator) : 0; | |
| 1415 } | |
| 1416 | |
| 1417 # Validate and process fingerprints vectors for similarity calculations... | |
| 1418 # | |
| 1419 sub _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation { | |
| 1420 my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
| 1421 | |
| 1422 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm'; | |
| 1423 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0; | |
| 1424 | |
| 1425 if (!$SkipValuesCheck) { | |
| 1426 _ValidateFingerprintsVectorsForSimilarityCalculation($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode); | |
| 1427 } | |
| 1428 _ProcessFingerprintsVectorsForSimilarityCalculation($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode); | |
| 1429 } | |
| 1430 | |
| 1431 # Make sure fingerprint vectors are good for performing similarity/distance calculation... | |
| 1432 # | |
| 1433 sub _ValidateFingerprintsVectorsForSimilarityCalculation { | |
| 1434 my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode) = @_; | |
| 1435 | |
| 1436 # Make sure both are fingerprint vectors.. | |
| 1437 if (!(IsFingerprintsVector($FingerprintsVectorA) && IsFingerprintsVector($FingerprintsVectorB))) { | |
| 1438 croak "Error: ${ClassName}->${ErrorMsg}: Both objects must be fingerprint vectors..."; | |
| 1439 } | |
| 1440 | |
| 1441 # Check types... | |
| 1442 if ($FingerprintsVectorA->{Type} ne $FingerprintsVectorB->{Type}) { | |
| 1443 croak "Error: ${ClassName}->${ErrorMsg}: Type of first fingerprint vector, $FingerprintsVectorA->{Type}, must be same as type of second fingerprint vector, $FingerprintsVectorB->{Type}..."; | |
| 1444 } | |
| 1445 | |
| 1446 # Check calculation mode... | |
| 1447 if ($CalculationMode !~ /^(AlgebraicForm|BinaryForm|SetTheoreticForm)$/i) { | |
| 1448 croak "Error: ${ClassName}->${ErrorMsg}: Specified similarity calculation mode, $CalculationMode, is not valid. Supported values: AlgebraicForm, BinaryForm, and SetTheoreticForm..."; | |
| 1449 } | |
| 1450 | |
| 1451 # Check values and value IDs... | |
| 1452 my($Na, $Nb, $NIDa, $NIDb); | |
| 1453 $Na = $FingerprintsVectorA->GetNumOfValues(); $Nb = $FingerprintsVectorB->GetNumOfValues(); | |
| 1454 $NIDa = $FingerprintsVectorA->GetNumOfValueIDs(); $NIDb = $FingerprintsVectorB->GetNumOfValueIDs(); | |
| 1455 | |
| 1456 if ($Na == 0) { | |
| 1457 croak "Error: ${ClassName}->${ErrorMsg}: Number of values in first fingerprint vector, $Na, must be > 0 for fingerprint vector type $FingerprintsVectorA->{Type} ..."; | |
| 1458 } | |
| 1459 if ($Nb == 0) { | |
| 1460 croak "Error: ${ClassName}->${ErrorMsg}: Number of values in second fingerprint vector, $Nb, must be > 0 for fingerprint vector type $FingerprintsVectorB->{Type} ..."; | |
| 1461 } | |
| 1462 | |
| 1463 if ($FingerprintsVectorA->{Type} =~ /^OrderedNumericalValues$/i) { | |
| 1464 if ($Na != $Nb) { | |
| 1465 croak "Error: ${ClassName}->${ErrorMsg}: Number of values in first fingerprint vector, $Na, must be equal to number of values, $Nb, in second fingerprint vector for fingerprint vector types $FingerprintsVectorA->{Type} ..."; | |
| 1466 } | |
| 1467 } | |
| 1468 elsif ($FingerprintsVectorA->{Type} =~ /^NumericalValues$/i) { | |
| 1469 if ($NIDa == 0) { | |
| 1470 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDa, must be > 0 for fingerprint vector type $FingerprintsVectorA->{Type} ..."; | |
| 1471 } | |
| 1472 if ($NIDb == 0) { | |
| 1473 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDb, must be > 0 for fingerprint vector type $FingerprintsVectorB->{Type} ..."; | |
| 1474 } | |
| 1475 | |
| 1476 if ($NIDa != $Na) { | |
| 1477 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDa, must be equal to its number of values, $Na, for fingerprint vector type $FingerprintsVectorA->{Type} ..."; | |
| 1478 } | |
| 1479 if ($NIDb != $Nb) { | |
| 1480 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in second fingerprint vector, $NIDb, must be equal to its number of values, $Nb, for fingerprint vector type $FingerprintsVectorA->{Type} ..."; | |
| 1481 } | |
| 1482 } | |
| 1483 elsif ($FingerprintsVectorA->{Type} =~ /^AlphaNumericalValues$/i) { | |
| 1484 if ($NIDa || $NIDb) { | |
| 1485 croak "Error: ${ClassName}->${ErrorMsg}: ValueIDs cann't be specified for fingerprint vector types $FingerprintsVectorA->{Type} ..."; | |
| 1486 } | |
| 1487 } | |
| 1488 else { | |
| 1489 croak "Error: ${ClassName}->${ErrorMsg}: Fingerprint vector types $FingerprintsVectorA->{Type} is not valid..."; | |
| 1490 } | |
| 1491 } | |
| 1492 | |
| 1493 # Process fingerprints vectors for similarity calculation by generating vectors | |
| 1494 # containing ordered list of values... | |
| 1495 # | |
| 1496 sub _ProcessFingerprintsVectorsForSimilarityCalculation { | |
| 1497 my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode) = @_; | |
| 1498 | |
| 1499 $FingerprintsVectorA->{OrderedValuesRef} = undef; $FingerprintsVectorB->{OrderedValuesRef} = undef; | |
| 1500 $FingerprintsVectorA->{BitVector} = undef; $FingerprintsVectorB->{BitVector} = undef; | |
| 1501 | |
| 1502 if ($FingerprintsVectorA->{Type} =~ /^OrderedNumericalValues$/i) { | |
| 1503 _ProcessOrderedNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1504 } | |
| 1505 elsif ($FingerprintsVectorA->{Type} =~ /^NumericalValues$/i) { | |
| 1506 _ProcessNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1507 } | |
| 1508 elsif ($FingerprintsVectorA->{Type} =~ /^AlphaNumericalValues$/i) { | |
| 1509 _ProcessAlphaNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1510 } | |
| 1511 else { | |
| 1512 croak "Error: ${ClassName}->${ErrorMsg}: Fingerprint vector types $FingerprintsVectorA->{Type} is not valid..."; | |
| 1513 } | |
| 1514 if ($CalculationMode =~ /^BinaryForm$/i) { | |
| 1515 _TransformFinalOrderedValuesIntoBitVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB); | |
| 1516 } | |
| 1517 } | |
| 1518 | |
| 1519 # Process fingerprints vectors with ordered numerical values for similarity calculations... | |
| 1520 # | |
| 1521 sub _ProcessOrderedNumericalValuesFingerprintsVectorsForSimilarityCalculation { | |
| 1522 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1523 | |
| 1524 $FingerprintsVectorA->{OrderedValuesRef} = \@{$FingerprintsVectorA->{Values}}; | |
| 1525 $FingerprintsVectorB->{OrderedValuesRef} = \@{$FingerprintsVectorB->{Values}}; | |
| 1526 } | |
| 1527 | |
| 1528 # Process fingerprints vectors with numerical values for similarity calculations... | |
| 1529 # | |
| 1530 sub _ProcessNumericalValuesFingerprintsVectorsForSimilarityCalculation { | |
| 1531 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1532 | |
| 1533 # Set up unique IDs and values map for each fingerprint vector... | |
| 1534 my($Index, $Value, $ValueID, %UniqueFingerprintsVectorAValueIDValues, %UniqueFingerprintsVectorBValueIDValues, %UniqueFingerprintsVectorsValueIDs); | |
| 1535 | |
| 1536 %UniqueFingerprintsVectorAValueIDValues = (); | |
| 1537 %UniqueFingerprintsVectorBValueIDValues = (); | |
| 1538 %UniqueFingerprintsVectorsValueIDs = (); | |
| 1539 | |
| 1540 # Go over first vector... | |
| 1541 for $Index (0 .. $#{$FingerprintsVectorA->{ValueIDs}}) { | |
| 1542 $ValueID = $FingerprintsVectorA->{ValueIDs}[$Index]; | |
| 1543 $Value = $FingerprintsVectorA->{Values}[$Index]; | |
| 1544 if (exists $UniqueFingerprintsVectorAValueIDValues{$ValueID}) { | |
| 1545 $UniqueFingerprintsVectorAValueIDValues{$ValueID} += $Value; | |
| 1546 } | |
| 1547 else { | |
| 1548 $UniqueFingerprintsVectorAValueIDValues{$ValueID} = $Value; | |
| 1549 } | |
| 1550 if (!exists $UniqueFingerprintsVectorsValueIDs{$ValueID}) { | |
| 1551 $UniqueFingerprintsVectorsValueIDs{$ValueID} = 1; | |
| 1552 } | |
| 1553 } | |
| 1554 | |
| 1555 # Go over second vector... | |
| 1556 for $Index (0 .. $#{$FingerprintsVectorB->{ValueIDs}}) { | |
| 1557 $ValueID = $FingerprintsVectorB->{ValueIDs}[$Index]; | |
| 1558 $Value = $FingerprintsVectorB->{Values}[$Index]; | |
| 1559 if (exists $UniqueFingerprintsVectorBValueIDValues{$ValueID}) { | |
| 1560 $UniqueFingerprintsVectorBValueIDValues{$ValueID} += $Value; | |
| 1561 } | |
| 1562 else { | |
| 1563 $UniqueFingerprintsVectorBValueIDValues{$ValueID} = $Value; | |
| 1564 } | |
| 1565 if (!exists $UniqueFingerprintsVectorsValueIDs{$ValueID}) { | |
| 1566 $UniqueFingerprintsVectorsValueIDs{$ValueID} = 1; | |
| 1567 } | |
| 1568 } | |
| 1569 | |
| 1570 # Setup ordered values... | |
| 1571 my(@UniqueOrderedValueIDs, @OrderedValuesA, @OrderedValuesB); | |
| 1572 | |
| 1573 @UniqueOrderedValueIDs = (); | |
| 1574 @UniqueOrderedValueIDs = sort keys %UniqueFingerprintsVectorsValueIDs; | |
| 1575 | |
| 1576 @OrderedValuesA = (); | |
| 1577 @OrderedValuesA = map { exists $UniqueFingerprintsVectorAValueIDValues{$_} ? $UniqueFingerprintsVectorAValueIDValues{$_} : 0 } @UniqueOrderedValueIDs; | |
| 1578 | |
| 1579 @OrderedValuesB = (); | |
| 1580 @OrderedValuesB = map { exists $UniqueFingerprintsVectorBValueIDValues{$_} ? $UniqueFingerprintsVectorBValueIDValues{$_} : 0 } @UniqueOrderedValueIDs; | |
| 1581 | |
| 1582 $FingerprintsVectorA->{OrderedValuesRef} = \@OrderedValuesA; | |
| 1583 $FingerprintsVectorB->{OrderedValuesRef} = \@OrderedValuesB; | |
| 1584 } | |
| 1585 | |
| 1586 # Process fingerprints vectors with allpha numerical values for similarity calculations... | |
| 1587 # | |
| 1588 sub _ProcessAlphaNumericalValuesFingerprintsVectorsForSimilarityCalculation { | |
| 1589 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1590 | |
| 1591 # Set up unique IDs and values map for each vector... | |
| 1592 my($Index, $Value, $ValueID, %UniqueFingerprintsVectorAValuesCount, %UniqueFingerprintsVectorBValuesCount, %UniqueFingerprintsVectorsValues); | |
| 1593 | |
| 1594 %UniqueFingerprintsVectorAValuesCount = (); | |
| 1595 %UniqueFingerprintsVectorBValuesCount = (); | |
| 1596 %UniqueFingerprintsVectorsValues = (); | |
| 1597 | |
| 1598 # Go over first vector... | |
| 1599 for $Value (@{$FingerprintsVectorA->{Values}}) { | |
| 1600 if (exists $UniqueFingerprintsVectorAValuesCount{$Value}) { | |
| 1601 $UniqueFingerprintsVectorAValuesCount{$Value} += 1; | |
| 1602 } | |
| 1603 else { | |
| 1604 $UniqueFingerprintsVectorAValuesCount{$Value} = 1; | |
| 1605 } | |
| 1606 if (!exists $UniqueFingerprintsVectorsValues{$Value}) { | |
| 1607 $UniqueFingerprintsVectorsValues{$Value} = 1; | |
| 1608 } | |
| 1609 } | |
| 1610 | |
| 1611 # Go over second vector... | |
| 1612 for $Value (@{$FingerprintsVectorB->{Values}}) { | |
| 1613 if (exists $UniqueFingerprintsVectorBValuesCount{$Value}) { | |
| 1614 $UniqueFingerprintsVectorBValuesCount{$Value} += 1; | |
| 1615 } | |
| 1616 else { | |
| 1617 $UniqueFingerprintsVectorBValuesCount{$Value} = 1; | |
| 1618 } | |
| 1619 if (!exists $UniqueFingerprintsVectorsValues{$Value}) { | |
| 1620 $UniqueFingerprintsVectorsValues{$Value} = 1; | |
| 1621 } | |
| 1622 } | |
| 1623 | |
| 1624 # Setup ordered values... | |
| 1625 my(@UniqueOrderedValueIDs, @OrderedValuesA, @OrderedValuesB); | |
| 1626 | |
| 1627 @UniqueOrderedValueIDs = (); | |
| 1628 @UniqueOrderedValueIDs = sort keys %UniqueFingerprintsVectorsValues; | |
| 1629 | |
| 1630 @OrderedValuesA = (); | |
| 1631 @OrderedValuesA = map { exists $UniqueFingerprintsVectorAValuesCount{$_} ? $UniqueFingerprintsVectorAValuesCount{$_} : 0 } @UniqueOrderedValueIDs; | |
| 1632 | |
| 1633 @OrderedValuesB = (); | |
| 1634 @OrderedValuesB = map { exists $UniqueFingerprintsVectorBValuesCount{$_} ? $UniqueFingerprintsVectorBValuesCount{$_} : 0 } @UniqueOrderedValueIDs; | |
| 1635 | |
| 1636 $FingerprintsVectorA->{OrderedValuesRef} = \@OrderedValuesA; | |
| 1637 $FingerprintsVectorB->{OrderedValuesRef} = \@OrderedValuesB; | |
| 1638 | |
| 1639 } | |
| 1640 | |
| 1641 # Transform final ordered values array into a BitVector for similarity calculation... | |
| 1642 # | |
| 1643 sub _TransformFinalOrderedValuesIntoBitVectorsForSimilarityCalculation { | |
| 1644 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1645 my($Index, $Size, $BitVectorA, $BitVectorB, $SkipCheck); | |
| 1646 | |
| 1647 # Create bit vectors... | |
| 1648 $Size = scalar @{$FingerprintsVectorA->{OrderedValuesRef}}; | |
| 1649 | |
| 1650 $FingerprintsVectorA->{BitVector} = new BitVector($Size); | |
| 1651 $FingerprintsVectorB->{BitVector} = new BitVector($Size); | |
| 1652 | |
| 1653 # Set bits... | |
| 1654 $SkipCheck = 1; | |
| 1655 for $Index (0 .. ($Size - 1)) { | |
| 1656 if ($FingerprintsVectorA->{OrderedValuesRef}[$Index]) { | |
| 1657 $FingerprintsVectorA->{BitVector}->SetBit($Index, $SkipCheck); | |
| 1658 } | |
| 1659 if ($FingerprintsVectorB->{OrderedValuesRef}[$Index]) { | |
| 1660 $FingerprintsVectorB->{BitVector}->SetBit($Index, $SkipCheck); | |
| 1661 } | |
| 1662 } | |
| 1663 } | |
| 1664 | |
| 1665 # Return sum of ordered vector values... | |
| 1666 # | |
| 1667 sub _GetSumOfFingerprintsOrderedValues { | |
| 1668 my($FingerprintVector) = @_; | |
| 1669 | |
| 1670 return StatisticsUtil::Sum($FingerprintVector->{OrderedValuesRef}); | |
| 1671 } | |
| 1672 | |
| 1673 # Return sum of squared ordered vector values... | |
| 1674 # | |
| 1675 sub _GetSumOfSquaresOfFingerprintsOrderedValues { | |
| 1676 my($FingerprintVector) = @_; | |
| 1677 | |
| 1678 return StatisticsUtil::SumOfSquares($FingerprintVector->{OrderedValuesRef}); | |
| 1679 } | |
| 1680 | |
| 1681 # Return sum of product of correponding ordered vector values... | |
| 1682 # | |
| 1683 sub _GetSumOfProductOfFingerprintsOrderedValues { | |
| 1684 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1685 my($Index, $SumProductXaiXbi); | |
| 1686 | |
| 1687 $SumProductXaiXbi = 0; | |
| 1688 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) { | |
| 1689 $SumProductXaiXbi += $FingerprintsVectorA->{OrderedValuesRef}[$Index] * $FingerprintsVectorB->{OrderedValuesRef}[$Index]; | |
| 1690 } | |
| 1691 return $SumProductXaiXbi; | |
| 1692 } | |
| 1693 | |
| 1694 # Return sum of absolute value of subtraction of correponding ordered vector values... | |
| 1695 # | |
| 1696 sub _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues { | |
| 1697 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1698 my($Index, $SumAbsSubtractionXaiXbi); | |
| 1699 | |
| 1700 $SumAbsSubtractionXaiXbi = 0; | |
| 1701 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) { | |
| 1702 $SumAbsSubtractionXaiXbi += abs($FingerprintsVectorA->{OrderedValuesRef}[$Index] - $FingerprintsVectorB->{OrderedValuesRef}[$Index]); | |
| 1703 } | |
| 1704 return $SumAbsSubtractionXaiXbi; | |
| 1705 } | |
| 1706 | |
| 1707 # Return sum of squares of subtraction of correponding ordered vector values... | |
| 1708 # | |
| 1709 sub _GetSumOfSquaresOfSubtractionOfFingerprintsOrderedValues { | |
| 1710 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1711 my($Index, $SumSquaresSubtractionXaiXbi); | |
| 1712 | |
| 1713 $SumSquaresSubtractionXaiXbi = 0; | |
| 1714 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) { | |
| 1715 $SumSquaresSubtractionXaiXbi += ($FingerprintsVectorA->{OrderedValuesRef}[$Index] - $FingerprintsVectorB->{OrderedValuesRef}[$Index])**2; | |
| 1716 } | |
| 1717 return $SumSquaresSubtractionXaiXbi; | |
| 1718 } | |
| 1719 | |
| 1720 # Return sum of minimum of correponding ordered vector values... | |
| 1721 # | |
| 1722 sub _GetSumOfMinimumOfFingerprintsOrderdedValues { | |
| 1723 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1724 my($Index, $SumMinXaiXbi); | |
| 1725 | |
| 1726 $SumMinXaiXbi = 0; | |
| 1727 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) { | |
| 1728 $SumMinXaiXbi += MathUtil::min($FingerprintsVectorA->{OrderedValuesRef}[$Index], $FingerprintsVectorB->{OrderedValuesRef}[$Index]); | |
| 1729 } | |
| 1730 return $SumMinXaiXbi; | |
| 1731 } | |
| 1732 | |
| 1733 # Return sum of maximum of correponding ordered vector values... | |
| 1734 # | |
| 1735 sub _GetSumOfMaximumOfFingerprintsOrderdedValues { | |
| 1736 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1737 my($Index, $SumMaxXaiXbi); | |
| 1738 | |
| 1739 $SumMaxXaiXbi = 0; | |
| 1740 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) { | |
| 1741 $SumMaxXaiXbi += MathUtil::max($FingerprintsVectorA->{OrderedValuesRef}[$Index], $FingerprintsVectorB->{OrderedValuesRef}[$Index]); | |
| 1742 } | |
| 1743 return $SumMaxXaiXbi; | |
| 1744 } | |
| 1745 | |
| 1746 # Get number of Na, Nb and Nc bits in vector A and B for BinaryForm calculation... | |
| 1747 # | |
| 1748 sub _GetNumOfIndividualAndCommonSetBits ($$) { | |
| 1749 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
| 1750 my($Na, $Nb, $Nc, $Nd, $FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
| 1751 | |
| 1752 $FingerprintsBitVectorA = $FingerprintsVectorA->{BitVector}; | |
| 1753 $FingerprintsBitVectorB = $FingerprintsVectorB->{BitVector}; | |
| 1754 | |
| 1755 # Number of bits set to "1" in A | |
| 1756 $Na = $FingerprintsBitVectorA->GetNumOfSetBits(); | |
| 1757 | |
| 1758 # Number of bits set to "1" in B | |
| 1759 $Nb = $FingerprintsBitVectorB->GetNumOfSetBits(); | |
| 1760 | |
| 1761 # Number of bits set to "1" in both A and B | |
| 1762 my($NcBitVector); | |
| 1763 $NcBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB; | |
| 1764 $Nc = $NcBitVector->GetNumOfSetBits(); | |
| 1765 | |
| 1766 return ($Na, $Nb, $Nc); | |
| 1767 } | |
| 1768 | |
| 1769 # Return a list of supported distance coefficients... | |
| 1770 # | |
| 1771 sub GetSupportedDistanceCoefficients () { | |
| 1772 | |
| 1773 return @DistanceCoefficients; | |
| 1774 } | |
| 1775 | |
| 1776 # Return a list of supported similarity coefficients... | |
| 1777 # | |
| 1778 sub GetSupportedSimilarityCoefficients () { | |
| 1779 | |
| 1780 return @SimilarityCoefficients; | |
| 1781 } | |
| 1782 | |
| 1783 # Return a list of supported distance and similarity coefficients... | |
| 1784 # | |
| 1785 sub GetSupportedDistanceAndSimilarityCoefficients () { | |
| 1786 my(@DistanceAndSimilarityCoefficients); | |
| 1787 | |
| 1788 @DistanceAndSimilarityCoefficients = (); | |
| 1789 push @DistanceAndSimilarityCoefficients, @DistanceCoefficients; | |
| 1790 push @DistanceAndSimilarityCoefficients, @SimilarityCoefficients; | |
| 1791 | |
| 1792 return sort @DistanceAndSimilarityCoefficients; | |
| 1793 } | |
| 1794 | |
| 1795 # Is it a fingerprints vector object? | |
| 1796 sub IsFingerprintsVector ($) { | |
| 1797 my($Object) = @_; | |
| 1798 | |
| 1799 return _IsFingerprintsVector($Object); | |
| 1800 } | |
| 1801 | |
| 1802 # Is it a fingerprints vector object? | |
| 1803 sub _IsFingerprintsVector { | |
| 1804 my($Object) = @_; | |
| 1805 | |
| 1806 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
| 1807 } | |
| 1808 | |
| 1809 # Return a string containing vector values... | |
| 1810 sub StringifyFingerprintsVector { | |
| 1811 my($This) = @_; | |
| 1812 my($FingerprintsVectorString); | |
| 1813 | |
| 1814 # Set type, values and value IDs... | |
| 1815 my($NumOfValues, $ValuesString, $NumOfValueIDs, $ValueIDsString, $MaxValuesToStringify); | |
| 1816 | |
| 1817 $NumOfValues = $This->GetNumOfValues(); | |
| 1818 $MaxValuesToStringify = 500; | |
| 1819 | |
| 1820 if ($NumOfValues < $MaxValuesToStringify) { | |
| 1821 # Append all values... | |
| 1822 $ValuesString = $NumOfValues ? join ' ', @{$This->{Values}} : 'None'; | |
| 1823 } | |
| 1824 else { | |
| 1825 # Truncate values... | |
| 1826 my($Index, @Values); | |
| 1827 for $Index (0 .. ($MaxValuesToStringify - 1)) { | |
| 1828 push @Values, $This->{Values}[$Index]; | |
| 1829 } | |
| 1830 $ValuesString = join(' ', @Values) . " ..."; | |
| 1831 } | |
| 1832 | |
| 1833 $NumOfValueIDs = $This->GetNumOfValueIDs(); | |
| 1834 if ($NumOfValueIDs < $MaxValuesToStringify) { | |
| 1835 # Append all valueIDs... | |
| 1836 $ValueIDsString = $NumOfValueIDs ? join ' ', @{$This->{ValueIDs}} : 'None'; | |
| 1837 } | |
| 1838 else { | |
| 1839 # Truncate value IDs... | |
| 1840 my($Index, @ValueIDs); | |
| 1841 @ValueIDs = (); | |
| 1842 for $Index (0 .. ($MaxValuesToStringify - 1)) { | |
| 1843 push @ValueIDs, $This->{ValueIDs}[$Index]; | |
| 1844 } | |
| 1845 $ValueIDsString = join(' ', @ValueIDs) . " ..."; | |
| 1846 } | |
| 1847 | |
| 1848 $FingerprintsVectorString = "Type: $This->{Type}; NumOfValues: $NumOfValues"; | |
| 1849 if ($This->{Type} =~ /^(OrderedNumericalValues|NumericalValues)$/i) { | |
| 1850 my($NumOfNonZeroValues); | |
| 1851 $NumOfNonZeroValues = $This->GetNumOfNonZeroValues(); | |
| 1852 $FingerprintsVectorString .= "; NumOfNonZeroValues: $NumOfNonZeroValues"; | |
| 1853 } | |
| 1854 | |
| 1855 # Append all the values and value IDs... | |
| 1856 if ($NumOfValues < $MaxValuesToStringify) { | |
| 1857 $FingerprintsVectorString .= "; Values: <$ValuesString>; NumOfValueIDs: $NumOfValueIDs; ValueIDs: <$ValueIDsString>"; | |
| 1858 } | |
| 1859 else { | |
| 1860 $FingerprintsVectorString .= "; Values (Truncated after $MaxValuesToStringify): <$ValuesString>; NumOfValueIDs: $NumOfValueIDs; ValueIDs (Truncated after $MaxValuesToStringify): <$ValueIDsString>"; | |
| 1861 } | |
| 1862 | |
| 1863 return $FingerprintsVectorString; | |
| 1864 } | |
| 1865 | |
| 1866 1; | |
| 1867 | |
| 1868 __END__ | |
| 1869 | |
| 1870 =head1 NAME | |
| 1871 | |
| 1872 FingerprintsVector | |
| 1873 | |
| 1874 =head1 SYNOPSIS | |
| 1875 | |
| 1876 use Fingerprints::FingerprintsVector; | |
| 1877 | |
| 1878 use Fingerprints::FingerprintsVector qw(:all); | |
| 1879 | |
| 1880 =head1 DESCRIPTION | |
| 1881 | |
| 1882 B<FingerprintsVector> class provides the following methods: | |
| 1883 | |
| 1884 new, AddValueIDs, AddValues, CityBlockDistanceCoefficient, | |
| 1885 CosineSimilarityCoefficient, CzekanowskiSimilarityCoefficient, | |
| 1886 DiceSimilarityCoefficient, EuclideanDistanceCoefficient, GetDescription, | |
| 1887 GetFingerprintsVectorString, GetID, GetIDsAndValuesPairsString, | |
| 1888 GetIDsAndValuesString, GetNumOfNonZeroValues, GetNumOfValueIDs, GetNumOfValues, | |
| 1889 GetSupportedDistanceAndSimilarityCoefficients, GetSupportedDistanceCoefficients, | |
| 1890 GetSupportedSimilarityCoefficients, GetType, GetValue, GetValueID, GetValueIDs, | |
| 1891 GetValueIDsString, GetValues, GetValuesAndIDsPairsString, GetValuesAndIDsString, | |
| 1892 GetValuesString, GetVectorType, HammingDistanceCoefficient, IsFingerprintsVector, | |
| 1893 JaccardSimilarityCoefficient, ManhattanDistanceCoefficient, | |
| 1894 NewFromIDsAndValuesPairsString, NewFromIDsAndValuesString, | |
| 1895 NewFromValuesAndIDsPairsString, NewFromValuesAndIDsString, NewFromValuesString, | |
| 1896 OchiaiSimilarityCoefficient, SetDescription, SetID, SetType, SetValue, SetValueID, | |
| 1897 SetValueIDs, SetValues, SetVectorType, SoergelDistanceCoefficient, | |
| 1898 SorensonSimilarityCoefficient, StringifyFingerprintsVector, | |
| 1899 TanimotoSimilarityCoefficient | |
| 1900 | |
| 1901 The methods available to create fingerprints vector from strings and to calculate similarity | |
| 1902 and distance coefficients between two vectors can also be invoked as class functions. | |
| 1903 | |
| 1904 B<FingerprintsVector> class provides support to perform comparison between vectors | |
| 1905 containing three different types of values: | |
| 1906 | |
| 1907 Type I: OrderedNumericalValues | |
| 1908 | |
| 1909 o Size of two vectors are same | |
| 1910 o Vectors contain real values in a specific order. For example: MACCS keys | |
| 1911 count, Topological pharmacophore atom pairs and so on. | |
| 1912 | |
| 1913 Type II: UnorderedNumericalValues | |
| 1914 | |
| 1915 o Size of two vectors might not be same | |
| 1916 o Vectors contain unordered real value identified by value IDs. For example: | |
| 1917 Topological atom pairs, Topological atom torsions and so on | |
| 1918 | |
| 1919 Type III: AlphaNumericalValues | |
| 1920 | |
| 1921 o Size of two vectors might not be same | |
| 1922 o Vectors contain unordered alphanumerical values. For example: Extended | |
| 1923 connectivity fingerprints, atom neighborhood fingerprints. | |
| 1924 | |
| 1925 Before performing similarity or distance calculations between vectors containing UnorderedNumericalValues | |
| 1926 or AlphaNumericalValues, the vectors are transformed into vectors containing unique OrderedNumericalValues | |
| 1927 using value IDs for UnorderedNumericalValues and values itself for AlphaNumericalValues. | |
| 1928 | |
| 1929 Three forms of similarity and distance calculation between two vectors, specified using B<CalculationMode> | |
| 1930 option, are supported: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. | |
| 1931 | |
| 1932 For I<BinaryForm>, the ordered list of processed final vector values containing the value or | |
| 1933 count of each unique value type is simply converted into a binary vector containing 1s and 0s | |
| 1934 corresponding to presence or absence of values before calculating similarity or distance between | |
| 1935 two vectors. | |
| 1936 | |
| 1937 For two fingerprint vectors A and B of same size containing OrderedNumericalValues, let: | |
| 1938 | |
| 1939 N = Number values in A or B | |
| 1940 | |
| 1941 Xa = Values of vector A | |
| 1942 Xb = Values of vector B | |
| 1943 | |
| 1944 Xai = Value of ith element in A | |
| 1945 Xbi = Value of ith element in B | |
| 1946 | |
| 1947 SUM = Sum of i over N values | |
| 1948 | |
| 1949 For SetTheoreticForm of calculation between two vectors, let: | |
| 1950 | |
| 1951 SetIntersectionXaXb = SUM ( MIN ( Xai, Xbi ) ) | |
| 1952 SetDifferenceXaXb = SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) | |
| 1953 | |
| 1954 For BinaryForm of calculation between two vectors, let: | |
| 1955 | |
| 1956 Na = Number of bits set to "1" in A = SUM ( Xai ) | |
| 1957 Nb = Number of bits set to "1" in B = SUM ( Xbi ) | |
| 1958 Nc = Number of bits set to "1" in both A and B = SUM ( Xai * Xbi ) | |
| 1959 Nd = Number of bits set to "0" in both A and B | |
| 1960 = SUM ( 1 - Xai - Xbi + Xai * Xbi) | |
| 1961 | |
| 1962 N = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd | |
| 1963 | |
| 1964 Additionally, for BinaryForm various values also correspond to: | |
| 1965 | |
| 1966 Na = | Xa | | |
| 1967 Nb = | Xb | | |
| 1968 Nc = | SetIntersectionXaXb | | |
| 1969 Nd = N - | SetDifferenceXaXb | | |
| 1970 | |
| 1971 | SetDifferenceXaXb | = N - Nd = Na + Nb - Nc + Nd - Nd = Na + Nb - Nc | |
| 1972 = | Xa | + | Xb | - | SetIntersectionXaXb | | |
| 1973 | |
| 1974 Various similarity and distance coefficients [ Ref 40, Ref 62, Ref 64 ] for a pair of vectors A and B | |
| 1975 in I<AlgebraicForm, BinaryForm and SetTheoreticForm> are defined as follows: | |
| 1976 | |
| 1977 B<CityBlockDistance>: ( same as HammingDistance and ManhattanDistance) | |
| 1978 | |
| 1979 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) ) | |
| 1980 | |
| 1981 I<BinaryForm>: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc | |
| 1982 | |
| 1983 I<SetTheoreticForm>: | SetDifferenceXaXb | - | SetIntersectionXaXb | = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) | |
| 1984 | |
| 1985 B<CosineSimilarity>: ( same as OchiaiSimilarityCoefficient) | |
| 1986 | |
| 1987 I<AlgebraicForm>: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) ) | |
| 1988 | |
| 1989 I<BinaryForm>: Nc / SQRT ( Na * Nb) | |
| 1990 | |
| 1991 I<SetTheoreticForm>: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| ) = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) ) | |
| 1992 | |
| 1993 B<CzekanowskiSimilarity>: ( same as DiceSimilarity and SorensonSimilarity) | |
| 1994 | |
| 1995 I<AlgebraicForm>: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) ) | |
| 1996 | |
| 1997 I<BinaryForm>: 2 * Nc / ( Na + Nb ) | |
| 1998 | |
| 1999 I<SetTheoreticForm>: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) ) | |
| 2000 | |
| 2001 B<DiceSimilarity>: ( same as CzekanowskiSimilarity and SorensonSimilarity) | |
| 2002 | |
| 2003 I<AlgebraicForm>: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) ) | |
| 2004 | |
| 2005 I<BinaryForm>: 2 * Nc / ( Na + Nb ) | |
| 2006 | |
| 2007 I<SetTheoreticForm>: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) ) | |
| 2008 | |
| 2009 B<EuclideanDistance>: | |
| 2010 | |
| 2011 I<AlgebraicForm>: SQRT ( SUM ( ( ( Xai - Xbi ) ** 2 ) ) ) | |
| 2012 | |
| 2013 I<BinaryForm>: SQRT ( ( Na - Nc ) + ( Nb - Nc ) ) = SQRT ( Na + Nb - 2 * Nc ) | |
| 2014 | |
| 2015 I<SetTheoreticForm>: SQRT ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) = SQRT ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) ) | |
| 2016 | |
| 2017 B<HammingDistance>: ( same as CityBlockDistance and ManhattanDistance) | |
| 2018 | |
| 2019 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) ) | |
| 2020 | |
| 2021 I<BinaryForm>: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc | |
| 2022 | |
| 2023 I<SetTheoreticForm>: | SetDifferenceXaXb | - | SetIntersectionXaXb | = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) | |
| 2024 | |
| 2025 B<JaccardSimilarity>: ( same as TanimotoSimilarity) | |
| 2026 | |
| 2027 I<AlgebraicForm>: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) ) | |
| 2028 | |
| 2029 I<BinaryForm>: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) | |
| 2030 | |
| 2031 I<SetTheoreticForm>: | SetIntersectionXaXb | / | SetDifferenceXaXb | = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) ) | |
| 2032 | |
| 2033 B<ManhattanDistance>: ( same as CityBlockDistance and HammingDistance) | |
| 2034 | |
| 2035 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) ) | |
| 2036 | |
| 2037 I<BinaryForm>: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc | |
| 2038 | |
| 2039 I<SetTheoreticForm>: | SetDifferenceXaXb | - | SetIntersectionXaXb | = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) | |
| 2040 | |
| 2041 B<OchiaiSimilarity>: ( same as CosineSimilarity) | |
| 2042 | |
| 2043 I<AlgebraicForm>: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) ) | |
| 2044 | |
| 2045 I<BinaryForm>: Nc / SQRT ( Na * Nb) | |
| 2046 | |
| 2047 I<SetTheoreticForm>: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| ) = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) ) | |
| 2048 | |
| 2049 B<SorensonSimilarity>: ( same as CzekanowskiSimilarity and DiceSimilarity) | |
| 2050 | |
| 2051 I<AlgebraicForm>: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) ) | |
| 2052 | |
| 2053 I<BinaryForm>: 2 * Nc / ( Na + Nb ) | |
| 2054 | |
| 2055 I<SetTheoreticForm>: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) ) | |
| 2056 | |
| 2057 B<SoergelDistance>: | |
| 2058 | |
| 2059 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) ) / SUM ( MAX ( Xai, Xbi ) ) | |
| 2060 | |
| 2061 I<BinaryForm>: 1 - Nc / ( Na + Nb - Nc ) = ( Na + Nb - 2 * Nc ) / ( Na + Nb - Nc ) | |
| 2062 | |
| 2063 I<SetTheoreticForm>: ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) / | SetDifferenceXaXb | = ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) ) | |
| 2064 | |
| 2065 B<TanimotoSimilarity>: ( same as JaccardSimilarity) | |
| 2066 | |
| 2067 I<AlgebraicForm>: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) ) | |
| 2068 | |
| 2069 I<BinaryForm>: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) | |
| 2070 | |
| 2071 I<SetTheoreticForm>: | SetIntersectionXaXb | / | SetDifferenceXaXb | = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) ) | |
| 2072 | |
| 2073 =head2 METHODS | |
| 2074 | |
| 2075 =over 4 | |
| 2076 | |
| 2077 =item B<new> | |
| 2078 | |
| 2079 $FPVector = new Fingerprints::FingerprintsVector(%NamesAndValues); | |
| 2080 | |
| 2081 Using specified I<FingerprintsVector> property names and values hash, B<new> method creates | |
| 2082 a new object and returns a reference to newly created B<FingerprintsVectorsVector> | |
| 2083 object. By default, the following properties are initialized: | |
| 2084 | |
| 2085 Type = '' | |
| 2086 @{Values} = () | |
| 2087 @{ValuesIDs} = () | |
| 2088 | |
| 2089 Examples: | |
| 2090 | |
| 2091 $FPVector = new Fingerprints::FingerprintsVector('Type' => 'OrderedNumericalValues', | |
| 2092 'Values' => [1, 2, 3, 4]); | |
| 2093 $FPVector = new Fingerprints::FingerprintsVector('Type' => 'NumericalValues', | |
| 2094 'Values' => [10, 22, 33, 44], | |
| 2095 'ValueIDs' => ['ID1', 'ID2', 'ID3', 'ID4']); | |
| 2096 $FPVector = new Fingerprints::FingerprintsVector('Type' => 'AlphaNumericalValues', | |
| 2097 'Values' => ['a1', 2, 'a3', 4]); | |
| 2098 | |
| 2099 =item B<AddValueIDs> | |
| 2100 | |
| 2101 $FingerprintsVector->AddValueIDs($ValueIDsRef); | |
| 2102 $FingerprintsVector->AddValueIDs(@ValueIDs); | |
| 2103 | |
| 2104 Adds specified I<ValueIDs> to I<FingerprintsVector> and returns I<FingerprintsVector>. | |
| 2105 | |
| 2106 =item B<AddValues> | |
| 2107 | |
| 2108 $FingerprintsVector->AddValues($ValuesRef); | |
| 2109 $FingerprintsVector->AddValues(@Values); | |
| 2110 $FingerprintsVector->AddValues($Vector); | |
| 2111 | |
| 2112 Adds specified I<Values> to I<FingerprintsVector> and returns I<FingerprintsVector>. | |
| 2113 | |
| 2114 =item B<CityBlockDistanceCoefficient> | |
| 2115 | |
| 2116 $Value = $FingerprintsVector->CityBlockDistanceCoefficient( | |
| 2117 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
| 2118 $Value = Fingerprints::FingerprintsVector::CityBlockDistanceCoefficient( | |
| 2119 $FingerprintsVectorA, $FingerprintVectorB, | |
| 2120 [$CalculationMode, $SkipValuesCheck]); | |
| 2121 | |
| 2122 Returns value of I<CityBlock> distance coefficient between two I<FingerprintsVectors> using | |
| 2123 optionally specified I<CalculationMode> and optional checking of vector values. | |
| 2124 | |
| 2125 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
| 2126 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
| 2127 | |
| 2128 =item B<CosineSimilarityCoefficient> | |
| 2129 | |
| 2130 $Value = $FingerprintsVector->CosineSimilarityCoefficient( | |
| 2131 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
| 2132 $Value = Fingerprints::FingerprintsVector::CosineSimilarityCoefficient( | |
| 2133 $FingerprintsVectorA, $FingerprintVectorB, | |
| 2134 [$CalculationMode, $SkipValuesCheck]); | |
| 2135 | |
| 2136 Returns value of I<Cosine> similarity coefficient between two I<FingerprintsVectors> using | |
| 2137 optionally specified I<CalculationMode> and optional checking of vector values. | |
| 2138 | |
| 2139 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
| 2140 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
| 2141 | |
| 2142 =item B<CzekanowskiSimilarityCoefficient> | |
| 2143 | |
| 2144 $Value = $FingerprintsVector->CzekanowskiSimilarityCoefficient( | |
| 2145 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
| 2146 $Value = Fingerprints::FingerprintsVector::CzekanowskiSimilarityCoefficient( | |
| 2147 $FingerprintsVectorA, $FingerprintVectorB, | |
| 2148 [$CalculationMode, $SkipValuesCheck]); | |
| 2149 | |
| 2150 Returns value of I<Czekanowski> similarity coefficient between two I<FingerprintsVectors> using | |
| 2151 optionally specified I<CalculationMode> and optional checking of vector values. | |
| 2152 | |
| 2153 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
| 2154 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
| 2155 | |
| 2156 =item B<DiceSimilarityCoefficient> | |
| 2157 | |
| 2158 $Value = $FingerprintsVector->DiceSimilarityCoefficient( | |
| 2159 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
| 2160 $Value = Fingerprints::FingerprintsVector::DiceSimilarityCoefficient( | |
| 2161 $FingerprintsVectorA, $FingerprintVectorB, | |
| 2162 [$CalculationMode, $SkipValuesCheck]); | |
| 2163 | |
| 2164 Returns value of I<Dice> similarity coefficient between two I<FingerprintsVectors> using | |
| 2165 optionally specified I<CalculationMode> and optional checking of vector values. | |
| 2166 | |
| 2167 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
| 2168 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
| 2169 | |
| 2170 =item B<EuclideanDistanceCoefficient> | |
| 2171 | |
| 2172 $Value = $FingerprintsVector->EuclideanDistanceCoefficient( | |
| 2173 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
| 2174 $Value = Fingerprints::FingerprintsVector::EuclideanDistanceCoefficient( | |
| 2175 $FingerprintsVectorA, $FingerprintVectorB, | |
| 2176 [$CalculationMode, $SkipValuesCheck]); | |
| 2177 | |
| 2178 Returns value of I<Euclidean> distance coefficient between two I<FingerprintsVectors> using | |
| 2179 optionally specified I<CalculationMode> and optional checking of vector values. | |
| 2180 | |
| 2181 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
| 2182 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
| 2183 | |
| 2184 =item B<GetDescription> | |
| 2185 | |
| 2186 $Description = $FingerprintsVector->GetDescription(); | |
| 2187 | |
| 2188 Returns a string containing description of fingerprints vector. | |
| 2189 | |
| 2190 =item B<GetFingerprintsVectorString> | |
| 2191 | |
| 2192 $FPString = $FingerprintsVector->GetFingerprintsVectorString($Format); | |
| 2193 | |
| 2194 Returns a B<FingerprintsString> containing vector values and/or IDs in I<FingerprintsVector> | |
| 2195 corresponding to specified I<Format>. | |
| 2196 | |
| 2197 Possible I<Format> values: I<IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, | |
| 2198 IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, | |
| 2199 ValueIDsString, ValueIDs, ValuesString, or Values>. | |
| 2200 | |
| 2201 =item B<GetID> | |
| 2202 | |
| 2203 $ID = $FingerprintsVector->GetID(); | |
| 2204 | |
| 2205 Returns I<ID> of I<FingerprintsVector>. | |
| 2206 | |
| 2207 =item B<GetVectorType> | |
| 2208 | |
| 2209 $VectorType = $FingerprintsVector->GetVectorType(); | |
| 2210 | |
| 2211 Returns I<VectorType> of I<FingerprintsVector>. | |
| 2212 | |
| 2213 =item B<GetIDsAndValuesPairsString> | |
| 2214 | |
| 2215 $IDsValuesPairsString = $FingerprintsVector->GetIDsAndValuesPairsString(); | |
| 2216 | |
| 2217 Returns I<FingerprintsVector> value IDs and values as space delimited ID/value pair | |
| 2218 string. | |
| 2219 | |
| 2220 =item B<GetIDsAndValuesString> | |
| 2221 | |
| 2222 $IDsValuesString = $FingerprintsVector->GetIDsAndValuesString(); | |
| 2223 | |
| 2224 Returns I<FingerprintsVector> value IDs and values as string containing space delimited IDs followed by | |
| 2225 values with semicolon as IDs and values delimiter. | |
| 2226 | |
| 2227 =item B<GetNumOfNonZeroValues> | |
| 2228 | |
| 2229 $NumOfNonZeroValues = $FingerprintsVector->GetNumOfNonZeroValues(); | |
| 2230 | |
| 2231 Returns number of non-zero values in I<FingerprintsVector>. | |
| 2232 | |
| 2233 =item B<GetNumOfValueIDs> | |
| 2234 | |
| 2235 $NumOfValueIDs = $FingerprintsVector->GetNumOfValueIDs(); | |
| 2236 | |
| 2237 Returns number of value IDs I<FingerprintsVector>. | |
| 2238 | |
| 2239 =item B<GetNumOfValues> | |
| 2240 | |
| 2241 $NumOfValues = $FingerprintsVector->GetNumOfValues(); | |
| 2242 | |
| 2243 Returns number of values I<FingerprintsVector>. | |
| 2244 | |
| 2245 =item B<GetSupportedDistanceAndSimilarityCoefficients> | |
| 2246 | |
| 2247 @SupportedDistanceAndSimilarityCoefficientsReturn = | |
| 2248 Fingerprints::FingerprintsVector::GetSupportedDistanceAndSimilarityCoefficients(); | |
| 2249 | |
| 2250 Returns an array containing names of supported distance and similarity coefficients. | |
| 2251 | |
| 2252 =item B<GetSupportedDistanceCoefficients> | |
| 2253 | |
| 2254 @SupportedDistanceCoefficientsReturn = | |
| 2255 Fingerprints::FingerprintsVector::GetSupportedDistanceCoefficients(); | |
| 2256 | |
| 2257 Returns an array containing names of supported disyance coefficients. | |
| 2258 | |
| 2259 =item B<GetSupportedSimilarityCoefficients> | |
| 2260 | |
| 2261 @SupportedSimilarityCoefficientsReturn = | |
| 2262 Fingerprints::FingerprintsVector::GetSupportedSimilarityCoefficients(); | |
| 2263 | |
| 2264 Returns an array containing names of supported similarity coefficients. | |
| 2265 | |
| 2266 =item B<GetType> | |
| 2267 | |
| 2268 $VectorType = $FingerprintsVector->GetType(); | |
| 2269 | |
| 2270 Returns I<FingerprintsVector> vector type. | |
| 2271 | |
| 2272 =item B<GetValue> | |
| 2273 | |
| 2274 $Value = $FingerprintsVector->GetValue($Index); | |
| 2275 | |
| 2276 Returns fingerprints vector B<Value> specified using I<Index> starting at 0. | |
| 2277 | |
| 2278 =item B<GetValueID> | |
| 2279 | |
| 2280 $ValueID = $FingerprintsVector->GetValueID(); | |
| 2281 | |
| 2282 Returns fingerprints vector B<ValueID> specified using I<Index> starting at 0. | |
| 2283 | |
| 2284 =item B<GetValueIDs> | |
| 2285 | |
| 2286 $ValueIDs = $FingerprintsVector->GetValueIDs(); | |
| 2287 @ValueIDs = $FingerprintsVector->GetValueIDs(); | |
| 2288 | |
| 2289 Returns fingerprints vector B<ValueIDs> as an array or reference to an array. | |
| 2290 | |
| 2291 =item B<GetValueIDsString> | |
| 2292 | |
| 2293 $ValueIDsString = $FingerprintsVector->GetValueIDsString(); | |
| 2294 | |
| 2295 Returns fingerprints vector B<ValueIDsString> with value IDs delimited by space. | |
| 2296 | |
| 2297 =item B<GetValues> | |
| 2298 | |
| 2299 $ValuesRef = $FingerprintsVector->GetValues(); | |
| 2300 @Values = $FingerprintsVector->GetValues(); | |
| 2301 | |
| 2302 Returns fingerprints vector B<Values> as an array or reference to an array. | |
| 2303 | |
| 2304 =item B<GetValuesAndIDsPairsString> | |
| 2305 | |
| 2306 $ValuesIDsPairsString = $FingerprintsVector->GetValuesAndIDsPairsString(); | |
| 2307 | |
| 2308 Returns I<FingerprintsVector> value and value IDs as space delimited ID/value pair | |
| 2309 string. | |
| 2310 | |
| 2311 =item B<GetValuesAndIDsString> | |
| 2312 | |
| 2313 $ValuesIDsString = $FingerprintsVector->GetValuesAndIDsString(); | |
| 2314 | |
| 2315 Returns I<FingerprintsVector> values and value IDs as string containing space delimited IDs followed by | |
| 2316 values with semicolon as IDs and values delimiter. | |
| 2317 | |
| 2318 =item B<GetValuesString> | |
| 2319 | |
| 2320 $Return = $FingerprintsVector->GetValuesString(); | |
| 2321 | |
| 2322 Returns I<FingerprintsVector> values as space delimited string. | |
| 2323 | |
| 2324 =item B<HammingDistanceCoefficient> | |
| 2325 | |
| 2326 $Value = $FingerprintsVector->HammingDistanceCoefficient( | |
| 2327 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
| 2328 $Value = Fingerprints::FingerprintsVector::HammingDistanceCoefficient( | |
| 2329 $FingerprintsVectorA, $FingerprintVectorB, | |
| 2330 [$CalculationMode, $SkipValuesCheck]); | |
| 2331 | |
| 2332 Returns value of I<Hamming> distance coefficient between two I<FingerprintsVectors> using | |
| 2333 optionally specified I<CalculationMode> and optional checking of vector values. | |
| 2334 | |
| 2335 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
| 2336 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
| 2337 | |
| 2338 =item B<IsFingerprintsVector> | |
| 2339 | |
| 2340 $Status = Fingerprints::FingerprintsVector::IsFingerprintsVector($Object); | |
| 2341 | |
| 2342 Returns 1 or 0 based on whether I<Object> is a I<FingerprintsVector>. | |
| 2343 | |
| 2344 =item B<JaccardSimilarityCoefficient> | |
| 2345 | |
| 2346 $Value = $FingerprintsVector->JaccardSimilarityCoefficient( | |
| 2347 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
| 2348 $Value = Fingerprints::FingerprintsVector::JaccardSimilarityCoefficient( | |
| 2349 $FingerprintsVectorA, $FingerprintVectorB, | |
| 2350 [$CalculationMode, $SkipValuesCheck]); | |
| 2351 | |
| 2352 Returns value of I<Jaccard> similarity coefficient between two I<FingerprintsVectors> using | |
| 2353 optionally specified I<CalculationMode> and optional checking of vector values. | |
| 2354 | |
| 2355 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
| 2356 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
| 2357 | |
| 2358 =item B<ManhattanDistanceCoefficient> | |
| 2359 | |
| 2360 $Value = $FingerprintsVector->ManhattanDistanceCoefficient( | |
| 2361 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
| 2362 $Value = Fingerprints::FingerprintsVector::ManhattanDistanceCoefficient( | |
| 2363 $FingerprintsVectorA, $FingerprintVectorB, | |
| 2364 [$CalculationMode, $SkipValuesCheck]); | |
| 2365 | |
| 2366 Returns value of I<Manhattan> distance coefficient between two I<FingerprintsVectors> using | |
| 2367 optionally specified I<CalculationMode> and optional checking of vector values. | |
| 2368 | |
| 2369 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
| 2370 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
| 2371 | |
| 2372 =item B<NewFromIDsAndValuesPairsString> | |
| 2373 | |
| 2374 $FingerprintsVector = $FingerprintsVector->NewFromIDsAndValuesPairsString( | |
| 2375 $ValuesType, $IDsAndValuesPairsString); | |
| 2376 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesPairsString( | |
| 2377 $ValuesType, $IDsAndValuesPairsString); | |
| 2378 | |
| 2379 Creates a new I<FingerprintsVector> of I<ValuesType> using I<IDsAndValuesPairsString> containing | |
| 2380 space delimited value IDs and values pairs and returns new B<FingerprintsVector> object. | |
| 2381 Possible I<ValuesType> values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>. | |
| 2382 | |
| 2383 =item B<NewFromIDsAndValuesString> | |
| 2384 | |
| 2385 $FingerprintsVector = $FingerprintsVector->NewFromIDsAndValuesString( | |
| 2386 $ValuesType, $IDsAndValuesString); | |
| 2387 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesString( | |
| 2388 $ValuesType, $IDsAndValuesString); | |
| 2389 | |
| 2390 Creates a new I<FingerprintsVector> of I<ValuesType> using I<IDsAndValuesString> containing | |
| 2391 semicolon delimited value IDs string followed by values strings and returns new B<FingerprintsVector> | |
| 2392 object. The values within value and value IDs tring are delimited by spaces. Possible I<ValuesType> | |
| 2393 values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>. | |
| 2394 | |
| 2395 =item B<NewFromValuesAndIDsPairsString> | |
| 2396 | |
| 2397 $FingerprintsVector = $FingerprintsVector->NewFromValuesAndIDsPairsString( | |
| 2398 $ValuesType, $ValuesAndIDsPairsString); | |
| 2399 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsPairsString( | |
| 2400 $ValuesType, $ValuesAndIDsPairsString); | |
| 2401 | |
| 2402 Creates a new I<FingerprintsVector> of I<ValuesType> using I<ValuesAndIDsPairsString> containing | |
| 2403 space delimited value and value IDs pairs and returns new B<FingerprintsVector> object. | |
| 2404 Possible I<ValuesType> values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>. | |
| 2405 | |
| 2406 =item B<NewFromValuesAndIDsString> | |
| 2407 | |
| 2408 $FingerprintsVector = $FingerprintsVector->NewFromValuesAndIDsString( | |
| 2409 $ValuesType, $IDsAndValuesString); | |
| 2410 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsString( | |
| 2411 $ValuesType, $IDsAndValuesString); | |
| 2412 | |
| 2413 Creates a new I<FingerprintsVector> of I<ValuesType> using I<ValuesAndIDsString> containing | |
| 2414 semicolon delimited values string followed by value IDs strings and returns new B<FingerprintsVector> | |
| 2415 object. The values within values and value IDs tring are delimited by spaces. Possible I<ValuesType> | |
| 2416 values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>. | |
| 2417 | |
| 2418 =item B<NewFromValuesString> | |
| 2419 | |
| 2420 $FingerprintsVector = $FingerprintsVector->NewFromValuesString( | |
| 2421 $ValuesType, $ValuesString); | |
| 2422 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesString( | |
| 2423 $ValuesType, $ValuesString); | |
| 2424 | |
| 2425 Creates a new I<FingerprintsVector> of I<ValuesType> using I<ValuesString> containing space | |
| 2426 delimited values string and returns new B<FingerprintsVector> object. The values within values | |
| 2427 and value IDs tring are delimited by spaces. Possible I<ValuesType> values: I<OrderedNumericalValues, | |
| 2428 NumericalValues, or AlphaNumericalValues>. | |
| 2429 | |
| 2430 =item B<OchiaiSimilarityCoefficient> | |
| 2431 | |
| 2432 $Value = $FingerprintsVector->OchiaiSimilarityCoefficient( | |
| 2433 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
| 2434 $Value = Fingerprints::FingerprintsVector::OchiaiSimilarityCoefficient( | |
| 2435 $FingerprintsVectorA, $FingerprintVectorB, | |
| 2436 [$CalculationMode, $SkipValuesCheck]); | |
| 2437 | |
| 2438 Returns value of I<Ochiai> similarity coefficient between two I<FingerprintsVectors> using | |
| 2439 optionally specified I<CalculationMode> and optional checking of vector values. | |
| 2440 | |
| 2441 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
| 2442 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
| 2443 | |
| 2444 =item B<SetDescription> | |
| 2445 | |
| 2446 $FingerprintsVector->SetDescription($Description); | |
| 2447 | |
| 2448 Sets I<Description> of fingerprints vector and returns I<FingerprintsVector>. | |
| 2449 | |
| 2450 =item B<SetID> | |
| 2451 | |
| 2452 $FingerprintsVector->SetID($ID); | |
| 2453 | |
| 2454 Sets I<ID> of fingerprints vector and returns I<FingerprintsVector>. | |
| 2455 | |
| 2456 =item B<SetVectorType> | |
| 2457 | |
| 2458 $FingerprintsVector->SetVectorType($VectorType); | |
| 2459 | |
| 2460 Sets I<VectorType> of fingerprints vector and returns I<FingerprintsVector>. | |
| 2461 | |
| 2462 =item B<SetType> | |
| 2463 | |
| 2464 $FingerprintsVector->SetType($Type); | |
| 2465 | |
| 2466 Sets I<FingerprintsVector> values I<Type> and returns I<FingerprintsVector>. Possible I<Type> | |
| 2467 values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>. | |
| 2468 | |
| 2469 During calculation of similarity and distance coefficients between two I<FingerprintsVectors>, the | |
| 2470 following conditions apply to vector type, size, value and value IDs: | |
| 2471 | |
| 2472 o For OrderedNumericalValues type, both vectors must be of the same size | |
| 2473 and contain similar types of numerical values in the same order. | |
| 2474 | |
| 2475 o For NumericalValues type, vector value IDs for both vectors must be | |
| 2476 specified; however, their size and order of IDs and numerical values may | |
| 2477 be different. For each vector, value IDs must correspond to vector values. | |
| 2478 | |
| 2479 o For AlphaNumericalValues type, vectors may contain both numerical and | |
| 2480 alphanumerical values and their sizes may be different. | |
| 2481 | |
| 2482 =item B<SetValue> | |
| 2483 | |
| 2484 $FingerprintsVector->SetValue($Index, $Value, [$SkipIndexCheck]); | |
| 2485 | |
| 2486 Sets a I<FingerprintsVector> value specified by I<Index> starting at 0 to I<Value> along with | |
| 2487 optional index range check and returns I<FingerprintsVector>. | |
| 2488 | |
| 2489 =item B<SetValueID> | |
| 2490 | |
| 2491 $FingerprintsVector->SetValueID($Index, $ValueID, [$SkipIndexCheck]); | |
| 2492 | |
| 2493 Sets a I<FingerprintsVector> value ID specified by I<Index> starting at 0 to I<ValueID> along with | |
| 2494 optional index range check and returns I<FingerprintsVector>. | |
| 2495 | |
| 2496 =item B<SetValueIDs> | |
| 2497 | |
| 2498 $FingerprintsVector->SetValueIDs($ValueIDsRef); | |
| 2499 $FingerprintsVector->SetValueIDs(@ValueIDs); | |
| 2500 | |
| 2501 Sets I<FingerprintsVector> value IDs to specified I<ValueIDs> and returns I<FingerprintsVector>. | |
| 2502 | |
| 2503 =item B<SetValues> | |
| 2504 | |
| 2505 $FingerprintsVector->SetValues($ValuesRef); | |
| 2506 $FingerprintsVector->SetValues(@Values); | |
| 2507 | |
| 2508 Sets I<FingerprintsVector> value to specified I<Values> and returns I<FingerprintsVector>. | |
| 2509 | |
| 2510 =item B<SoergelDistanceCoefficient> | |
| 2511 | |
| 2512 $Value = $FingerprintsVector->SoergelDistanceCoefficient( | |
| 2513 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
| 2514 $Value = Fingerprints::FingerprintsVector::SoergelDistanceCoefficient( | |
| 2515 $FingerprintsVectorA, $FingerprintVectorB, | |
| 2516 [$CalculationMode, $SkipValuesCheck]); | |
| 2517 | |
| 2518 Returns value of I<Soergel> distance coefficient between two I<FingerprintsVectors> using | |
| 2519 optionally specified I<CalculationMode> and optional checking of vector values. | |
| 2520 | |
| 2521 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
| 2522 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
| 2523 | |
| 2524 =item B<SorensonSimilarityCoefficient> | |
| 2525 | |
| 2526 $Value = $FingerprintsVector->SorensonSimilarityCoefficient( | |
| 2527 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
| 2528 $Value = Fingerprints::FingerprintsVector::SorensonSimilarityCoefficient( | |
| 2529 $FingerprintsVectorA, $FingerprintVectorB, | |
| 2530 [$CalculationMode, $SkipValuesCheck]); | |
| 2531 | |
| 2532 Returns value of I<Sorenson> similarity coefficient between two I<FingerprintsVectors> using | |
| 2533 optionally specified I<CalculationMode> and optional checking of vector values. | |
| 2534 | |
| 2535 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
| 2536 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
| 2537 | |
| 2538 =item B<TanimotoSimilarityCoefficient> | |
| 2539 | |
| 2540 $Value = $FingerprintsVector->TanimotoSimilarityCoefficient( | |
| 2541 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
| 2542 $Value = Fingerprints::FingerprintsVector::TanimotoSimilarityCoefficient( | |
| 2543 $FingerprintsVectorA, $FingerprintVectorB, | |
| 2544 [$CalculationMode, $SkipValuesCheck]); | |
| 2545 | |
| 2546 Returns value of I<Tanimoto> similarity coefficient between two I<FingerprintsVectors> using | |
| 2547 optionally specified I<CalculationMode> and optional checking of vector values. | |
| 2548 | |
| 2549 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
| 2550 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
| 2551 | |
| 2552 =item B<StringifyFingerprintsVector> | |
| 2553 | |
| 2554 $String = $FingerprintsVector->StringifyFingerprintsVector(); | |
| 2555 | |
| 2556 Returns a string containing information about I<FingerprintsVector> object. | |
| 2557 | |
| 2558 =back | |
| 2559 | |
| 2560 =head1 AUTHOR | |
| 2561 | |
| 2562 Manish Sud <msud@san.rr.com> | |
| 2563 | |
| 2564 =head1 SEE ALSO | |
| 2565 | |
| 2566 BitVector.pm, FingerprintsStringUtil.pm, FingerprintsBitVector.pm, Vector.pm | |
| 2567 | |
| 2568 =head1 COPYRIGHT | |
| 2569 | |
| 2570 Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 2571 | |
| 2572 This file is part of MayaChemTools. | |
| 2573 | |
| 2574 MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 2575 the terms of the GNU Lesser General Public License as published by the Free | |
| 2576 Software Foundation; either version 3 of the License, or (at your option) | |
| 2577 any later version. | |
| 2578 | |
| 2579 =cut |
