Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/Fingerprints/FingerprintsVector.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4816e4a8ae95 |
---|---|
1 package Fingerprints::FingerprintsVector; | |
2 # | |
3 # $RCSfile: FingerprintsVector.pm,v $ | |
4 # $Date: 2015/02/28 20:48:54 $ | |
5 # $Revision: 1.31 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Carp; | |
31 use Exporter; | |
32 use Scalar::Util (); | |
33 use MathUtil (); | |
34 use TextUtil (); | |
35 use StatisticsUtil (); | |
36 use BitVector; | |
37 use Vector; | |
38 | |
39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
40 | |
41 @ISA = qw(Exporter); | |
42 | |
43 # Distance coefficients | |
44 my(@DistanceCoefficients) = qw(CityBlockDistanceCoefficient EuclideanDistanceCoefficient HammingDistanceCoefficient ManhattanDistanceCoefficient SoergelDistanceCoefficient); | |
45 | |
46 # Similarity coefficients... | |
47 my(@SimilarityCoefficients) = qw(CosineSimilarityCoefficient CzekanowskiSimilarityCoefficient DiceSimilarityCoefficient OchiaiSimilarityCoefficient JaccardSimilarityCoefficient SorensonSimilarityCoefficient TanimotoSimilarityCoefficient); | |
48 | |
49 # New from string... | |
50 my(@NewFromString) = qw(NewFromValuesString NewFromValuesAndIDsString NewFromIDsAndValuesString NewFromValuesAndIDsPairsString NewFromIDsAndValuesPairsString); | |
51 | |
52 @EXPORT = qw(IsFingerprintsVector); | |
53 @EXPORT_OK = qw(GetSupportedDistanceCoefficients GetSupportedSimilarityCoefficients GetSupportedDistanceAndSimilarityCoefficients @DistanceCoefficients @SimilarityCoefficients); | |
54 | |
55 %EXPORT_TAGS = ( | |
56 new => [@NewFromString], | |
57 distancecoefficients => [@DistanceCoefficients], | |
58 similaritycoefficients => [@SimilarityCoefficients], | |
59 all => [@EXPORT, @EXPORT_OK] | |
60 ); | |
61 | |
62 # Setup class variables... | |
63 my($ClassName); | |
64 _InitializeClass(); | |
65 | |
66 # Overload Perl functions... | |
67 use overload '""' => 'StringifyFingerprintsVector'; | |
68 | |
69 # Class constructor... | |
70 sub new { | |
71 my($Class, %NamesAndValues) = @_; | |
72 | |
73 # Initialize object... | |
74 my $This = {}; | |
75 bless $This, ref($Class) || $Class; | |
76 | |
77 $This->_InitializeFingerprintsVector(); | |
78 | |
79 $This->_InitializeFingerprintsVectorProperties(%NamesAndValues); | |
80 | |
81 return $This; | |
82 } | |
83 | |
84 # Initialize object data... | |
85 # | |
86 sub _InitializeFingerprintsVector { | |
87 my($This) = @_; | |
88 | |
89 # Type of fingerprint vector... | |
90 $This->{Type} = ''; | |
91 | |
92 # Fingerprint vector values... | |
93 @{$This->{Values}} = (); | |
94 | |
95 # Fingerprint vector value IDs... | |
96 @{$This->{ValueIDs}} = (); | |
97 | |
98 return $This; | |
99 } | |
100 | |
101 # Initialize class ... | |
102 sub _InitializeClass { | |
103 #Class name... | |
104 $ClassName = __PACKAGE__; | |
105 } | |
106 | |
107 # Initialize object properties.... | |
108 sub _InitializeFingerprintsVectorProperties { | |
109 my($This, %NamesAndValues) = @_; | |
110 | |
111 my($Name, $Value, $MethodName); | |
112 while (($Name, $Value) = each %NamesAndValues) { | |
113 $MethodName = "Set${Name}"; | |
114 $This->$MethodName($Value); | |
115 } | |
116 | |
117 if (!exists $NamesAndValues{Type}) { | |
118 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying type..."; | |
119 } | |
120 return $This; | |
121 } | |
122 | |
123 # Create a new fingerprints vector using space delimited values string. This functionality can be | |
124 # either invoked as a class function or an object method. | |
125 # | |
126 sub NewFromValuesString ($$;$) { | |
127 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_; | |
128 my($This, $Type, $ValuesString); | |
129 | |
130 if (@_ == 3) { | |
131 ($This, $Type, $ValuesString) = ($FirstParameter, $SecondParameter, $ThirdParamater); | |
132 } | |
133 else { | |
134 ($This, $Type, $ValuesString) = (undef, $FirstParameter, $SecondParameter); | |
135 } | |
136 my($FingerprintsVector, @Values); | |
137 | |
138 @Values = (); | |
139 if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) { | |
140 @Values = split(' ', $ValuesString); | |
141 } | |
142 | |
143 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values); | |
144 | |
145 return $FingerprintsVector; | |
146 } | |
147 | |
148 # Create a new fingerprints vector using values and IDs string containing semicolon | |
149 # delimited value string and value IDs strings. The values within value and value IDs | |
150 # string are delimited by spaces. | |
151 # | |
152 # This functionality can be either invoked as a class function or an object method. | |
153 # | |
154 sub NewFromValuesAndIDsString ($$;$) { | |
155 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_; | |
156 my($This, $Type, $ValuesAndIDsString); | |
157 | |
158 if (@_ == 3) { | |
159 ($This, $Type, $ValuesAndIDsString) = ($FirstParameter, $SecondParameter, $ThirdParamater); | |
160 } | |
161 else { | |
162 ($This, $Type, $ValuesAndIDsString) = (undef, $FirstParameter, $SecondParameter); | |
163 } | |
164 my($FingerprintsVector, $ValuesString, $ValueIDsString, @Values, @ValueIDs); | |
165 | |
166 ($ValuesString, $ValueIDsString) = split(';', $ValuesAndIDsString); | |
167 | |
168 @Values = (); | |
169 if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) { | |
170 @Values = split(' ', $ValuesString); | |
171 } | |
172 @ValueIDs = (); | |
173 if (defined($ValueIDsString) && length($ValueIDsString) && $ValueIDsString !~ /^None$/i) { | |
174 @ValueIDs = split(' ', $ValueIDsString); | |
175 } | |
176 | |
177 if (@Values != @ValueIDs ) { | |
178 carp "Warning: ${ClassName}->NewFromValuesAndIDsString: Object can't be instantiated: Number specified values, " . scalar @Values . ", must be equal to number of specified value IDs, " . scalar @ValueIDs . "..."; | |
179 return undef; | |
180 } | |
181 | |
182 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs); | |
183 | |
184 return $FingerprintsVector; | |
185 } | |
186 | |
187 # Create a new fingerprints vector using IDs and values string containing semicolon | |
188 # delimited value IDs string and values strings. The values within value and value IDs | |
189 # string are delimited by spaces. | |
190 # | |
191 # This functionality can be either invoked as a class function or an object method. | |
192 # | |
193 sub NewFromIDsAndValuesString ($$;$) { | |
194 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_; | |
195 my($This, $Type, $IDsAndValuesString); | |
196 | |
197 if (@_ == 3) { | |
198 ($This, $Type, $IDsAndValuesString) = ($FirstParameter, $SecondParameter, $ThirdParamater); | |
199 } | |
200 else { | |
201 ($This, $Type, $IDsAndValuesString) = (undef, $FirstParameter, $SecondParameter); | |
202 } | |
203 my($FingerprintsVector, $ValuesString, $ValueIDsString, @Values, @ValueIDs); | |
204 | |
205 ($ValueIDsString, $ValuesString) = split(';', $IDsAndValuesString); | |
206 | |
207 @Values = (); | |
208 if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) { | |
209 @Values = split(' ', $ValuesString); | |
210 } | |
211 @ValueIDs = (); | |
212 if (defined($ValueIDsString) && length($ValueIDsString) && $ValueIDsString !~ /^None$/i) { | |
213 @ValueIDs = split(' ', $ValueIDsString); | |
214 } | |
215 | |
216 if (@Values != @ValueIDs ) { | |
217 carp "Warning: ${ClassName}->NewFromIDsAndValuesString: Object can't be instantiated: Number specified values, " . scalar @Values . ", must be equal to number of specified value IDs, " . scalar @ValueIDs . "..."; | |
218 return undef; | |
219 } | |
220 | |
221 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs); | |
222 | |
223 return $FingerprintsVector; | |
224 } | |
225 | |
226 # Create a new fingerprints vector using values and IDs pairs string containing space | |
227 # value and value IDs pairs. | |
228 # | |
229 # This functionality can be either invoked as a class function or an object method. | |
230 # | |
231 sub NewFromValuesAndIDsPairsString ($$;$) { | |
232 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_; | |
233 my($This, $Type, $ValuesAndIDsPairsString); | |
234 | |
235 if (@_ == 3) { | |
236 ($This, $Type, $ValuesAndIDsPairsString) = ($FirstParameter, $SecondParameter, $ThirdParamater); | |
237 } | |
238 else { | |
239 ($This, $Type, $ValuesAndIDsPairsString) = (undef, $FirstParameter, $SecondParameter); | |
240 } | |
241 my($FingerprintsVector, $Index, @Values, @ValueIDs, @ValuesAndIDsPairs); | |
242 | |
243 @ValuesAndIDsPairs = split(' ', $ValuesAndIDsPairsString); | |
244 if (@ValuesAndIDsPairs % 2) { | |
245 carp "Warning: ${ClassName}->NewFromValuesAndIDsPairsString: No fingerprint vector created: Invalid values and IDs pairs data: Input list must contain even number of values and IDs pairs..."; | |
246 return undef; | |
247 } | |
248 | |
249 @Values = (); @ValueIDs = (); | |
250 if (!(@ValuesAndIDsPairs == 2 && $ValuesAndIDsPairs[0] =~ /^None$/i && $ValuesAndIDsPairs[1] =~ /^None$/i)) { | |
251 for ($Index = 0; $Index < $#ValuesAndIDsPairs; $Index += 2) { | |
252 push @Values, $ValuesAndIDsPairs[$Index]; | |
253 push @ValueIDs, $ValuesAndIDsPairs[$Index + 1]; | |
254 } | |
255 } | |
256 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs); | |
257 | |
258 return $FingerprintsVector; | |
259 } | |
260 | |
261 # Create a new fingerprints vector using IDs and values pairs string containing space | |
262 # value IDs and valus pairs. | |
263 # | |
264 # This functionality can be either invoked as a class function or an object method. | |
265 # | |
266 sub NewFromIDsAndValuesPairsString ($$;$) { | |
267 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_; | |
268 my($This, $Type, $IDsAndValuesPairsString); | |
269 | |
270 if (@_ == 3) { | |
271 ($This, $Type, $IDsAndValuesPairsString) = ($FirstParameter, $SecondParameter, $ThirdParamater); | |
272 } | |
273 else { | |
274 ($This, $Type, $IDsAndValuesPairsString) = (undef, $FirstParameter, $SecondParameter); | |
275 } | |
276 my($FingerprintsVector, $Index, @Values, @ValueIDs, @IDsAndValuesPairs); | |
277 | |
278 @IDsAndValuesPairs = split(' ', $IDsAndValuesPairsString); | |
279 if (@IDsAndValuesPairs % 2) { | |
280 croak "Error: ${ClassName}->NewFromIDsAndValuesPairsString: No fingerprint vector created: Invalid values and IDs pairs data: Input list must contain even number of values and IDs pairs..."; | |
281 return undef; | |
282 } | |
283 | |
284 @Values = (); @ValueIDs = (); | |
285 if (!(@IDsAndValuesPairs == 2 && $IDsAndValuesPairs[0] =~ /^None$/i && $IDsAndValuesPairs[1] =~ /^None$/i)) { | |
286 for ($Index = 0; $Index < $#IDsAndValuesPairs; $Index += 2) { | |
287 push @ValueIDs, $IDsAndValuesPairs[$Index]; | |
288 push @Values, $IDsAndValuesPairs[$Index + 1]; | |
289 } | |
290 } | |
291 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs); | |
292 | |
293 return $FingerprintsVector; | |
294 } | |
295 | |
296 # Set type of fingerprint vector. Supported types are: OrderedNumericalValues, NumericalValues, and | |
297 # AlphaNumericalValues | |
298 # | |
299 # . For OrderedNumericalValues type, both vectors must be of the same size and contain similar | |
300 # types of numerical values in the same order. | |
301 # | |
302 # . For NumericalValues type, vector value IDs for both vectors must be specified; however, their | |
303 # size and order of IDs and numerical values may be different. For each vector, value IDs must | |
304 # correspond to vector values. | |
305 # | |
306 # . For AlphaNumericalValues type, vectors may contain both numerical and alphanumerical values | |
307 # and their sizes may be different. | |
308 # | |
309 sub SetType { | |
310 my($This, $Type) = @_; | |
311 | |
312 if ($Type !~ /^(OrderedNumericalValues|NumericalValues|AlphaNumericalValues)$/i) { | |
313 croak "Error: ${ClassName}->SetType: Specified value, $Type, for Type is not vaild. Supported types in current release of MayaChemTools: OrderedNumericalValues, NumericalValues or AlphaNumericalValues"; | |
314 } | |
315 | |
316 if ($This->{Type}) { | |
317 croak "Error: ${ClassName}->SetType: Can't change intial fingerprints vector type: It's already set..."; | |
318 } | |
319 $This->{Type} = $Type; | |
320 | |
321 return $This; | |
322 } | |
323 | |
324 # Get fingerpints vector type... | |
325 # | |
326 sub GetType { | |
327 my($This) = @_; | |
328 | |
329 return $This->{Type}; | |
330 } | |
331 | |
332 # Set ID... | |
333 sub SetID { | |
334 my($This, $Value) = @_; | |
335 | |
336 $This->{ID} = $Value; | |
337 | |
338 return $This; | |
339 } | |
340 | |
341 # Get ID... | |
342 sub GetID { | |
343 my($This) = @_; | |
344 | |
345 return exists $This->{ID} ? $This->{ID} : 'None'; | |
346 } | |
347 | |
348 # Set description... | |
349 sub SetDescription { | |
350 my($This, $Value) = @_; | |
351 | |
352 $This->{Description} = $Value; | |
353 | |
354 return $This; | |
355 } | |
356 | |
357 # Get description... | |
358 sub GetDescription { | |
359 my($This) = @_; | |
360 | |
361 return exists $This->{Description} ? $This->{Description} : 'No description available'; | |
362 } | |
363 | |
364 # Set vector type... | |
365 sub SetVectorType { | |
366 my($This, $Value) = @_; | |
367 | |
368 $This->{VectorType} = $Value; | |
369 | |
370 return $This; | |
371 } | |
372 | |
373 # Get vector type... | |
374 sub GetVectorType { | |
375 my($This) = @_; | |
376 | |
377 return exists $This->{VectorType} ? $This->{VectorType} : 'FingerprintsVector'; | |
378 } | |
379 | |
380 # Set values of a fingerprint vector using a vector, reference to an array or an array... | |
381 # | |
382 sub SetValues { | |
383 my($This, @Values) = @_; | |
384 | |
385 $This->_SetOrAddValuesOrValueIDs("SetValues", @Values); | |
386 | |
387 return $This; | |
388 } | |
389 | |
390 # Set value IDs of a fingerprint vector using a vector, reference to an array or an array... | |
391 # | |
392 sub SetValueIDs { | |
393 my($This, @Values) = @_; | |
394 | |
395 $This->_SetOrAddValuesOrValueIDs("SetValueIDs", @Values); | |
396 | |
397 return $This; | |
398 } | |
399 | |
400 # Add values to a fingerprint vector using a vector, reference to an array or an array... | |
401 # | |
402 sub AddValues { | |
403 my($This, @Values) = @_; | |
404 | |
405 $This->_SetOrAddValuesOrValueIDs("AddValues", @Values); | |
406 | |
407 return $This; | |
408 } | |
409 | |
410 # Add value IDs to a fingerprint vector using a vector, reference to an array or an array... | |
411 # | |
412 sub AddValueIDs { | |
413 my($This, @Values) = @_; | |
414 | |
415 $This->_SetOrAddValuesOrValueIDs("AddValueIDs", @Values); | |
416 | |
417 return $This; | |
418 } | |
419 | |
420 # Set or add values or value IDs using: | |
421 # | |
422 # o List of values or ValueIDs | |
423 # o Reference to an list of values or ValuesIDs | |
424 # o A vector containing values or ValueIDs | |
425 # | |
426 sub _SetOrAddValuesOrValueIDs { | |
427 my($This, $Mode, @Values) = @_; | |
428 | |
429 if (!@Values) { | |
430 return; | |
431 } | |
432 | |
433 # Collect specified values or valueIDs... | |
434 my($FirstValue, $TypeOfFirstValue, $ValuesRef); | |
435 | |
436 $FirstValue = $Values[0]; | |
437 $TypeOfFirstValue = ref $FirstValue; | |
438 if ($TypeOfFirstValue =~ /^(SCALAR|HASH|CODE|REF|GLOB)/) { | |
439 croak "Error: ${ClassName}-> _SetOrAddValuesOrValueIDs: Trying to add values to vector object with a reference to unsupported value format..."; | |
440 } | |
441 | |
442 if (Vector::IsVector($FirstValue)) { | |
443 # It's a vector... | |
444 $ValuesRef = $FirstValue->GetValues(); | |
445 } | |
446 elsif ($TypeOfFirstValue =~ /^ARRAY/) { | |
447 # It's an array refernce... | |
448 $ValuesRef = $FirstValue; | |
449 } | |
450 else { | |
451 # It's a list of values... | |
452 $ValuesRef = \@Values; | |
453 } | |
454 | |
455 # Set or add values or value IDs... | |
456 MODE: { | |
457 if ($Mode =~ /^SetValues$/i) { @{$This->{Values}} = (); push @{$This->{Values}}, @{$ValuesRef}; last MODE; } | |
458 if ($Mode =~ /^SetValueIDs$/i) { @{$This->{ValueIDs}} = (); push @{$This->{ValueIDs}}, @{$ValuesRef}; last MODE; } | |
459 if ($Mode =~ /^AddValues$/i) { push @{$This->{Values}}, @{$ValuesRef}; last MODE; } | |
460 if ($Mode =~ /^AddValueIDs$/i) { push @{$This->{ValueIDs}}, @{$ValuesRef}; last MODE; } | |
461 croak "Error: ${ClassName}-> _SetOrAddValuesOrValueIDs: Unknown mode $Mode..."; | |
462 } | |
463 return $This; | |
464 } | |
465 | |
466 # Set a specific value in fingerprint vector with indicies starting from 0.. | |
467 # | |
468 sub SetValue { | |
469 my($This, $Index, $Value, $SkipCheck) = @_; | |
470 | |
471 # Just set it... | |
472 if ($SkipCheck) { | |
473 return $This->_SetValue($Index, $Value); | |
474 } | |
475 | |
476 # Check and set... | |
477 if ($Index < 0) { | |
478 croak "Error: ${ClassName}->SetValue: Index value must be a positive number..."; | |
479 } | |
480 if ($Index >= $This->GetNumOfValues()) { | |
481 croak "Error: ${ClassName}->SetValue: Index vaue must be less than number of values..."; | |
482 } | |
483 | |
484 return $This->_SetValue($Index, $Value); | |
485 } | |
486 | |
487 # Set a fingerprint vector value... | |
488 # | |
489 sub _SetValue { | |
490 my($This, $Index, $Value) = @_; | |
491 | |
492 $This->{Values}[$Index] = $Value; | |
493 | |
494 return $This; | |
495 } | |
496 | |
497 # Get a specific value from fingerprint vector with indicies starting from 0... | |
498 # | |
499 sub GetValue { | |
500 my($This, $Index) = @_; | |
501 | |
502 if ($Index < 0) { | |
503 croak "Error: ${ClassName}->GetValue: Index value must be a positive number..."; | |
504 } | |
505 if ($Index >= $This->GetNumOfValues()) { | |
506 croak "Error: ${ClassName}->GetValue: Index value must be less than number of values..."; | |
507 } | |
508 return $This->_GetValue($Index); | |
509 } | |
510 | |
511 # Get a fingerprint vector value... | |
512 sub _GetValue { | |
513 my($This, $Index) = @_; | |
514 | |
515 return $This->{Values}[$Index]; | |
516 } | |
517 | |
518 # Return vector values as an array or reference to an array... | |
519 # | |
520 sub GetValues { | |
521 my($This) = @_; | |
522 | |
523 return wantarray ? @{$This->{Values}} : \@{$This->{Values}}; | |
524 } | |
525 | |
526 # Set a specific value ID in fingerprint vector with indicies starting from 0.. | |
527 # | |
528 sub SetValueID { | |
529 my($This, $Index, $Value, $SkipCheck) = @_; | |
530 | |
531 # Just set it... | |
532 if ($SkipCheck) { | |
533 return $This->_SetValueID($Index, $Value); | |
534 } | |
535 | |
536 # Check and set... | |
537 if ($Index < 0) { | |
538 croak "Error: ${ClassName}->SetValueID: Index value must be a positive number..."; | |
539 } | |
540 if ($Index >= $This->GetNumOfValueIDs()) { | |
541 croak "Error: ${ClassName}->SetValueID: Index vaue must be less than number of value IDs..."; | |
542 } | |
543 | |
544 return $This->_SetValueID($Index, $Value); | |
545 } | |
546 | |
547 # Set a fingerprint vector value ID... | |
548 # | |
549 sub _SetValueID { | |
550 my($This, $Index, $Value) = @_; | |
551 | |
552 $This->{ValueIDs}[$Index] = $Value; | |
553 | |
554 return $This; | |
555 } | |
556 | |
557 # Get a specific value ID from fingerprint vector with indicies starting from 0... | |
558 # | |
559 sub GetValueID { | |
560 my($This, $Index) = @_; | |
561 | |
562 if ($Index < 0) { | |
563 croak "Error: ${ClassName}->GetValueID: Index value must be a positive number..."; | |
564 } | |
565 if ($Index >= $This->GetNumOfValueIDs()) { | |
566 croak "Error: ${ClassName}->GetValueID: Index value must be less than number of value IDs..."; | |
567 } | |
568 return $This->_GetValueID($Index); | |
569 } | |
570 | |
571 # Get a fingerprint vector value ID... | |
572 # | |
573 sub _GetValueID { | |
574 my($This, $Index) = @_; | |
575 | |
576 return $This->{ValueIDs}[$Index]; | |
577 } | |
578 | |
579 # Return vector value IDs as an array or reference to an array... | |
580 # | |
581 sub GetValueIDs { | |
582 my($This) = @_; | |
583 | |
584 return wantarray ? @{$This->{ValueIDs}} : \@{$This->{ValueIDs}}; | |
585 } | |
586 | |
587 # Get fingerprints vector string containing values and/or IDs string in a specifed format... | |
588 # | |
589 sub GetFingerprintsVectorString { | |
590 my($This, $Format) = @_; | |
591 | |
592 FORMAT : { | |
593 if ($Format =~ /^(IDsAndValuesString|IDsAndValues)$/i) { return $This->GetIDsAndValuesString(); last FORMAT; } | |
594 if ($Format =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) { return $This->GetIDsAndValuesPairsString(); last FORMAT; } | |
595 if ($Format =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) { return $This->GetValuesAndIDsString(); last FORMAT; } | |
596 if ($Format =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) { return $This->GetValuesAndIDsPairsString(); last FORMAT;} | |
597 if ($Format =~ /^(ValueIDsString|ValueIDs)$/i) { return $This->GetValueIDsString(); last FORMAT; } | |
598 if ($Format =~ /^(ValuesString|Values)$/i) { return $This->GetValuesString(); last FORMAT; } | |
599 croak "Error: ${ClassName}->GetFingerprintsVectorString: Specified vector string format, $Format, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValueIDsString, ValueIDs, ValuesString, Values..."; | |
600 } | |
601 return ''; | |
602 } | |
603 # Get vector value IDs and values string as space delimited ASCII string separated | |
604 # by semicolon... | |
605 # | |
606 sub GetIDsAndValuesString { | |
607 my($This) = @_; | |
608 | |
609 if (@{$This->{ValueIDs}} && @{$This->{Values}}) { | |
610 # Both IDs and values are available... | |
611 return join(' ', @{$This->{ValueIDs}}) . ";" . join(' ', @{$This->{Values}}); | |
612 } | |
613 elsif (@{$This->{Values}}) { | |
614 # Only values are available... | |
615 return "None;" . join(' ', @{$This->{Values}}); | |
616 } | |
617 else { | |
618 # Values are not available... | |
619 return "None;None"; | |
620 } | |
621 } | |
622 | |
623 # Get vector value IDs and value pairs string as space delimited ASCII string... | |
624 # | |
625 sub GetIDsAndValuesPairsString { | |
626 my($This) = @_; | |
627 my($Index, $ValueIDsPresent, @IDsAndValuesPairs); | |
628 | |
629 if (!@{$This->{Values}}) { | |
630 # Values are unavailable... | |
631 return "None None"; | |
632 } | |
633 | |
634 $ValueIDsPresent = @{$This->{ValueIDs}} ? 1 : 0; | |
635 | |
636 @IDsAndValuesPairs = (); | |
637 for $Index (0 .. $#{$This->{Values}}) { | |
638 if ($ValueIDsPresent) { | |
639 push @IDsAndValuesPairs, ($This->{ValueIDs}->[$Index], $This->{Values}->[$Index]); | |
640 } | |
641 else { | |
642 push @IDsAndValuesPairs, ('None', $This->{Values}->[$Index]); | |
643 } | |
644 } | |
645 return join(' ', @IDsAndValuesPairs); | |
646 } | |
647 | |
648 # Get vector value and value IDs string as space delimited ASCII string separated | |
649 # by semicolon... | |
650 # | |
651 sub GetValuesAndIDsString { | |
652 my($This) = @_; | |
653 | |
654 if (@{$This->{ValueIDs}} && @{$This->{Values}}) { | |
655 # Both IDs and values are available... | |
656 return join(' ', @{$This->{Values}}) . ";" . join(' ', @{$This->{ValueIDs}}); | |
657 } | |
658 elsif (@{$This->{Values}}) { | |
659 # Only values are available... | |
660 return join(' ', @{$This->{Values}}) . ";None"; | |
661 } | |
662 else { | |
663 # Values are not available... | |
664 return "None;None"; | |
665 } | |
666 } | |
667 | |
668 # Get vector value and value ID pairs string as space delimited ASCII string... | |
669 # | |
670 sub GetValuesAndIDsPairsString { | |
671 my($This) = @_; | |
672 my($Index, $ValueIDsPresent, @ValuesAndIDsPairs); | |
673 | |
674 if (!@{$This->{Values}}) { | |
675 # Values are unavailable... | |
676 return "None None"; | |
677 } | |
678 | |
679 $ValueIDsPresent = @{$This->{ValueIDs}} ? 1 : 0; | |
680 | |
681 @ValuesAndIDsPairs = (); | |
682 for $Index (0 .. $#{$This->{Values}}) { | |
683 if ($ValueIDsPresent) { | |
684 push @ValuesAndIDsPairs, ($This->{Values}->[$Index], $This->{ValueIDs}->[$Index]); | |
685 } | |
686 else { | |
687 push @ValuesAndIDsPairs, ($This->{Values}->[$Index], 'None'); | |
688 } | |
689 } | |
690 return join(' ', @ValuesAndIDsPairs); | |
691 } | |
692 | |
693 # Get vector value IDs string as space delimited ASCII string... | |
694 # | |
695 sub GetValueIDsString { | |
696 my($This) = @_; | |
697 | |
698 return @{$This->{ValueIDs}} ? join(' ', @{$This->{ValueIDs}}) : 'None'; | |
699 } | |
700 | |
701 # Get vector value string as space delimited ASCII string... | |
702 # | |
703 sub GetValuesString { | |
704 my($This) = @_; | |
705 | |
706 return @{$This->{Values}} ? join(' ', @{$This->{Values}}) : 'None'; | |
707 } | |
708 | |
709 # Get number of values... | |
710 sub GetNumOfValues { | |
711 my($This) = @_; | |
712 | |
713 return scalar @{$This->{Values}}; | |
714 } | |
715 | |
716 # Get number of non-zero values... | |
717 sub GetNumOfNonZeroValues { | |
718 my($This) = @_; | |
719 my($Count, $Index, $Size); | |
720 | |
721 $Count = 0; | |
722 $Size = $This->GetNumOfValues(); | |
723 | |
724 for $Index (0 .. ($Size -1)) { | |
725 if ($This->{Values}[$Index] != 0) { | |
726 $Count++; | |
727 } | |
728 } | |
729 return $Count; | |
730 } | |
731 | |
732 # Get number of value IDs... | |
733 sub GetNumOfValueIDs { | |
734 my($This) = @_; | |
735 | |
736 return scalar @{$This->{ValueIDs}}; | |
737 } | |
738 | |
739 # FinegerprintsVectors class provides methods to calculate similarity between vectors | |
740 # containing three different types of values: | |
741 # | |
742 # Type I: OrderedNumericalValues | |
743 # | |
744 # . Size of two vectors are same | |
745 # . Vectors contain real values in a specific order. For example: MACCS keys count, Topological | |
746 # pharnacophore atom pairs and so on. | |
747 # . Option to calculate similarity value using continious values or binary values | |
748 # | |
749 # Type II: UnorderedNumericalValues | |
750 # | |
751 # . Size of two vectors might not be same | |
752 # . Vectors contain unordered real value identified by value IDs. For example: Toplogical atom pairs, | |
753 # Topological atom torsions and so on | |
754 # . Option to calculate similarity value using continous values or binary values | |
755 # | |
756 # Type III: AlphaNumericalValues | |
757 # | |
758 # . Size of two vectors might not be same | |
759 # . Vectors contain unordered alphanumerical values. For example: Extended connectivity fingerprints, | |
760 # atom neighbothood fingerpritns. | |
761 # . The vector values are treated as keys or bit indices and similarity value is calculated accordingly. | |
762 # | |
763 # Before performing similarity or distance calculations between vectors containing UnorderedNumericalValues | |
764 # or AlphaNumericalValues, the vectors are tranformed into vectors containing unique OrderedNumericalValues | |
765 # using value IDs for UnorderedNumericalValues and values itself for AlphaNumericalValues. | |
766 # | |
767 # Three forms similarity or distance calculation between two vectors: AlgebraicForm, BinaryForm or | |
768 # SetTheoreticForm. | |
769 # | |
770 # The value of an extra paramter, CalculationMode, passed to each similarity or distance function | |
771 # controls the calculation. Supported values for CalculationMode: AlgebraicForm, BinaryForm and | |
772 # SetTheoreticForm. Default: AlgebraicForm. | |
773 # | |
774 # For BinaryForm CalculationMode, the ordered list of processed final vector values containing the value or | |
775 # count of each unique value type is simply converted into a binary vector containing 1s and 0s | |
776 # corresponding to presence or absence of values before calculating similarity or distance between | |
777 # two vectors. | |
778 # | |
779 # For two fingerprint vectors A and B of same size containing OrderedNumericalValues, let: | |
780 # | |
781 # N = Number values in A or B | |
782 # | |
783 # Xa = Values of vector A | |
784 # Xb = Values of vector B | |
785 # | |
786 # Xai = Value of ith element in A | |
787 # Xbi = Value of ith element in B | |
788 # | |
789 # SUM = Sum of i over N values | |
790 # | |
791 # For SetTheoreticForm of calculation between two vectors, let: | |
792 # | |
793 # SetIntersectionXaXb = SUM ( MIN ( Xai, Xbi ) ) | |
794 # SetDifferenceXaXb = SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) | |
795 # | |
796 # For BinaryForm of calculation between two vectors, let: | |
797 # | |
798 # Na = Number of bits set to "1" in A = SUM ( Xai ) | |
799 # Nb = Number of bits set to "1" in B = SUM ( Xbi ) | |
800 # Nc = Number of bits set to "1" in both A and B = SUM ( Xai * Xbi ) | |
801 # Nd = Number of bits set to "0" in both A and B = SUM ( 1 - Xai - Xbi + Xai * Xbi) | |
802 # | |
803 # N = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd | |
804 # | |
805 # Additionally, for BinaryForm various values also correspond to: | |
806 # | |
807 # Na = | Xa | | |
808 # Nb = | Xb | | |
809 # Nc = | SetIntersectionXaXb | | |
810 # Nd = N - | SetDifferenceXaXb | | |
811 # | |
812 # | SetDifferenceXaXb | = N - Nd = Na + Nb - Nc + Nd - Nd = Na + Nb - Nc | |
813 # = | Xa | + | Xb | - | SetIntersectionXaXb | | |
814 # | |
815 # Various distance coefficients and similarity coefficients [ Ref 40, Ref 62, Ref 64 ] for a pair vectors A and B | |
816 # in AlgebraicForm and BinaryForm are defined as follows: | |
817 # | |
818 # . CityBlockDistanceCoefficient: ( same as HammingDistanceCoefficient and ManhattanDistanceCoefficient) | |
819 # | |
820 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) ) | |
821 # | |
822 # . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc | |
823 # | |
824 # . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb | | |
825 # = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) | |
826 # | |
827 # . CosineSimilarityCoefficient: ( same as OchiaiSimilarityCoefficient) | |
828 # | |
829 # . AlgebraicForm: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) ) | |
830 # | |
831 # . BinaryForm: Nc / SQRT ( Na * Nb) | |
832 # | |
833 # . SetTheoreticForm: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| ) | |
834 # = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) ) | |
835 # | |
836 # . CzekanowskiSimilarityCoefficient: ( same as DiceSimilarityCoefficient and SorensonSimilarityCoefficient) | |
837 # | |
838 # . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) ) | |
839 # | |
840 # . BinaryForm: 2 * Nc / ( Na + Nb ) | |
841 # | |
842 # . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) | |
843 # = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) ) | |
844 # | |
845 # . DiceSimilarityCoefficient: ( same as CzekanowskiSimilarityCoefficient and SorensonSimilarityCoefficient) | |
846 # | |
847 # . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) ) | |
848 # | |
849 # . BinaryForm: 2 * Nc / ( Na + Nb ) | |
850 # | |
851 # . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) | |
852 # = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) ) | |
853 # | |
854 # . EuclideanDistanceCoefficient: | |
855 # | |
856 # . AlgebraicForm: SQRT ( SUM ( ( ( Xai - Xbi ) ** 2 ) ) ) | |
857 # | |
858 # . BinaryForm: SQRT ( ( Na - Nc ) + ( Nb - Nc ) ) = SQRT ( Na + Nb - 2 * Nc ) | |
859 # | |
860 # . SetTheoreticForm: SQRT ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) | |
861 # = SQRT ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) ) | |
862 # | |
863 # . HammingDistanceCoefficient: ( same as CityBlockDistanceCoefficient and ManhattanDistanceCoefficient) | |
864 # | |
865 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) ) | |
866 # | |
867 # . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc | |
868 # | |
869 # . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb | | |
870 # = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) | |
871 # | |
872 # . JaccardSimilarityCoefficient: ( same as TanimotoSimilarityCoefficient) | |
873 # | |
874 # . AlgebraicForm: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) ) | |
875 # | |
876 # . BinaryForm: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) | |
877 # | |
878 # . SetTheoreticForm: | SetIntersectionXaXb | / | SetDifferenceXaXb | | |
879 # = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) ) | |
880 # | |
881 # . ManhattanDistanceCoefficient: ( same as CityBlockDistanceCoefficient and HammingDistanceCoefficient) | |
882 # | |
883 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) ) | |
884 # | |
885 # . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc | |
886 # | |
887 # . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb | | |
888 # = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) | |
889 # | |
890 # . OchiaiSimilarityCoefficient: ( same as CosineSimilarityCoefficient) | |
891 # | |
892 # . AlgebraicForm: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) ) | |
893 # | |
894 # . BinaryForm: Nc / SQRT ( Na * Nb) | |
895 # | |
896 # . SetTheoreticForm: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| ) | |
897 # = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) ) | |
898 # | |
899 # . SorensonSimilarityCoefficient: ( same as CzekanowskiSimilarityCoefficient and DiceSimilarityCoefficient) | |
900 # | |
901 # . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) ) | |
902 # | |
903 # . BinaryForm: 2 * Nc / ( Na + Nb ) | |
904 # | |
905 # . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) | |
906 # = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) ) | |
907 # | |
908 # . SoergelDistanceCoefficient: | |
909 # | |
910 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) ) / SUM ( MAX ( Xai, Xbi ) ) | |
911 # | |
912 # . BinaryForm: 1 - Nc / ( Na + Nb - Nc ) = ( Na + Nb - 2 * Nc ) / ( Na + Nb - Nc ) | |
913 # | |
914 # . SetTheoreticForm: ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) / | SetDifferenceXaXb | | |
915 # = ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) ) | |
916 # | |
917 # . TanimotoSimilarityCoefficient: ( same as JaccardSimilarityCoefficient) | |
918 # | |
919 # . AlgebraicForm: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) ) | |
920 # | |
921 # . BinaryForm: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) | |
922 # | |
923 # . SetTheoreticForm: | SetIntersectionXaXb | / | SetDifferenceXaXb | | |
924 # = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) ) | |
925 # | |
926 # | |
927 | |
928 # Calculate Hamming distance coefficient between two fingerprint vectors. | |
929 # | |
930 # This functionality can be either invoked as a class function or an object method. | |
931 # | |
932 sub HammingDistanceCoefficient ($$;$$) { | |
933 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
934 | |
935 return CityBlockDistanceCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
936 } | |
937 | |
938 # Calculate Hamming distance coefficient between two fingerprint vectors. | |
939 # | |
940 # This functionality can be either invoked as a class function or an object method. | |
941 # | |
942 sub ManhattanDistanceCoefficient ($$;$$) { | |
943 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
944 | |
945 return CityBlockDistanceCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
946 } | |
947 | |
948 # Calculate CityBlock distance coefficient between two fingerprint vectors. | |
949 # | |
950 # This functionality can be either invoked as a class function or an object method. | |
951 # | |
952 sub CityBlockDistanceCoefficient ($$;$$) { | |
953 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
954 | |
955 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm'; | |
956 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0; | |
957 | |
958 # Validate and process fingerprints vectors for similarity calculations... | |
959 # | |
960 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("CityBlockDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
961 | |
962 # Perform the calculation... | |
963 if ($CalculationMode =~ /^AlgebraicForm$/i) { | |
964 return _CityBlockDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB); | |
965 } | |
966 elsif ($CalculationMode =~ /^BinaryForm$/i) { | |
967 return _CityBlockDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB); | |
968 } | |
969 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) { | |
970 return _CityBlockDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB); | |
971 } | |
972 else { | |
973 return undef; | |
974 } | |
975 } | |
976 | |
977 # Calculate CityBlock distance coefficient using algebraic form... | |
978 # | |
979 sub _CityBlockDistanceCoefficientUsingAlgebraicForm { | |
980 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
981 my($SumAbsSubtractionXaiXbi); | |
982 | |
983 $SumAbsSubtractionXaiXbi = _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
984 | |
985 return $SumAbsSubtractionXaiXbi; | |
986 } | |
987 | |
988 # Calculate CityBlock distance coefficient using binary form... | |
989 # | |
990 sub _CityBlockDistanceCoefficientUsingBinaryForm { | |
991 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
992 my($Na, $Nb, $Nc); | |
993 | |
994 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB); | |
995 | |
996 return ($Na + $Nb - 2 * $Nc); | |
997 } | |
998 | |
999 # Calculate CityBlock distance coefficient using set theoretic form... | |
1000 # | |
1001 sub _CityBlockDistanceCoefficientUsingSetTheoreticForm { | |
1002 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1003 my($SumMinXaiXbi, $SumXai, $SumXbi); | |
1004 | |
1005 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA); | |
1006 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB); | |
1007 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
1008 | |
1009 return ($SumXai + $SumXbi - 2 * $SumMinXaiXbi); | |
1010 } | |
1011 | |
1012 # Calculate Ochiai similarity cofficient between two fingerprint vectors. | |
1013 # | |
1014 # This functionality can be either invoked as a class function or an object method. | |
1015 # | |
1016 sub OchiaiSimilarityCoefficient ($$;$$) { | |
1017 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
1018 | |
1019 return CosineSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
1020 } | |
1021 | |
1022 # Calculate Cosine similarity cofficient between two fingerprint vectors. | |
1023 # | |
1024 # This functionality can be either invoked as a class function or an object method. | |
1025 # | |
1026 sub CosineSimilarityCoefficient ($$;$$) { | |
1027 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
1028 | |
1029 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm'; | |
1030 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0; | |
1031 | |
1032 # Validate and process fingerprints vectors for similarity calculations... | |
1033 # | |
1034 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("CosineSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
1035 | |
1036 # Perform the calculation... | |
1037 if ($CalculationMode =~ /^AlgebraicForm$/i) { | |
1038 return _CosineSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1039 } | |
1040 elsif ($CalculationMode =~ /^BinaryForm$/i) { | |
1041 return _CosineSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1042 } | |
1043 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) { | |
1044 return _CosineSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1045 } | |
1046 else { | |
1047 return undef; | |
1048 } | |
1049 } | |
1050 | |
1051 # Calculate Cosine similarity coefficient using algebraic form... | |
1052 # | |
1053 sub _CosineSimilarityCoefficientUsingAlgebraicForm { | |
1054 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1055 my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator); | |
1056 | |
1057 $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA); | |
1058 $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB); | |
1059 $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
1060 | |
1061 $Numerator = $SumProductXaiXbi; | |
1062 $Denominator = sqrt($SumXai2 * $SumXbi2); | |
1063 | |
1064 return $Denominator ? ($Numerator/$Denominator) : 0; | |
1065 } | |
1066 | |
1067 # CalculateCosine similarity coefficient using binary form... | |
1068 # | |
1069 sub _CosineSimilarityCoefficientUsingBinaryForm { | |
1070 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1071 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
1072 | |
1073 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB); | |
1074 | |
1075 $Numerator = $Nc; | |
1076 $Denominator = sqrt($Na * $Nb); | |
1077 | |
1078 return $Denominator ? ($Numerator/$Denominator) : 0; | |
1079 } | |
1080 | |
1081 # Calculate Cosine similarity coefficient using set theoretic form... | |
1082 # | |
1083 sub _CosineSimilarityCoefficientUsingSetTheoreticForm { | |
1084 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1085 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator); | |
1086 | |
1087 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA); | |
1088 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB); | |
1089 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
1090 | |
1091 $Numerator = $SumMinXaiXbi; | |
1092 $Denominator = sqrt($SumXai * $SumXbi); | |
1093 | |
1094 return $Denominator ? ($Numerator/$Denominator) : 0; | |
1095 } | |
1096 | |
1097 # Calculate Czekanowski similarity cofficient between two fingerprint vectors. | |
1098 # | |
1099 # This functionality can be either invoked as a class function or an object method. | |
1100 # | |
1101 sub CzekanowskiSimilarityCoefficient ($$;$$) { | |
1102 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
1103 | |
1104 return DiceSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
1105 } | |
1106 | |
1107 # Calculate Sorenson similarity cofficient between two fingerprint vectors. | |
1108 # | |
1109 # This functionality can be either invoked as a class function or an object method. | |
1110 # | |
1111 sub SorensonSimilarityCoefficient ($$;$$) { | |
1112 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
1113 | |
1114 return DiceSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
1115 } | |
1116 | |
1117 # Calculate Dice similarity cofficient between two fingerprint vectors. | |
1118 # | |
1119 # This functionality can be either invoked as a class function or an object method. | |
1120 # | |
1121 sub DiceSimilarityCoefficient ($$;$$) { | |
1122 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
1123 | |
1124 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm'; | |
1125 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0; | |
1126 | |
1127 # Validate and process fingerprints vectors for similarity calculations... | |
1128 # | |
1129 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("DiceSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
1130 | |
1131 # Perform the calculation... | |
1132 if ($CalculationMode =~ /^AlgebraicForm$/i) { | |
1133 return _DiceSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1134 } | |
1135 elsif ($CalculationMode =~ /^BinaryForm$/i) { | |
1136 return _DiceSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1137 } | |
1138 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) { | |
1139 return _DiceSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1140 } | |
1141 else { | |
1142 return undef; | |
1143 } | |
1144 } | |
1145 | |
1146 # Calculate Dice similarity coefficient using algebraic form... | |
1147 # | |
1148 sub _DiceSimilarityCoefficientUsingAlgebraicForm { | |
1149 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1150 my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator); | |
1151 | |
1152 $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA); | |
1153 $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB); | |
1154 $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
1155 | |
1156 $Numerator = 2 * $SumProductXaiXbi; | |
1157 $Denominator = $SumXai2 + $SumXbi2; | |
1158 | |
1159 return $Denominator ? ($Numerator/$Denominator) : 0; | |
1160 } | |
1161 | |
1162 # Calculate Dice similarity coefficient using binary form... | |
1163 # | |
1164 sub _DiceSimilarityCoefficientUsingBinaryForm { | |
1165 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1166 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
1167 | |
1168 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB); | |
1169 | |
1170 $Numerator = 2 * $Nc; | |
1171 $Denominator = $Na + $Nb; | |
1172 | |
1173 return $Denominator ? ($Numerator/$Denominator) : 0; | |
1174 } | |
1175 | |
1176 # Calculate Dice similarity coefficient using set theoretic form... | |
1177 # | |
1178 sub _DiceSimilarityCoefficientUsingSetTheoreticForm { | |
1179 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1180 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator); | |
1181 | |
1182 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA); | |
1183 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB); | |
1184 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
1185 | |
1186 $Numerator = 2 * $SumMinXaiXbi; | |
1187 $Denominator = $SumXai + $SumXbi; | |
1188 | |
1189 return $Denominator ? ($Numerator/$Denominator) : 0; | |
1190 } | |
1191 | |
1192 | |
1193 # Calculate Euclidean distance coefficient between two fingerprint vectors. | |
1194 # | |
1195 # This functionality can be either invoked as a class function or an object method. | |
1196 # | |
1197 sub EuclideanDistanceCoefficient ($$;$$) { | |
1198 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
1199 | |
1200 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm'; | |
1201 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0; | |
1202 | |
1203 # Validate and process fingerprints vectors for similarity calculations... | |
1204 # | |
1205 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("EuclideanDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
1206 | |
1207 # Perform the calculation... | |
1208 if ($CalculationMode =~ /^AlgebraicForm$/i) { | |
1209 return _EuclideanDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1210 } | |
1211 elsif ($CalculationMode =~ /^BinaryForm$/i) { | |
1212 return _EuclideanDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1213 } | |
1214 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) { | |
1215 return _EuclideanDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1216 } | |
1217 else { | |
1218 return undef; | |
1219 } | |
1220 } | |
1221 | |
1222 # Calculate Euclidean distance coefficient using algebraic form... | |
1223 # | |
1224 sub _EuclideanDistanceCoefficientUsingAlgebraicForm { | |
1225 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1226 my($SumSquaresSubtractionXaiXbi); | |
1227 | |
1228 $SumSquaresSubtractionXaiXbi = _GetSumOfSquaresOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
1229 | |
1230 return sqrt($SumSquaresSubtractionXaiXbi); | |
1231 } | |
1232 | |
1233 # Calculate Euclidean distance coefficient using binary form... | |
1234 # | |
1235 sub _EuclideanDistanceCoefficientUsingBinaryForm { | |
1236 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1237 my($Na, $Nb, $Nc); | |
1238 | |
1239 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB); | |
1240 | |
1241 return (sqrt($Na + $Nb - 2 * $Nc)); | |
1242 } | |
1243 | |
1244 # Calculate Euclidean distance coefficient using set theoretic form... | |
1245 # | |
1246 sub _EuclideanDistanceCoefficientUsingSetTheoreticForm { | |
1247 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1248 my($SumMinXaiXbi, $SumXai, $SumXbi); | |
1249 | |
1250 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA); | |
1251 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB); | |
1252 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
1253 | |
1254 return (sqrt($SumXai + $SumXbi - 2 * $SumMinXaiXbi)); | |
1255 } | |
1256 | |
1257 # Calculate Jaccard similarity cofficient between two fingerprint vectors. | |
1258 # | |
1259 # This functionality can be either invoked as a class function or an object method. | |
1260 # | |
1261 sub JaccardSimilarityCoefficient ($$;$$) { | |
1262 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
1263 | |
1264 return TanimotoSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
1265 } | |
1266 | |
1267 # Calculate Tanimoto similarity cofficient between two fingerprint vectors. | |
1268 # | |
1269 # This functionality can be either invoked as a class function or an object method. | |
1270 # | |
1271 sub TanimotoSimilarityCoefficient ($$;$$) { | |
1272 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
1273 | |
1274 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm'; | |
1275 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0; | |
1276 | |
1277 # Validate and process fingerprints vectors for similarity calculations... | |
1278 # | |
1279 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("TanimotoSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
1280 | |
1281 # Perform the calculation... | |
1282 if ($CalculationMode =~ /^AlgebraicForm$/i) { | |
1283 return _TanimotoSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1284 } | |
1285 elsif ($CalculationMode =~ /^BinaryForm$/i) { | |
1286 return _TanimotoSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1287 } | |
1288 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) { | |
1289 return _TanimotoSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1290 } | |
1291 else { | |
1292 return undef; | |
1293 } | |
1294 } | |
1295 | |
1296 # Calculate Tanimoto similarity coefficient using algebraic form... | |
1297 # | |
1298 sub _TanimotoSimilarityCoefficientUsingAlgebraicForm { | |
1299 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1300 my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator); | |
1301 | |
1302 $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA); | |
1303 $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB); | |
1304 $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
1305 | |
1306 $Numerator = $SumProductXaiXbi; | |
1307 $Denominator = $SumXai2 + $SumXbi2 - $SumProductXaiXbi; | |
1308 | |
1309 return $Denominator ? ($Numerator/$Denominator) : 0; | |
1310 } | |
1311 | |
1312 # Calculate Tanimoto similarity coefficient using binary form... | |
1313 # | |
1314 sub _TanimotoSimilarityCoefficientUsingBinaryForm { | |
1315 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1316 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
1317 | |
1318 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB); | |
1319 | |
1320 $Numerator = $Nc; | |
1321 $Denominator = $Na + $Nb - $Nc; | |
1322 | |
1323 return $Denominator ? ($Numerator/$Denominator) : 0; | |
1324 } | |
1325 | |
1326 # Calculate Tanimoto similarity coefficient using set theoretic form... | |
1327 # | |
1328 sub _TanimotoSimilarityCoefficientUsingSetTheoreticForm { | |
1329 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1330 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator); | |
1331 | |
1332 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA); | |
1333 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB); | |
1334 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
1335 | |
1336 $Numerator = $SumMinXaiXbi; | |
1337 $Denominator = $SumXai + $SumXbi - $SumMinXaiXbi; | |
1338 | |
1339 return $Denominator ? ($Numerator/$Denominator) : 0; | |
1340 } | |
1341 | |
1342 | |
1343 # Calculate Soergel distance coefficient between two fingerprint vectors. | |
1344 # | |
1345 # This functionality can be either invoked as a class function or an object method. | |
1346 # | |
1347 sub SoergelDistanceCoefficient ($$;$$) { | |
1348 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
1349 | |
1350 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm'; | |
1351 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0; | |
1352 | |
1353 # Validate and process fingerprints vectors for similarity calculations... | |
1354 # | |
1355 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("SoergelDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck); | |
1356 | |
1357 # Perform the calculation... | |
1358 if ($CalculationMode =~ /^AlgebraicForm$/i) { | |
1359 return _SoergelDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1360 } | |
1361 elsif ($CalculationMode =~ /^BinaryForm$/i) { | |
1362 return _SoergelDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1363 } | |
1364 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) { | |
1365 return _SoergelDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB); | |
1366 } | |
1367 else { | |
1368 return undef; | |
1369 } | |
1370 } | |
1371 | |
1372 # Calculate Soergel distance coefficientusing algebraic form... | |
1373 # | |
1374 sub _SoergelDistanceCoefficientUsingAlgebraicForm { | |
1375 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1376 my($SumAbsSubtractionXaiXbi, $SumMaxXaiXbi, $Numerator, $Denominator); | |
1377 | |
1378 $SumAbsSubtractionXaiXbi = _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
1379 $SumMaxXaiXbi = _GetSumOfMaximumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
1380 | |
1381 $Numerator = $SumAbsSubtractionXaiXbi; | |
1382 $Denominator = $SumMaxXaiXbi; | |
1383 | |
1384 return $Denominator ? ($Numerator/$Denominator) : 0; | |
1385 } | |
1386 | |
1387 # Calculate Soergel distance coefficient using binary form... | |
1388 # | |
1389 sub _SoergelDistanceCoefficientUsingBinaryForm { | |
1390 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1391 my($Na, $Nb, $Nc, $Numerator, $Denominator); | |
1392 | |
1393 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB); | |
1394 | |
1395 $Numerator = $Na + $Nb - 2 * $Nc; | |
1396 $Denominator = $Na + $Nb - $Nc; | |
1397 | |
1398 return $Denominator ? ($Numerator/$Denominator) : 0; | |
1399 } | |
1400 | |
1401 # Calculate SoergelDistanceCoefficient using set theoretic form... | |
1402 # | |
1403 sub _SoergelDistanceCoefficientUsingSetTheoreticForm { | |
1404 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1405 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator); | |
1406 | |
1407 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA); | |
1408 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB); | |
1409 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB); | |
1410 | |
1411 $Numerator = $SumXai + $SumXbi - 2 * $SumMinXaiXbi; | |
1412 $Denominator = $SumXai + $SumXbi - $SumMinXaiXbi; | |
1413 | |
1414 return $Denominator ? ($Numerator/$Denominator) : 0; | |
1415 } | |
1416 | |
1417 # Validate and process fingerprints vectors for similarity calculations... | |
1418 # | |
1419 sub _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation { | |
1420 my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_; | |
1421 | |
1422 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm'; | |
1423 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0; | |
1424 | |
1425 if (!$SkipValuesCheck) { | |
1426 _ValidateFingerprintsVectorsForSimilarityCalculation($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode); | |
1427 } | |
1428 _ProcessFingerprintsVectorsForSimilarityCalculation($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode); | |
1429 } | |
1430 | |
1431 # Make sure fingerprint vectors are good for performing similarity/distance calculation... | |
1432 # | |
1433 sub _ValidateFingerprintsVectorsForSimilarityCalculation { | |
1434 my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode) = @_; | |
1435 | |
1436 # Make sure both are fingerprint vectors.. | |
1437 if (!(IsFingerprintsVector($FingerprintsVectorA) && IsFingerprintsVector($FingerprintsVectorB))) { | |
1438 croak "Error: ${ClassName}->${ErrorMsg}: Both objects must be fingerprint vectors..."; | |
1439 } | |
1440 | |
1441 # Check types... | |
1442 if ($FingerprintsVectorA->{Type} ne $FingerprintsVectorB->{Type}) { | |
1443 croak "Error: ${ClassName}->${ErrorMsg}: Type of first fingerprint vector, $FingerprintsVectorA->{Type}, must be same as type of second fingerprint vector, $FingerprintsVectorB->{Type}..."; | |
1444 } | |
1445 | |
1446 # Check calculation mode... | |
1447 if ($CalculationMode !~ /^(AlgebraicForm|BinaryForm|SetTheoreticForm)$/i) { | |
1448 croak "Error: ${ClassName}->${ErrorMsg}: Specified similarity calculation mode, $CalculationMode, is not valid. Supported values: AlgebraicForm, BinaryForm, and SetTheoreticForm..."; | |
1449 } | |
1450 | |
1451 # Check values and value IDs... | |
1452 my($Na, $Nb, $NIDa, $NIDb); | |
1453 $Na = $FingerprintsVectorA->GetNumOfValues(); $Nb = $FingerprintsVectorB->GetNumOfValues(); | |
1454 $NIDa = $FingerprintsVectorA->GetNumOfValueIDs(); $NIDb = $FingerprintsVectorB->GetNumOfValueIDs(); | |
1455 | |
1456 if ($Na == 0) { | |
1457 croak "Error: ${ClassName}->${ErrorMsg}: Number of values in first fingerprint vector, $Na, must be > 0 for fingerprint vector type $FingerprintsVectorA->{Type} ..."; | |
1458 } | |
1459 if ($Nb == 0) { | |
1460 croak "Error: ${ClassName}->${ErrorMsg}: Number of values in second fingerprint vector, $Nb, must be > 0 for fingerprint vector type $FingerprintsVectorB->{Type} ..."; | |
1461 } | |
1462 | |
1463 if ($FingerprintsVectorA->{Type} =~ /^OrderedNumericalValues$/i) { | |
1464 if ($Na != $Nb) { | |
1465 croak "Error: ${ClassName}->${ErrorMsg}: Number of values in first fingerprint vector, $Na, must be equal to number of values, $Nb, in second fingerprint vector for fingerprint vector types $FingerprintsVectorA->{Type} ..."; | |
1466 } | |
1467 } | |
1468 elsif ($FingerprintsVectorA->{Type} =~ /^NumericalValues$/i) { | |
1469 if ($NIDa == 0) { | |
1470 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDa, must be > 0 for fingerprint vector type $FingerprintsVectorA->{Type} ..."; | |
1471 } | |
1472 if ($NIDb == 0) { | |
1473 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDb, must be > 0 for fingerprint vector type $FingerprintsVectorB->{Type} ..."; | |
1474 } | |
1475 | |
1476 if ($NIDa != $Na) { | |
1477 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDa, must be equal to its number of values, $Na, for fingerprint vector type $FingerprintsVectorA->{Type} ..."; | |
1478 } | |
1479 if ($NIDb != $Nb) { | |
1480 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in second fingerprint vector, $NIDb, must be equal to its number of values, $Nb, for fingerprint vector type $FingerprintsVectorA->{Type} ..."; | |
1481 } | |
1482 } | |
1483 elsif ($FingerprintsVectorA->{Type} =~ /^AlphaNumericalValues$/i) { | |
1484 if ($NIDa || $NIDb) { | |
1485 croak "Error: ${ClassName}->${ErrorMsg}: ValueIDs cann't be specified for fingerprint vector types $FingerprintsVectorA->{Type} ..."; | |
1486 } | |
1487 } | |
1488 else { | |
1489 croak "Error: ${ClassName}->${ErrorMsg}: Fingerprint vector types $FingerprintsVectorA->{Type} is not valid..."; | |
1490 } | |
1491 } | |
1492 | |
1493 # Process fingerprints vectors for similarity calculation by generating vectors | |
1494 # containing ordered list of values... | |
1495 # | |
1496 sub _ProcessFingerprintsVectorsForSimilarityCalculation { | |
1497 my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode) = @_; | |
1498 | |
1499 $FingerprintsVectorA->{OrderedValuesRef} = undef; $FingerprintsVectorB->{OrderedValuesRef} = undef; | |
1500 $FingerprintsVectorA->{BitVector} = undef; $FingerprintsVectorB->{BitVector} = undef; | |
1501 | |
1502 if ($FingerprintsVectorA->{Type} =~ /^OrderedNumericalValues$/i) { | |
1503 _ProcessOrderedNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB); | |
1504 } | |
1505 elsif ($FingerprintsVectorA->{Type} =~ /^NumericalValues$/i) { | |
1506 _ProcessNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB); | |
1507 } | |
1508 elsif ($FingerprintsVectorA->{Type} =~ /^AlphaNumericalValues$/i) { | |
1509 _ProcessAlphaNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB); | |
1510 } | |
1511 else { | |
1512 croak "Error: ${ClassName}->${ErrorMsg}: Fingerprint vector types $FingerprintsVectorA->{Type} is not valid..."; | |
1513 } | |
1514 if ($CalculationMode =~ /^BinaryForm$/i) { | |
1515 _TransformFinalOrderedValuesIntoBitVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB); | |
1516 } | |
1517 } | |
1518 | |
1519 # Process fingerprints vectors with ordered numerical values for similarity calculations... | |
1520 # | |
1521 sub _ProcessOrderedNumericalValuesFingerprintsVectorsForSimilarityCalculation { | |
1522 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1523 | |
1524 $FingerprintsVectorA->{OrderedValuesRef} = \@{$FingerprintsVectorA->{Values}}; | |
1525 $FingerprintsVectorB->{OrderedValuesRef} = \@{$FingerprintsVectorB->{Values}}; | |
1526 } | |
1527 | |
1528 # Process fingerprints vectors with numerical values for similarity calculations... | |
1529 # | |
1530 sub _ProcessNumericalValuesFingerprintsVectorsForSimilarityCalculation { | |
1531 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1532 | |
1533 # Set up unique IDs and values map for each fingerprint vector... | |
1534 my($Index, $Value, $ValueID, %UniqueFingerprintsVectorAValueIDValues, %UniqueFingerprintsVectorBValueIDValues, %UniqueFingerprintsVectorsValueIDs); | |
1535 | |
1536 %UniqueFingerprintsVectorAValueIDValues = (); | |
1537 %UniqueFingerprintsVectorBValueIDValues = (); | |
1538 %UniqueFingerprintsVectorsValueIDs = (); | |
1539 | |
1540 # Go over first vector... | |
1541 for $Index (0 .. $#{$FingerprintsVectorA->{ValueIDs}}) { | |
1542 $ValueID = $FingerprintsVectorA->{ValueIDs}[$Index]; | |
1543 $Value = $FingerprintsVectorA->{Values}[$Index]; | |
1544 if (exists $UniqueFingerprintsVectorAValueIDValues{$ValueID}) { | |
1545 $UniqueFingerprintsVectorAValueIDValues{$ValueID} += $Value; | |
1546 } | |
1547 else { | |
1548 $UniqueFingerprintsVectorAValueIDValues{$ValueID} = $Value; | |
1549 } | |
1550 if (!exists $UniqueFingerprintsVectorsValueIDs{$ValueID}) { | |
1551 $UniqueFingerprintsVectorsValueIDs{$ValueID} = 1; | |
1552 } | |
1553 } | |
1554 | |
1555 # Go over second vector... | |
1556 for $Index (0 .. $#{$FingerprintsVectorB->{ValueIDs}}) { | |
1557 $ValueID = $FingerprintsVectorB->{ValueIDs}[$Index]; | |
1558 $Value = $FingerprintsVectorB->{Values}[$Index]; | |
1559 if (exists $UniqueFingerprintsVectorBValueIDValues{$ValueID}) { | |
1560 $UniqueFingerprintsVectorBValueIDValues{$ValueID} += $Value; | |
1561 } | |
1562 else { | |
1563 $UniqueFingerprintsVectorBValueIDValues{$ValueID} = $Value; | |
1564 } | |
1565 if (!exists $UniqueFingerprintsVectorsValueIDs{$ValueID}) { | |
1566 $UniqueFingerprintsVectorsValueIDs{$ValueID} = 1; | |
1567 } | |
1568 } | |
1569 | |
1570 # Setup ordered values... | |
1571 my(@UniqueOrderedValueIDs, @OrderedValuesA, @OrderedValuesB); | |
1572 | |
1573 @UniqueOrderedValueIDs = (); | |
1574 @UniqueOrderedValueIDs = sort keys %UniqueFingerprintsVectorsValueIDs; | |
1575 | |
1576 @OrderedValuesA = (); | |
1577 @OrderedValuesA = map { exists $UniqueFingerprintsVectorAValueIDValues{$_} ? $UniqueFingerprintsVectorAValueIDValues{$_} : 0 } @UniqueOrderedValueIDs; | |
1578 | |
1579 @OrderedValuesB = (); | |
1580 @OrderedValuesB = map { exists $UniqueFingerprintsVectorBValueIDValues{$_} ? $UniqueFingerprintsVectorBValueIDValues{$_} : 0 } @UniqueOrderedValueIDs; | |
1581 | |
1582 $FingerprintsVectorA->{OrderedValuesRef} = \@OrderedValuesA; | |
1583 $FingerprintsVectorB->{OrderedValuesRef} = \@OrderedValuesB; | |
1584 } | |
1585 | |
1586 # Process fingerprints vectors with allpha numerical values for similarity calculations... | |
1587 # | |
1588 sub _ProcessAlphaNumericalValuesFingerprintsVectorsForSimilarityCalculation { | |
1589 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1590 | |
1591 # Set up unique IDs and values map for each vector... | |
1592 my($Index, $Value, $ValueID, %UniqueFingerprintsVectorAValuesCount, %UniqueFingerprintsVectorBValuesCount, %UniqueFingerprintsVectorsValues); | |
1593 | |
1594 %UniqueFingerprintsVectorAValuesCount = (); | |
1595 %UniqueFingerprintsVectorBValuesCount = (); | |
1596 %UniqueFingerprintsVectorsValues = (); | |
1597 | |
1598 # Go over first vector... | |
1599 for $Value (@{$FingerprintsVectorA->{Values}}) { | |
1600 if (exists $UniqueFingerprintsVectorAValuesCount{$Value}) { | |
1601 $UniqueFingerprintsVectorAValuesCount{$Value} += 1; | |
1602 } | |
1603 else { | |
1604 $UniqueFingerprintsVectorAValuesCount{$Value} = 1; | |
1605 } | |
1606 if (!exists $UniqueFingerprintsVectorsValues{$Value}) { | |
1607 $UniqueFingerprintsVectorsValues{$Value} = 1; | |
1608 } | |
1609 } | |
1610 | |
1611 # Go over second vector... | |
1612 for $Value (@{$FingerprintsVectorB->{Values}}) { | |
1613 if (exists $UniqueFingerprintsVectorBValuesCount{$Value}) { | |
1614 $UniqueFingerprintsVectorBValuesCount{$Value} += 1; | |
1615 } | |
1616 else { | |
1617 $UniqueFingerprintsVectorBValuesCount{$Value} = 1; | |
1618 } | |
1619 if (!exists $UniqueFingerprintsVectorsValues{$Value}) { | |
1620 $UniqueFingerprintsVectorsValues{$Value} = 1; | |
1621 } | |
1622 } | |
1623 | |
1624 # Setup ordered values... | |
1625 my(@UniqueOrderedValueIDs, @OrderedValuesA, @OrderedValuesB); | |
1626 | |
1627 @UniqueOrderedValueIDs = (); | |
1628 @UniqueOrderedValueIDs = sort keys %UniqueFingerprintsVectorsValues; | |
1629 | |
1630 @OrderedValuesA = (); | |
1631 @OrderedValuesA = map { exists $UniqueFingerprintsVectorAValuesCount{$_} ? $UniqueFingerprintsVectorAValuesCount{$_} : 0 } @UniqueOrderedValueIDs; | |
1632 | |
1633 @OrderedValuesB = (); | |
1634 @OrderedValuesB = map { exists $UniqueFingerprintsVectorBValuesCount{$_} ? $UniqueFingerprintsVectorBValuesCount{$_} : 0 } @UniqueOrderedValueIDs; | |
1635 | |
1636 $FingerprintsVectorA->{OrderedValuesRef} = \@OrderedValuesA; | |
1637 $FingerprintsVectorB->{OrderedValuesRef} = \@OrderedValuesB; | |
1638 | |
1639 } | |
1640 | |
1641 # Transform final ordered values array into a BitVector for similarity calculation... | |
1642 # | |
1643 sub _TransformFinalOrderedValuesIntoBitVectorsForSimilarityCalculation { | |
1644 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1645 my($Index, $Size, $BitVectorA, $BitVectorB, $SkipCheck); | |
1646 | |
1647 # Create bit vectors... | |
1648 $Size = scalar @{$FingerprintsVectorA->{OrderedValuesRef}}; | |
1649 | |
1650 $FingerprintsVectorA->{BitVector} = new BitVector($Size); | |
1651 $FingerprintsVectorB->{BitVector} = new BitVector($Size); | |
1652 | |
1653 # Set bits... | |
1654 $SkipCheck = 1; | |
1655 for $Index (0 .. ($Size - 1)) { | |
1656 if ($FingerprintsVectorA->{OrderedValuesRef}[$Index]) { | |
1657 $FingerprintsVectorA->{BitVector}->SetBit($Index, $SkipCheck); | |
1658 } | |
1659 if ($FingerprintsVectorB->{OrderedValuesRef}[$Index]) { | |
1660 $FingerprintsVectorB->{BitVector}->SetBit($Index, $SkipCheck); | |
1661 } | |
1662 } | |
1663 } | |
1664 | |
1665 # Return sum of ordered vector values... | |
1666 # | |
1667 sub _GetSumOfFingerprintsOrderedValues { | |
1668 my($FingerprintVector) = @_; | |
1669 | |
1670 return StatisticsUtil::Sum($FingerprintVector->{OrderedValuesRef}); | |
1671 } | |
1672 | |
1673 # Return sum of squared ordered vector values... | |
1674 # | |
1675 sub _GetSumOfSquaresOfFingerprintsOrderedValues { | |
1676 my($FingerprintVector) = @_; | |
1677 | |
1678 return StatisticsUtil::SumOfSquares($FingerprintVector->{OrderedValuesRef}); | |
1679 } | |
1680 | |
1681 # Return sum of product of correponding ordered vector values... | |
1682 # | |
1683 sub _GetSumOfProductOfFingerprintsOrderedValues { | |
1684 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1685 my($Index, $SumProductXaiXbi); | |
1686 | |
1687 $SumProductXaiXbi = 0; | |
1688 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) { | |
1689 $SumProductXaiXbi += $FingerprintsVectorA->{OrderedValuesRef}[$Index] * $FingerprintsVectorB->{OrderedValuesRef}[$Index]; | |
1690 } | |
1691 return $SumProductXaiXbi; | |
1692 } | |
1693 | |
1694 # Return sum of absolute value of subtraction of correponding ordered vector values... | |
1695 # | |
1696 sub _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues { | |
1697 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1698 my($Index, $SumAbsSubtractionXaiXbi); | |
1699 | |
1700 $SumAbsSubtractionXaiXbi = 0; | |
1701 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) { | |
1702 $SumAbsSubtractionXaiXbi += abs($FingerprintsVectorA->{OrderedValuesRef}[$Index] - $FingerprintsVectorB->{OrderedValuesRef}[$Index]); | |
1703 } | |
1704 return $SumAbsSubtractionXaiXbi; | |
1705 } | |
1706 | |
1707 # Return sum of squares of subtraction of correponding ordered vector values... | |
1708 # | |
1709 sub _GetSumOfSquaresOfSubtractionOfFingerprintsOrderedValues { | |
1710 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1711 my($Index, $SumSquaresSubtractionXaiXbi); | |
1712 | |
1713 $SumSquaresSubtractionXaiXbi = 0; | |
1714 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) { | |
1715 $SumSquaresSubtractionXaiXbi += ($FingerprintsVectorA->{OrderedValuesRef}[$Index] - $FingerprintsVectorB->{OrderedValuesRef}[$Index])**2; | |
1716 } | |
1717 return $SumSquaresSubtractionXaiXbi; | |
1718 } | |
1719 | |
1720 # Return sum of minimum of correponding ordered vector values... | |
1721 # | |
1722 sub _GetSumOfMinimumOfFingerprintsOrderdedValues { | |
1723 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1724 my($Index, $SumMinXaiXbi); | |
1725 | |
1726 $SumMinXaiXbi = 0; | |
1727 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) { | |
1728 $SumMinXaiXbi += MathUtil::min($FingerprintsVectorA->{OrderedValuesRef}[$Index], $FingerprintsVectorB->{OrderedValuesRef}[$Index]); | |
1729 } | |
1730 return $SumMinXaiXbi; | |
1731 } | |
1732 | |
1733 # Return sum of maximum of correponding ordered vector values... | |
1734 # | |
1735 sub _GetSumOfMaximumOfFingerprintsOrderdedValues { | |
1736 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1737 my($Index, $SumMaxXaiXbi); | |
1738 | |
1739 $SumMaxXaiXbi = 0; | |
1740 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) { | |
1741 $SumMaxXaiXbi += MathUtil::max($FingerprintsVectorA->{OrderedValuesRef}[$Index], $FingerprintsVectorB->{OrderedValuesRef}[$Index]); | |
1742 } | |
1743 return $SumMaxXaiXbi; | |
1744 } | |
1745 | |
1746 # Get number of Na, Nb and Nc bits in vector A and B for BinaryForm calculation... | |
1747 # | |
1748 sub _GetNumOfIndividualAndCommonSetBits ($$) { | |
1749 my($FingerprintsVectorA, $FingerprintsVectorB) = @_; | |
1750 my($Na, $Nb, $Nc, $Nd, $FingerprintsBitVectorA, $FingerprintsBitVectorB); | |
1751 | |
1752 $FingerprintsBitVectorA = $FingerprintsVectorA->{BitVector}; | |
1753 $FingerprintsBitVectorB = $FingerprintsVectorB->{BitVector}; | |
1754 | |
1755 # Number of bits set to "1" in A | |
1756 $Na = $FingerprintsBitVectorA->GetNumOfSetBits(); | |
1757 | |
1758 # Number of bits set to "1" in B | |
1759 $Nb = $FingerprintsBitVectorB->GetNumOfSetBits(); | |
1760 | |
1761 # Number of bits set to "1" in both A and B | |
1762 my($NcBitVector); | |
1763 $NcBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB; | |
1764 $Nc = $NcBitVector->GetNumOfSetBits(); | |
1765 | |
1766 return ($Na, $Nb, $Nc); | |
1767 } | |
1768 | |
1769 # Return a list of supported distance coefficients... | |
1770 # | |
1771 sub GetSupportedDistanceCoefficients () { | |
1772 | |
1773 return @DistanceCoefficients; | |
1774 } | |
1775 | |
1776 # Return a list of supported similarity coefficients... | |
1777 # | |
1778 sub GetSupportedSimilarityCoefficients () { | |
1779 | |
1780 return @SimilarityCoefficients; | |
1781 } | |
1782 | |
1783 # Return a list of supported distance and similarity coefficients... | |
1784 # | |
1785 sub GetSupportedDistanceAndSimilarityCoefficients () { | |
1786 my(@DistanceAndSimilarityCoefficients); | |
1787 | |
1788 @DistanceAndSimilarityCoefficients = (); | |
1789 push @DistanceAndSimilarityCoefficients, @DistanceCoefficients; | |
1790 push @DistanceAndSimilarityCoefficients, @SimilarityCoefficients; | |
1791 | |
1792 return sort @DistanceAndSimilarityCoefficients; | |
1793 } | |
1794 | |
1795 # Is it a fingerprints vector object? | |
1796 sub IsFingerprintsVector ($) { | |
1797 my($Object) = @_; | |
1798 | |
1799 return _IsFingerprintsVector($Object); | |
1800 } | |
1801 | |
1802 # Is it a fingerprints vector object? | |
1803 sub _IsFingerprintsVector { | |
1804 my($Object) = @_; | |
1805 | |
1806 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
1807 } | |
1808 | |
1809 # Return a string containing vector values... | |
1810 sub StringifyFingerprintsVector { | |
1811 my($This) = @_; | |
1812 my($FingerprintsVectorString); | |
1813 | |
1814 # Set type, values and value IDs... | |
1815 my($NumOfValues, $ValuesString, $NumOfValueIDs, $ValueIDsString, $MaxValuesToStringify); | |
1816 | |
1817 $NumOfValues = $This->GetNumOfValues(); | |
1818 $MaxValuesToStringify = 500; | |
1819 | |
1820 if ($NumOfValues < $MaxValuesToStringify) { | |
1821 # Append all values... | |
1822 $ValuesString = $NumOfValues ? join ' ', @{$This->{Values}} : 'None'; | |
1823 } | |
1824 else { | |
1825 # Truncate values... | |
1826 my($Index, @Values); | |
1827 for $Index (0 .. ($MaxValuesToStringify - 1)) { | |
1828 push @Values, $This->{Values}[$Index]; | |
1829 } | |
1830 $ValuesString = join(' ', @Values) . " ..."; | |
1831 } | |
1832 | |
1833 $NumOfValueIDs = $This->GetNumOfValueIDs(); | |
1834 if ($NumOfValueIDs < $MaxValuesToStringify) { | |
1835 # Append all valueIDs... | |
1836 $ValueIDsString = $NumOfValueIDs ? join ' ', @{$This->{ValueIDs}} : 'None'; | |
1837 } | |
1838 else { | |
1839 # Truncate value IDs... | |
1840 my($Index, @ValueIDs); | |
1841 @ValueIDs = (); | |
1842 for $Index (0 .. ($MaxValuesToStringify - 1)) { | |
1843 push @ValueIDs, $This->{ValueIDs}[$Index]; | |
1844 } | |
1845 $ValueIDsString = join(' ', @ValueIDs) . " ..."; | |
1846 } | |
1847 | |
1848 $FingerprintsVectorString = "Type: $This->{Type}; NumOfValues: $NumOfValues"; | |
1849 if ($This->{Type} =~ /^(OrderedNumericalValues|NumericalValues)$/i) { | |
1850 my($NumOfNonZeroValues); | |
1851 $NumOfNonZeroValues = $This->GetNumOfNonZeroValues(); | |
1852 $FingerprintsVectorString .= "; NumOfNonZeroValues: $NumOfNonZeroValues"; | |
1853 } | |
1854 | |
1855 # Append all the values and value IDs... | |
1856 if ($NumOfValues < $MaxValuesToStringify) { | |
1857 $FingerprintsVectorString .= "; Values: <$ValuesString>; NumOfValueIDs: $NumOfValueIDs; ValueIDs: <$ValueIDsString>"; | |
1858 } | |
1859 else { | |
1860 $FingerprintsVectorString .= "; Values (Truncated after $MaxValuesToStringify): <$ValuesString>; NumOfValueIDs: $NumOfValueIDs; ValueIDs (Truncated after $MaxValuesToStringify): <$ValueIDsString>"; | |
1861 } | |
1862 | |
1863 return $FingerprintsVectorString; | |
1864 } | |
1865 | |
1866 1; | |
1867 | |
1868 __END__ | |
1869 | |
1870 =head1 NAME | |
1871 | |
1872 FingerprintsVector | |
1873 | |
1874 =head1 SYNOPSIS | |
1875 | |
1876 use Fingerprints::FingerprintsVector; | |
1877 | |
1878 use Fingerprints::FingerprintsVector qw(:all); | |
1879 | |
1880 =head1 DESCRIPTION | |
1881 | |
1882 B<FingerprintsVector> class provides the following methods: | |
1883 | |
1884 new, AddValueIDs, AddValues, CityBlockDistanceCoefficient, | |
1885 CosineSimilarityCoefficient, CzekanowskiSimilarityCoefficient, | |
1886 DiceSimilarityCoefficient, EuclideanDistanceCoefficient, GetDescription, | |
1887 GetFingerprintsVectorString, GetID, GetIDsAndValuesPairsString, | |
1888 GetIDsAndValuesString, GetNumOfNonZeroValues, GetNumOfValueIDs, GetNumOfValues, | |
1889 GetSupportedDistanceAndSimilarityCoefficients, GetSupportedDistanceCoefficients, | |
1890 GetSupportedSimilarityCoefficients, GetType, GetValue, GetValueID, GetValueIDs, | |
1891 GetValueIDsString, GetValues, GetValuesAndIDsPairsString, GetValuesAndIDsString, | |
1892 GetValuesString, GetVectorType, HammingDistanceCoefficient, IsFingerprintsVector, | |
1893 JaccardSimilarityCoefficient, ManhattanDistanceCoefficient, | |
1894 NewFromIDsAndValuesPairsString, NewFromIDsAndValuesString, | |
1895 NewFromValuesAndIDsPairsString, NewFromValuesAndIDsString, NewFromValuesString, | |
1896 OchiaiSimilarityCoefficient, SetDescription, SetID, SetType, SetValue, SetValueID, | |
1897 SetValueIDs, SetValues, SetVectorType, SoergelDistanceCoefficient, | |
1898 SorensonSimilarityCoefficient, StringifyFingerprintsVector, | |
1899 TanimotoSimilarityCoefficient | |
1900 | |
1901 The methods available to create fingerprints vector from strings and to calculate similarity | |
1902 and distance coefficients between two vectors can also be invoked as class functions. | |
1903 | |
1904 B<FingerprintsVector> class provides support to perform comparison between vectors | |
1905 containing three different types of values: | |
1906 | |
1907 Type I: OrderedNumericalValues | |
1908 | |
1909 o Size of two vectors are same | |
1910 o Vectors contain real values in a specific order. For example: MACCS keys | |
1911 count, Topological pharmacophore atom pairs and so on. | |
1912 | |
1913 Type II: UnorderedNumericalValues | |
1914 | |
1915 o Size of two vectors might not be same | |
1916 o Vectors contain unordered real value identified by value IDs. For example: | |
1917 Topological atom pairs, Topological atom torsions and so on | |
1918 | |
1919 Type III: AlphaNumericalValues | |
1920 | |
1921 o Size of two vectors might not be same | |
1922 o Vectors contain unordered alphanumerical values. For example: Extended | |
1923 connectivity fingerprints, atom neighborhood fingerprints. | |
1924 | |
1925 Before performing similarity or distance calculations between vectors containing UnorderedNumericalValues | |
1926 or AlphaNumericalValues, the vectors are transformed into vectors containing unique OrderedNumericalValues | |
1927 using value IDs for UnorderedNumericalValues and values itself for AlphaNumericalValues. | |
1928 | |
1929 Three forms of similarity and distance calculation between two vectors, specified using B<CalculationMode> | |
1930 option, are supported: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. | |
1931 | |
1932 For I<BinaryForm>, the ordered list of processed final vector values containing the value or | |
1933 count of each unique value type is simply converted into a binary vector containing 1s and 0s | |
1934 corresponding to presence or absence of values before calculating similarity or distance between | |
1935 two vectors. | |
1936 | |
1937 For two fingerprint vectors A and B of same size containing OrderedNumericalValues, let: | |
1938 | |
1939 N = Number values in A or B | |
1940 | |
1941 Xa = Values of vector A | |
1942 Xb = Values of vector B | |
1943 | |
1944 Xai = Value of ith element in A | |
1945 Xbi = Value of ith element in B | |
1946 | |
1947 SUM = Sum of i over N values | |
1948 | |
1949 For SetTheoreticForm of calculation between two vectors, let: | |
1950 | |
1951 SetIntersectionXaXb = SUM ( MIN ( Xai, Xbi ) ) | |
1952 SetDifferenceXaXb = SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) | |
1953 | |
1954 For BinaryForm of calculation between two vectors, let: | |
1955 | |
1956 Na = Number of bits set to "1" in A = SUM ( Xai ) | |
1957 Nb = Number of bits set to "1" in B = SUM ( Xbi ) | |
1958 Nc = Number of bits set to "1" in both A and B = SUM ( Xai * Xbi ) | |
1959 Nd = Number of bits set to "0" in both A and B | |
1960 = SUM ( 1 - Xai - Xbi + Xai * Xbi) | |
1961 | |
1962 N = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd | |
1963 | |
1964 Additionally, for BinaryForm various values also correspond to: | |
1965 | |
1966 Na = | Xa | | |
1967 Nb = | Xb | | |
1968 Nc = | SetIntersectionXaXb | | |
1969 Nd = N - | SetDifferenceXaXb | | |
1970 | |
1971 | SetDifferenceXaXb | = N - Nd = Na + Nb - Nc + Nd - Nd = Na + Nb - Nc | |
1972 = | Xa | + | Xb | - | SetIntersectionXaXb | | |
1973 | |
1974 Various similarity and distance coefficients [ Ref 40, Ref 62, Ref 64 ] for a pair of vectors A and B | |
1975 in I<AlgebraicForm, BinaryForm and SetTheoreticForm> are defined as follows: | |
1976 | |
1977 B<CityBlockDistance>: ( same as HammingDistance and ManhattanDistance) | |
1978 | |
1979 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) ) | |
1980 | |
1981 I<BinaryForm>: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc | |
1982 | |
1983 I<SetTheoreticForm>: | SetDifferenceXaXb | - | SetIntersectionXaXb | = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) | |
1984 | |
1985 B<CosineSimilarity>: ( same as OchiaiSimilarityCoefficient) | |
1986 | |
1987 I<AlgebraicForm>: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) ) | |
1988 | |
1989 I<BinaryForm>: Nc / SQRT ( Na * Nb) | |
1990 | |
1991 I<SetTheoreticForm>: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| ) = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) ) | |
1992 | |
1993 B<CzekanowskiSimilarity>: ( same as DiceSimilarity and SorensonSimilarity) | |
1994 | |
1995 I<AlgebraicForm>: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) ) | |
1996 | |
1997 I<BinaryForm>: 2 * Nc / ( Na + Nb ) | |
1998 | |
1999 I<SetTheoreticForm>: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) ) | |
2000 | |
2001 B<DiceSimilarity>: ( same as CzekanowskiSimilarity and SorensonSimilarity) | |
2002 | |
2003 I<AlgebraicForm>: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) ) | |
2004 | |
2005 I<BinaryForm>: 2 * Nc / ( Na + Nb ) | |
2006 | |
2007 I<SetTheoreticForm>: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) ) | |
2008 | |
2009 B<EuclideanDistance>: | |
2010 | |
2011 I<AlgebraicForm>: SQRT ( SUM ( ( ( Xai - Xbi ) ** 2 ) ) ) | |
2012 | |
2013 I<BinaryForm>: SQRT ( ( Na - Nc ) + ( Nb - Nc ) ) = SQRT ( Na + Nb - 2 * Nc ) | |
2014 | |
2015 I<SetTheoreticForm>: SQRT ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) = SQRT ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) ) | |
2016 | |
2017 B<HammingDistance>: ( same as CityBlockDistance and ManhattanDistance) | |
2018 | |
2019 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) ) | |
2020 | |
2021 I<BinaryForm>: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc | |
2022 | |
2023 I<SetTheoreticForm>: | SetDifferenceXaXb | - | SetIntersectionXaXb | = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) | |
2024 | |
2025 B<JaccardSimilarity>: ( same as TanimotoSimilarity) | |
2026 | |
2027 I<AlgebraicForm>: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) ) | |
2028 | |
2029 I<BinaryForm>: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) | |
2030 | |
2031 I<SetTheoreticForm>: | SetIntersectionXaXb | / | SetDifferenceXaXb | = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) ) | |
2032 | |
2033 B<ManhattanDistance>: ( same as CityBlockDistance and HammingDistance) | |
2034 | |
2035 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) ) | |
2036 | |
2037 I<BinaryForm>: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc | |
2038 | |
2039 I<SetTheoreticForm>: | SetDifferenceXaXb | - | SetIntersectionXaXb | = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) | |
2040 | |
2041 B<OchiaiSimilarity>: ( same as CosineSimilarity) | |
2042 | |
2043 I<AlgebraicForm>: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) ) | |
2044 | |
2045 I<BinaryForm>: Nc / SQRT ( Na * Nb) | |
2046 | |
2047 I<SetTheoreticForm>: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| ) = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) ) | |
2048 | |
2049 B<SorensonSimilarity>: ( same as CzekanowskiSimilarity and DiceSimilarity) | |
2050 | |
2051 I<AlgebraicForm>: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) ) | |
2052 | |
2053 I<BinaryForm>: 2 * Nc / ( Na + Nb ) | |
2054 | |
2055 I<SetTheoreticForm>: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) ) | |
2056 | |
2057 B<SoergelDistance>: | |
2058 | |
2059 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) ) / SUM ( MAX ( Xai, Xbi ) ) | |
2060 | |
2061 I<BinaryForm>: 1 - Nc / ( Na + Nb - Nc ) = ( Na + Nb - 2 * Nc ) / ( Na + Nb - Nc ) | |
2062 | |
2063 I<SetTheoreticForm>: ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) / | SetDifferenceXaXb | = ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) ) | |
2064 | |
2065 B<TanimotoSimilarity>: ( same as JaccardSimilarity) | |
2066 | |
2067 I<AlgebraicForm>: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) ) | |
2068 | |
2069 I<BinaryForm>: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) | |
2070 | |
2071 I<SetTheoreticForm>: | SetIntersectionXaXb | / | SetDifferenceXaXb | = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) ) | |
2072 | |
2073 =head2 METHODS | |
2074 | |
2075 =over 4 | |
2076 | |
2077 =item B<new> | |
2078 | |
2079 $FPVector = new Fingerprints::FingerprintsVector(%NamesAndValues); | |
2080 | |
2081 Using specified I<FingerprintsVector> property names and values hash, B<new> method creates | |
2082 a new object and returns a reference to newly created B<FingerprintsVectorsVector> | |
2083 object. By default, the following properties are initialized: | |
2084 | |
2085 Type = '' | |
2086 @{Values} = () | |
2087 @{ValuesIDs} = () | |
2088 | |
2089 Examples: | |
2090 | |
2091 $FPVector = new Fingerprints::FingerprintsVector('Type' => 'OrderedNumericalValues', | |
2092 'Values' => [1, 2, 3, 4]); | |
2093 $FPVector = new Fingerprints::FingerprintsVector('Type' => 'NumericalValues', | |
2094 'Values' => [10, 22, 33, 44], | |
2095 'ValueIDs' => ['ID1', 'ID2', 'ID3', 'ID4']); | |
2096 $FPVector = new Fingerprints::FingerprintsVector('Type' => 'AlphaNumericalValues', | |
2097 'Values' => ['a1', 2, 'a3', 4]); | |
2098 | |
2099 =item B<AddValueIDs> | |
2100 | |
2101 $FingerprintsVector->AddValueIDs($ValueIDsRef); | |
2102 $FingerprintsVector->AddValueIDs(@ValueIDs); | |
2103 | |
2104 Adds specified I<ValueIDs> to I<FingerprintsVector> and returns I<FingerprintsVector>. | |
2105 | |
2106 =item B<AddValues> | |
2107 | |
2108 $FingerprintsVector->AddValues($ValuesRef); | |
2109 $FingerprintsVector->AddValues(@Values); | |
2110 $FingerprintsVector->AddValues($Vector); | |
2111 | |
2112 Adds specified I<Values> to I<FingerprintsVector> and returns I<FingerprintsVector>. | |
2113 | |
2114 =item B<CityBlockDistanceCoefficient> | |
2115 | |
2116 $Value = $FingerprintsVector->CityBlockDistanceCoefficient( | |
2117 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
2118 $Value = Fingerprints::FingerprintsVector::CityBlockDistanceCoefficient( | |
2119 $FingerprintsVectorA, $FingerprintVectorB, | |
2120 [$CalculationMode, $SkipValuesCheck]); | |
2121 | |
2122 Returns value of I<CityBlock> distance coefficient between two I<FingerprintsVectors> using | |
2123 optionally specified I<CalculationMode> and optional checking of vector values. | |
2124 | |
2125 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
2126 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
2127 | |
2128 =item B<CosineSimilarityCoefficient> | |
2129 | |
2130 $Value = $FingerprintsVector->CosineSimilarityCoefficient( | |
2131 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
2132 $Value = Fingerprints::FingerprintsVector::CosineSimilarityCoefficient( | |
2133 $FingerprintsVectorA, $FingerprintVectorB, | |
2134 [$CalculationMode, $SkipValuesCheck]); | |
2135 | |
2136 Returns value of I<Cosine> similarity coefficient between two I<FingerprintsVectors> using | |
2137 optionally specified I<CalculationMode> and optional checking of vector values. | |
2138 | |
2139 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
2140 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
2141 | |
2142 =item B<CzekanowskiSimilarityCoefficient> | |
2143 | |
2144 $Value = $FingerprintsVector->CzekanowskiSimilarityCoefficient( | |
2145 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
2146 $Value = Fingerprints::FingerprintsVector::CzekanowskiSimilarityCoefficient( | |
2147 $FingerprintsVectorA, $FingerprintVectorB, | |
2148 [$CalculationMode, $SkipValuesCheck]); | |
2149 | |
2150 Returns value of I<Czekanowski> similarity coefficient between two I<FingerprintsVectors> using | |
2151 optionally specified I<CalculationMode> and optional checking of vector values. | |
2152 | |
2153 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
2154 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
2155 | |
2156 =item B<DiceSimilarityCoefficient> | |
2157 | |
2158 $Value = $FingerprintsVector->DiceSimilarityCoefficient( | |
2159 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
2160 $Value = Fingerprints::FingerprintsVector::DiceSimilarityCoefficient( | |
2161 $FingerprintsVectorA, $FingerprintVectorB, | |
2162 [$CalculationMode, $SkipValuesCheck]); | |
2163 | |
2164 Returns value of I<Dice> similarity coefficient between two I<FingerprintsVectors> using | |
2165 optionally specified I<CalculationMode> and optional checking of vector values. | |
2166 | |
2167 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
2168 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
2169 | |
2170 =item B<EuclideanDistanceCoefficient> | |
2171 | |
2172 $Value = $FingerprintsVector->EuclideanDistanceCoefficient( | |
2173 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
2174 $Value = Fingerprints::FingerprintsVector::EuclideanDistanceCoefficient( | |
2175 $FingerprintsVectorA, $FingerprintVectorB, | |
2176 [$CalculationMode, $SkipValuesCheck]); | |
2177 | |
2178 Returns value of I<Euclidean> distance coefficient between two I<FingerprintsVectors> using | |
2179 optionally specified I<CalculationMode> and optional checking of vector values. | |
2180 | |
2181 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
2182 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
2183 | |
2184 =item B<GetDescription> | |
2185 | |
2186 $Description = $FingerprintsVector->GetDescription(); | |
2187 | |
2188 Returns a string containing description of fingerprints vector. | |
2189 | |
2190 =item B<GetFingerprintsVectorString> | |
2191 | |
2192 $FPString = $FingerprintsVector->GetFingerprintsVectorString($Format); | |
2193 | |
2194 Returns a B<FingerprintsString> containing vector values and/or IDs in I<FingerprintsVector> | |
2195 corresponding to specified I<Format>. | |
2196 | |
2197 Possible I<Format> values: I<IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, | |
2198 IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, | |
2199 ValueIDsString, ValueIDs, ValuesString, or Values>. | |
2200 | |
2201 =item B<GetID> | |
2202 | |
2203 $ID = $FingerprintsVector->GetID(); | |
2204 | |
2205 Returns I<ID> of I<FingerprintsVector>. | |
2206 | |
2207 =item B<GetVectorType> | |
2208 | |
2209 $VectorType = $FingerprintsVector->GetVectorType(); | |
2210 | |
2211 Returns I<VectorType> of I<FingerprintsVector>. | |
2212 | |
2213 =item B<GetIDsAndValuesPairsString> | |
2214 | |
2215 $IDsValuesPairsString = $FingerprintsVector->GetIDsAndValuesPairsString(); | |
2216 | |
2217 Returns I<FingerprintsVector> value IDs and values as space delimited ID/value pair | |
2218 string. | |
2219 | |
2220 =item B<GetIDsAndValuesString> | |
2221 | |
2222 $IDsValuesString = $FingerprintsVector->GetIDsAndValuesString(); | |
2223 | |
2224 Returns I<FingerprintsVector> value IDs and values as string containing space delimited IDs followed by | |
2225 values with semicolon as IDs and values delimiter. | |
2226 | |
2227 =item B<GetNumOfNonZeroValues> | |
2228 | |
2229 $NumOfNonZeroValues = $FingerprintsVector->GetNumOfNonZeroValues(); | |
2230 | |
2231 Returns number of non-zero values in I<FingerprintsVector>. | |
2232 | |
2233 =item B<GetNumOfValueIDs> | |
2234 | |
2235 $NumOfValueIDs = $FingerprintsVector->GetNumOfValueIDs(); | |
2236 | |
2237 Returns number of value IDs I<FingerprintsVector>. | |
2238 | |
2239 =item B<GetNumOfValues> | |
2240 | |
2241 $NumOfValues = $FingerprintsVector->GetNumOfValues(); | |
2242 | |
2243 Returns number of values I<FingerprintsVector>. | |
2244 | |
2245 =item B<GetSupportedDistanceAndSimilarityCoefficients> | |
2246 | |
2247 @SupportedDistanceAndSimilarityCoefficientsReturn = | |
2248 Fingerprints::FingerprintsVector::GetSupportedDistanceAndSimilarityCoefficients(); | |
2249 | |
2250 Returns an array containing names of supported distance and similarity coefficients. | |
2251 | |
2252 =item B<GetSupportedDistanceCoefficients> | |
2253 | |
2254 @SupportedDistanceCoefficientsReturn = | |
2255 Fingerprints::FingerprintsVector::GetSupportedDistanceCoefficients(); | |
2256 | |
2257 Returns an array containing names of supported disyance coefficients. | |
2258 | |
2259 =item B<GetSupportedSimilarityCoefficients> | |
2260 | |
2261 @SupportedSimilarityCoefficientsReturn = | |
2262 Fingerprints::FingerprintsVector::GetSupportedSimilarityCoefficients(); | |
2263 | |
2264 Returns an array containing names of supported similarity coefficients. | |
2265 | |
2266 =item B<GetType> | |
2267 | |
2268 $VectorType = $FingerprintsVector->GetType(); | |
2269 | |
2270 Returns I<FingerprintsVector> vector type. | |
2271 | |
2272 =item B<GetValue> | |
2273 | |
2274 $Value = $FingerprintsVector->GetValue($Index); | |
2275 | |
2276 Returns fingerprints vector B<Value> specified using I<Index> starting at 0. | |
2277 | |
2278 =item B<GetValueID> | |
2279 | |
2280 $ValueID = $FingerprintsVector->GetValueID(); | |
2281 | |
2282 Returns fingerprints vector B<ValueID> specified using I<Index> starting at 0. | |
2283 | |
2284 =item B<GetValueIDs> | |
2285 | |
2286 $ValueIDs = $FingerprintsVector->GetValueIDs(); | |
2287 @ValueIDs = $FingerprintsVector->GetValueIDs(); | |
2288 | |
2289 Returns fingerprints vector B<ValueIDs> as an array or reference to an array. | |
2290 | |
2291 =item B<GetValueIDsString> | |
2292 | |
2293 $ValueIDsString = $FingerprintsVector->GetValueIDsString(); | |
2294 | |
2295 Returns fingerprints vector B<ValueIDsString> with value IDs delimited by space. | |
2296 | |
2297 =item B<GetValues> | |
2298 | |
2299 $ValuesRef = $FingerprintsVector->GetValues(); | |
2300 @Values = $FingerprintsVector->GetValues(); | |
2301 | |
2302 Returns fingerprints vector B<Values> as an array or reference to an array. | |
2303 | |
2304 =item B<GetValuesAndIDsPairsString> | |
2305 | |
2306 $ValuesIDsPairsString = $FingerprintsVector->GetValuesAndIDsPairsString(); | |
2307 | |
2308 Returns I<FingerprintsVector> value and value IDs as space delimited ID/value pair | |
2309 string. | |
2310 | |
2311 =item B<GetValuesAndIDsString> | |
2312 | |
2313 $ValuesIDsString = $FingerprintsVector->GetValuesAndIDsString(); | |
2314 | |
2315 Returns I<FingerprintsVector> values and value IDs as string containing space delimited IDs followed by | |
2316 values with semicolon as IDs and values delimiter. | |
2317 | |
2318 =item B<GetValuesString> | |
2319 | |
2320 $Return = $FingerprintsVector->GetValuesString(); | |
2321 | |
2322 Returns I<FingerprintsVector> values as space delimited string. | |
2323 | |
2324 =item B<HammingDistanceCoefficient> | |
2325 | |
2326 $Value = $FingerprintsVector->HammingDistanceCoefficient( | |
2327 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
2328 $Value = Fingerprints::FingerprintsVector::HammingDistanceCoefficient( | |
2329 $FingerprintsVectorA, $FingerprintVectorB, | |
2330 [$CalculationMode, $SkipValuesCheck]); | |
2331 | |
2332 Returns value of I<Hamming> distance coefficient between two I<FingerprintsVectors> using | |
2333 optionally specified I<CalculationMode> and optional checking of vector values. | |
2334 | |
2335 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
2336 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
2337 | |
2338 =item B<IsFingerprintsVector> | |
2339 | |
2340 $Status = Fingerprints::FingerprintsVector::IsFingerprintsVector($Object); | |
2341 | |
2342 Returns 1 or 0 based on whether I<Object> is a I<FingerprintsVector>. | |
2343 | |
2344 =item B<JaccardSimilarityCoefficient> | |
2345 | |
2346 $Value = $FingerprintsVector->JaccardSimilarityCoefficient( | |
2347 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
2348 $Value = Fingerprints::FingerprintsVector::JaccardSimilarityCoefficient( | |
2349 $FingerprintsVectorA, $FingerprintVectorB, | |
2350 [$CalculationMode, $SkipValuesCheck]); | |
2351 | |
2352 Returns value of I<Jaccard> similarity coefficient between two I<FingerprintsVectors> using | |
2353 optionally specified I<CalculationMode> and optional checking of vector values. | |
2354 | |
2355 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
2356 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
2357 | |
2358 =item B<ManhattanDistanceCoefficient> | |
2359 | |
2360 $Value = $FingerprintsVector->ManhattanDistanceCoefficient( | |
2361 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
2362 $Value = Fingerprints::FingerprintsVector::ManhattanDistanceCoefficient( | |
2363 $FingerprintsVectorA, $FingerprintVectorB, | |
2364 [$CalculationMode, $SkipValuesCheck]); | |
2365 | |
2366 Returns value of I<Manhattan> distance coefficient between two I<FingerprintsVectors> using | |
2367 optionally specified I<CalculationMode> and optional checking of vector values. | |
2368 | |
2369 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
2370 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
2371 | |
2372 =item B<NewFromIDsAndValuesPairsString> | |
2373 | |
2374 $FingerprintsVector = $FingerprintsVector->NewFromIDsAndValuesPairsString( | |
2375 $ValuesType, $IDsAndValuesPairsString); | |
2376 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesPairsString( | |
2377 $ValuesType, $IDsAndValuesPairsString); | |
2378 | |
2379 Creates a new I<FingerprintsVector> of I<ValuesType> using I<IDsAndValuesPairsString> containing | |
2380 space delimited value IDs and values pairs and returns new B<FingerprintsVector> object. | |
2381 Possible I<ValuesType> values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>. | |
2382 | |
2383 =item B<NewFromIDsAndValuesString> | |
2384 | |
2385 $FingerprintsVector = $FingerprintsVector->NewFromIDsAndValuesString( | |
2386 $ValuesType, $IDsAndValuesString); | |
2387 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesString( | |
2388 $ValuesType, $IDsAndValuesString); | |
2389 | |
2390 Creates a new I<FingerprintsVector> of I<ValuesType> using I<IDsAndValuesString> containing | |
2391 semicolon delimited value IDs string followed by values strings and returns new B<FingerprintsVector> | |
2392 object. The values within value and value IDs tring are delimited by spaces. Possible I<ValuesType> | |
2393 values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>. | |
2394 | |
2395 =item B<NewFromValuesAndIDsPairsString> | |
2396 | |
2397 $FingerprintsVector = $FingerprintsVector->NewFromValuesAndIDsPairsString( | |
2398 $ValuesType, $ValuesAndIDsPairsString); | |
2399 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsPairsString( | |
2400 $ValuesType, $ValuesAndIDsPairsString); | |
2401 | |
2402 Creates a new I<FingerprintsVector> of I<ValuesType> using I<ValuesAndIDsPairsString> containing | |
2403 space delimited value and value IDs pairs and returns new B<FingerprintsVector> object. | |
2404 Possible I<ValuesType> values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>. | |
2405 | |
2406 =item B<NewFromValuesAndIDsString> | |
2407 | |
2408 $FingerprintsVector = $FingerprintsVector->NewFromValuesAndIDsString( | |
2409 $ValuesType, $IDsAndValuesString); | |
2410 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsString( | |
2411 $ValuesType, $IDsAndValuesString); | |
2412 | |
2413 Creates a new I<FingerprintsVector> of I<ValuesType> using I<ValuesAndIDsString> containing | |
2414 semicolon delimited values string followed by value IDs strings and returns new B<FingerprintsVector> | |
2415 object. The values within values and value IDs tring are delimited by spaces. Possible I<ValuesType> | |
2416 values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>. | |
2417 | |
2418 =item B<NewFromValuesString> | |
2419 | |
2420 $FingerprintsVector = $FingerprintsVector->NewFromValuesString( | |
2421 $ValuesType, $ValuesString); | |
2422 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesString( | |
2423 $ValuesType, $ValuesString); | |
2424 | |
2425 Creates a new I<FingerprintsVector> of I<ValuesType> using I<ValuesString> containing space | |
2426 delimited values string and returns new B<FingerprintsVector> object. The values within values | |
2427 and value IDs tring are delimited by spaces. Possible I<ValuesType> values: I<OrderedNumericalValues, | |
2428 NumericalValues, or AlphaNumericalValues>. | |
2429 | |
2430 =item B<OchiaiSimilarityCoefficient> | |
2431 | |
2432 $Value = $FingerprintsVector->OchiaiSimilarityCoefficient( | |
2433 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
2434 $Value = Fingerprints::FingerprintsVector::OchiaiSimilarityCoefficient( | |
2435 $FingerprintsVectorA, $FingerprintVectorB, | |
2436 [$CalculationMode, $SkipValuesCheck]); | |
2437 | |
2438 Returns value of I<Ochiai> similarity coefficient between two I<FingerprintsVectors> using | |
2439 optionally specified I<CalculationMode> and optional checking of vector values. | |
2440 | |
2441 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
2442 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
2443 | |
2444 =item B<SetDescription> | |
2445 | |
2446 $FingerprintsVector->SetDescription($Description); | |
2447 | |
2448 Sets I<Description> of fingerprints vector and returns I<FingerprintsVector>. | |
2449 | |
2450 =item B<SetID> | |
2451 | |
2452 $FingerprintsVector->SetID($ID); | |
2453 | |
2454 Sets I<ID> of fingerprints vector and returns I<FingerprintsVector>. | |
2455 | |
2456 =item B<SetVectorType> | |
2457 | |
2458 $FingerprintsVector->SetVectorType($VectorType); | |
2459 | |
2460 Sets I<VectorType> of fingerprints vector and returns I<FingerprintsVector>. | |
2461 | |
2462 =item B<SetType> | |
2463 | |
2464 $FingerprintsVector->SetType($Type); | |
2465 | |
2466 Sets I<FingerprintsVector> values I<Type> and returns I<FingerprintsVector>. Possible I<Type> | |
2467 values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>. | |
2468 | |
2469 During calculation of similarity and distance coefficients between two I<FingerprintsVectors>, the | |
2470 following conditions apply to vector type, size, value and value IDs: | |
2471 | |
2472 o For OrderedNumericalValues type, both vectors must be of the same size | |
2473 and contain similar types of numerical values in the same order. | |
2474 | |
2475 o For NumericalValues type, vector value IDs for both vectors must be | |
2476 specified; however, their size and order of IDs and numerical values may | |
2477 be different. For each vector, value IDs must correspond to vector values. | |
2478 | |
2479 o For AlphaNumericalValues type, vectors may contain both numerical and | |
2480 alphanumerical values and their sizes may be different. | |
2481 | |
2482 =item B<SetValue> | |
2483 | |
2484 $FingerprintsVector->SetValue($Index, $Value, [$SkipIndexCheck]); | |
2485 | |
2486 Sets a I<FingerprintsVector> value specified by I<Index> starting at 0 to I<Value> along with | |
2487 optional index range check and returns I<FingerprintsVector>. | |
2488 | |
2489 =item B<SetValueID> | |
2490 | |
2491 $FingerprintsVector->SetValueID($Index, $ValueID, [$SkipIndexCheck]); | |
2492 | |
2493 Sets a I<FingerprintsVector> value ID specified by I<Index> starting at 0 to I<ValueID> along with | |
2494 optional index range check and returns I<FingerprintsVector>. | |
2495 | |
2496 =item B<SetValueIDs> | |
2497 | |
2498 $FingerprintsVector->SetValueIDs($ValueIDsRef); | |
2499 $FingerprintsVector->SetValueIDs(@ValueIDs); | |
2500 | |
2501 Sets I<FingerprintsVector> value IDs to specified I<ValueIDs> and returns I<FingerprintsVector>. | |
2502 | |
2503 =item B<SetValues> | |
2504 | |
2505 $FingerprintsVector->SetValues($ValuesRef); | |
2506 $FingerprintsVector->SetValues(@Values); | |
2507 | |
2508 Sets I<FingerprintsVector> value to specified I<Values> and returns I<FingerprintsVector>. | |
2509 | |
2510 =item B<SoergelDistanceCoefficient> | |
2511 | |
2512 $Value = $FingerprintsVector->SoergelDistanceCoefficient( | |
2513 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
2514 $Value = Fingerprints::FingerprintsVector::SoergelDistanceCoefficient( | |
2515 $FingerprintsVectorA, $FingerprintVectorB, | |
2516 [$CalculationMode, $SkipValuesCheck]); | |
2517 | |
2518 Returns value of I<Soergel> distance coefficient between two I<FingerprintsVectors> using | |
2519 optionally specified I<CalculationMode> and optional checking of vector values. | |
2520 | |
2521 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
2522 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
2523 | |
2524 =item B<SorensonSimilarityCoefficient> | |
2525 | |
2526 $Value = $FingerprintsVector->SorensonSimilarityCoefficient( | |
2527 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
2528 $Value = Fingerprints::FingerprintsVector::SorensonSimilarityCoefficient( | |
2529 $FingerprintsVectorA, $FingerprintVectorB, | |
2530 [$CalculationMode, $SkipValuesCheck]); | |
2531 | |
2532 Returns value of I<Sorenson> similarity coefficient between two I<FingerprintsVectors> using | |
2533 optionally specified I<CalculationMode> and optional checking of vector values. | |
2534 | |
2535 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
2536 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
2537 | |
2538 =item B<TanimotoSimilarityCoefficient> | |
2539 | |
2540 $Value = $FingerprintsVector->TanimotoSimilarityCoefficient( | |
2541 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]); | |
2542 $Value = Fingerprints::FingerprintsVector::TanimotoSimilarityCoefficient( | |
2543 $FingerprintsVectorA, $FingerprintVectorB, | |
2544 [$CalculationMode, $SkipValuesCheck]); | |
2545 | |
2546 Returns value of I<Tanimoto> similarity coefficient between two I<FingerprintsVectors> using | |
2547 optionally specified I<CalculationMode> and optional checking of vector values. | |
2548 | |
2549 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default | |
2550 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>. | |
2551 | |
2552 =item B<StringifyFingerprintsVector> | |
2553 | |
2554 $String = $FingerprintsVector->StringifyFingerprintsVector(); | |
2555 | |
2556 Returns a string containing information about I<FingerprintsVector> object. | |
2557 | |
2558 =back | |
2559 | |
2560 =head1 AUTHOR | |
2561 | |
2562 Manish Sud <msud@san.rr.com> | |
2563 | |
2564 =head1 SEE ALSO | |
2565 | |
2566 BitVector.pm, FingerprintsStringUtil.pm, FingerprintsBitVector.pm, Vector.pm | |
2567 | |
2568 =head1 COPYRIGHT | |
2569 | |
2570 Copyright (C) 2015 Manish Sud. All rights reserved. | |
2571 | |
2572 This file is part of MayaChemTools. | |
2573 | |
2574 MayaChemTools is free software; you can redistribute it and/or modify it under | |
2575 the terms of the GNU Lesser General Public License as published by the Free | |
2576 Software Foundation; either version 3 of the License, or (at your option) | |
2577 any later version. | |
2578 | |
2579 =cut |