comparison lib/Fingerprints/FingerprintsVector.pm @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 package Fingerprints::FingerprintsVector;
2 #
3 # $RCSfile: FingerprintsVector.pm,v $
4 # $Date: 2015/02/28 20:48:54 $
5 # $Revision: 1.31 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Exporter;
32 use Scalar::Util ();
33 use MathUtil ();
34 use TextUtil ();
35 use StatisticsUtil ();
36 use BitVector;
37 use Vector;
38
39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
40
41 @ISA = qw(Exporter);
42
43 # Distance coefficients
44 my(@DistanceCoefficients) = qw(CityBlockDistanceCoefficient EuclideanDistanceCoefficient HammingDistanceCoefficient ManhattanDistanceCoefficient SoergelDistanceCoefficient);
45
46 # Similarity coefficients...
47 my(@SimilarityCoefficients) = qw(CosineSimilarityCoefficient CzekanowskiSimilarityCoefficient DiceSimilarityCoefficient OchiaiSimilarityCoefficient JaccardSimilarityCoefficient SorensonSimilarityCoefficient TanimotoSimilarityCoefficient);
48
49 # New from string...
50 my(@NewFromString) = qw(NewFromValuesString NewFromValuesAndIDsString NewFromIDsAndValuesString NewFromValuesAndIDsPairsString NewFromIDsAndValuesPairsString);
51
52 @EXPORT = qw(IsFingerprintsVector);
53 @EXPORT_OK = qw(GetSupportedDistanceCoefficients GetSupportedSimilarityCoefficients GetSupportedDistanceAndSimilarityCoefficients @DistanceCoefficients @SimilarityCoefficients);
54
55 %EXPORT_TAGS = (
56 new => [@NewFromString],
57 distancecoefficients => [@DistanceCoefficients],
58 similaritycoefficients => [@SimilarityCoefficients],
59 all => [@EXPORT, @EXPORT_OK]
60 );
61
62 # Setup class variables...
63 my($ClassName);
64 _InitializeClass();
65
66 # Overload Perl functions...
67 use overload '""' => 'StringifyFingerprintsVector';
68
69 # Class constructor...
70 sub new {
71 my($Class, %NamesAndValues) = @_;
72
73 # Initialize object...
74 my $This = {};
75 bless $This, ref($Class) || $Class;
76
77 $This->_InitializeFingerprintsVector();
78
79 $This->_InitializeFingerprintsVectorProperties(%NamesAndValues);
80
81 return $This;
82 }
83
84 # Initialize object data...
85 #
86 sub _InitializeFingerprintsVector {
87 my($This) = @_;
88
89 # Type of fingerprint vector...
90 $This->{Type} = '';
91
92 # Fingerprint vector values...
93 @{$This->{Values}} = ();
94
95 # Fingerprint vector value IDs...
96 @{$This->{ValueIDs}} = ();
97
98 return $This;
99 }
100
101 # Initialize class ...
102 sub _InitializeClass {
103 #Class name...
104 $ClassName = __PACKAGE__;
105 }
106
107 # Initialize object properties....
108 sub _InitializeFingerprintsVectorProperties {
109 my($This, %NamesAndValues) = @_;
110
111 my($Name, $Value, $MethodName);
112 while (($Name, $Value) = each %NamesAndValues) {
113 $MethodName = "Set${Name}";
114 $This->$MethodName($Value);
115 }
116
117 if (!exists $NamesAndValues{Type}) {
118 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying type...";
119 }
120 return $This;
121 }
122
123 # Create a new fingerprints vector using space delimited values string. This functionality can be
124 # either invoked as a class function or an object method.
125 #
126 sub NewFromValuesString ($$;$) {
127 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
128 my($This, $Type, $ValuesString);
129
130 if (@_ == 3) {
131 ($This, $Type, $ValuesString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
132 }
133 else {
134 ($This, $Type, $ValuesString) = (undef, $FirstParameter, $SecondParameter);
135 }
136 my($FingerprintsVector, @Values);
137
138 @Values = ();
139 if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) {
140 @Values = split(' ', $ValuesString);
141 }
142
143 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values);
144
145 return $FingerprintsVector;
146 }
147
148 # Create a new fingerprints vector using values and IDs string containing semicolon
149 # delimited value string and value IDs strings. The values within value and value IDs
150 # string are delimited by spaces.
151 #
152 # This functionality can be either invoked as a class function or an object method.
153 #
154 sub NewFromValuesAndIDsString ($$;$) {
155 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
156 my($This, $Type, $ValuesAndIDsString);
157
158 if (@_ == 3) {
159 ($This, $Type, $ValuesAndIDsString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
160 }
161 else {
162 ($This, $Type, $ValuesAndIDsString) = (undef, $FirstParameter, $SecondParameter);
163 }
164 my($FingerprintsVector, $ValuesString, $ValueIDsString, @Values, @ValueIDs);
165
166 ($ValuesString, $ValueIDsString) = split(';', $ValuesAndIDsString);
167
168 @Values = ();
169 if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) {
170 @Values = split(' ', $ValuesString);
171 }
172 @ValueIDs = ();
173 if (defined($ValueIDsString) && length($ValueIDsString) && $ValueIDsString !~ /^None$/i) {
174 @ValueIDs = split(' ', $ValueIDsString);
175 }
176
177 if (@Values != @ValueIDs ) {
178 carp "Warning: ${ClassName}->NewFromValuesAndIDsString: Object can't be instantiated: Number specified values, " . scalar @Values . ", must be equal to number of specified value IDs, " . scalar @ValueIDs . "...";
179 return undef;
180 }
181
182 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
183
184 return $FingerprintsVector;
185 }
186
187 # Create a new fingerprints vector using IDs and values string containing semicolon
188 # delimited value IDs string and values strings. The values within value and value IDs
189 # string are delimited by spaces.
190 #
191 # This functionality can be either invoked as a class function or an object method.
192 #
193 sub NewFromIDsAndValuesString ($$;$) {
194 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
195 my($This, $Type, $IDsAndValuesString);
196
197 if (@_ == 3) {
198 ($This, $Type, $IDsAndValuesString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
199 }
200 else {
201 ($This, $Type, $IDsAndValuesString) = (undef, $FirstParameter, $SecondParameter);
202 }
203 my($FingerprintsVector, $ValuesString, $ValueIDsString, @Values, @ValueIDs);
204
205 ($ValueIDsString, $ValuesString) = split(';', $IDsAndValuesString);
206
207 @Values = ();
208 if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) {
209 @Values = split(' ', $ValuesString);
210 }
211 @ValueIDs = ();
212 if (defined($ValueIDsString) && length($ValueIDsString) && $ValueIDsString !~ /^None$/i) {
213 @ValueIDs = split(' ', $ValueIDsString);
214 }
215
216 if (@Values != @ValueIDs ) {
217 carp "Warning: ${ClassName}->NewFromIDsAndValuesString: Object can't be instantiated: Number specified values, " . scalar @Values . ", must be equal to number of specified value IDs, " . scalar @ValueIDs . "...";
218 return undef;
219 }
220
221 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
222
223 return $FingerprintsVector;
224 }
225
226 # Create a new fingerprints vector using values and IDs pairs string containing space
227 # value and value IDs pairs.
228 #
229 # This functionality can be either invoked as a class function or an object method.
230 #
231 sub NewFromValuesAndIDsPairsString ($$;$) {
232 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
233 my($This, $Type, $ValuesAndIDsPairsString);
234
235 if (@_ == 3) {
236 ($This, $Type, $ValuesAndIDsPairsString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
237 }
238 else {
239 ($This, $Type, $ValuesAndIDsPairsString) = (undef, $FirstParameter, $SecondParameter);
240 }
241 my($FingerprintsVector, $Index, @Values, @ValueIDs, @ValuesAndIDsPairs);
242
243 @ValuesAndIDsPairs = split(' ', $ValuesAndIDsPairsString);
244 if (@ValuesAndIDsPairs % 2) {
245 carp "Warning: ${ClassName}->NewFromValuesAndIDsPairsString: No fingerprint vector created: Invalid values and IDs pairs data: Input list must contain even number of values and IDs pairs...";
246 return undef;
247 }
248
249 @Values = (); @ValueIDs = ();
250 if (!(@ValuesAndIDsPairs == 2 && $ValuesAndIDsPairs[0] =~ /^None$/i && $ValuesAndIDsPairs[1] =~ /^None$/i)) {
251 for ($Index = 0; $Index < $#ValuesAndIDsPairs; $Index += 2) {
252 push @Values, $ValuesAndIDsPairs[$Index];
253 push @ValueIDs, $ValuesAndIDsPairs[$Index + 1];
254 }
255 }
256 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
257
258 return $FingerprintsVector;
259 }
260
261 # Create a new fingerprints vector using IDs and values pairs string containing space
262 # value IDs and valus pairs.
263 #
264 # This functionality can be either invoked as a class function or an object method.
265 #
266 sub NewFromIDsAndValuesPairsString ($$;$) {
267 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
268 my($This, $Type, $IDsAndValuesPairsString);
269
270 if (@_ == 3) {
271 ($This, $Type, $IDsAndValuesPairsString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
272 }
273 else {
274 ($This, $Type, $IDsAndValuesPairsString) = (undef, $FirstParameter, $SecondParameter);
275 }
276 my($FingerprintsVector, $Index, @Values, @ValueIDs, @IDsAndValuesPairs);
277
278 @IDsAndValuesPairs = split(' ', $IDsAndValuesPairsString);
279 if (@IDsAndValuesPairs % 2) {
280 croak "Error: ${ClassName}->NewFromIDsAndValuesPairsString: No fingerprint vector created: Invalid values and IDs pairs data: Input list must contain even number of values and IDs pairs...";
281 return undef;
282 }
283
284 @Values = (); @ValueIDs = ();
285 if (!(@IDsAndValuesPairs == 2 && $IDsAndValuesPairs[0] =~ /^None$/i && $IDsAndValuesPairs[1] =~ /^None$/i)) {
286 for ($Index = 0; $Index < $#IDsAndValuesPairs; $Index += 2) {
287 push @ValueIDs, $IDsAndValuesPairs[$Index];
288 push @Values, $IDsAndValuesPairs[$Index + 1];
289 }
290 }
291 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
292
293 return $FingerprintsVector;
294 }
295
296 # Set type of fingerprint vector. Supported types are: OrderedNumericalValues, NumericalValues, and
297 # AlphaNumericalValues
298 #
299 # . For OrderedNumericalValues type, both vectors must be of the same size and contain similar
300 # types of numerical values in the same order.
301 #
302 # . For NumericalValues type, vector value IDs for both vectors must be specified; however, their
303 # size and order of IDs and numerical values may be different. For each vector, value IDs must
304 # correspond to vector values.
305 #
306 # . For AlphaNumericalValues type, vectors may contain both numerical and alphanumerical values
307 # and their sizes may be different.
308 #
309 sub SetType {
310 my($This, $Type) = @_;
311
312 if ($Type !~ /^(OrderedNumericalValues|NumericalValues|AlphaNumericalValues)$/i) {
313 croak "Error: ${ClassName}->SetType: Specified value, $Type, for Type is not vaild. Supported types in current release of MayaChemTools: OrderedNumericalValues, NumericalValues or AlphaNumericalValues";
314 }
315
316 if ($This->{Type}) {
317 croak "Error: ${ClassName}->SetType: Can't change intial fingerprints vector type: It's already set...";
318 }
319 $This->{Type} = $Type;
320
321 return $This;
322 }
323
324 # Get fingerpints vector type...
325 #
326 sub GetType {
327 my($This) = @_;
328
329 return $This->{Type};
330 }
331
332 # Set ID...
333 sub SetID {
334 my($This, $Value) = @_;
335
336 $This->{ID} = $Value;
337
338 return $This;
339 }
340
341 # Get ID...
342 sub GetID {
343 my($This) = @_;
344
345 return exists $This->{ID} ? $This->{ID} : 'None';
346 }
347
348 # Set description...
349 sub SetDescription {
350 my($This, $Value) = @_;
351
352 $This->{Description} = $Value;
353
354 return $This;
355 }
356
357 # Get description...
358 sub GetDescription {
359 my($This) = @_;
360
361 return exists $This->{Description} ? $This->{Description} : 'No description available';
362 }
363
364 # Set vector type...
365 sub SetVectorType {
366 my($This, $Value) = @_;
367
368 $This->{VectorType} = $Value;
369
370 return $This;
371 }
372
373 # Get vector type...
374 sub GetVectorType {
375 my($This) = @_;
376
377 return exists $This->{VectorType} ? $This->{VectorType} : 'FingerprintsVector';
378 }
379
380 # Set values of a fingerprint vector using a vector, reference to an array or an array...
381 #
382 sub SetValues {
383 my($This, @Values) = @_;
384
385 $This->_SetOrAddValuesOrValueIDs("SetValues", @Values);
386
387 return $This;
388 }
389
390 # Set value IDs of a fingerprint vector using a vector, reference to an array or an array...
391 #
392 sub SetValueIDs {
393 my($This, @Values) = @_;
394
395 $This->_SetOrAddValuesOrValueIDs("SetValueIDs", @Values);
396
397 return $This;
398 }
399
400 # Add values to a fingerprint vector using a vector, reference to an array or an array...
401 #
402 sub AddValues {
403 my($This, @Values) = @_;
404
405 $This->_SetOrAddValuesOrValueIDs("AddValues", @Values);
406
407 return $This;
408 }
409
410 # Add value IDs to a fingerprint vector using a vector, reference to an array or an array...
411 #
412 sub AddValueIDs {
413 my($This, @Values) = @_;
414
415 $This->_SetOrAddValuesOrValueIDs("AddValueIDs", @Values);
416
417 return $This;
418 }
419
420 # Set or add values or value IDs using:
421 #
422 # o List of values or ValueIDs
423 # o Reference to an list of values or ValuesIDs
424 # o A vector containing values or ValueIDs
425 #
426 sub _SetOrAddValuesOrValueIDs {
427 my($This, $Mode, @Values) = @_;
428
429 if (!@Values) {
430 return;
431 }
432
433 # Collect specified values or valueIDs...
434 my($FirstValue, $TypeOfFirstValue, $ValuesRef);
435
436 $FirstValue = $Values[0];
437 $TypeOfFirstValue = ref $FirstValue;
438 if ($TypeOfFirstValue =~ /^(SCALAR|HASH|CODE|REF|GLOB)/) {
439 croak "Error: ${ClassName}-> _SetOrAddValuesOrValueIDs: Trying to add values to vector object with a reference to unsupported value format...";
440 }
441
442 if (Vector::IsVector($FirstValue)) {
443 # It's a vector...
444 $ValuesRef = $FirstValue->GetValues();
445 }
446 elsif ($TypeOfFirstValue =~ /^ARRAY/) {
447 # It's an array refernce...
448 $ValuesRef = $FirstValue;
449 }
450 else {
451 # It's a list of values...
452 $ValuesRef = \@Values;
453 }
454
455 # Set or add values or value IDs...
456 MODE: {
457 if ($Mode =~ /^SetValues$/i) { @{$This->{Values}} = (); push @{$This->{Values}}, @{$ValuesRef}; last MODE; }
458 if ($Mode =~ /^SetValueIDs$/i) { @{$This->{ValueIDs}} = (); push @{$This->{ValueIDs}}, @{$ValuesRef}; last MODE; }
459 if ($Mode =~ /^AddValues$/i) { push @{$This->{Values}}, @{$ValuesRef}; last MODE; }
460 if ($Mode =~ /^AddValueIDs$/i) { push @{$This->{ValueIDs}}, @{$ValuesRef}; last MODE; }
461 croak "Error: ${ClassName}-> _SetOrAddValuesOrValueIDs: Unknown mode $Mode...";
462 }
463 return $This;
464 }
465
466 # Set a specific value in fingerprint vector with indicies starting from 0..
467 #
468 sub SetValue {
469 my($This, $Index, $Value, $SkipCheck) = @_;
470
471 # Just set it...
472 if ($SkipCheck) {
473 return $This->_SetValue($Index, $Value);
474 }
475
476 # Check and set...
477 if ($Index < 0) {
478 croak "Error: ${ClassName}->SetValue: Index value must be a positive number...";
479 }
480 if ($Index >= $This->GetNumOfValues()) {
481 croak "Error: ${ClassName}->SetValue: Index vaue must be less than number of values...";
482 }
483
484 return $This->_SetValue($Index, $Value);
485 }
486
487 # Set a fingerprint vector value...
488 #
489 sub _SetValue {
490 my($This, $Index, $Value) = @_;
491
492 $This->{Values}[$Index] = $Value;
493
494 return $This;
495 }
496
497 # Get a specific value from fingerprint vector with indicies starting from 0...
498 #
499 sub GetValue {
500 my($This, $Index) = @_;
501
502 if ($Index < 0) {
503 croak "Error: ${ClassName}->GetValue: Index value must be a positive number...";
504 }
505 if ($Index >= $This->GetNumOfValues()) {
506 croak "Error: ${ClassName}->GetValue: Index value must be less than number of values...";
507 }
508 return $This->_GetValue($Index);
509 }
510
511 # Get a fingerprint vector value...
512 sub _GetValue {
513 my($This, $Index) = @_;
514
515 return $This->{Values}[$Index];
516 }
517
518 # Return vector values as an array or reference to an array...
519 #
520 sub GetValues {
521 my($This) = @_;
522
523 return wantarray ? @{$This->{Values}} : \@{$This->{Values}};
524 }
525
526 # Set a specific value ID in fingerprint vector with indicies starting from 0..
527 #
528 sub SetValueID {
529 my($This, $Index, $Value, $SkipCheck) = @_;
530
531 # Just set it...
532 if ($SkipCheck) {
533 return $This->_SetValueID($Index, $Value);
534 }
535
536 # Check and set...
537 if ($Index < 0) {
538 croak "Error: ${ClassName}->SetValueID: Index value must be a positive number...";
539 }
540 if ($Index >= $This->GetNumOfValueIDs()) {
541 croak "Error: ${ClassName}->SetValueID: Index vaue must be less than number of value IDs...";
542 }
543
544 return $This->_SetValueID($Index, $Value);
545 }
546
547 # Set a fingerprint vector value ID...
548 #
549 sub _SetValueID {
550 my($This, $Index, $Value) = @_;
551
552 $This->{ValueIDs}[$Index] = $Value;
553
554 return $This;
555 }
556
557 # Get a specific value ID from fingerprint vector with indicies starting from 0...
558 #
559 sub GetValueID {
560 my($This, $Index) = @_;
561
562 if ($Index < 0) {
563 croak "Error: ${ClassName}->GetValueID: Index value must be a positive number...";
564 }
565 if ($Index >= $This->GetNumOfValueIDs()) {
566 croak "Error: ${ClassName}->GetValueID: Index value must be less than number of value IDs...";
567 }
568 return $This->_GetValueID($Index);
569 }
570
571 # Get a fingerprint vector value ID...
572 #
573 sub _GetValueID {
574 my($This, $Index) = @_;
575
576 return $This->{ValueIDs}[$Index];
577 }
578
579 # Return vector value IDs as an array or reference to an array...
580 #
581 sub GetValueIDs {
582 my($This) = @_;
583
584 return wantarray ? @{$This->{ValueIDs}} : \@{$This->{ValueIDs}};
585 }
586
587 # Get fingerprints vector string containing values and/or IDs string in a specifed format...
588 #
589 sub GetFingerprintsVectorString {
590 my($This, $Format) = @_;
591
592 FORMAT : {
593 if ($Format =~ /^(IDsAndValuesString|IDsAndValues)$/i) { return $This->GetIDsAndValuesString(); last FORMAT; }
594 if ($Format =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) { return $This->GetIDsAndValuesPairsString(); last FORMAT; }
595 if ($Format =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) { return $This->GetValuesAndIDsString(); last FORMAT; }
596 if ($Format =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) { return $This->GetValuesAndIDsPairsString(); last FORMAT;}
597 if ($Format =~ /^(ValueIDsString|ValueIDs)$/i) { return $This->GetValueIDsString(); last FORMAT; }
598 if ($Format =~ /^(ValuesString|Values)$/i) { return $This->GetValuesString(); last FORMAT; }
599 croak "Error: ${ClassName}->GetFingerprintsVectorString: Specified vector string format, $Format, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValueIDsString, ValueIDs, ValuesString, Values...";
600 }
601 return '';
602 }
603 # Get vector value IDs and values string as space delimited ASCII string separated
604 # by semicolon...
605 #
606 sub GetIDsAndValuesString {
607 my($This) = @_;
608
609 if (@{$This->{ValueIDs}} && @{$This->{Values}}) {
610 # Both IDs and values are available...
611 return join(' ', @{$This->{ValueIDs}}) . ";" . join(' ', @{$This->{Values}});
612 }
613 elsif (@{$This->{Values}}) {
614 # Only values are available...
615 return "None;" . join(' ', @{$This->{Values}});
616 }
617 else {
618 # Values are not available...
619 return "None;None";
620 }
621 }
622
623 # Get vector value IDs and value pairs string as space delimited ASCII string...
624 #
625 sub GetIDsAndValuesPairsString {
626 my($This) = @_;
627 my($Index, $ValueIDsPresent, @IDsAndValuesPairs);
628
629 if (!@{$This->{Values}}) {
630 # Values are unavailable...
631 return "None None";
632 }
633
634 $ValueIDsPresent = @{$This->{ValueIDs}} ? 1 : 0;
635
636 @IDsAndValuesPairs = ();
637 for $Index (0 .. $#{$This->{Values}}) {
638 if ($ValueIDsPresent) {
639 push @IDsAndValuesPairs, ($This->{ValueIDs}->[$Index], $This->{Values}->[$Index]);
640 }
641 else {
642 push @IDsAndValuesPairs, ('None', $This->{Values}->[$Index]);
643 }
644 }
645 return join(' ', @IDsAndValuesPairs);
646 }
647
648 # Get vector value and value IDs string as space delimited ASCII string separated
649 # by semicolon...
650 #
651 sub GetValuesAndIDsString {
652 my($This) = @_;
653
654 if (@{$This->{ValueIDs}} && @{$This->{Values}}) {
655 # Both IDs and values are available...
656 return join(' ', @{$This->{Values}}) . ";" . join(' ', @{$This->{ValueIDs}});
657 }
658 elsif (@{$This->{Values}}) {
659 # Only values are available...
660 return join(' ', @{$This->{Values}}) . ";None";
661 }
662 else {
663 # Values are not available...
664 return "None;None";
665 }
666 }
667
668 # Get vector value and value ID pairs string as space delimited ASCII string...
669 #
670 sub GetValuesAndIDsPairsString {
671 my($This) = @_;
672 my($Index, $ValueIDsPresent, @ValuesAndIDsPairs);
673
674 if (!@{$This->{Values}}) {
675 # Values are unavailable...
676 return "None None";
677 }
678
679 $ValueIDsPresent = @{$This->{ValueIDs}} ? 1 : 0;
680
681 @ValuesAndIDsPairs = ();
682 for $Index (0 .. $#{$This->{Values}}) {
683 if ($ValueIDsPresent) {
684 push @ValuesAndIDsPairs, ($This->{Values}->[$Index], $This->{ValueIDs}->[$Index]);
685 }
686 else {
687 push @ValuesAndIDsPairs, ($This->{Values}->[$Index], 'None');
688 }
689 }
690 return join(' ', @ValuesAndIDsPairs);
691 }
692
693 # Get vector value IDs string as space delimited ASCII string...
694 #
695 sub GetValueIDsString {
696 my($This) = @_;
697
698 return @{$This->{ValueIDs}} ? join(' ', @{$This->{ValueIDs}}) : 'None';
699 }
700
701 # Get vector value string as space delimited ASCII string...
702 #
703 sub GetValuesString {
704 my($This) = @_;
705
706 return @{$This->{Values}} ? join(' ', @{$This->{Values}}) : 'None';
707 }
708
709 # Get number of values...
710 sub GetNumOfValues {
711 my($This) = @_;
712
713 return scalar @{$This->{Values}};
714 }
715
716 # Get number of non-zero values...
717 sub GetNumOfNonZeroValues {
718 my($This) = @_;
719 my($Count, $Index, $Size);
720
721 $Count = 0;
722 $Size = $This->GetNumOfValues();
723
724 for $Index (0 .. ($Size -1)) {
725 if ($This->{Values}[$Index] != 0) {
726 $Count++;
727 }
728 }
729 return $Count;
730 }
731
732 # Get number of value IDs...
733 sub GetNumOfValueIDs {
734 my($This) = @_;
735
736 return scalar @{$This->{ValueIDs}};
737 }
738
739 # FinegerprintsVectors class provides methods to calculate similarity between vectors
740 # containing three different types of values:
741 #
742 # Type I: OrderedNumericalValues
743 #
744 # . Size of two vectors are same
745 # . Vectors contain real values in a specific order. For example: MACCS keys count, Topological
746 # pharnacophore atom pairs and so on.
747 # . Option to calculate similarity value using continious values or binary values
748 #
749 # Type II: UnorderedNumericalValues
750 #
751 # . Size of two vectors might not be same
752 # . Vectors contain unordered real value identified by value IDs. For example: Toplogical atom pairs,
753 # Topological atom torsions and so on
754 # . Option to calculate similarity value using continous values or binary values
755 #
756 # Type III: AlphaNumericalValues
757 #
758 # . Size of two vectors might not be same
759 # . Vectors contain unordered alphanumerical values. For example: Extended connectivity fingerprints,
760 # atom neighbothood fingerpritns.
761 # . The vector values are treated as keys or bit indices and similarity value is calculated accordingly.
762 #
763 # Before performing similarity or distance calculations between vectors containing UnorderedNumericalValues
764 # or AlphaNumericalValues, the vectors are tranformed into vectors containing unique OrderedNumericalValues
765 # using value IDs for UnorderedNumericalValues and values itself for AlphaNumericalValues.
766 #
767 # Three forms similarity or distance calculation between two vectors: AlgebraicForm, BinaryForm or
768 # SetTheoreticForm.
769 #
770 # The value of an extra paramter, CalculationMode, passed to each similarity or distance function
771 # controls the calculation. Supported values for CalculationMode: AlgebraicForm, BinaryForm and
772 # SetTheoreticForm. Default: AlgebraicForm.
773 #
774 # For BinaryForm CalculationMode, the ordered list of processed final vector values containing the value or
775 # count of each unique value type is simply converted into a binary vector containing 1s and 0s
776 # corresponding to presence or absence of values before calculating similarity or distance between
777 # two vectors.
778 #
779 # For two fingerprint vectors A and B of same size containing OrderedNumericalValues, let:
780 #
781 # N = Number values in A or B
782 #
783 # Xa = Values of vector A
784 # Xb = Values of vector B
785 #
786 # Xai = Value of ith element in A
787 # Xbi = Value of ith element in B
788 #
789 # SUM = Sum of i over N values
790 #
791 # For SetTheoreticForm of calculation between two vectors, let:
792 #
793 # SetIntersectionXaXb = SUM ( MIN ( Xai, Xbi ) )
794 # SetDifferenceXaXb = SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) )
795 #
796 # For BinaryForm of calculation between two vectors, let:
797 #
798 # Na = Number of bits set to "1" in A = SUM ( Xai )
799 # Nb = Number of bits set to "1" in B = SUM ( Xbi )
800 # Nc = Number of bits set to "1" in both A and B = SUM ( Xai * Xbi )
801 # Nd = Number of bits set to "0" in both A and B = SUM ( 1 - Xai - Xbi + Xai * Xbi)
802 #
803 # N = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd
804 #
805 # Additionally, for BinaryForm various values also correspond to:
806 #
807 # Na = | Xa |
808 # Nb = | Xb |
809 # Nc = | SetIntersectionXaXb |
810 # Nd = N - | SetDifferenceXaXb |
811 #
812 # | SetDifferenceXaXb | = N - Nd = Na + Nb - Nc + Nd - Nd = Na + Nb - Nc
813 # = | Xa | + | Xb | - | SetIntersectionXaXb |
814 #
815 # Various distance coefficients and similarity coefficients [ Ref 40, Ref 62, Ref 64 ] for a pair vectors A and B
816 # in AlgebraicForm and BinaryForm are defined as follows:
817 #
818 # . CityBlockDistanceCoefficient: ( same as HammingDistanceCoefficient and ManhattanDistanceCoefficient)
819 #
820 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) )
821 #
822 # . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
823 #
824 # . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb |
825 # = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
826 #
827 # . CosineSimilarityCoefficient: ( same as OchiaiSimilarityCoefficient)
828 #
829 # . AlgebraicForm: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) )
830 #
831 # . BinaryForm: Nc / SQRT ( Na * Nb)
832 #
833 # . SetTheoreticForm: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| )
834 # = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) )
835 #
836 # . CzekanowskiSimilarityCoefficient: ( same as DiceSimilarityCoefficient and SorensonSimilarityCoefficient)
837 #
838 # . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
839 #
840 # . BinaryForm: 2 * Nc / ( Na + Nb )
841 #
842 # . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| )
843 # = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
844 #
845 # . DiceSimilarityCoefficient: ( same as CzekanowskiSimilarityCoefficient and SorensonSimilarityCoefficient)
846 #
847 # . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
848 #
849 # . BinaryForm: 2 * Nc / ( Na + Nb )
850 #
851 # . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| )
852 # = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
853 #
854 # . EuclideanDistanceCoefficient:
855 #
856 # . AlgebraicForm: SQRT ( SUM ( ( ( Xai - Xbi ) ** 2 ) ) )
857 #
858 # . BinaryForm: SQRT ( ( Na - Nc ) + ( Nb - Nc ) ) = SQRT ( Na + Nb - 2 * Nc )
859 #
860 # . SetTheoreticForm: SQRT ( | SetDifferenceXaXb | - | SetIntersectionXaXb | )
861 # = SQRT ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) )
862 #
863 # . HammingDistanceCoefficient: ( same as CityBlockDistanceCoefficient and ManhattanDistanceCoefficient)
864 #
865 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) )
866 #
867 # . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
868 #
869 # . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb |
870 # = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
871 #
872 # . JaccardSimilarityCoefficient: ( same as TanimotoSimilarityCoefficient)
873 #
874 # . AlgebraicForm: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) )
875 #
876 # . BinaryForm: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc )
877 #
878 # . SetTheoreticForm: | SetIntersectionXaXb | / | SetDifferenceXaXb |
879 # = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
880 #
881 # . ManhattanDistanceCoefficient: ( same as CityBlockDistanceCoefficient and HammingDistanceCoefficient)
882 #
883 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) )
884 #
885 # . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
886 #
887 # . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb |
888 # = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
889 #
890 # . OchiaiSimilarityCoefficient: ( same as CosineSimilarityCoefficient)
891 #
892 # . AlgebraicForm: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) )
893 #
894 # . BinaryForm: Nc / SQRT ( Na * Nb)
895 #
896 # . SetTheoreticForm: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| )
897 # = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) )
898 #
899 # . SorensonSimilarityCoefficient: ( same as CzekanowskiSimilarityCoefficient and DiceSimilarityCoefficient)
900 #
901 # . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
902 #
903 # . BinaryForm: 2 * Nc / ( Na + Nb )
904 #
905 # . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| )
906 # = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
907 #
908 # . SoergelDistanceCoefficient:
909 #
910 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) ) / SUM ( MAX ( Xai, Xbi ) )
911 #
912 # . BinaryForm: 1 - Nc / ( Na + Nb - Nc ) = ( Na + Nb - 2 * Nc ) / ( Na + Nb - Nc )
913 #
914 # . SetTheoreticForm: ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) / | SetDifferenceXaXb |
915 # = ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
916 #
917 # . TanimotoSimilarityCoefficient: ( same as JaccardSimilarityCoefficient)
918 #
919 # . AlgebraicForm: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) )
920 #
921 # . BinaryForm: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc )
922 #
923 # . SetTheoreticForm: | SetIntersectionXaXb | / | SetDifferenceXaXb |
924 # = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
925 #
926 #
927
928 # Calculate Hamming distance coefficient between two fingerprint vectors.
929 #
930 # This functionality can be either invoked as a class function or an object method.
931 #
932 sub HammingDistanceCoefficient ($$;$$) {
933 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
934
935 return CityBlockDistanceCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
936 }
937
938 # Calculate Hamming distance coefficient between two fingerprint vectors.
939 #
940 # This functionality can be either invoked as a class function or an object method.
941 #
942 sub ManhattanDistanceCoefficient ($$;$$) {
943 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
944
945 return CityBlockDistanceCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
946 }
947
948 # Calculate CityBlock distance coefficient between two fingerprint vectors.
949 #
950 # This functionality can be either invoked as a class function or an object method.
951 #
952 sub CityBlockDistanceCoefficient ($$;$$) {
953 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
954
955 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
956 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
957
958 # Validate and process fingerprints vectors for similarity calculations...
959 #
960 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("CityBlockDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
961
962 # Perform the calculation...
963 if ($CalculationMode =~ /^AlgebraicForm$/i) {
964 return _CityBlockDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
965 }
966 elsif ($CalculationMode =~ /^BinaryForm$/i) {
967 return _CityBlockDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
968 }
969 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
970 return _CityBlockDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
971 }
972 else {
973 return undef;
974 }
975 }
976
977 # Calculate CityBlock distance coefficient using algebraic form...
978 #
979 sub _CityBlockDistanceCoefficientUsingAlgebraicForm {
980 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
981 my($SumAbsSubtractionXaiXbi);
982
983 $SumAbsSubtractionXaiXbi = _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
984
985 return $SumAbsSubtractionXaiXbi;
986 }
987
988 # Calculate CityBlock distance coefficient using binary form...
989 #
990 sub _CityBlockDistanceCoefficientUsingBinaryForm {
991 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
992 my($Na, $Nb, $Nc);
993
994 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
995
996 return ($Na + $Nb - 2 * $Nc);
997 }
998
999 # Calculate CityBlock distance coefficient using set theoretic form...
1000 #
1001 sub _CityBlockDistanceCoefficientUsingSetTheoreticForm {
1002 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1003 my($SumMinXaiXbi, $SumXai, $SumXbi);
1004
1005 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1006 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1007 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1008
1009 return ($SumXai + $SumXbi - 2 * $SumMinXaiXbi);
1010 }
1011
1012 # Calculate Ochiai similarity cofficient between two fingerprint vectors.
1013 #
1014 # This functionality can be either invoked as a class function or an object method.
1015 #
1016 sub OchiaiSimilarityCoefficient ($$;$$) {
1017 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1018
1019 return CosineSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1020 }
1021
1022 # Calculate Cosine similarity cofficient between two fingerprint vectors.
1023 #
1024 # This functionality can be either invoked as a class function or an object method.
1025 #
1026 sub CosineSimilarityCoefficient ($$;$$) {
1027 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1028
1029 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1030 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1031
1032 # Validate and process fingerprints vectors for similarity calculations...
1033 #
1034 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("CosineSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1035
1036 # Perform the calculation...
1037 if ($CalculationMode =~ /^AlgebraicForm$/i) {
1038 return _CosineSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1039 }
1040 elsif ($CalculationMode =~ /^BinaryForm$/i) {
1041 return _CosineSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1042 }
1043 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1044 return _CosineSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1045 }
1046 else {
1047 return undef;
1048 }
1049 }
1050
1051 # Calculate Cosine similarity coefficient using algebraic form...
1052 #
1053 sub _CosineSimilarityCoefficientUsingAlgebraicForm {
1054 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1055 my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator);
1056
1057 $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA);
1058 $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB);
1059 $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1060
1061 $Numerator = $SumProductXaiXbi;
1062 $Denominator = sqrt($SumXai2 * $SumXbi2);
1063
1064 return $Denominator ? ($Numerator/$Denominator) : 0;
1065 }
1066
1067 # CalculateCosine similarity coefficient using binary form...
1068 #
1069 sub _CosineSimilarityCoefficientUsingBinaryForm {
1070 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1071 my($Na, $Nb, $Nc, $Numerator, $Denominator);
1072
1073 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1074
1075 $Numerator = $Nc;
1076 $Denominator = sqrt($Na * $Nb);
1077
1078 return $Denominator ? ($Numerator/$Denominator) : 0;
1079 }
1080
1081 # Calculate Cosine similarity coefficient using set theoretic form...
1082 #
1083 sub _CosineSimilarityCoefficientUsingSetTheoreticForm {
1084 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1085 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
1086
1087 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1088 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1089 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1090
1091 $Numerator = $SumMinXaiXbi;
1092 $Denominator = sqrt($SumXai * $SumXbi);
1093
1094 return $Denominator ? ($Numerator/$Denominator) : 0;
1095 }
1096
1097 # Calculate Czekanowski similarity cofficient between two fingerprint vectors.
1098 #
1099 # This functionality can be either invoked as a class function or an object method.
1100 #
1101 sub CzekanowskiSimilarityCoefficient ($$;$$) {
1102 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1103
1104 return DiceSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1105 }
1106
1107 # Calculate Sorenson similarity cofficient between two fingerprint vectors.
1108 #
1109 # This functionality can be either invoked as a class function or an object method.
1110 #
1111 sub SorensonSimilarityCoefficient ($$;$$) {
1112 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1113
1114 return DiceSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1115 }
1116
1117 # Calculate Dice similarity cofficient between two fingerprint vectors.
1118 #
1119 # This functionality can be either invoked as a class function or an object method.
1120 #
1121 sub DiceSimilarityCoefficient ($$;$$) {
1122 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1123
1124 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1125 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1126
1127 # Validate and process fingerprints vectors for similarity calculations...
1128 #
1129 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("DiceSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1130
1131 # Perform the calculation...
1132 if ($CalculationMode =~ /^AlgebraicForm$/i) {
1133 return _DiceSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1134 }
1135 elsif ($CalculationMode =~ /^BinaryForm$/i) {
1136 return _DiceSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1137 }
1138 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1139 return _DiceSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1140 }
1141 else {
1142 return undef;
1143 }
1144 }
1145
1146 # Calculate Dice similarity coefficient using algebraic form...
1147 #
1148 sub _DiceSimilarityCoefficientUsingAlgebraicForm {
1149 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1150 my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator);
1151
1152 $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA);
1153 $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB);
1154 $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1155
1156 $Numerator = 2 * $SumProductXaiXbi;
1157 $Denominator = $SumXai2 + $SumXbi2;
1158
1159 return $Denominator ? ($Numerator/$Denominator) : 0;
1160 }
1161
1162 # Calculate Dice similarity coefficient using binary form...
1163 #
1164 sub _DiceSimilarityCoefficientUsingBinaryForm {
1165 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1166 my($Na, $Nb, $Nc, $Numerator, $Denominator);
1167
1168 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1169
1170 $Numerator = 2 * $Nc;
1171 $Denominator = $Na + $Nb;
1172
1173 return $Denominator ? ($Numerator/$Denominator) : 0;
1174 }
1175
1176 # Calculate Dice similarity coefficient using set theoretic form...
1177 #
1178 sub _DiceSimilarityCoefficientUsingSetTheoreticForm {
1179 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1180 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
1181
1182 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1183 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1184 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1185
1186 $Numerator = 2 * $SumMinXaiXbi;
1187 $Denominator = $SumXai + $SumXbi;
1188
1189 return $Denominator ? ($Numerator/$Denominator) : 0;
1190 }
1191
1192
1193 # Calculate Euclidean distance coefficient between two fingerprint vectors.
1194 #
1195 # This functionality can be either invoked as a class function or an object method.
1196 #
1197 sub EuclideanDistanceCoefficient ($$;$$) {
1198 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1199
1200 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1201 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1202
1203 # Validate and process fingerprints vectors for similarity calculations...
1204 #
1205 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("EuclideanDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1206
1207 # Perform the calculation...
1208 if ($CalculationMode =~ /^AlgebraicForm$/i) {
1209 return _EuclideanDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1210 }
1211 elsif ($CalculationMode =~ /^BinaryForm$/i) {
1212 return _EuclideanDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1213 }
1214 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1215 return _EuclideanDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1216 }
1217 else {
1218 return undef;
1219 }
1220 }
1221
1222 # Calculate Euclidean distance coefficient using algebraic form...
1223 #
1224 sub _EuclideanDistanceCoefficientUsingAlgebraicForm {
1225 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1226 my($SumSquaresSubtractionXaiXbi);
1227
1228 $SumSquaresSubtractionXaiXbi = _GetSumOfSquaresOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1229
1230 return sqrt($SumSquaresSubtractionXaiXbi);
1231 }
1232
1233 # Calculate Euclidean distance coefficient using binary form...
1234 #
1235 sub _EuclideanDistanceCoefficientUsingBinaryForm {
1236 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1237 my($Na, $Nb, $Nc);
1238
1239 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1240
1241 return (sqrt($Na + $Nb - 2 * $Nc));
1242 }
1243
1244 # Calculate Euclidean distance coefficient using set theoretic form...
1245 #
1246 sub _EuclideanDistanceCoefficientUsingSetTheoreticForm {
1247 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1248 my($SumMinXaiXbi, $SumXai, $SumXbi);
1249
1250 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1251 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1252 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1253
1254 return (sqrt($SumXai + $SumXbi - 2 * $SumMinXaiXbi));
1255 }
1256
1257 # Calculate Jaccard similarity cofficient between two fingerprint vectors.
1258 #
1259 # This functionality can be either invoked as a class function or an object method.
1260 #
1261 sub JaccardSimilarityCoefficient ($$;$$) {
1262 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1263
1264 return TanimotoSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1265 }
1266
1267 # Calculate Tanimoto similarity cofficient between two fingerprint vectors.
1268 #
1269 # This functionality can be either invoked as a class function or an object method.
1270 #
1271 sub TanimotoSimilarityCoefficient ($$;$$) {
1272 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1273
1274 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1275 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1276
1277 # Validate and process fingerprints vectors for similarity calculations...
1278 #
1279 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("TanimotoSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1280
1281 # Perform the calculation...
1282 if ($CalculationMode =~ /^AlgebraicForm$/i) {
1283 return _TanimotoSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1284 }
1285 elsif ($CalculationMode =~ /^BinaryForm$/i) {
1286 return _TanimotoSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1287 }
1288 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1289 return _TanimotoSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1290 }
1291 else {
1292 return undef;
1293 }
1294 }
1295
1296 # Calculate Tanimoto similarity coefficient using algebraic form...
1297 #
1298 sub _TanimotoSimilarityCoefficientUsingAlgebraicForm {
1299 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1300 my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator);
1301
1302 $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA);
1303 $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB);
1304 $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1305
1306 $Numerator = $SumProductXaiXbi;
1307 $Denominator = $SumXai2 + $SumXbi2 - $SumProductXaiXbi;
1308
1309 return $Denominator ? ($Numerator/$Denominator) : 0;
1310 }
1311
1312 # Calculate Tanimoto similarity coefficient using binary form...
1313 #
1314 sub _TanimotoSimilarityCoefficientUsingBinaryForm {
1315 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1316 my($Na, $Nb, $Nc, $Numerator, $Denominator);
1317
1318 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1319
1320 $Numerator = $Nc;
1321 $Denominator = $Na + $Nb - $Nc;
1322
1323 return $Denominator ? ($Numerator/$Denominator) : 0;
1324 }
1325
1326 # Calculate Tanimoto similarity coefficient using set theoretic form...
1327 #
1328 sub _TanimotoSimilarityCoefficientUsingSetTheoreticForm {
1329 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1330 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
1331
1332 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1333 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1334 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1335
1336 $Numerator = $SumMinXaiXbi;
1337 $Denominator = $SumXai + $SumXbi - $SumMinXaiXbi;
1338
1339 return $Denominator ? ($Numerator/$Denominator) : 0;
1340 }
1341
1342
1343 # Calculate Soergel distance coefficient between two fingerprint vectors.
1344 #
1345 # This functionality can be either invoked as a class function or an object method.
1346 #
1347 sub SoergelDistanceCoefficient ($$;$$) {
1348 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1349
1350 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1351 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1352
1353 # Validate and process fingerprints vectors for similarity calculations...
1354 #
1355 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("SoergelDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1356
1357 # Perform the calculation...
1358 if ($CalculationMode =~ /^AlgebraicForm$/i) {
1359 return _SoergelDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1360 }
1361 elsif ($CalculationMode =~ /^BinaryForm$/i) {
1362 return _SoergelDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1363 }
1364 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1365 return _SoergelDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1366 }
1367 else {
1368 return undef;
1369 }
1370 }
1371
1372 # Calculate Soergel distance coefficientusing algebraic form...
1373 #
1374 sub _SoergelDistanceCoefficientUsingAlgebraicForm {
1375 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1376 my($SumAbsSubtractionXaiXbi, $SumMaxXaiXbi, $Numerator, $Denominator);
1377
1378 $SumAbsSubtractionXaiXbi = _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1379 $SumMaxXaiXbi = _GetSumOfMaximumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1380
1381 $Numerator = $SumAbsSubtractionXaiXbi;
1382 $Denominator = $SumMaxXaiXbi;
1383
1384 return $Denominator ? ($Numerator/$Denominator) : 0;
1385 }
1386
1387 # Calculate Soergel distance coefficient using binary form...
1388 #
1389 sub _SoergelDistanceCoefficientUsingBinaryForm {
1390 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1391 my($Na, $Nb, $Nc, $Numerator, $Denominator);
1392
1393 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1394
1395 $Numerator = $Na + $Nb - 2 * $Nc;
1396 $Denominator = $Na + $Nb - $Nc;
1397
1398 return $Denominator ? ($Numerator/$Denominator) : 0;
1399 }
1400
1401 # Calculate SoergelDistanceCoefficient using set theoretic form...
1402 #
1403 sub _SoergelDistanceCoefficientUsingSetTheoreticForm {
1404 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1405 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
1406
1407 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1408 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1409 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1410
1411 $Numerator = $SumXai + $SumXbi - 2 * $SumMinXaiXbi;
1412 $Denominator = $SumXai + $SumXbi - $SumMinXaiXbi;
1413
1414 return $Denominator ? ($Numerator/$Denominator) : 0;
1415 }
1416
1417 # Validate and process fingerprints vectors for similarity calculations...
1418 #
1419 sub _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation {
1420 my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1421
1422 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1423 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1424
1425 if (!$SkipValuesCheck) {
1426 _ValidateFingerprintsVectorsForSimilarityCalculation($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode);
1427 }
1428 _ProcessFingerprintsVectorsForSimilarityCalculation($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode);
1429 }
1430
1431 # Make sure fingerprint vectors are good for performing similarity/distance calculation...
1432 #
1433 sub _ValidateFingerprintsVectorsForSimilarityCalculation {
1434 my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode) = @_;
1435
1436 # Make sure both are fingerprint vectors..
1437 if (!(IsFingerprintsVector($FingerprintsVectorA) && IsFingerprintsVector($FingerprintsVectorB))) {
1438 croak "Error: ${ClassName}->${ErrorMsg}: Both objects must be fingerprint vectors...";
1439 }
1440
1441 # Check types...
1442 if ($FingerprintsVectorA->{Type} ne $FingerprintsVectorB->{Type}) {
1443 croak "Error: ${ClassName}->${ErrorMsg}: Type of first fingerprint vector, $FingerprintsVectorA->{Type}, must be same as type of second fingerprint vector, $FingerprintsVectorB->{Type}...";
1444 }
1445
1446 # Check calculation mode...
1447 if ($CalculationMode !~ /^(AlgebraicForm|BinaryForm|SetTheoreticForm)$/i) {
1448 croak "Error: ${ClassName}->${ErrorMsg}: Specified similarity calculation mode, $CalculationMode, is not valid. Supported values: AlgebraicForm, BinaryForm, and SetTheoreticForm...";
1449 }
1450
1451 # Check values and value IDs...
1452 my($Na, $Nb, $NIDa, $NIDb);
1453 $Na = $FingerprintsVectorA->GetNumOfValues(); $Nb = $FingerprintsVectorB->GetNumOfValues();
1454 $NIDa = $FingerprintsVectorA->GetNumOfValueIDs(); $NIDb = $FingerprintsVectorB->GetNumOfValueIDs();
1455
1456 if ($Na == 0) {
1457 croak "Error: ${ClassName}->${ErrorMsg}: Number of values in first fingerprint vector, $Na, must be > 0 for fingerprint vector type $FingerprintsVectorA->{Type} ...";
1458 }
1459 if ($Nb == 0) {
1460 croak "Error: ${ClassName}->${ErrorMsg}: Number of values in second fingerprint vector, $Nb, must be > 0 for fingerprint vector type $FingerprintsVectorB->{Type} ...";
1461 }
1462
1463 if ($FingerprintsVectorA->{Type} =~ /^OrderedNumericalValues$/i) {
1464 if ($Na != $Nb) {
1465 croak "Error: ${ClassName}->${ErrorMsg}: Number of values in first fingerprint vector, $Na, must be equal to number of values, $Nb, in second fingerprint vector for fingerprint vector types $FingerprintsVectorA->{Type} ...";
1466 }
1467 }
1468 elsif ($FingerprintsVectorA->{Type} =~ /^NumericalValues$/i) {
1469 if ($NIDa == 0) {
1470 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDa, must be > 0 for fingerprint vector type $FingerprintsVectorA->{Type} ...";
1471 }
1472 if ($NIDb == 0) {
1473 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDb, must be > 0 for fingerprint vector type $FingerprintsVectorB->{Type} ...";
1474 }
1475
1476 if ($NIDa != $Na) {
1477 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDa, must be equal to its number of values, $Na, for fingerprint vector type $FingerprintsVectorA->{Type} ...";
1478 }
1479 if ($NIDb != $Nb) {
1480 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in second fingerprint vector, $NIDb, must be equal to its number of values, $Nb, for fingerprint vector type $FingerprintsVectorA->{Type} ...";
1481 }
1482 }
1483 elsif ($FingerprintsVectorA->{Type} =~ /^AlphaNumericalValues$/i) {
1484 if ($NIDa || $NIDb) {
1485 croak "Error: ${ClassName}->${ErrorMsg}: ValueIDs cann't be specified for fingerprint vector types $FingerprintsVectorA->{Type} ...";
1486 }
1487 }
1488 else {
1489 croak "Error: ${ClassName}->${ErrorMsg}: Fingerprint vector types $FingerprintsVectorA->{Type} is not valid...";
1490 }
1491 }
1492
1493 # Process fingerprints vectors for similarity calculation by generating vectors
1494 # containing ordered list of values...
1495 #
1496 sub _ProcessFingerprintsVectorsForSimilarityCalculation {
1497 my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode) = @_;
1498
1499 $FingerprintsVectorA->{OrderedValuesRef} = undef; $FingerprintsVectorB->{OrderedValuesRef} = undef;
1500 $FingerprintsVectorA->{BitVector} = undef; $FingerprintsVectorB->{BitVector} = undef;
1501
1502 if ($FingerprintsVectorA->{Type} =~ /^OrderedNumericalValues$/i) {
1503 _ProcessOrderedNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
1504 }
1505 elsif ($FingerprintsVectorA->{Type} =~ /^NumericalValues$/i) {
1506 _ProcessNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
1507 }
1508 elsif ($FingerprintsVectorA->{Type} =~ /^AlphaNumericalValues$/i) {
1509 _ProcessAlphaNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
1510 }
1511 else {
1512 croak "Error: ${ClassName}->${ErrorMsg}: Fingerprint vector types $FingerprintsVectorA->{Type} is not valid...";
1513 }
1514 if ($CalculationMode =~ /^BinaryForm$/i) {
1515 _TransformFinalOrderedValuesIntoBitVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
1516 }
1517 }
1518
1519 # Process fingerprints vectors with ordered numerical values for similarity calculations...
1520 #
1521 sub _ProcessOrderedNumericalValuesFingerprintsVectorsForSimilarityCalculation {
1522 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1523
1524 $FingerprintsVectorA->{OrderedValuesRef} = \@{$FingerprintsVectorA->{Values}};
1525 $FingerprintsVectorB->{OrderedValuesRef} = \@{$FingerprintsVectorB->{Values}};
1526 }
1527
1528 # Process fingerprints vectors with numerical values for similarity calculations...
1529 #
1530 sub _ProcessNumericalValuesFingerprintsVectorsForSimilarityCalculation {
1531 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1532
1533 # Set up unique IDs and values map for each fingerprint vector...
1534 my($Index, $Value, $ValueID, %UniqueFingerprintsVectorAValueIDValues, %UniqueFingerprintsVectorBValueIDValues, %UniqueFingerprintsVectorsValueIDs);
1535
1536 %UniqueFingerprintsVectorAValueIDValues = ();
1537 %UniqueFingerprintsVectorBValueIDValues = ();
1538 %UniqueFingerprintsVectorsValueIDs = ();
1539
1540 # Go over first vector...
1541 for $Index (0 .. $#{$FingerprintsVectorA->{ValueIDs}}) {
1542 $ValueID = $FingerprintsVectorA->{ValueIDs}[$Index];
1543 $Value = $FingerprintsVectorA->{Values}[$Index];
1544 if (exists $UniqueFingerprintsVectorAValueIDValues{$ValueID}) {
1545 $UniqueFingerprintsVectorAValueIDValues{$ValueID} += $Value;
1546 }
1547 else {
1548 $UniqueFingerprintsVectorAValueIDValues{$ValueID} = $Value;
1549 }
1550 if (!exists $UniqueFingerprintsVectorsValueIDs{$ValueID}) {
1551 $UniqueFingerprintsVectorsValueIDs{$ValueID} = 1;
1552 }
1553 }
1554
1555 # Go over second vector...
1556 for $Index (0 .. $#{$FingerprintsVectorB->{ValueIDs}}) {
1557 $ValueID = $FingerprintsVectorB->{ValueIDs}[$Index];
1558 $Value = $FingerprintsVectorB->{Values}[$Index];
1559 if (exists $UniqueFingerprintsVectorBValueIDValues{$ValueID}) {
1560 $UniqueFingerprintsVectorBValueIDValues{$ValueID} += $Value;
1561 }
1562 else {
1563 $UniqueFingerprintsVectorBValueIDValues{$ValueID} = $Value;
1564 }
1565 if (!exists $UniqueFingerprintsVectorsValueIDs{$ValueID}) {
1566 $UniqueFingerprintsVectorsValueIDs{$ValueID} = 1;
1567 }
1568 }
1569
1570 # Setup ordered values...
1571 my(@UniqueOrderedValueIDs, @OrderedValuesA, @OrderedValuesB);
1572
1573 @UniqueOrderedValueIDs = ();
1574 @UniqueOrderedValueIDs = sort keys %UniqueFingerprintsVectorsValueIDs;
1575
1576 @OrderedValuesA = ();
1577 @OrderedValuesA = map { exists $UniqueFingerprintsVectorAValueIDValues{$_} ? $UniqueFingerprintsVectorAValueIDValues{$_} : 0 } @UniqueOrderedValueIDs;
1578
1579 @OrderedValuesB = ();
1580 @OrderedValuesB = map { exists $UniqueFingerprintsVectorBValueIDValues{$_} ? $UniqueFingerprintsVectorBValueIDValues{$_} : 0 } @UniqueOrderedValueIDs;
1581
1582 $FingerprintsVectorA->{OrderedValuesRef} = \@OrderedValuesA;
1583 $FingerprintsVectorB->{OrderedValuesRef} = \@OrderedValuesB;
1584 }
1585
1586 # Process fingerprints vectors with allpha numerical values for similarity calculations...
1587 #
1588 sub _ProcessAlphaNumericalValuesFingerprintsVectorsForSimilarityCalculation {
1589 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1590
1591 # Set up unique IDs and values map for each vector...
1592 my($Index, $Value, $ValueID, %UniqueFingerprintsVectorAValuesCount, %UniqueFingerprintsVectorBValuesCount, %UniqueFingerprintsVectorsValues);
1593
1594 %UniqueFingerprintsVectorAValuesCount = ();
1595 %UniqueFingerprintsVectorBValuesCount = ();
1596 %UniqueFingerprintsVectorsValues = ();
1597
1598 # Go over first vector...
1599 for $Value (@{$FingerprintsVectorA->{Values}}) {
1600 if (exists $UniqueFingerprintsVectorAValuesCount{$Value}) {
1601 $UniqueFingerprintsVectorAValuesCount{$Value} += 1;
1602 }
1603 else {
1604 $UniqueFingerprintsVectorAValuesCount{$Value} = 1;
1605 }
1606 if (!exists $UniqueFingerprintsVectorsValues{$Value}) {
1607 $UniqueFingerprintsVectorsValues{$Value} = 1;
1608 }
1609 }
1610
1611 # Go over second vector...
1612 for $Value (@{$FingerprintsVectorB->{Values}}) {
1613 if (exists $UniqueFingerprintsVectorBValuesCount{$Value}) {
1614 $UniqueFingerprintsVectorBValuesCount{$Value} += 1;
1615 }
1616 else {
1617 $UniqueFingerprintsVectorBValuesCount{$Value} = 1;
1618 }
1619 if (!exists $UniqueFingerprintsVectorsValues{$Value}) {
1620 $UniqueFingerprintsVectorsValues{$Value} = 1;
1621 }
1622 }
1623
1624 # Setup ordered values...
1625 my(@UniqueOrderedValueIDs, @OrderedValuesA, @OrderedValuesB);
1626
1627 @UniqueOrderedValueIDs = ();
1628 @UniqueOrderedValueIDs = sort keys %UniqueFingerprintsVectorsValues;
1629
1630 @OrderedValuesA = ();
1631 @OrderedValuesA = map { exists $UniqueFingerprintsVectorAValuesCount{$_} ? $UniqueFingerprintsVectorAValuesCount{$_} : 0 } @UniqueOrderedValueIDs;
1632
1633 @OrderedValuesB = ();
1634 @OrderedValuesB = map { exists $UniqueFingerprintsVectorBValuesCount{$_} ? $UniqueFingerprintsVectorBValuesCount{$_} : 0 } @UniqueOrderedValueIDs;
1635
1636 $FingerprintsVectorA->{OrderedValuesRef} = \@OrderedValuesA;
1637 $FingerprintsVectorB->{OrderedValuesRef} = \@OrderedValuesB;
1638
1639 }
1640
1641 # Transform final ordered values array into a BitVector for similarity calculation...
1642 #
1643 sub _TransformFinalOrderedValuesIntoBitVectorsForSimilarityCalculation {
1644 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1645 my($Index, $Size, $BitVectorA, $BitVectorB, $SkipCheck);
1646
1647 # Create bit vectors...
1648 $Size = scalar @{$FingerprintsVectorA->{OrderedValuesRef}};
1649
1650 $FingerprintsVectorA->{BitVector} = new BitVector($Size);
1651 $FingerprintsVectorB->{BitVector} = new BitVector($Size);
1652
1653 # Set bits...
1654 $SkipCheck = 1;
1655 for $Index (0 .. ($Size - 1)) {
1656 if ($FingerprintsVectorA->{OrderedValuesRef}[$Index]) {
1657 $FingerprintsVectorA->{BitVector}->SetBit($Index, $SkipCheck);
1658 }
1659 if ($FingerprintsVectorB->{OrderedValuesRef}[$Index]) {
1660 $FingerprintsVectorB->{BitVector}->SetBit($Index, $SkipCheck);
1661 }
1662 }
1663 }
1664
1665 # Return sum of ordered vector values...
1666 #
1667 sub _GetSumOfFingerprintsOrderedValues {
1668 my($FingerprintVector) = @_;
1669
1670 return StatisticsUtil::Sum($FingerprintVector->{OrderedValuesRef});
1671 }
1672
1673 # Return sum of squared ordered vector values...
1674 #
1675 sub _GetSumOfSquaresOfFingerprintsOrderedValues {
1676 my($FingerprintVector) = @_;
1677
1678 return StatisticsUtil::SumOfSquares($FingerprintVector->{OrderedValuesRef});
1679 }
1680
1681 # Return sum of product of correponding ordered vector values...
1682 #
1683 sub _GetSumOfProductOfFingerprintsOrderedValues {
1684 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1685 my($Index, $SumProductXaiXbi);
1686
1687 $SumProductXaiXbi = 0;
1688 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1689 $SumProductXaiXbi += $FingerprintsVectorA->{OrderedValuesRef}[$Index] * $FingerprintsVectorB->{OrderedValuesRef}[$Index];
1690 }
1691 return $SumProductXaiXbi;
1692 }
1693
1694 # Return sum of absolute value of subtraction of correponding ordered vector values...
1695 #
1696 sub _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues {
1697 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1698 my($Index, $SumAbsSubtractionXaiXbi);
1699
1700 $SumAbsSubtractionXaiXbi = 0;
1701 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1702 $SumAbsSubtractionXaiXbi += abs($FingerprintsVectorA->{OrderedValuesRef}[$Index] - $FingerprintsVectorB->{OrderedValuesRef}[$Index]);
1703 }
1704 return $SumAbsSubtractionXaiXbi;
1705 }
1706
1707 # Return sum of squares of subtraction of correponding ordered vector values...
1708 #
1709 sub _GetSumOfSquaresOfSubtractionOfFingerprintsOrderedValues {
1710 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1711 my($Index, $SumSquaresSubtractionXaiXbi);
1712
1713 $SumSquaresSubtractionXaiXbi = 0;
1714 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1715 $SumSquaresSubtractionXaiXbi += ($FingerprintsVectorA->{OrderedValuesRef}[$Index] - $FingerprintsVectorB->{OrderedValuesRef}[$Index])**2;
1716 }
1717 return $SumSquaresSubtractionXaiXbi;
1718 }
1719
1720 # Return sum of minimum of correponding ordered vector values...
1721 #
1722 sub _GetSumOfMinimumOfFingerprintsOrderdedValues {
1723 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1724 my($Index, $SumMinXaiXbi);
1725
1726 $SumMinXaiXbi = 0;
1727 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1728 $SumMinXaiXbi += MathUtil::min($FingerprintsVectorA->{OrderedValuesRef}[$Index], $FingerprintsVectorB->{OrderedValuesRef}[$Index]);
1729 }
1730 return $SumMinXaiXbi;
1731 }
1732
1733 # Return sum of maximum of correponding ordered vector values...
1734 #
1735 sub _GetSumOfMaximumOfFingerprintsOrderdedValues {
1736 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1737 my($Index, $SumMaxXaiXbi);
1738
1739 $SumMaxXaiXbi = 0;
1740 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1741 $SumMaxXaiXbi += MathUtil::max($FingerprintsVectorA->{OrderedValuesRef}[$Index], $FingerprintsVectorB->{OrderedValuesRef}[$Index]);
1742 }
1743 return $SumMaxXaiXbi;
1744 }
1745
1746 # Get number of Na, Nb and Nc bits in vector A and B for BinaryForm calculation...
1747 #
1748 sub _GetNumOfIndividualAndCommonSetBits ($$) {
1749 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1750 my($Na, $Nb, $Nc, $Nd, $FingerprintsBitVectorA, $FingerprintsBitVectorB);
1751
1752 $FingerprintsBitVectorA = $FingerprintsVectorA->{BitVector};
1753 $FingerprintsBitVectorB = $FingerprintsVectorB->{BitVector};
1754
1755 # Number of bits set to "1" in A
1756 $Na = $FingerprintsBitVectorA->GetNumOfSetBits();
1757
1758 # Number of bits set to "1" in B
1759 $Nb = $FingerprintsBitVectorB->GetNumOfSetBits();
1760
1761 # Number of bits set to "1" in both A and B
1762 my($NcBitVector);
1763 $NcBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB;
1764 $Nc = $NcBitVector->GetNumOfSetBits();
1765
1766 return ($Na, $Nb, $Nc);
1767 }
1768
1769 # Return a list of supported distance coefficients...
1770 #
1771 sub GetSupportedDistanceCoefficients () {
1772
1773 return @DistanceCoefficients;
1774 }
1775
1776 # Return a list of supported similarity coefficients...
1777 #
1778 sub GetSupportedSimilarityCoefficients () {
1779
1780 return @SimilarityCoefficients;
1781 }
1782
1783 # Return a list of supported distance and similarity coefficients...
1784 #
1785 sub GetSupportedDistanceAndSimilarityCoefficients () {
1786 my(@DistanceAndSimilarityCoefficients);
1787
1788 @DistanceAndSimilarityCoefficients = ();
1789 push @DistanceAndSimilarityCoefficients, @DistanceCoefficients;
1790 push @DistanceAndSimilarityCoefficients, @SimilarityCoefficients;
1791
1792 return sort @DistanceAndSimilarityCoefficients;
1793 }
1794
1795 # Is it a fingerprints vector object?
1796 sub IsFingerprintsVector ($) {
1797 my($Object) = @_;
1798
1799 return _IsFingerprintsVector($Object);
1800 }
1801
1802 # Is it a fingerprints vector object?
1803 sub _IsFingerprintsVector {
1804 my($Object) = @_;
1805
1806 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
1807 }
1808
1809 # Return a string containing vector values...
1810 sub StringifyFingerprintsVector {
1811 my($This) = @_;
1812 my($FingerprintsVectorString);
1813
1814 # Set type, values and value IDs...
1815 my($NumOfValues, $ValuesString, $NumOfValueIDs, $ValueIDsString, $MaxValuesToStringify);
1816
1817 $NumOfValues = $This->GetNumOfValues();
1818 $MaxValuesToStringify = 500;
1819
1820 if ($NumOfValues < $MaxValuesToStringify) {
1821 # Append all values...
1822 $ValuesString = $NumOfValues ? join ' ', @{$This->{Values}} : 'None';
1823 }
1824 else {
1825 # Truncate values...
1826 my($Index, @Values);
1827 for $Index (0 .. ($MaxValuesToStringify - 1)) {
1828 push @Values, $This->{Values}[$Index];
1829 }
1830 $ValuesString = join(' ', @Values) . " ...";
1831 }
1832
1833 $NumOfValueIDs = $This->GetNumOfValueIDs();
1834 if ($NumOfValueIDs < $MaxValuesToStringify) {
1835 # Append all valueIDs...
1836 $ValueIDsString = $NumOfValueIDs ? join ' ', @{$This->{ValueIDs}} : 'None';
1837 }
1838 else {
1839 # Truncate value IDs...
1840 my($Index, @ValueIDs);
1841 @ValueIDs = ();
1842 for $Index (0 .. ($MaxValuesToStringify - 1)) {
1843 push @ValueIDs, $This->{ValueIDs}[$Index];
1844 }
1845 $ValueIDsString = join(' ', @ValueIDs) . " ...";
1846 }
1847
1848 $FingerprintsVectorString = "Type: $This->{Type}; NumOfValues: $NumOfValues";
1849 if ($This->{Type} =~ /^(OrderedNumericalValues|NumericalValues)$/i) {
1850 my($NumOfNonZeroValues);
1851 $NumOfNonZeroValues = $This->GetNumOfNonZeroValues();
1852 $FingerprintsVectorString .= "; NumOfNonZeroValues: $NumOfNonZeroValues";
1853 }
1854
1855 # Append all the values and value IDs...
1856 if ($NumOfValues < $MaxValuesToStringify) {
1857 $FingerprintsVectorString .= "; Values: <$ValuesString>; NumOfValueIDs: $NumOfValueIDs; ValueIDs: <$ValueIDsString>";
1858 }
1859 else {
1860 $FingerprintsVectorString .= "; Values (Truncated after $MaxValuesToStringify): <$ValuesString>; NumOfValueIDs: $NumOfValueIDs; ValueIDs (Truncated after $MaxValuesToStringify): <$ValueIDsString>";
1861 }
1862
1863 return $FingerprintsVectorString;
1864 }
1865
1866 1;
1867
1868 __END__
1869
1870 =head1 NAME
1871
1872 FingerprintsVector
1873
1874 =head1 SYNOPSIS
1875
1876 use Fingerprints::FingerprintsVector;
1877
1878 use Fingerprints::FingerprintsVector qw(:all);
1879
1880 =head1 DESCRIPTION
1881
1882 B<FingerprintsVector> class provides the following methods:
1883
1884 new, AddValueIDs, AddValues, CityBlockDistanceCoefficient,
1885 CosineSimilarityCoefficient, CzekanowskiSimilarityCoefficient,
1886 DiceSimilarityCoefficient, EuclideanDistanceCoefficient, GetDescription,
1887 GetFingerprintsVectorString, GetID, GetIDsAndValuesPairsString,
1888 GetIDsAndValuesString, GetNumOfNonZeroValues, GetNumOfValueIDs, GetNumOfValues,
1889 GetSupportedDistanceAndSimilarityCoefficients, GetSupportedDistanceCoefficients,
1890 GetSupportedSimilarityCoefficients, GetType, GetValue, GetValueID, GetValueIDs,
1891 GetValueIDsString, GetValues, GetValuesAndIDsPairsString, GetValuesAndIDsString,
1892 GetValuesString, GetVectorType, HammingDistanceCoefficient, IsFingerprintsVector,
1893 JaccardSimilarityCoefficient, ManhattanDistanceCoefficient,
1894 NewFromIDsAndValuesPairsString, NewFromIDsAndValuesString,
1895 NewFromValuesAndIDsPairsString, NewFromValuesAndIDsString, NewFromValuesString,
1896 OchiaiSimilarityCoefficient, SetDescription, SetID, SetType, SetValue, SetValueID,
1897 SetValueIDs, SetValues, SetVectorType, SoergelDistanceCoefficient,
1898 SorensonSimilarityCoefficient, StringifyFingerprintsVector,
1899 TanimotoSimilarityCoefficient
1900
1901 The methods available to create fingerprints vector from strings and to calculate similarity
1902 and distance coefficients between two vectors can also be invoked as class functions.
1903
1904 B<FingerprintsVector> class provides support to perform comparison between vectors
1905 containing three different types of values:
1906
1907 Type I: OrderedNumericalValues
1908
1909 o Size of two vectors are same
1910 o Vectors contain real values in a specific order. For example: MACCS keys
1911 count, Topological pharmacophore atom pairs and so on.
1912
1913 Type II: UnorderedNumericalValues
1914
1915 o Size of two vectors might not be same
1916 o Vectors contain unordered real value identified by value IDs. For example:
1917 Topological atom pairs, Topological atom torsions and so on
1918
1919 Type III: AlphaNumericalValues
1920
1921 o Size of two vectors might not be same
1922 o Vectors contain unordered alphanumerical values. For example: Extended
1923 connectivity fingerprints, atom neighborhood fingerprints.
1924
1925 Before performing similarity or distance calculations between vectors containing UnorderedNumericalValues
1926 or AlphaNumericalValues, the vectors are transformed into vectors containing unique OrderedNumericalValues
1927 using value IDs for UnorderedNumericalValues and values itself for AlphaNumericalValues.
1928
1929 Three forms of similarity and distance calculation between two vectors, specified using B<CalculationMode>
1930 option, are supported: I<AlgebraicForm, BinaryForm or SetTheoreticForm>.
1931
1932 For I<BinaryForm>, the ordered list of processed final vector values containing the value or
1933 count of each unique value type is simply converted into a binary vector containing 1s and 0s
1934 corresponding to presence or absence of values before calculating similarity or distance between
1935 two vectors.
1936
1937 For two fingerprint vectors A and B of same size containing OrderedNumericalValues, let:
1938
1939 N = Number values in A or B
1940
1941 Xa = Values of vector A
1942 Xb = Values of vector B
1943
1944 Xai = Value of ith element in A
1945 Xbi = Value of ith element in B
1946
1947 SUM = Sum of i over N values
1948
1949 For SetTheoreticForm of calculation between two vectors, let:
1950
1951 SetIntersectionXaXb = SUM ( MIN ( Xai, Xbi ) )
1952 SetDifferenceXaXb = SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) )
1953
1954 For BinaryForm of calculation between two vectors, let:
1955
1956 Na = Number of bits set to "1" in A = SUM ( Xai )
1957 Nb = Number of bits set to "1" in B = SUM ( Xbi )
1958 Nc = Number of bits set to "1" in both A and B = SUM ( Xai * Xbi )
1959 Nd = Number of bits set to "0" in both A and B
1960 = SUM ( 1 - Xai - Xbi + Xai * Xbi)
1961
1962 N = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd
1963
1964 Additionally, for BinaryForm various values also correspond to:
1965
1966 Na = | Xa |
1967 Nb = | Xb |
1968 Nc = | SetIntersectionXaXb |
1969 Nd = N - | SetDifferenceXaXb |
1970
1971 | SetDifferenceXaXb | = N - Nd = Na + Nb - Nc + Nd - Nd = Na + Nb - Nc
1972 = | Xa | + | Xb | - | SetIntersectionXaXb |
1973
1974 Various similarity and distance coefficients [ Ref 40, Ref 62, Ref 64 ] for a pair of vectors A and B
1975 in I<AlgebraicForm, BinaryForm and SetTheoreticForm> are defined as follows:
1976
1977 B<CityBlockDistance>: ( same as HammingDistance and ManhattanDistance)
1978
1979 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) )
1980
1981 I<BinaryForm>: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
1982
1983 I<SetTheoreticForm>: | SetDifferenceXaXb | - | SetIntersectionXaXb | = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
1984
1985 B<CosineSimilarity>: ( same as OchiaiSimilarityCoefficient)
1986
1987 I<AlgebraicForm>: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) )
1988
1989 I<BinaryForm>: Nc / SQRT ( Na * Nb)
1990
1991 I<SetTheoreticForm>: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| ) = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) )
1992
1993 B<CzekanowskiSimilarity>: ( same as DiceSimilarity and SorensonSimilarity)
1994
1995 I<AlgebraicForm>: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
1996
1997 I<BinaryForm>: 2 * Nc / ( Na + Nb )
1998
1999 I<SetTheoreticForm>: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
2000
2001 B<DiceSimilarity>: ( same as CzekanowskiSimilarity and SorensonSimilarity)
2002
2003 I<AlgebraicForm>: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
2004
2005 I<BinaryForm>: 2 * Nc / ( Na + Nb )
2006
2007 I<SetTheoreticForm>: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
2008
2009 B<EuclideanDistance>:
2010
2011 I<AlgebraicForm>: SQRT ( SUM ( ( ( Xai - Xbi ) ** 2 ) ) )
2012
2013 I<BinaryForm>: SQRT ( ( Na - Nc ) + ( Nb - Nc ) ) = SQRT ( Na + Nb - 2 * Nc )
2014
2015 I<SetTheoreticForm>: SQRT ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) = SQRT ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) )
2016
2017 B<HammingDistance>: ( same as CityBlockDistance and ManhattanDistance)
2018
2019 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) )
2020
2021 I<BinaryForm>: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
2022
2023 I<SetTheoreticForm>: | SetDifferenceXaXb | - | SetIntersectionXaXb | = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
2024
2025 B<JaccardSimilarity>: ( same as TanimotoSimilarity)
2026
2027 I<AlgebraicForm>: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) )
2028
2029 I<BinaryForm>: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc )
2030
2031 I<SetTheoreticForm>: | SetIntersectionXaXb | / | SetDifferenceXaXb | = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
2032
2033 B<ManhattanDistance>: ( same as CityBlockDistance and HammingDistance)
2034
2035 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) )
2036
2037 I<BinaryForm>: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
2038
2039 I<SetTheoreticForm>: | SetDifferenceXaXb | - | SetIntersectionXaXb | = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
2040
2041 B<OchiaiSimilarity>: ( same as CosineSimilarity)
2042
2043 I<AlgebraicForm>: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) )
2044
2045 I<BinaryForm>: Nc / SQRT ( Na * Nb)
2046
2047 I<SetTheoreticForm>: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| ) = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) )
2048
2049 B<SorensonSimilarity>: ( same as CzekanowskiSimilarity and DiceSimilarity)
2050
2051 I<AlgebraicForm>: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
2052
2053 I<BinaryForm>: 2 * Nc / ( Na + Nb )
2054
2055 I<SetTheoreticForm>: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
2056
2057 B<SoergelDistance>:
2058
2059 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) ) / SUM ( MAX ( Xai, Xbi ) )
2060
2061 I<BinaryForm>: 1 - Nc / ( Na + Nb - Nc ) = ( Na + Nb - 2 * Nc ) / ( Na + Nb - Nc )
2062
2063 I<SetTheoreticForm>: ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) / | SetDifferenceXaXb | = ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
2064
2065 B<TanimotoSimilarity>: ( same as JaccardSimilarity)
2066
2067 I<AlgebraicForm>: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) )
2068
2069 I<BinaryForm>: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc )
2070
2071 I<SetTheoreticForm>: | SetIntersectionXaXb | / | SetDifferenceXaXb | = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
2072
2073 =head2 METHODS
2074
2075 =over 4
2076
2077 =item B<new>
2078
2079 $FPVector = new Fingerprints::FingerprintsVector(%NamesAndValues);
2080
2081 Using specified I<FingerprintsVector> property names and values hash, B<new> method creates
2082 a new object and returns a reference to newly created B<FingerprintsVectorsVector>
2083 object. By default, the following properties are initialized:
2084
2085 Type = ''
2086 @{Values} = ()
2087 @{ValuesIDs} = ()
2088
2089 Examples:
2090
2091 $FPVector = new Fingerprints::FingerprintsVector('Type' => 'OrderedNumericalValues',
2092 'Values' => [1, 2, 3, 4]);
2093 $FPVector = new Fingerprints::FingerprintsVector('Type' => 'NumericalValues',
2094 'Values' => [10, 22, 33, 44],
2095 'ValueIDs' => ['ID1', 'ID2', 'ID3', 'ID4']);
2096 $FPVector = new Fingerprints::FingerprintsVector('Type' => 'AlphaNumericalValues',
2097 'Values' => ['a1', 2, 'a3', 4]);
2098
2099 =item B<AddValueIDs>
2100
2101 $FingerprintsVector->AddValueIDs($ValueIDsRef);
2102 $FingerprintsVector->AddValueIDs(@ValueIDs);
2103
2104 Adds specified I<ValueIDs> to I<FingerprintsVector> and returns I<FingerprintsVector>.
2105
2106 =item B<AddValues>
2107
2108 $FingerprintsVector->AddValues($ValuesRef);
2109 $FingerprintsVector->AddValues(@Values);
2110 $FingerprintsVector->AddValues($Vector);
2111
2112 Adds specified I<Values> to I<FingerprintsVector> and returns I<FingerprintsVector>.
2113
2114 =item B<CityBlockDistanceCoefficient>
2115
2116 $Value = $FingerprintsVector->CityBlockDistanceCoefficient(
2117 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
2118 $Value = Fingerprints::FingerprintsVector::CityBlockDistanceCoefficient(
2119 $FingerprintsVectorA, $FingerprintVectorB,
2120 [$CalculationMode, $SkipValuesCheck]);
2121
2122 Returns value of I<CityBlock> distance coefficient between two I<FingerprintsVectors> using
2123 optionally specified I<CalculationMode> and optional checking of vector values.
2124
2125 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
2126 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
2127
2128 =item B<CosineSimilarityCoefficient>
2129
2130 $Value = $FingerprintsVector->CosineSimilarityCoefficient(
2131 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
2132 $Value = Fingerprints::FingerprintsVector::CosineSimilarityCoefficient(
2133 $FingerprintsVectorA, $FingerprintVectorB,
2134 [$CalculationMode, $SkipValuesCheck]);
2135
2136 Returns value of I<Cosine> similarity coefficient between two I<FingerprintsVectors> using
2137 optionally specified I<CalculationMode> and optional checking of vector values.
2138
2139 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
2140 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
2141
2142 =item B<CzekanowskiSimilarityCoefficient>
2143
2144 $Value = $FingerprintsVector->CzekanowskiSimilarityCoefficient(
2145 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
2146 $Value = Fingerprints::FingerprintsVector::CzekanowskiSimilarityCoefficient(
2147 $FingerprintsVectorA, $FingerprintVectorB,
2148 [$CalculationMode, $SkipValuesCheck]);
2149
2150 Returns value of I<Czekanowski> similarity coefficient between two I<FingerprintsVectors> using
2151 optionally specified I<CalculationMode> and optional checking of vector values.
2152
2153 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
2154 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
2155
2156 =item B<DiceSimilarityCoefficient>
2157
2158 $Value = $FingerprintsVector->DiceSimilarityCoefficient(
2159 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
2160 $Value = Fingerprints::FingerprintsVector::DiceSimilarityCoefficient(
2161 $FingerprintsVectorA, $FingerprintVectorB,
2162 [$CalculationMode, $SkipValuesCheck]);
2163
2164 Returns value of I<Dice> similarity coefficient between two I<FingerprintsVectors> using
2165 optionally specified I<CalculationMode> and optional checking of vector values.
2166
2167 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
2168 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
2169
2170 =item B<EuclideanDistanceCoefficient>
2171
2172 $Value = $FingerprintsVector->EuclideanDistanceCoefficient(
2173 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
2174 $Value = Fingerprints::FingerprintsVector::EuclideanDistanceCoefficient(
2175 $FingerprintsVectorA, $FingerprintVectorB,
2176 [$CalculationMode, $SkipValuesCheck]);
2177
2178 Returns value of I<Euclidean> distance coefficient between two I<FingerprintsVectors> using
2179 optionally specified I<CalculationMode> and optional checking of vector values.
2180
2181 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
2182 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
2183
2184 =item B<GetDescription>
2185
2186 $Description = $FingerprintsVector->GetDescription();
2187
2188 Returns a string containing description of fingerprints vector.
2189
2190 =item B<GetFingerprintsVectorString>
2191
2192 $FPString = $FingerprintsVector->GetFingerprintsVectorString($Format);
2193
2194 Returns a B<FingerprintsString> containing vector values and/or IDs in I<FingerprintsVector>
2195 corresponding to specified I<Format>.
2196
2197 Possible I<Format> values: I<IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString,
2198 IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs,
2199 ValueIDsString, ValueIDs, ValuesString, or Values>.
2200
2201 =item B<GetID>
2202
2203 $ID = $FingerprintsVector->GetID();
2204
2205 Returns I<ID> of I<FingerprintsVector>.
2206
2207 =item B<GetVectorType>
2208
2209 $VectorType = $FingerprintsVector->GetVectorType();
2210
2211 Returns I<VectorType> of I<FingerprintsVector>.
2212
2213 =item B<GetIDsAndValuesPairsString>
2214
2215 $IDsValuesPairsString = $FingerprintsVector->GetIDsAndValuesPairsString();
2216
2217 Returns I<FingerprintsVector> value IDs and values as space delimited ID/value pair
2218 string.
2219
2220 =item B<GetIDsAndValuesString>
2221
2222 $IDsValuesString = $FingerprintsVector->GetIDsAndValuesString();
2223
2224 Returns I<FingerprintsVector> value IDs and values as string containing space delimited IDs followed by
2225 values with semicolon as IDs and values delimiter.
2226
2227 =item B<GetNumOfNonZeroValues>
2228
2229 $NumOfNonZeroValues = $FingerprintsVector->GetNumOfNonZeroValues();
2230
2231 Returns number of non-zero values in I<FingerprintsVector>.
2232
2233 =item B<GetNumOfValueIDs>
2234
2235 $NumOfValueIDs = $FingerprintsVector->GetNumOfValueIDs();
2236
2237 Returns number of value IDs I<FingerprintsVector>.
2238
2239 =item B<GetNumOfValues>
2240
2241 $NumOfValues = $FingerprintsVector->GetNumOfValues();
2242
2243 Returns number of values I<FingerprintsVector>.
2244
2245 =item B<GetSupportedDistanceAndSimilarityCoefficients>
2246
2247 @SupportedDistanceAndSimilarityCoefficientsReturn =
2248 Fingerprints::FingerprintsVector::GetSupportedDistanceAndSimilarityCoefficients();
2249
2250 Returns an array containing names of supported distance and similarity coefficients.
2251
2252 =item B<GetSupportedDistanceCoefficients>
2253
2254 @SupportedDistanceCoefficientsReturn =
2255 Fingerprints::FingerprintsVector::GetSupportedDistanceCoefficients();
2256
2257 Returns an array containing names of supported disyance coefficients.
2258
2259 =item B<GetSupportedSimilarityCoefficients>
2260
2261 @SupportedSimilarityCoefficientsReturn =
2262 Fingerprints::FingerprintsVector::GetSupportedSimilarityCoefficients();
2263
2264 Returns an array containing names of supported similarity coefficients.
2265
2266 =item B<GetType>
2267
2268 $VectorType = $FingerprintsVector->GetType();
2269
2270 Returns I<FingerprintsVector> vector type.
2271
2272 =item B<GetValue>
2273
2274 $Value = $FingerprintsVector->GetValue($Index);
2275
2276 Returns fingerprints vector B<Value> specified using I<Index> starting at 0.
2277
2278 =item B<GetValueID>
2279
2280 $ValueID = $FingerprintsVector->GetValueID();
2281
2282 Returns fingerprints vector B<ValueID> specified using I<Index> starting at 0.
2283
2284 =item B<GetValueIDs>
2285
2286 $ValueIDs = $FingerprintsVector->GetValueIDs();
2287 @ValueIDs = $FingerprintsVector->GetValueIDs();
2288
2289 Returns fingerprints vector B<ValueIDs> as an array or reference to an array.
2290
2291 =item B<GetValueIDsString>
2292
2293 $ValueIDsString = $FingerprintsVector->GetValueIDsString();
2294
2295 Returns fingerprints vector B<ValueIDsString> with value IDs delimited by space.
2296
2297 =item B<GetValues>
2298
2299 $ValuesRef = $FingerprintsVector->GetValues();
2300 @Values = $FingerprintsVector->GetValues();
2301
2302 Returns fingerprints vector B<Values> as an array or reference to an array.
2303
2304 =item B<GetValuesAndIDsPairsString>
2305
2306 $ValuesIDsPairsString = $FingerprintsVector->GetValuesAndIDsPairsString();
2307
2308 Returns I<FingerprintsVector> value and value IDs as space delimited ID/value pair
2309 string.
2310
2311 =item B<GetValuesAndIDsString>
2312
2313 $ValuesIDsString = $FingerprintsVector->GetValuesAndIDsString();
2314
2315 Returns I<FingerprintsVector> values and value IDs as string containing space delimited IDs followed by
2316 values with semicolon as IDs and values delimiter.
2317
2318 =item B<GetValuesString>
2319
2320 $Return = $FingerprintsVector->GetValuesString();
2321
2322 Returns I<FingerprintsVector> values as space delimited string.
2323
2324 =item B<HammingDistanceCoefficient>
2325
2326 $Value = $FingerprintsVector->HammingDistanceCoefficient(
2327 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
2328 $Value = Fingerprints::FingerprintsVector::HammingDistanceCoefficient(
2329 $FingerprintsVectorA, $FingerprintVectorB,
2330 [$CalculationMode, $SkipValuesCheck]);
2331
2332 Returns value of I<Hamming> distance coefficient between two I<FingerprintsVectors> using
2333 optionally specified I<CalculationMode> and optional checking of vector values.
2334
2335 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
2336 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
2337
2338 =item B<IsFingerprintsVector>
2339
2340 $Status = Fingerprints::FingerprintsVector::IsFingerprintsVector($Object);
2341
2342 Returns 1 or 0 based on whether I<Object> is a I<FingerprintsVector>.
2343
2344 =item B<JaccardSimilarityCoefficient>
2345
2346 $Value = $FingerprintsVector->JaccardSimilarityCoefficient(
2347 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
2348 $Value = Fingerprints::FingerprintsVector::JaccardSimilarityCoefficient(
2349 $FingerprintsVectorA, $FingerprintVectorB,
2350 [$CalculationMode, $SkipValuesCheck]);
2351
2352 Returns value of I<Jaccard> similarity coefficient between two I<FingerprintsVectors> using
2353 optionally specified I<CalculationMode> and optional checking of vector values.
2354
2355 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
2356 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
2357
2358 =item B<ManhattanDistanceCoefficient>
2359
2360 $Value = $FingerprintsVector->ManhattanDistanceCoefficient(
2361 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
2362 $Value = Fingerprints::FingerprintsVector::ManhattanDistanceCoefficient(
2363 $FingerprintsVectorA, $FingerprintVectorB,
2364 [$CalculationMode, $SkipValuesCheck]);
2365
2366 Returns value of I<Manhattan> distance coefficient between two I<FingerprintsVectors> using
2367 optionally specified I<CalculationMode> and optional checking of vector values.
2368
2369 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
2370 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
2371
2372 =item B<NewFromIDsAndValuesPairsString>
2373
2374 $FingerprintsVector = $FingerprintsVector->NewFromIDsAndValuesPairsString(
2375 $ValuesType, $IDsAndValuesPairsString);
2376 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesPairsString(
2377 $ValuesType, $IDsAndValuesPairsString);
2378
2379 Creates a new I<FingerprintsVector> of I<ValuesType> using I<IDsAndValuesPairsString> containing
2380 space delimited value IDs and values pairs and returns new B<FingerprintsVector> object.
2381 Possible I<ValuesType> values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>.
2382
2383 =item B<NewFromIDsAndValuesString>
2384
2385 $FingerprintsVector = $FingerprintsVector->NewFromIDsAndValuesString(
2386 $ValuesType, $IDsAndValuesString);
2387 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesString(
2388 $ValuesType, $IDsAndValuesString);
2389
2390 Creates a new I<FingerprintsVector> of I<ValuesType> using I<IDsAndValuesString> containing
2391 semicolon delimited value IDs string followed by values strings and returns new B<FingerprintsVector>
2392 object. The values within value and value IDs tring are delimited by spaces. Possible I<ValuesType>
2393 values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>.
2394
2395 =item B<NewFromValuesAndIDsPairsString>
2396
2397 $FingerprintsVector = $FingerprintsVector->NewFromValuesAndIDsPairsString(
2398 $ValuesType, $ValuesAndIDsPairsString);
2399 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsPairsString(
2400 $ValuesType, $ValuesAndIDsPairsString);
2401
2402 Creates a new I<FingerprintsVector> of I<ValuesType> using I<ValuesAndIDsPairsString> containing
2403 space delimited value and value IDs pairs and returns new B<FingerprintsVector> object.
2404 Possible I<ValuesType> values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>.
2405
2406 =item B<NewFromValuesAndIDsString>
2407
2408 $FingerprintsVector = $FingerprintsVector->NewFromValuesAndIDsString(
2409 $ValuesType, $IDsAndValuesString);
2410 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsString(
2411 $ValuesType, $IDsAndValuesString);
2412
2413 Creates a new I<FingerprintsVector> of I<ValuesType> using I<ValuesAndIDsString> containing
2414 semicolon delimited values string followed by value IDs strings and returns new B<FingerprintsVector>
2415 object. The values within values and value IDs tring are delimited by spaces. Possible I<ValuesType>
2416 values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>.
2417
2418 =item B<NewFromValuesString>
2419
2420 $FingerprintsVector = $FingerprintsVector->NewFromValuesString(
2421 $ValuesType, $ValuesString);
2422 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesString(
2423 $ValuesType, $ValuesString);
2424
2425 Creates a new I<FingerprintsVector> of I<ValuesType> using I<ValuesString> containing space
2426 delimited values string and returns new B<FingerprintsVector> object. The values within values
2427 and value IDs tring are delimited by spaces. Possible I<ValuesType> values: I<OrderedNumericalValues,
2428 NumericalValues, or AlphaNumericalValues>.
2429
2430 =item B<OchiaiSimilarityCoefficient>
2431
2432 $Value = $FingerprintsVector->OchiaiSimilarityCoefficient(
2433 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
2434 $Value = Fingerprints::FingerprintsVector::OchiaiSimilarityCoefficient(
2435 $FingerprintsVectorA, $FingerprintVectorB,
2436 [$CalculationMode, $SkipValuesCheck]);
2437
2438 Returns value of I<Ochiai> similarity coefficient between two I<FingerprintsVectors> using
2439 optionally specified I<CalculationMode> and optional checking of vector values.
2440
2441 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
2442 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
2443
2444 =item B<SetDescription>
2445
2446 $FingerprintsVector->SetDescription($Description);
2447
2448 Sets I<Description> of fingerprints vector and returns I<FingerprintsVector>.
2449
2450 =item B<SetID>
2451
2452 $FingerprintsVector->SetID($ID);
2453
2454 Sets I<ID> of fingerprints vector and returns I<FingerprintsVector>.
2455
2456 =item B<SetVectorType>
2457
2458 $FingerprintsVector->SetVectorType($VectorType);
2459
2460 Sets I<VectorType> of fingerprints vector and returns I<FingerprintsVector>.
2461
2462 =item B<SetType>
2463
2464 $FingerprintsVector->SetType($Type);
2465
2466 Sets I<FingerprintsVector> values I<Type> and returns I<FingerprintsVector>. Possible I<Type>
2467 values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>.
2468
2469 During calculation of similarity and distance coefficients between two I<FingerprintsVectors>, the
2470 following conditions apply to vector type, size, value and value IDs:
2471
2472 o For OrderedNumericalValues type, both vectors must be of the same size
2473 and contain similar types of numerical values in the same order.
2474
2475 o For NumericalValues type, vector value IDs for both vectors must be
2476 specified; however, their size and order of IDs and numerical values may
2477 be different. For each vector, value IDs must correspond to vector values.
2478
2479 o For AlphaNumericalValues type, vectors may contain both numerical and
2480 alphanumerical values and their sizes may be different.
2481
2482 =item B<SetValue>
2483
2484 $FingerprintsVector->SetValue($Index, $Value, [$SkipIndexCheck]);
2485
2486 Sets a I<FingerprintsVector> value specified by I<Index> starting at 0 to I<Value> along with
2487 optional index range check and returns I<FingerprintsVector>.
2488
2489 =item B<SetValueID>
2490
2491 $FingerprintsVector->SetValueID($Index, $ValueID, [$SkipIndexCheck]);
2492
2493 Sets a I<FingerprintsVector> value ID specified by I<Index> starting at 0 to I<ValueID> along with
2494 optional index range check and returns I<FingerprintsVector>.
2495
2496 =item B<SetValueIDs>
2497
2498 $FingerprintsVector->SetValueIDs($ValueIDsRef);
2499 $FingerprintsVector->SetValueIDs(@ValueIDs);
2500
2501 Sets I<FingerprintsVector> value IDs to specified I<ValueIDs> and returns I<FingerprintsVector>.
2502
2503 =item B<SetValues>
2504
2505 $FingerprintsVector->SetValues($ValuesRef);
2506 $FingerprintsVector->SetValues(@Values);
2507
2508 Sets I<FingerprintsVector> value to specified I<Values> and returns I<FingerprintsVector>.
2509
2510 =item B<SoergelDistanceCoefficient>
2511
2512 $Value = $FingerprintsVector->SoergelDistanceCoefficient(
2513 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
2514 $Value = Fingerprints::FingerprintsVector::SoergelDistanceCoefficient(
2515 $FingerprintsVectorA, $FingerprintVectorB,
2516 [$CalculationMode, $SkipValuesCheck]);
2517
2518 Returns value of I<Soergel> distance coefficient between two I<FingerprintsVectors> using
2519 optionally specified I<CalculationMode> and optional checking of vector values.
2520
2521 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
2522 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
2523
2524 =item B<SorensonSimilarityCoefficient>
2525
2526 $Value = $FingerprintsVector->SorensonSimilarityCoefficient(
2527 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
2528 $Value = Fingerprints::FingerprintsVector::SorensonSimilarityCoefficient(
2529 $FingerprintsVectorA, $FingerprintVectorB,
2530 [$CalculationMode, $SkipValuesCheck]);
2531
2532 Returns value of I<Sorenson> similarity coefficient between two I<FingerprintsVectors> using
2533 optionally specified I<CalculationMode> and optional checking of vector values.
2534
2535 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
2536 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
2537
2538 =item B<TanimotoSimilarityCoefficient>
2539
2540 $Value = $FingerprintsVector->TanimotoSimilarityCoefficient(
2541 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
2542 $Value = Fingerprints::FingerprintsVector::TanimotoSimilarityCoefficient(
2543 $FingerprintsVectorA, $FingerprintVectorB,
2544 [$CalculationMode, $SkipValuesCheck]);
2545
2546 Returns value of I<Tanimoto> similarity coefficient between two I<FingerprintsVectors> using
2547 optionally specified I<CalculationMode> and optional checking of vector values.
2548
2549 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
2550 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
2551
2552 =item B<StringifyFingerprintsVector>
2553
2554 $String = $FingerprintsVector->StringifyFingerprintsVector();
2555
2556 Returns a string containing information about I<FingerprintsVector> object.
2557
2558 =back
2559
2560 =head1 AUTHOR
2561
2562 Manish Sud <msud@san.rr.com>
2563
2564 =head1 SEE ALSO
2565
2566 BitVector.pm, FingerprintsStringUtil.pm, FingerprintsBitVector.pm, Vector.pm
2567
2568 =head1 COPYRIGHT
2569
2570 Copyright (C) 2015 Manish Sud. All rights reserved.
2571
2572 This file is part of MayaChemTools.
2573
2574 MayaChemTools is free software; you can redistribute it and/or modify it under
2575 the terms of the GNU Lesser General Public License as published by the Free
2576 Software Foundation; either version 3 of the License, or (at your option)
2577 any later version.
2578
2579 =cut