0
|
1 package Fingerprints::FingerprintsVector;
|
|
2 #
|
|
3 # $RCSfile: FingerprintsVector.pm,v $
|
|
4 # $Date: 2015/02/28 20:48:54 $
|
|
5 # $Revision: 1.31 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Exporter;
|
|
32 use Scalar::Util ();
|
|
33 use MathUtil ();
|
|
34 use TextUtil ();
|
|
35 use StatisticsUtil ();
|
|
36 use BitVector;
|
|
37 use Vector;
|
|
38
|
|
39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
40
|
|
41 @ISA = qw(Exporter);
|
|
42
|
|
43 # Distance coefficients
|
|
44 my(@DistanceCoefficients) = qw(CityBlockDistanceCoefficient EuclideanDistanceCoefficient HammingDistanceCoefficient ManhattanDistanceCoefficient SoergelDistanceCoefficient);
|
|
45
|
|
46 # Similarity coefficients...
|
|
47 my(@SimilarityCoefficients) = qw(CosineSimilarityCoefficient CzekanowskiSimilarityCoefficient DiceSimilarityCoefficient OchiaiSimilarityCoefficient JaccardSimilarityCoefficient SorensonSimilarityCoefficient TanimotoSimilarityCoefficient);
|
|
48
|
|
49 # New from string...
|
|
50 my(@NewFromString) = qw(NewFromValuesString NewFromValuesAndIDsString NewFromIDsAndValuesString NewFromValuesAndIDsPairsString NewFromIDsAndValuesPairsString);
|
|
51
|
|
52 @EXPORT = qw(IsFingerprintsVector);
|
|
53 @EXPORT_OK = qw(GetSupportedDistanceCoefficients GetSupportedSimilarityCoefficients GetSupportedDistanceAndSimilarityCoefficients @DistanceCoefficients @SimilarityCoefficients);
|
|
54
|
|
55 %EXPORT_TAGS = (
|
|
56 new => [@NewFromString],
|
|
57 distancecoefficients => [@DistanceCoefficients],
|
|
58 similaritycoefficients => [@SimilarityCoefficients],
|
|
59 all => [@EXPORT, @EXPORT_OK]
|
|
60 );
|
|
61
|
|
62 # Setup class variables...
|
|
63 my($ClassName);
|
|
64 _InitializeClass();
|
|
65
|
|
66 # Overload Perl functions...
|
|
67 use overload '""' => 'StringifyFingerprintsVector';
|
|
68
|
|
69 # Class constructor...
|
|
70 sub new {
|
|
71 my($Class, %NamesAndValues) = @_;
|
|
72
|
|
73 # Initialize object...
|
|
74 my $This = {};
|
|
75 bless $This, ref($Class) || $Class;
|
|
76
|
|
77 $This->_InitializeFingerprintsVector();
|
|
78
|
|
79 $This->_InitializeFingerprintsVectorProperties(%NamesAndValues);
|
|
80
|
|
81 return $This;
|
|
82 }
|
|
83
|
|
84 # Initialize object data...
|
|
85 #
|
|
86 sub _InitializeFingerprintsVector {
|
|
87 my($This) = @_;
|
|
88
|
|
89 # Type of fingerprint vector...
|
|
90 $This->{Type} = '';
|
|
91
|
|
92 # Fingerprint vector values...
|
|
93 @{$This->{Values}} = ();
|
|
94
|
|
95 # Fingerprint vector value IDs...
|
|
96 @{$This->{ValueIDs}} = ();
|
|
97
|
|
98 return $This;
|
|
99 }
|
|
100
|
|
101 # Initialize class ...
|
|
102 sub _InitializeClass {
|
|
103 #Class name...
|
|
104 $ClassName = __PACKAGE__;
|
|
105 }
|
|
106
|
|
107 # Initialize object properties....
|
|
108 sub _InitializeFingerprintsVectorProperties {
|
|
109 my($This, %NamesAndValues) = @_;
|
|
110
|
|
111 my($Name, $Value, $MethodName);
|
|
112 while (($Name, $Value) = each %NamesAndValues) {
|
|
113 $MethodName = "Set${Name}";
|
|
114 $This->$MethodName($Value);
|
|
115 }
|
|
116
|
|
117 if (!exists $NamesAndValues{Type}) {
|
|
118 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying type...";
|
|
119 }
|
|
120 return $This;
|
|
121 }
|
|
122
|
|
123 # Create a new fingerprints vector using space delimited values string. This functionality can be
|
|
124 # either invoked as a class function or an object method.
|
|
125 #
|
|
126 sub NewFromValuesString ($$;$) {
|
|
127 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
|
|
128 my($This, $Type, $ValuesString);
|
|
129
|
|
130 if (@_ == 3) {
|
|
131 ($This, $Type, $ValuesString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
|
|
132 }
|
|
133 else {
|
|
134 ($This, $Type, $ValuesString) = (undef, $FirstParameter, $SecondParameter);
|
|
135 }
|
|
136 my($FingerprintsVector, @Values);
|
|
137
|
|
138 @Values = ();
|
|
139 if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) {
|
|
140 @Values = split(' ', $ValuesString);
|
|
141 }
|
|
142
|
|
143 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values);
|
|
144
|
|
145 return $FingerprintsVector;
|
|
146 }
|
|
147
|
|
148 # Create a new fingerprints vector using values and IDs string containing semicolon
|
|
149 # delimited value string and value IDs strings. The values within value and value IDs
|
|
150 # string are delimited by spaces.
|
|
151 #
|
|
152 # This functionality can be either invoked as a class function or an object method.
|
|
153 #
|
|
154 sub NewFromValuesAndIDsString ($$;$) {
|
|
155 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
|
|
156 my($This, $Type, $ValuesAndIDsString);
|
|
157
|
|
158 if (@_ == 3) {
|
|
159 ($This, $Type, $ValuesAndIDsString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
|
|
160 }
|
|
161 else {
|
|
162 ($This, $Type, $ValuesAndIDsString) = (undef, $FirstParameter, $SecondParameter);
|
|
163 }
|
|
164 my($FingerprintsVector, $ValuesString, $ValueIDsString, @Values, @ValueIDs);
|
|
165
|
|
166 ($ValuesString, $ValueIDsString) = split(';', $ValuesAndIDsString);
|
|
167
|
|
168 @Values = ();
|
|
169 if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) {
|
|
170 @Values = split(' ', $ValuesString);
|
|
171 }
|
|
172 @ValueIDs = ();
|
|
173 if (defined($ValueIDsString) && length($ValueIDsString) && $ValueIDsString !~ /^None$/i) {
|
|
174 @ValueIDs = split(' ', $ValueIDsString);
|
|
175 }
|
|
176
|
|
177 if (@Values != @ValueIDs ) {
|
|
178 carp "Warning: ${ClassName}->NewFromValuesAndIDsString: Object can't be instantiated: Number specified values, " . scalar @Values . ", must be equal to number of specified value IDs, " . scalar @ValueIDs . "...";
|
|
179 return undef;
|
|
180 }
|
|
181
|
|
182 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
|
|
183
|
|
184 return $FingerprintsVector;
|
|
185 }
|
|
186
|
|
187 # Create a new fingerprints vector using IDs and values string containing semicolon
|
|
188 # delimited value IDs string and values strings. The values within value and value IDs
|
|
189 # string are delimited by spaces.
|
|
190 #
|
|
191 # This functionality can be either invoked as a class function or an object method.
|
|
192 #
|
|
193 sub NewFromIDsAndValuesString ($$;$) {
|
|
194 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
|
|
195 my($This, $Type, $IDsAndValuesString);
|
|
196
|
|
197 if (@_ == 3) {
|
|
198 ($This, $Type, $IDsAndValuesString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
|
|
199 }
|
|
200 else {
|
|
201 ($This, $Type, $IDsAndValuesString) = (undef, $FirstParameter, $SecondParameter);
|
|
202 }
|
|
203 my($FingerprintsVector, $ValuesString, $ValueIDsString, @Values, @ValueIDs);
|
|
204
|
|
205 ($ValueIDsString, $ValuesString) = split(';', $IDsAndValuesString);
|
|
206
|
|
207 @Values = ();
|
|
208 if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) {
|
|
209 @Values = split(' ', $ValuesString);
|
|
210 }
|
|
211 @ValueIDs = ();
|
|
212 if (defined($ValueIDsString) && length($ValueIDsString) && $ValueIDsString !~ /^None$/i) {
|
|
213 @ValueIDs = split(' ', $ValueIDsString);
|
|
214 }
|
|
215
|
|
216 if (@Values != @ValueIDs ) {
|
|
217 carp "Warning: ${ClassName}->NewFromIDsAndValuesString: Object can't be instantiated: Number specified values, " . scalar @Values . ", must be equal to number of specified value IDs, " . scalar @ValueIDs . "...";
|
|
218 return undef;
|
|
219 }
|
|
220
|
|
221 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
|
|
222
|
|
223 return $FingerprintsVector;
|
|
224 }
|
|
225
|
|
226 # Create a new fingerprints vector using values and IDs pairs string containing space
|
|
227 # value and value IDs pairs.
|
|
228 #
|
|
229 # This functionality can be either invoked as a class function or an object method.
|
|
230 #
|
|
231 sub NewFromValuesAndIDsPairsString ($$;$) {
|
|
232 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
|
|
233 my($This, $Type, $ValuesAndIDsPairsString);
|
|
234
|
|
235 if (@_ == 3) {
|
|
236 ($This, $Type, $ValuesAndIDsPairsString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
|
|
237 }
|
|
238 else {
|
|
239 ($This, $Type, $ValuesAndIDsPairsString) = (undef, $FirstParameter, $SecondParameter);
|
|
240 }
|
|
241 my($FingerprintsVector, $Index, @Values, @ValueIDs, @ValuesAndIDsPairs);
|
|
242
|
|
243 @ValuesAndIDsPairs = split(' ', $ValuesAndIDsPairsString);
|
|
244 if (@ValuesAndIDsPairs % 2) {
|
|
245 carp "Warning: ${ClassName}->NewFromValuesAndIDsPairsString: No fingerprint vector created: Invalid values and IDs pairs data: Input list must contain even number of values and IDs pairs...";
|
|
246 return undef;
|
|
247 }
|
|
248
|
|
249 @Values = (); @ValueIDs = ();
|
|
250 if (!(@ValuesAndIDsPairs == 2 && $ValuesAndIDsPairs[0] =~ /^None$/i && $ValuesAndIDsPairs[1] =~ /^None$/i)) {
|
|
251 for ($Index = 0; $Index < $#ValuesAndIDsPairs; $Index += 2) {
|
|
252 push @Values, $ValuesAndIDsPairs[$Index];
|
|
253 push @ValueIDs, $ValuesAndIDsPairs[$Index + 1];
|
|
254 }
|
|
255 }
|
|
256 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
|
|
257
|
|
258 return $FingerprintsVector;
|
|
259 }
|
|
260
|
|
261 # Create a new fingerprints vector using IDs and values pairs string containing space
|
|
262 # value IDs and valus pairs.
|
|
263 #
|
|
264 # This functionality can be either invoked as a class function or an object method.
|
|
265 #
|
|
266 sub NewFromIDsAndValuesPairsString ($$;$) {
|
|
267 my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
|
|
268 my($This, $Type, $IDsAndValuesPairsString);
|
|
269
|
|
270 if (@_ == 3) {
|
|
271 ($This, $Type, $IDsAndValuesPairsString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
|
|
272 }
|
|
273 else {
|
|
274 ($This, $Type, $IDsAndValuesPairsString) = (undef, $FirstParameter, $SecondParameter);
|
|
275 }
|
|
276 my($FingerprintsVector, $Index, @Values, @ValueIDs, @IDsAndValuesPairs);
|
|
277
|
|
278 @IDsAndValuesPairs = split(' ', $IDsAndValuesPairsString);
|
|
279 if (@IDsAndValuesPairs % 2) {
|
|
280 croak "Error: ${ClassName}->NewFromIDsAndValuesPairsString: No fingerprint vector created: Invalid values and IDs pairs data: Input list must contain even number of values and IDs pairs...";
|
|
281 return undef;
|
|
282 }
|
|
283
|
|
284 @Values = (); @ValueIDs = ();
|
|
285 if (!(@IDsAndValuesPairs == 2 && $IDsAndValuesPairs[0] =~ /^None$/i && $IDsAndValuesPairs[1] =~ /^None$/i)) {
|
|
286 for ($Index = 0; $Index < $#IDsAndValuesPairs; $Index += 2) {
|
|
287 push @ValueIDs, $IDsAndValuesPairs[$Index];
|
|
288 push @Values, $IDsAndValuesPairs[$Index + 1];
|
|
289 }
|
|
290 }
|
|
291 $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
|
|
292
|
|
293 return $FingerprintsVector;
|
|
294 }
|
|
295
|
|
296 # Set type of fingerprint vector. Supported types are: OrderedNumericalValues, NumericalValues, and
|
|
297 # AlphaNumericalValues
|
|
298 #
|
|
299 # . For OrderedNumericalValues type, both vectors must be of the same size and contain similar
|
|
300 # types of numerical values in the same order.
|
|
301 #
|
|
302 # . For NumericalValues type, vector value IDs for both vectors must be specified; however, their
|
|
303 # size and order of IDs and numerical values may be different. For each vector, value IDs must
|
|
304 # correspond to vector values.
|
|
305 #
|
|
306 # . For AlphaNumericalValues type, vectors may contain both numerical and alphanumerical values
|
|
307 # and their sizes may be different.
|
|
308 #
|
|
309 sub SetType {
|
|
310 my($This, $Type) = @_;
|
|
311
|
|
312 if ($Type !~ /^(OrderedNumericalValues|NumericalValues|AlphaNumericalValues)$/i) {
|
|
313 croak "Error: ${ClassName}->SetType: Specified value, $Type, for Type is not vaild. Supported types in current release of MayaChemTools: OrderedNumericalValues, NumericalValues or AlphaNumericalValues";
|
|
314 }
|
|
315
|
|
316 if ($This->{Type}) {
|
|
317 croak "Error: ${ClassName}->SetType: Can't change intial fingerprints vector type: It's already set...";
|
|
318 }
|
|
319 $This->{Type} = $Type;
|
|
320
|
|
321 return $This;
|
|
322 }
|
|
323
|
|
324 # Get fingerpints vector type...
|
|
325 #
|
|
326 sub GetType {
|
|
327 my($This) = @_;
|
|
328
|
|
329 return $This->{Type};
|
|
330 }
|
|
331
|
|
332 # Set ID...
|
|
333 sub SetID {
|
|
334 my($This, $Value) = @_;
|
|
335
|
|
336 $This->{ID} = $Value;
|
|
337
|
|
338 return $This;
|
|
339 }
|
|
340
|
|
341 # Get ID...
|
|
342 sub GetID {
|
|
343 my($This) = @_;
|
|
344
|
|
345 return exists $This->{ID} ? $This->{ID} : 'None';
|
|
346 }
|
|
347
|
|
348 # Set description...
|
|
349 sub SetDescription {
|
|
350 my($This, $Value) = @_;
|
|
351
|
|
352 $This->{Description} = $Value;
|
|
353
|
|
354 return $This;
|
|
355 }
|
|
356
|
|
357 # Get description...
|
|
358 sub GetDescription {
|
|
359 my($This) = @_;
|
|
360
|
|
361 return exists $This->{Description} ? $This->{Description} : 'No description available';
|
|
362 }
|
|
363
|
|
364 # Set vector type...
|
|
365 sub SetVectorType {
|
|
366 my($This, $Value) = @_;
|
|
367
|
|
368 $This->{VectorType} = $Value;
|
|
369
|
|
370 return $This;
|
|
371 }
|
|
372
|
|
373 # Get vector type...
|
|
374 sub GetVectorType {
|
|
375 my($This) = @_;
|
|
376
|
|
377 return exists $This->{VectorType} ? $This->{VectorType} : 'FingerprintsVector';
|
|
378 }
|
|
379
|
|
380 # Set values of a fingerprint vector using a vector, reference to an array or an array...
|
|
381 #
|
|
382 sub SetValues {
|
|
383 my($This, @Values) = @_;
|
|
384
|
|
385 $This->_SetOrAddValuesOrValueIDs("SetValues", @Values);
|
|
386
|
|
387 return $This;
|
|
388 }
|
|
389
|
|
390 # Set value IDs of a fingerprint vector using a vector, reference to an array or an array...
|
|
391 #
|
|
392 sub SetValueIDs {
|
|
393 my($This, @Values) = @_;
|
|
394
|
|
395 $This->_SetOrAddValuesOrValueIDs("SetValueIDs", @Values);
|
|
396
|
|
397 return $This;
|
|
398 }
|
|
399
|
|
400 # Add values to a fingerprint vector using a vector, reference to an array or an array...
|
|
401 #
|
|
402 sub AddValues {
|
|
403 my($This, @Values) = @_;
|
|
404
|
|
405 $This->_SetOrAddValuesOrValueIDs("AddValues", @Values);
|
|
406
|
|
407 return $This;
|
|
408 }
|
|
409
|
|
410 # Add value IDs to a fingerprint vector using a vector, reference to an array or an array...
|
|
411 #
|
|
412 sub AddValueIDs {
|
|
413 my($This, @Values) = @_;
|
|
414
|
|
415 $This->_SetOrAddValuesOrValueIDs("AddValueIDs", @Values);
|
|
416
|
|
417 return $This;
|
|
418 }
|
|
419
|
|
420 # Set or add values or value IDs using:
|
|
421 #
|
|
422 # o List of values or ValueIDs
|
|
423 # o Reference to an list of values or ValuesIDs
|
|
424 # o A vector containing values or ValueIDs
|
|
425 #
|
|
426 sub _SetOrAddValuesOrValueIDs {
|
|
427 my($This, $Mode, @Values) = @_;
|
|
428
|
|
429 if (!@Values) {
|
|
430 return;
|
|
431 }
|
|
432
|
|
433 # Collect specified values or valueIDs...
|
|
434 my($FirstValue, $TypeOfFirstValue, $ValuesRef);
|
|
435
|
|
436 $FirstValue = $Values[0];
|
|
437 $TypeOfFirstValue = ref $FirstValue;
|
|
438 if ($TypeOfFirstValue =~ /^(SCALAR|HASH|CODE|REF|GLOB)/) {
|
|
439 croak "Error: ${ClassName}-> _SetOrAddValuesOrValueIDs: Trying to add values to vector object with a reference to unsupported value format...";
|
|
440 }
|
|
441
|
|
442 if (Vector::IsVector($FirstValue)) {
|
|
443 # It's a vector...
|
|
444 $ValuesRef = $FirstValue->GetValues();
|
|
445 }
|
|
446 elsif ($TypeOfFirstValue =~ /^ARRAY/) {
|
|
447 # It's an array refernce...
|
|
448 $ValuesRef = $FirstValue;
|
|
449 }
|
|
450 else {
|
|
451 # It's a list of values...
|
|
452 $ValuesRef = \@Values;
|
|
453 }
|
|
454
|
|
455 # Set or add values or value IDs...
|
|
456 MODE: {
|
|
457 if ($Mode =~ /^SetValues$/i) { @{$This->{Values}} = (); push @{$This->{Values}}, @{$ValuesRef}; last MODE; }
|
|
458 if ($Mode =~ /^SetValueIDs$/i) { @{$This->{ValueIDs}} = (); push @{$This->{ValueIDs}}, @{$ValuesRef}; last MODE; }
|
|
459 if ($Mode =~ /^AddValues$/i) { push @{$This->{Values}}, @{$ValuesRef}; last MODE; }
|
|
460 if ($Mode =~ /^AddValueIDs$/i) { push @{$This->{ValueIDs}}, @{$ValuesRef}; last MODE; }
|
|
461 croak "Error: ${ClassName}-> _SetOrAddValuesOrValueIDs: Unknown mode $Mode...";
|
|
462 }
|
|
463 return $This;
|
|
464 }
|
|
465
|
|
466 # Set a specific value in fingerprint vector with indicies starting from 0..
|
|
467 #
|
|
468 sub SetValue {
|
|
469 my($This, $Index, $Value, $SkipCheck) = @_;
|
|
470
|
|
471 # Just set it...
|
|
472 if ($SkipCheck) {
|
|
473 return $This->_SetValue($Index, $Value);
|
|
474 }
|
|
475
|
|
476 # Check and set...
|
|
477 if ($Index < 0) {
|
|
478 croak "Error: ${ClassName}->SetValue: Index value must be a positive number...";
|
|
479 }
|
|
480 if ($Index >= $This->GetNumOfValues()) {
|
|
481 croak "Error: ${ClassName}->SetValue: Index vaue must be less than number of values...";
|
|
482 }
|
|
483
|
|
484 return $This->_SetValue($Index, $Value);
|
|
485 }
|
|
486
|
|
487 # Set a fingerprint vector value...
|
|
488 #
|
|
489 sub _SetValue {
|
|
490 my($This, $Index, $Value) = @_;
|
|
491
|
|
492 $This->{Values}[$Index] = $Value;
|
|
493
|
|
494 return $This;
|
|
495 }
|
|
496
|
|
497 # Get a specific value from fingerprint vector with indicies starting from 0...
|
|
498 #
|
|
499 sub GetValue {
|
|
500 my($This, $Index) = @_;
|
|
501
|
|
502 if ($Index < 0) {
|
|
503 croak "Error: ${ClassName}->GetValue: Index value must be a positive number...";
|
|
504 }
|
|
505 if ($Index >= $This->GetNumOfValues()) {
|
|
506 croak "Error: ${ClassName}->GetValue: Index value must be less than number of values...";
|
|
507 }
|
|
508 return $This->_GetValue($Index);
|
|
509 }
|
|
510
|
|
511 # Get a fingerprint vector value...
|
|
512 sub _GetValue {
|
|
513 my($This, $Index) = @_;
|
|
514
|
|
515 return $This->{Values}[$Index];
|
|
516 }
|
|
517
|
|
518 # Return vector values as an array or reference to an array...
|
|
519 #
|
|
520 sub GetValues {
|
|
521 my($This) = @_;
|
|
522
|
|
523 return wantarray ? @{$This->{Values}} : \@{$This->{Values}};
|
|
524 }
|
|
525
|
|
526 # Set a specific value ID in fingerprint vector with indicies starting from 0..
|
|
527 #
|
|
528 sub SetValueID {
|
|
529 my($This, $Index, $Value, $SkipCheck) = @_;
|
|
530
|
|
531 # Just set it...
|
|
532 if ($SkipCheck) {
|
|
533 return $This->_SetValueID($Index, $Value);
|
|
534 }
|
|
535
|
|
536 # Check and set...
|
|
537 if ($Index < 0) {
|
|
538 croak "Error: ${ClassName}->SetValueID: Index value must be a positive number...";
|
|
539 }
|
|
540 if ($Index >= $This->GetNumOfValueIDs()) {
|
|
541 croak "Error: ${ClassName}->SetValueID: Index vaue must be less than number of value IDs...";
|
|
542 }
|
|
543
|
|
544 return $This->_SetValueID($Index, $Value);
|
|
545 }
|
|
546
|
|
547 # Set a fingerprint vector value ID...
|
|
548 #
|
|
549 sub _SetValueID {
|
|
550 my($This, $Index, $Value) = @_;
|
|
551
|
|
552 $This->{ValueIDs}[$Index] = $Value;
|
|
553
|
|
554 return $This;
|
|
555 }
|
|
556
|
|
557 # Get a specific value ID from fingerprint vector with indicies starting from 0...
|
|
558 #
|
|
559 sub GetValueID {
|
|
560 my($This, $Index) = @_;
|
|
561
|
|
562 if ($Index < 0) {
|
|
563 croak "Error: ${ClassName}->GetValueID: Index value must be a positive number...";
|
|
564 }
|
|
565 if ($Index >= $This->GetNumOfValueIDs()) {
|
|
566 croak "Error: ${ClassName}->GetValueID: Index value must be less than number of value IDs...";
|
|
567 }
|
|
568 return $This->_GetValueID($Index);
|
|
569 }
|
|
570
|
|
571 # Get a fingerprint vector value ID...
|
|
572 #
|
|
573 sub _GetValueID {
|
|
574 my($This, $Index) = @_;
|
|
575
|
|
576 return $This->{ValueIDs}[$Index];
|
|
577 }
|
|
578
|
|
579 # Return vector value IDs as an array or reference to an array...
|
|
580 #
|
|
581 sub GetValueIDs {
|
|
582 my($This) = @_;
|
|
583
|
|
584 return wantarray ? @{$This->{ValueIDs}} : \@{$This->{ValueIDs}};
|
|
585 }
|
|
586
|
|
587 # Get fingerprints vector string containing values and/or IDs string in a specifed format...
|
|
588 #
|
|
589 sub GetFingerprintsVectorString {
|
|
590 my($This, $Format) = @_;
|
|
591
|
|
592 FORMAT : {
|
|
593 if ($Format =~ /^(IDsAndValuesString|IDsAndValues)$/i) { return $This->GetIDsAndValuesString(); last FORMAT; }
|
|
594 if ($Format =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) { return $This->GetIDsAndValuesPairsString(); last FORMAT; }
|
|
595 if ($Format =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) { return $This->GetValuesAndIDsString(); last FORMAT; }
|
|
596 if ($Format =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) { return $This->GetValuesAndIDsPairsString(); last FORMAT;}
|
|
597 if ($Format =~ /^(ValueIDsString|ValueIDs)$/i) { return $This->GetValueIDsString(); last FORMAT; }
|
|
598 if ($Format =~ /^(ValuesString|Values)$/i) { return $This->GetValuesString(); last FORMAT; }
|
|
599 croak "Error: ${ClassName}->GetFingerprintsVectorString: Specified vector string format, $Format, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValueIDsString, ValueIDs, ValuesString, Values...";
|
|
600 }
|
|
601 return '';
|
|
602 }
|
|
603 # Get vector value IDs and values string as space delimited ASCII string separated
|
|
604 # by semicolon...
|
|
605 #
|
|
606 sub GetIDsAndValuesString {
|
|
607 my($This) = @_;
|
|
608
|
|
609 if (@{$This->{ValueIDs}} && @{$This->{Values}}) {
|
|
610 # Both IDs and values are available...
|
|
611 return join(' ', @{$This->{ValueIDs}}) . ";" . join(' ', @{$This->{Values}});
|
|
612 }
|
|
613 elsif (@{$This->{Values}}) {
|
|
614 # Only values are available...
|
|
615 return "None;" . join(' ', @{$This->{Values}});
|
|
616 }
|
|
617 else {
|
|
618 # Values are not available...
|
|
619 return "None;None";
|
|
620 }
|
|
621 }
|
|
622
|
|
623 # Get vector value IDs and value pairs string as space delimited ASCII string...
|
|
624 #
|
|
625 sub GetIDsAndValuesPairsString {
|
|
626 my($This) = @_;
|
|
627 my($Index, $ValueIDsPresent, @IDsAndValuesPairs);
|
|
628
|
|
629 if (!@{$This->{Values}}) {
|
|
630 # Values are unavailable...
|
|
631 return "None None";
|
|
632 }
|
|
633
|
|
634 $ValueIDsPresent = @{$This->{ValueIDs}} ? 1 : 0;
|
|
635
|
|
636 @IDsAndValuesPairs = ();
|
|
637 for $Index (0 .. $#{$This->{Values}}) {
|
|
638 if ($ValueIDsPresent) {
|
|
639 push @IDsAndValuesPairs, ($This->{ValueIDs}->[$Index], $This->{Values}->[$Index]);
|
|
640 }
|
|
641 else {
|
|
642 push @IDsAndValuesPairs, ('None', $This->{Values}->[$Index]);
|
|
643 }
|
|
644 }
|
|
645 return join(' ', @IDsAndValuesPairs);
|
|
646 }
|
|
647
|
|
648 # Get vector value and value IDs string as space delimited ASCII string separated
|
|
649 # by semicolon...
|
|
650 #
|
|
651 sub GetValuesAndIDsString {
|
|
652 my($This) = @_;
|
|
653
|
|
654 if (@{$This->{ValueIDs}} && @{$This->{Values}}) {
|
|
655 # Both IDs and values are available...
|
|
656 return join(' ', @{$This->{Values}}) . ";" . join(' ', @{$This->{ValueIDs}});
|
|
657 }
|
|
658 elsif (@{$This->{Values}}) {
|
|
659 # Only values are available...
|
|
660 return join(' ', @{$This->{Values}}) . ";None";
|
|
661 }
|
|
662 else {
|
|
663 # Values are not available...
|
|
664 return "None;None";
|
|
665 }
|
|
666 }
|
|
667
|
|
668 # Get vector value and value ID pairs string as space delimited ASCII string...
|
|
669 #
|
|
670 sub GetValuesAndIDsPairsString {
|
|
671 my($This) = @_;
|
|
672 my($Index, $ValueIDsPresent, @ValuesAndIDsPairs);
|
|
673
|
|
674 if (!@{$This->{Values}}) {
|
|
675 # Values are unavailable...
|
|
676 return "None None";
|
|
677 }
|
|
678
|
|
679 $ValueIDsPresent = @{$This->{ValueIDs}} ? 1 : 0;
|
|
680
|
|
681 @ValuesAndIDsPairs = ();
|
|
682 for $Index (0 .. $#{$This->{Values}}) {
|
|
683 if ($ValueIDsPresent) {
|
|
684 push @ValuesAndIDsPairs, ($This->{Values}->[$Index], $This->{ValueIDs}->[$Index]);
|
|
685 }
|
|
686 else {
|
|
687 push @ValuesAndIDsPairs, ($This->{Values}->[$Index], 'None');
|
|
688 }
|
|
689 }
|
|
690 return join(' ', @ValuesAndIDsPairs);
|
|
691 }
|
|
692
|
|
693 # Get vector value IDs string as space delimited ASCII string...
|
|
694 #
|
|
695 sub GetValueIDsString {
|
|
696 my($This) = @_;
|
|
697
|
|
698 return @{$This->{ValueIDs}} ? join(' ', @{$This->{ValueIDs}}) : 'None';
|
|
699 }
|
|
700
|
|
701 # Get vector value string as space delimited ASCII string...
|
|
702 #
|
|
703 sub GetValuesString {
|
|
704 my($This) = @_;
|
|
705
|
|
706 return @{$This->{Values}} ? join(' ', @{$This->{Values}}) : 'None';
|
|
707 }
|
|
708
|
|
709 # Get number of values...
|
|
710 sub GetNumOfValues {
|
|
711 my($This) = @_;
|
|
712
|
|
713 return scalar @{$This->{Values}};
|
|
714 }
|
|
715
|
|
716 # Get number of non-zero values...
|
|
717 sub GetNumOfNonZeroValues {
|
|
718 my($This) = @_;
|
|
719 my($Count, $Index, $Size);
|
|
720
|
|
721 $Count = 0;
|
|
722 $Size = $This->GetNumOfValues();
|
|
723
|
|
724 for $Index (0 .. ($Size -1)) {
|
|
725 if ($This->{Values}[$Index] != 0) {
|
|
726 $Count++;
|
|
727 }
|
|
728 }
|
|
729 return $Count;
|
|
730 }
|
|
731
|
|
732 # Get number of value IDs...
|
|
733 sub GetNumOfValueIDs {
|
|
734 my($This) = @_;
|
|
735
|
|
736 return scalar @{$This->{ValueIDs}};
|
|
737 }
|
|
738
|
|
739 # FinegerprintsVectors class provides methods to calculate similarity between vectors
|
|
740 # containing three different types of values:
|
|
741 #
|
|
742 # Type I: OrderedNumericalValues
|
|
743 #
|
|
744 # . Size of two vectors are same
|
|
745 # . Vectors contain real values in a specific order. For example: MACCS keys count, Topological
|
|
746 # pharnacophore atom pairs and so on.
|
|
747 # . Option to calculate similarity value using continious values or binary values
|
|
748 #
|
|
749 # Type II: UnorderedNumericalValues
|
|
750 #
|
|
751 # . Size of two vectors might not be same
|
|
752 # . Vectors contain unordered real value identified by value IDs. For example: Toplogical atom pairs,
|
|
753 # Topological atom torsions and so on
|
|
754 # . Option to calculate similarity value using continous values or binary values
|
|
755 #
|
|
756 # Type III: AlphaNumericalValues
|
|
757 #
|
|
758 # . Size of two vectors might not be same
|
|
759 # . Vectors contain unordered alphanumerical values. For example: Extended connectivity fingerprints,
|
|
760 # atom neighbothood fingerpritns.
|
|
761 # . The vector values are treated as keys or bit indices and similarity value is calculated accordingly.
|
|
762 #
|
|
763 # Before performing similarity or distance calculations between vectors containing UnorderedNumericalValues
|
|
764 # or AlphaNumericalValues, the vectors are tranformed into vectors containing unique OrderedNumericalValues
|
|
765 # using value IDs for UnorderedNumericalValues and values itself for AlphaNumericalValues.
|
|
766 #
|
|
767 # Three forms similarity or distance calculation between two vectors: AlgebraicForm, BinaryForm or
|
|
768 # SetTheoreticForm.
|
|
769 #
|
|
770 # The value of an extra paramter, CalculationMode, passed to each similarity or distance function
|
|
771 # controls the calculation. Supported values for CalculationMode: AlgebraicForm, BinaryForm and
|
|
772 # SetTheoreticForm. Default: AlgebraicForm.
|
|
773 #
|
|
774 # For BinaryForm CalculationMode, the ordered list of processed final vector values containing the value or
|
|
775 # count of each unique value type is simply converted into a binary vector containing 1s and 0s
|
|
776 # corresponding to presence or absence of values before calculating similarity or distance between
|
|
777 # two vectors.
|
|
778 #
|
|
779 # For two fingerprint vectors A and B of same size containing OrderedNumericalValues, let:
|
|
780 #
|
|
781 # N = Number values in A or B
|
|
782 #
|
|
783 # Xa = Values of vector A
|
|
784 # Xb = Values of vector B
|
|
785 #
|
|
786 # Xai = Value of ith element in A
|
|
787 # Xbi = Value of ith element in B
|
|
788 #
|
|
789 # SUM = Sum of i over N values
|
|
790 #
|
|
791 # For SetTheoreticForm of calculation between two vectors, let:
|
|
792 #
|
|
793 # SetIntersectionXaXb = SUM ( MIN ( Xai, Xbi ) )
|
|
794 # SetDifferenceXaXb = SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) )
|
|
795 #
|
|
796 # For BinaryForm of calculation between two vectors, let:
|
|
797 #
|
|
798 # Na = Number of bits set to "1" in A = SUM ( Xai )
|
|
799 # Nb = Number of bits set to "1" in B = SUM ( Xbi )
|
|
800 # Nc = Number of bits set to "1" in both A and B = SUM ( Xai * Xbi )
|
|
801 # Nd = Number of bits set to "0" in both A and B = SUM ( 1 - Xai - Xbi + Xai * Xbi)
|
|
802 #
|
|
803 # N = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd
|
|
804 #
|
|
805 # Additionally, for BinaryForm various values also correspond to:
|
|
806 #
|
|
807 # Na = | Xa |
|
|
808 # Nb = | Xb |
|
|
809 # Nc = | SetIntersectionXaXb |
|
|
810 # Nd = N - | SetDifferenceXaXb |
|
|
811 #
|
|
812 # | SetDifferenceXaXb | = N - Nd = Na + Nb - Nc + Nd - Nd = Na + Nb - Nc
|
|
813 # = | Xa | + | Xb | - | SetIntersectionXaXb |
|
|
814 #
|
|
815 # Various distance coefficients and similarity coefficients [ Ref 40, Ref 62, Ref 64 ] for a pair vectors A and B
|
|
816 # in AlgebraicForm and BinaryForm are defined as follows:
|
|
817 #
|
|
818 # . CityBlockDistanceCoefficient: ( same as HammingDistanceCoefficient and ManhattanDistanceCoefficient)
|
|
819 #
|
|
820 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) )
|
|
821 #
|
|
822 # . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
|
|
823 #
|
|
824 # . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb |
|
|
825 # = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
|
|
826 #
|
|
827 # . CosineSimilarityCoefficient: ( same as OchiaiSimilarityCoefficient)
|
|
828 #
|
|
829 # . AlgebraicForm: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) )
|
|
830 #
|
|
831 # . BinaryForm: Nc / SQRT ( Na * Nb)
|
|
832 #
|
|
833 # . SetTheoreticForm: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| )
|
|
834 # = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) )
|
|
835 #
|
|
836 # . CzekanowskiSimilarityCoefficient: ( same as DiceSimilarityCoefficient and SorensonSimilarityCoefficient)
|
|
837 #
|
|
838 # . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
|
|
839 #
|
|
840 # . BinaryForm: 2 * Nc / ( Na + Nb )
|
|
841 #
|
|
842 # . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| )
|
|
843 # = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
|
|
844 #
|
|
845 # . DiceSimilarityCoefficient: ( same as CzekanowskiSimilarityCoefficient and SorensonSimilarityCoefficient)
|
|
846 #
|
|
847 # . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
|
|
848 #
|
|
849 # . BinaryForm: 2 * Nc / ( Na + Nb )
|
|
850 #
|
|
851 # . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| )
|
|
852 # = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
|
|
853 #
|
|
854 # . EuclideanDistanceCoefficient:
|
|
855 #
|
|
856 # . AlgebraicForm: SQRT ( SUM ( ( ( Xai - Xbi ) ** 2 ) ) )
|
|
857 #
|
|
858 # . BinaryForm: SQRT ( ( Na - Nc ) + ( Nb - Nc ) ) = SQRT ( Na + Nb - 2 * Nc )
|
|
859 #
|
|
860 # . SetTheoreticForm: SQRT ( | SetDifferenceXaXb | - | SetIntersectionXaXb | )
|
|
861 # = SQRT ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) )
|
|
862 #
|
|
863 # . HammingDistanceCoefficient: ( same as CityBlockDistanceCoefficient and ManhattanDistanceCoefficient)
|
|
864 #
|
|
865 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) )
|
|
866 #
|
|
867 # . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
|
|
868 #
|
|
869 # . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb |
|
|
870 # = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
|
|
871 #
|
|
872 # . JaccardSimilarityCoefficient: ( same as TanimotoSimilarityCoefficient)
|
|
873 #
|
|
874 # . AlgebraicForm: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) )
|
|
875 #
|
|
876 # . BinaryForm: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc )
|
|
877 #
|
|
878 # . SetTheoreticForm: | SetIntersectionXaXb | / | SetDifferenceXaXb |
|
|
879 # = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
|
|
880 #
|
|
881 # . ManhattanDistanceCoefficient: ( same as CityBlockDistanceCoefficient and HammingDistanceCoefficient)
|
|
882 #
|
|
883 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) )
|
|
884 #
|
|
885 # . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
|
|
886 #
|
|
887 # . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb |
|
|
888 # = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
|
|
889 #
|
|
890 # . OchiaiSimilarityCoefficient: ( same as CosineSimilarityCoefficient)
|
|
891 #
|
|
892 # . AlgebraicForm: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) )
|
|
893 #
|
|
894 # . BinaryForm: Nc / SQRT ( Na * Nb)
|
|
895 #
|
|
896 # . SetTheoreticForm: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| )
|
|
897 # = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) )
|
|
898 #
|
|
899 # . SorensonSimilarityCoefficient: ( same as CzekanowskiSimilarityCoefficient and DiceSimilarityCoefficient)
|
|
900 #
|
|
901 # . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
|
|
902 #
|
|
903 # . BinaryForm: 2 * Nc / ( Na + Nb )
|
|
904 #
|
|
905 # . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| )
|
|
906 # = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
|
|
907 #
|
|
908 # . SoergelDistanceCoefficient:
|
|
909 #
|
|
910 # . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) ) / SUM ( MAX ( Xai, Xbi ) )
|
|
911 #
|
|
912 # . BinaryForm: 1 - Nc / ( Na + Nb - Nc ) = ( Na + Nb - 2 * Nc ) / ( Na + Nb - Nc )
|
|
913 #
|
|
914 # . SetTheoreticForm: ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) / | SetDifferenceXaXb |
|
|
915 # = ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
|
|
916 #
|
|
917 # . TanimotoSimilarityCoefficient: ( same as JaccardSimilarityCoefficient)
|
|
918 #
|
|
919 # . AlgebraicForm: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) )
|
|
920 #
|
|
921 # . BinaryForm: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc )
|
|
922 #
|
|
923 # . SetTheoreticForm: | SetIntersectionXaXb | / | SetDifferenceXaXb |
|
|
924 # = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
|
|
925 #
|
|
926 #
|
|
927
|
|
928 # Calculate Hamming distance coefficient between two fingerprint vectors.
|
|
929 #
|
|
930 # This functionality can be either invoked as a class function or an object method.
|
|
931 #
|
|
932 sub HammingDistanceCoefficient ($$;$$) {
|
|
933 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
|
|
934
|
|
935 return CityBlockDistanceCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
|
|
936 }
|
|
937
|
|
938 # Calculate Hamming distance coefficient between two fingerprint vectors.
|
|
939 #
|
|
940 # This functionality can be either invoked as a class function or an object method.
|
|
941 #
|
|
942 sub ManhattanDistanceCoefficient ($$;$$) {
|
|
943 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
|
|
944
|
|
945 return CityBlockDistanceCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
|
|
946 }
|
|
947
|
|
948 # Calculate CityBlock distance coefficient between two fingerprint vectors.
|
|
949 #
|
|
950 # This functionality can be either invoked as a class function or an object method.
|
|
951 #
|
|
952 sub CityBlockDistanceCoefficient ($$;$$) {
|
|
953 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
|
|
954
|
|
955 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
|
|
956 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
|
|
957
|
|
958 # Validate and process fingerprints vectors for similarity calculations...
|
|
959 #
|
|
960 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("CityBlockDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
|
|
961
|
|
962 # Perform the calculation...
|
|
963 if ($CalculationMode =~ /^AlgebraicForm$/i) {
|
|
964 return _CityBlockDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
965 }
|
|
966 elsif ($CalculationMode =~ /^BinaryForm$/i) {
|
|
967 return _CityBlockDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
968 }
|
|
969 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
|
|
970 return _CityBlockDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
971 }
|
|
972 else {
|
|
973 return undef;
|
|
974 }
|
|
975 }
|
|
976
|
|
977 # Calculate CityBlock distance coefficient using algebraic form...
|
|
978 #
|
|
979 sub _CityBlockDistanceCoefficientUsingAlgebraicForm {
|
|
980 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
981 my($SumAbsSubtractionXaiXbi);
|
|
982
|
|
983 $SumAbsSubtractionXaiXbi = _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
|
|
984
|
|
985 return $SumAbsSubtractionXaiXbi;
|
|
986 }
|
|
987
|
|
988 # Calculate CityBlock distance coefficient using binary form...
|
|
989 #
|
|
990 sub _CityBlockDistanceCoefficientUsingBinaryForm {
|
|
991 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
992 my($Na, $Nb, $Nc);
|
|
993
|
|
994 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
|
|
995
|
|
996 return ($Na + $Nb - 2 * $Nc);
|
|
997 }
|
|
998
|
|
999 # Calculate CityBlock distance coefficient using set theoretic form...
|
|
1000 #
|
|
1001 sub _CityBlockDistanceCoefficientUsingSetTheoreticForm {
|
|
1002 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1003 my($SumMinXaiXbi, $SumXai, $SumXbi);
|
|
1004
|
|
1005 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
|
|
1006 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
|
|
1007 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1008
|
|
1009 return ($SumXai + $SumXbi - 2 * $SumMinXaiXbi);
|
|
1010 }
|
|
1011
|
|
1012 # Calculate Ochiai similarity cofficient between two fingerprint vectors.
|
|
1013 #
|
|
1014 # This functionality can be either invoked as a class function or an object method.
|
|
1015 #
|
|
1016 sub OchiaiSimilarityCoefficient ($$;$$) {
|
|
1017 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
|
|
1018
|
|
1019 return CosineSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
|
|
1020 }
|
|
1021
|
|
1022 # Calculate Cosine similarity cofficient between two fingerprint vectors.
|
|
1023 #
|
|
1024 # This functionality can be either invoked as a class function or an object method.
|
|
1025 #
|
|
1026 sub CosineSimilarityCoefficient ($$;$$) {
|
|
1027 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
|
|
1028
|
|
1029 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
|
|
1030 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
|
|
1031
|
|
1032 # Validate and process fingerprints vectors for similarity calculations...
|
|
1033 #
|
|
1034 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("CosineSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
|
|
1035
|
|
1036 # Perform the calculation...
|
|
1037 if ($CalculationMode =~ /^AlgebraicForm$/i) {
|
|
1038 return _CosineSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1039 }
|
|
1040 elsif ($CalculationMode =~ /^BinaryForm$/i) {
|
|
1041 return _CosineSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1042 }
|
|
1043 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
|
|
1044 return _CosineSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1045 }
|
|
1046 else {
|
|
1047 return undef;
|
|
1048 }
|
|
1049 }
|
|
1050
|
|
1051 # Calculate Cosine similarity coefficient using algebraic form...
|
|
1052 #
|
|
1053 sub _CosineSimilarityCoefficientUsingAlgebraicForm {
|
|
1054 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1055 my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator);
|
|
1056
|
|
1057 $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA);
|
|
1058 $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB);
|
|
1059 $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1060
|
|
1061 $Numerator = $SumProductXaiXbi;
|
|
1062 $Denominator = sqrt($SumXai2 * $SumXbi2);
|
|
1063
|
|
1064 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
1065 }
|
|
1066
|
|
1067 # CalculateCosine similarity coefficient using binary form...
|
|
1068 #
|
|
1069 sub _CosineSimilarityCoefficientUsingBinaryForm {
|
|
1070 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1071 my($Na, $Nb, $Nc, $Numerator, $Denominator);
|
|
1072
|
|
1073 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1074
|
|
1075 $Numerator = $Nc;
|
|
1076 $Denominator = sqrt($Na * $Nb);
|
|
1077
|
|
1078 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
1079 }
|
|
1080
|
|
1081 # Calculate Cosine similarity coefficient using set theoretic form...
|
|
1082 #
|
|
1083 sub _CosineSimilarityCoefficientUsingSetTheoreticForm {
|
|
1084 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1085 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
|
|
1086
|
|
1087 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
|
|
1088 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
|
|
1089 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1090
|
|
1091 $Numerator = $SumMinXaiXbi;
|
|
1092 $Denominator = sqrt($SumXai * $SumXbi);
|
|
1093
|
|
1094 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
1095 }
|
|
1096
|
|
1097 # Calculate Czekanowski similarity cofficient between two fingerprint vectors.
|
|
1098 #
|
|
1099 # This functionality can be either invoked as a class function or an object method.
|
|
1100 #
|
|
1101 sub CzekanowskiSimilarityCoefficient ($$;$$) {
|
|
1102 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
|
|
1103
|
|
1104 return DiceSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
|
|
1105 }
|
|
1106
|
|
1107 # Calculate Sorenson similarity cofficient between two fingerprint vectors.
|
|
1108 #
|
|
1109 # This functionality can be either invoked as a class function or an object method.
|
|
1110 #
|
|
1111 sub SorensonSimilarityCoefficient ($$;$$) {
|
|
1112 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
|
|
1113
|
|
1114 return DiceSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
|
|
1115 }
|
|
1116
|
|
1117 # Calculate Dice similarity cofficient between two fingerprint vectors.
|
|
1118 #
|
|
1119 # This functionality can be either invoked as a class function or an object method.
|
|
1120 #
|
|
1121 sub DiceSimilarityCoefficient ($$;$$) {
|
|
1122 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
|
|
1123
|
|
1124 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
|
|
1125 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
|
|
1126
|
|
1127 # Validate and process fingerprints vectors for similarity calculations...
|
|
1128 #
|
|
1129 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("DiceSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
|
|
1130
|
|
1131 # Perform the calculation...
|
|
1132 if ($CalculationMode =~ /^AlgebraicForm$/i) {
|
|
1133 return _DiceSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1134 }
|
|
1135 elsif ($CalculationMode =~ /^BinaryForm$/i) {
|
|
1136 return _DiceSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1137 }
|
|
1138 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
|
|
1139 return _DiceSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1140 }
|
|
1141 else {
|
|
1142 return undef;
|
|
1143 }
|
|
1144 }
|
|
1145
|
|
1146 # Calculate Dice similarity coefficient using algebraic form...
|
|
1147 #
|
|
1148 sub _DiceSimilarityCoefficientUsingAlgebraicForm {
|
|
1149 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1150 my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator);
|
|
1151
|
|
1152 $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA);
|
|
1153 $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB);
|
|
1154 $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1155
|
|
1156 $Numerator = 2 * $SumProductXaiXbi;
|
|
1157 $Denominator = $SumXai2 + $SumXbi2;
|
|
1158
|
|
1159 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
1160 }
|
|
1161
|
|
1162 # Calculate Dice similarity coefficient using binary form...
|
|
1163 #
|
|
1164 sub _DiceSimilarityCoefficientUsingBinaryForm {
|
|
1165 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1166 my($Na, $Nb, $Nc, $Numerator, $Denominator);
|
|
1167
|
|
1168 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1169
|
|
1170 $Numerator = 2 * $Nc;
|
|
1171 $Denominator = $Na + $Nb;
|
|
1172
|
|
1173 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
1174 }
|
|
1175
|
|
1176 # Calculate Dice similarity coefficient using set theoretic form...
|
|
1177 #
|
|
1178 sub _DiceSimilarityCoefficientUsingSetTheoreticForm {
|
|
1179 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1180 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
|
|
1181
|
|
1182 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
|
|
1183 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
|
|
1184 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1185
|
|
1186 $Numerator = 2 * $SumMinXaiXbi;
|
|
1187 $Denominator = $SumXai + $SumXbi;
|
|
1188
|
|
1189 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
1190 }
|
|
1191
|
|
1192
|
|
1193 # Calculate Euclidean distance coefficient between two fingerprint vectors.
|
|
1194 #
|
|
1195 # This functionality can be either invoked as a class function or an object method.
|
|
1196 #
|
|
1197 sub EuclideanDistanceCoefficient ($$;$$) {
|
|
1198 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
|
|
1199
|
|
1200 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
|
|
1201 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
|
|
1202
|
|
1203 # Validate and process fingerprints vectors for similarity calculations...
|
|
1204 #
|
|
1205 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("EuclideanDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
|
|
1206
|
|
1207 # Perform the calculation...
|
|
1208 if ($CalculationMode =~ /^AlgebraicForm$/i) {
|
|
1209 return _EuclideanDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1210 }
|
|
1211 elsif ($CalculationMode =~ /^BinaryForm$/i) {
|
|
1212 return _EuclideanDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1213 }
|
|
1214 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
|
|
1215 return _EuclideanDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1216 }
|
|
1217 else {
|
|
1218 return undef;
|
|
1219 }
|
|
1220 }
|
|
1221
|
|
1222 # Calculate Euclidean distance coefficient using algebraic form...
|
|
1223 #
|
|
1224 sub _EuclideanDistanceCoefficientUsingAlgebraicForm {
|
|
1225 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1226 my($SumSquaresSubtractionXaiXbi);
|
|
1227
|
|
1228 $SumSquaresSubtractionXaiXbi = _GetSumOfSquaresOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1229
|
|
1230 return sqrt($SumSquaresSubtractionXaiXbi);
|
|
1231 }
|
|
1232
|
|
1233 # Calculate Euclidean distance coefficient using binary form...
|
|
1234 #
|
|
1235 sub _EuclideanDistanceCoefficientUsingBinaryForm {
|
|
1236 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1237 my($Na, $Nb, $Nc);
|
|
1238
|
|
1239 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1240
|
|
1241 return (sqrt($Na + $Nb - 2 * $Nc));
|
|
1242 }
|
|
1243
|
|
1244 # Calculate Euclidean distance coefficient using set theoretic form...
|
|
1245 #
|
|
1246 sub _EuclideanDistanceCoefficientUsingSetTheoreticForm {
|
|
1247 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1248 my($SumMinXaiXbi, $SumXai, $SumXbi);
|
|
1249
|
|
1250 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
|
|
1251 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
|
|
1252 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1253
|
|
1254 return (sqrt($SumXai + $SumXbi - 2 * $SumMinXaiXbi));
|
|
1255 }
|
|
1256
|
|
1257 # Calculate Jaccard similarity cofficient between two fingerprint vectors.
|
|
1258 #
|
|
1259 # This functionality can be either invoked as a class function or an object method.
|
|
1260 #
|
|
1261 sub JaccardSimilarityCoefficient ($$;$$) {
|
|
1262 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
|
|
1263
|
|
1264 return TanimotoSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
|
|
1265 }
|
|
1266
|
|
1267 # Calculate Tanimoto similarity cofficient between two fingerprint vectors.
|
|
1268 #
|
|
1269 # This functionality can be either invoked as a class function or an object method.
|
|
1270 #
|
|
1271 sub TanimotoSimilarityCoefficient ($$;$$) {
|
|
1272 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
|
|
1273
|
|
1274 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
|
|
1275 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
|
|
1276
|
|
1277 # Validate and process fingerprints vectors for similarity calculations...
|
|
1278 #
|
|
1279 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("TanimotoSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
|
|
1280
|
|
1281 # Perform the calculation...
|
|
1282 if ($CalculationMode =~ /^AlgebraicForm$/i) {
|
|
1283 return _TanimotoSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1284 }
|
|
1285 elsif ($CalculationMode =~ /^BinaryForm$/i) {
|
|
1286 return _TanimotoSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1287 }
|
|
1288 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
|
|
1289 return _TanimotoSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1290 }
|
|
1291 else {
|
|
1292 return undef;
|
|
1293 }
|
|
1294 }
|
|
1295
|
|
1296 # Calculate Tanimoto similarity coefficient using algebraic form...
|
|
1297 #
|
|
1298 sub _TanimotoSimilarityCoefficientUsingAlgebraicForm {
|
|
1299 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1300 my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator);
|
|
1301
|
|
1302 $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA);
|
|
1303 $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB);
|
|
1304 $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1305
|
|
1306 $Numerator = $SumProductXaiXbi;
|
|
1307 $Denominator = $SumXai2 + $SumXbi2 - $SumProductXaiXbi;
|
|
1308
|
|
1309 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
1310 }
|
|
1311
|
|
1312 # Calculate Tanimoto similarity coefficient using binary form...
|
|
1313 #
|
|
1314 sub _TanimotoSimilarityCoefficientUsingBinaryForm {
|
|
1315 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1316 my($Na, $Nb, $Nc, $Numerator, $Denominator);
|
|
1317
|
|
1318 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1319
|
|
1320 $Numerator = $Nc;
|
|
1321 $Denominator = $Na + $Nb - $Nc;
|
|
1322
|
|
1323 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
1324 }
|
|
1325
|
|
1326 # Calculate Tanimoto similarity coefficient using set theoretic form...
|
|
1327 #
|
|
1328 sub _TanimotoSimilarityCoefficientUsingSetTheoreticForm {
|
|
1329 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1330 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
|
|
1331
|
|
1332 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
|
|
1333 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
|
|
1334 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1335
|
|
1336 $Numerator = $SumMinXaiXbi;
|
|
1337 $Denominator = $SumXai + $SumXbi - $SumMinXaiXbi;
|
|
1338
|
|
1339 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
1340 }
|
|
1341
|
|
1342
|
|
1343 # Calculate Soergel distance coefficient between two fingerprint vectors.
|
|
1344 #
|
|
1345 # This functionality can be either invoked as a class function or an object method.
|
|
1346 #
|
|
1347 sub SoergelDistanceCoefficient ($$;$$) {
|
|
1348 my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
|
|
1349
|
|
1350 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
|
|
1351 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
|
|
1352
|
|
1353 # Validate and process fingerprints vectors for similarity calculations...
|
|
1354 #
|
|
1355 _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("SoergelDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
|
|
1356
|
|
1357 # Perform the calculation...
|
|
1358 if ($CalculationMode =~ /^AlgebraicForm$/i) {
|
|
1359 return _SoergelDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1360 }
|
|
1361 elsif ($CalculationMode =~ /^BinaryForm$/i) {
|
|
1362 return _SoergelDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1363 }
|
|
1364 elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
|
|
1365 return _SoergelDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1366 }
|
|
1367 else {
|
|
1368 return undef;
|
|
1369 }
|
|
1370 }
|
|
1371
|
|
1372 # Calculate Soergel distance coefficientusing algebraic form...
|
|
1373 #
|
|
1374 sub _SoergelDistanceCoefficientUsingAlgebraicForm {
|
|
1375 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1376 my($SumAbsSubtractionXaiXbi, $SumMaxXaiXbi, $Numerator, $Denominator);
|
|
1377
|
|
1378 $SumAbsSubtractionXaiXbi = _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1379 $SumMaxXaiXbi = _GetSumOfMaximumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1380
|
|
1381 $Numerator = $SumAbsSubtractionXaiXbi;
|
|
1382 $Denominator = $SumMaxXaiXbi;
|
|
1383
|
|
1384 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
1385 }
|
|
1386
|
|
1387 # Calculate Soergel distance coefficient using binary form...
|
|
1388 #
|
|
1389 sub _SoergelDistanceCoefficientUsingBinaryForm {
|
|
1390 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1391 my($Na, $Nb, $Nc, $Numerator, $Denominator);
|
|
1392
|
|
1393 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1394
|
|
1395 $Numerator = $Na + $Nb - 2 * $Nc;
|
|
1396 $Denominator = $Na + $Nb - $Nc;
|
|
1397
|
|
1398 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
1399 }
|
|
1400
|
|
1401 # Calculate SoergelDistanceCoefficient using set theoretic form...
|
|
1402 #
|
|
1403 sub _SoergelDistanceCoefficientUsingSetTheoreticForm {
|
|
1404 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1405 my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
|
|
1406
|
|
1407 $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
|
|
1408 $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
|
|
1409 $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1410
|
|
1411 $Numerator = $SumXai + $SumXbi - 2 * $SumMinXaiXbi;
|
|
1412 $Denominator = $SumXai + $SumXbi - $SumMinXaiXbi;
|
|
1413
|
|
1414 return $Denominator ? ($Numerator/$Denominator) : 0;
|
|
1415 }
|
|
1416
|
|
1417 # Validate and process fingerprints vectors for similarity calculations...
|
|
1418 #
|
|
1419 sub _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation {
|
|
1420 my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
|
|
1421
|
|
1422 $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
|
|
1423 $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
|
|
1424
|
|
1425 if (!$SkipValuesCheck) {
|
|
1426 _ValidateFingerprintsVectorsForSimilarityCalculation($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode);
|
|
1427 }
|
|
1428 _ProcessFingerprintsVectorsForSimilarityCalculation($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode);
|
|
1429 }
|
|
1430
|
|
1431 # Make sure fingerprint vectors are good for performing similarity/distance calculation...
|
|
1432 #
|
|
1433 sub _ValidateFingerprintsVectorsForSimilarityCalculation {
|
|
1434 my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode) = @_;
|
|
1435
|
|
1436 # Make sure both are fingerprint vectors..
|
|
1437 if (!(IsFingerprintsVector($FingerprintsVectorA) && IsFingerprintsVector($FingerprintsVectorB))) {
|
|
1438 croak "Error: ${ClassName}->${ErrorMsg}: Both objects must be fingerprint vectors...";
|
|
1439 }
|
|
1440
|
|
1441 # Check types...
|
|
1442 if ($FingerprintsVectorA->{Type} ne $FingerprintsVectorB->{Type}) {
|
|
1443 croak "Error: ${ClassName}->${ErrorMsg}: Type of first fingerprint vector, $FingerprintsVectorA->{Type}, must be same as type of second fingerprint vector, $FingerprintsVectorB->{Type}...";
|
|
1444 }
|
|
1445
|
|
1446 # Check calculation mode...
|
|
1447 if ($CalculationMode !~ /^(AlgebraicForm|BinaryForm|SetTheoreticForm)$/i) {
|
|
1448 croak "Error: ${ClassName}->${ErrorMsg}: Specified similarity calculation mode, $CalculationMode, is not valid. Supported values: AlgebraicForm, BinaryForm, and SetTheoreticForm...";
|
|
1449 }
|
|
1450
|
|
1451 # Check values and value IDs...
|
|
1452 my($Na, $Nb, $NIDa, $NIDb);
|
|
1453 $Na = $FingerprintsVectorA->GetNumOfValues(); $Nb = $FingerprintsVectorB->GetNumOfValues();
|
|
1454 $NIDa = $FingerprintsVectorA->GetNumOfValueIDs(); $NIDb = $FingerprintsVectorB->GetNumOfValueIDs();
|
|
1455
|
|
1456 if ($Na == 0) {
|
|
1457 croak "Error: ${ClassName}->${ErrorMsg}: Number of values in first fingerprint vector, $Na, must be > 0 for fingerprint vector type $FingerprintsVectorA->{Type} ...";
|
|
1458 }
|
|
1459 if ($Nb == 0) {
|
|
1460 croak "Error: ${ClassName}->${ErrorMsg}: Number of values in second fingerprint vector, $Nb, must be > 0 for fingerprint vector type $FingerprintsVectorB->{Type} ...";
|
|
1461 }
|
|
1462
|
|
1463 if ($FingerprintsVectorA->{Type} =~ /^OrderedNumericalValues$/i) {
|
|
1464 if ($Na != $Nb) {
|
|
1465 croak "Error: ${ClassName}->${ErrorMsg}: Number of values in first fingerprint vector, $Na, must be equal to number of values, $Nb, in second fingerprint vector for fingerprint vector types $FingerprintsVectorA->{Type} ...";
|
|
1466 }
|
|
1467 }
|
|
1468 elsif ($FingerprintsVectorA->{Type} =~ /^NumericalValues$/i) {
|
|
1469 if ($NIDa == 0) {
|
|
1470 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDa, must be > 0 for fingerprint vector type $FingerprintsVectorA->{Type} ...";
|
|
1471 }
|
|
1472 if ($NIDb == 0) {
|
|
1473 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDb, must be > 0 for fingerprint vector type $FingerprintsVectorB->{Type} ...";
|
|
1474 }
|
|
1475
|
|
1476 if ($NIDa != $Na) {
|
|
1477 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDa, must be equal to its number of values, $Na, for fingerprint vector type $FingerprintsVectorA->{Type} ...";
|
|
1478 }
|
|
1479 if ($NIDb != $Nb) {
|
|
1480 croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in second fingerprint vector, $NIDb, must be equal to its number of values, $Nb, for fingerprint vector type $FingerprintsVectorA->{Type} ...";
|
|
1481 }
|
|
1482 }
|
|
1483 elsif ($FingerprintsVectorA->{Type} =~ /^AlphaNumericalValues$/i) {
|
|
1484 if ($NIDa || $NIDb) {
|
|
1485 croak "Error: ${ClassName}->${ErrorMsg}: ValueIDs cann't be specified for fingerprint vector types $FingerprintsVectorA->{Type} ...";
|
|
1486 }
|
|
1487 }
|
|
1488 else {
|
|
1489 croak "Error: ${ClassName}->${ErrorMsg}: Fingerprint vector types $FingerprintsVectorA->{Type} is not valid...";
|
|
1490 }
|
|
1491 }
|
|
1492
|
|
1493 # Process fingerprints vectors for similarity calculation by generating vectors
|
|
1494 # containing ordered list of values...
|
|
1495 #
|
|
1496 sub _ProcessFingerprintsVectorsForSimilarityCalculation {
|
|
1497 my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode) = @_;
|
|
1498
|
|
1499 $FingerprintsVectorA->{OrderedValuesRef} = undef; $FingerprintsVectorB->{OrderedValuesRef} = undef;
|
|
1500 $FingerprintsVectorA->{BitVector} = undef; $FingerprintsVectorB->{BitVector} = undef;
|
|
1501
|
|
1502 if ($FingerprintsVectorA->{Type} =~ /^OrderedNumericalValues$/i) {
|
|
1503 _ProcessOrderedNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1504 }
|
|
1505 elsif ($FingerprintsVectorA->{Type} =~ /^NumericalValues$/i) {
|
|
1506 _ProcessNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1507 }
|
|
1508 elsif ($FingerprintsVectorA->{Type} =~ /^AlphaNumericalValues$/i) {
|
|
1509 _ProcessAlphaNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1510 }
|
|
1511 else {
|
|
1512 croak "Error: ${ClassName}->${ErrorMsg}: Fingerprint vector types $FingerprintsVectorA->{Type} is not valid...";
|
|
1513 }
|
|
1514 if ($CalculationMode =~ /^BinaryForm$/i) {
|
|
1515 _TransformFinalOrderedValuesIntoBitVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
|
|
1516 }
|
|
1517 }
|
|
1518
|
|
1519 # Process fingerprints vectors with ordered numerical values for similarity calculations...
|
|
1520 #
|
|
1521 sub _ProcessOrderedNumericalValuesFingerprintsVectorsForSimilarityCalculation {
|
|
1522 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1523
|
|
1524 $FingerprintsVectorA->{OrderedValuesRef} = \@{$FingerprintsVectorA->{Values}};
|
|
1525 $FingerprintsVectorB->{OrderedValuesRef} = \@{$FingerprintsVectorB->{Values}};
|
|
1526 }
|
|
1527
|
|
1528 # Process fingerprints vectors with numerical values for similarity calculations...
|
|
1529 #
|
|
1530 sub _ProcessNumericalValuesFingerprintsVectorsForSimilarityCalculation {
|
|
1531 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1532
|
|
1533 # Set up unique IDs and values map for each fingerprint vector...
|
|
1534 my($Index, $Value, $ValueID, %UniqueFingerprintsVectorAValueIDValues, %UniqueFingerprintsVectorBValueIDValues, %UniqueFingerprintsVectorsValueIDs);
|
|
1535
|
|
1536 %UniqueFingerprintsVectorAValueIDValues = ();
|
|
1537 %UniqueFingerprintsVectorBValueIDValues = ();
|
|
1538 %UniqueFingerprintsVectorsValueIDs = ();
|
|
1539
|
|
1540 # Go over first vector...
|
|
1541 for $Index (0 .. $#{$FingerprintsVectorA->{ValueIDs}}) {
|
|
1542 $ValueID = $FingerprintsVectorA->{ValueIDs}[$Index];
|
|
1543 $Value = $FingerprintsVectorA->{Values}[$Index];
|
|
1544 if (exists $UniqueFingerprintsVectorAValueIDValues{$ValueID}) {
|
|
1545 $UniqueFingerprintsVectorAValueIDValues{$ValueID} += $Value;
|
|
1546 }
|
|
1547 else {
|
|
1548 $UniqueFingerprintsVectorAValueIDValues{$ValueID} = $Value;
|
|
1549 }
|
|
1550 if (!exists $UniqueFingerprintsVectorsValueIDs{$ValueID}) {
|
|
1551 $UniqueFingerprintsVectorsValueIDs{$ValueID} = 1;
|
|
1552 }
|
|
1553 }
|
|
1554
|
|
1555 # Go over second vector...
|
|
1556 for $Index (0 .. $#{$FingerprintsVectorB->{ValueIDs}}) {
|
|
1557 $ValueID = $FingerprintsVectorB->{ValueIDs}[$Index];
|
|
1558 $Value = $FingerprintsVectorB->{Values}[$Index];
|
|
1559 if (exists $UniqueFingerprintsVectorBValueIDValues{$ValueID}) {
|
|
1560 $UniqueFingerprintsVectorBValueIDValues{$ValueID} += $Value;
|
|
1561 }
|
|
1562 else {
|
|
1563 $UniqueFingerprintsVectorBValueIDValues{$ValueID} = $Value;
|
|
1564 }
|
|
1565 if (!exists $UniqueFingerprintsVectorsValueIDs{$ValueID}) {
|
|
1566 $UniqueFingerprintsVectorsValueIDs{$ValueID} = 1;
|
|
1567 }
|
|
1568 }
|
|
1569
|
|
1570 # Setup ordered values...
|
|
1571 my(@UniqueOrderedValueIDs, @OrderedValuesA, @OrderedValuesB);
|
|
1572
|
|
1573 @UniqueOrderedValueIDs = ();
|
|
1574 @UniqueOrderedValueIDs = sort keys %UniqueFingerprintsVectorsValueIDs;
|
|
1575
|
|
1576 @OrderedValuesA = ();
|
|
1577 @OrderedValuesA = map { exists $UniqueFingerprintsVectorAValueIDValues{$_} ? $UniqueFingerprintsVectorAValueIDValues{$_} : 0 } @UniqueOrderedValueIDs;
|
|
1578
|
|
1579 @OrderedValuesB = ();
|
|
1580 @OrderedValuesB = map { exists $UniqueFingerprintsVectorBValueIDValues{$_} ? $UniqueFingerprintsVectorBValueIDValues{$_} : 0 } @UniqueOrderedValueIDs;
|
|
1581
|
|
1582 $FingerprintsVectorA->{OrderedValuesRef} = \@OrderedValuesA;
|
|
1583 $FingerprintsVectorB->{OrderedValuesRef} = \@OrderedValuesB;
|
|
1584 }
|
|
1585
|
|
1586 # Process fingerprints vectors with allpha numerical values for similarity calculations...
|
|
1587 #
|
|
1588 sub _ProcessAlphaNumericalValuesFingerprintsVectorsForSimilarityCalculation {
|
|
1589 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1590
|
|
1591 # Set up unique IDs and values map for each vector...
|
|
1592 my($Index, $Value, $ValueID, %UniqueFingerprintsVectorAValuesCount, %UniqueFingerprintsVectorBValuesCount, %UniqueFingerprintsVectorsValues);
|
|
1593
|
|
1594 %UniqueFingerprintsVectorAValuesCount = ();
|
|
1595 %UniqueFingerprintsVectorBValuesCount = ();
|
|
1596 %UniqueFingerprintsVectorsValues = ();
|
|
1597
|
|
1598 # Go over first vector...
|
|
1599 for $Value (@{$FingerprintsVectorA->{Values}}) {
|
|
1600 if (exists $UniqueFingerprintsVectorAValuesCount{$Value}) {
|
|
1601 $UniqueFingerprintsVectorAValuesCount{$Value} += 1;
|
|
1602 }
|
|
1603 else {
|
|
1604 $UniqueFingerprintsVectorAValuesCount{$Value} = 1;
|
|
1605 }
|
|
1606 if (!exists $UniqueFingerprintsVectorsValues{$Value}) {
|
|
1607 $UniqueFingerprintsVectorsValues{$Value} = 1;
|
|
1608 }
|
|
1609 }
|
|
1610
|
|
1611 # Go over second vector...
|
|
1612 for $Value (@{$FingerprintsVectorB->{Values}}) {
|
|
1613 if (exists $UniqueFingerprintsVectorBValuesCount{$Value}) {
|
|
1614 $UniqueFingerprintsVectorBValuesCount{$Value} += 1;
|
|
1615 }
|
|
1616 else {
|
|
1617 $UniqueFingerprintsVectorBValuesCount{$Value} = 1;
|
|
1618 }
|
|
1619 if (!exists $UniqueFingerprintsVectorsValues{$Value}) {
|
|
1620 $UniqueFingerprintsVectorsValues{$Value} = 1;
|
|
1621 }
|
|
1622 }
|
|
1623
|
|
1624 # Setup ordered values...
|
|
1625 my(@UniqueOrderedValueIDs, @OrderedValuesA, @OrderedValuesB);
|
|
1626
|
|
1627 @UniqueOrderedValueIDs = ();
|
|
1628 @UniqueOrderedValueIDs = sort keys %UniqueFingerprintsVectorsValues;
|
|
1629
|
|
1630 @OrderedValuesA = ();
|
|
1631 @OrderedValuesA = map { exists $UniqueFingerprintsVectorAValuesCount{$_} ? $UniqueFingerprintsVectorAValuesCount{$_} : 0 } @UniqueOrderedValueIDs;
|
|
1632
|
|
1633 @OrderedValuesB = ();
|
|
1634 @OrderedValuesB = map { exists $UniqueFingerprintsVectorBValuesCount{$_} ? $UniqueFingerprintsVectorBValuesCount{$_} : 0 } @UniqueOrderedValueIDs;
|
|
1635
|
|
1636 $FingerprintsVectorA->{OrderedValuesRef} = \@OrderedValuesA;
|
|
1637 $FingerprintsVectorB->{OrderedValuesRef} = \@OrderedValuesB;
|
|
1638
|
|
1639 }
|
|
1640
|
|
1641 # Transform final ordered values array into a BitVector for similarity calculation...
|
|
1642 #
|
|
1643 sub _TransformFinalOrderedValuesIntoBitVectorsForSimilarityCalculation {
|
|
1644 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1645 my($Index, $Size, $BitVectorA, $BitVectorB, $SkipCheck);
|
|
1646
|
|
1647 # Create bit vectors...
|
|
1648 $Size = scalar @{$FingerprintsVectorA->{OrderedValuesRef}};
|
|
1649
|
|
1650 $FingerprintsVectorA->{BitVector} = new BitVector($Size);
|
|
1651 $FingerprintsVectorB->{BitVector} = new BitVector($Size);
|
|
1652
|
|
1653 # Set bits...
|
|
1654 $SkipCheck = 1;
|
|
1655 for $Index (0 .. ($Size - 1)) {
|
|
1656 if ($FingerprintsVectorA->{OrderedValuesRef}[$Index]) {
|
|
1657 $FingerprintsVectorA->{BitVector}->SetBit($Index, $SkipCheck);
|
|
1658 }
|
|
1659 if ($FingerprintsVectorB->{OrderedValuesRef}[$Index]) {
|
|
1660 $FingerprintsVectorB->{BitVector}->SetBit($Index, $SkipCheck);
|
|
1661 }
|
|
1662 }
|
|
1663 }
|
|
1664
|
|
1665 # Return sum of ordered vector values...
|
|
1666 #
|
|
1667 sub _GetSumOfFingerprintsOrderedValues {
|
|
1668 my($FingerprintVector) = @_;
|
|
1669
|
|
1670 return StatisticsUtil::Sum($FingerprintVector->{OrderedValuesRef});
|
|
1671 }
|
|
1672
|
|
1673 # Return sum of squared ordered vector values...
|
|
1674 #
|
|
1675 sub _GetSumOfSquaresOfFingerprintsOrderedValues {
|
|
1676 my($FingerprintVector) = @_;
|
|
1677
|
|
1678 return StatisticsUtil::SumOfSquares($FingerprintVector->{OrderedValuesRef});
|
|
1679 }
|
|
1680
|
|
1681 # Return sum of product of correponding ordered vector values...
|
|
1682 #
|
|
1683 sub _GetSumOfProductOfFingerprintsOrderedValues {
|
|
1684 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1685 my($Index, $SumProductXaiXbi);
|
|
1686
|
|
1687 $SumProductXaiXbi = 0;
|
|
1688 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
|
|
1689 $SumProductXaiXbi += $FingerprintsVectorA->{OrderedValuesRef}[$Index] * $FingerprintsVectorB->{OrderedValuesRef}[$Index];
|
|
1690 }
|
|
1691 return $SumProductXaiXbi;
|
|
1692 }
|
|
1693
|
|
1694 # Return sum of absolute value of subtraction of correponding ordered vector values...
|
|
1695 #
|
|
1696 sub _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues {
|
|
1697 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1698 my($Index, $SumAbsSubtractionXaiXbi);
|
|
1699
|
|
1700 $SumAbsSubtractionXaiXbi = 0;
|
|
1701 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
|
|
1702 $SumAbsSubtractionXaiXbi += abs($FingerprintsVectorA->{OrderedValuesRef}[$Index] - $FingerprintsVectorB->{OrderedValuesRef}[$Index]);
|
|
1703 }
|
|
1704 return $SumAbsSubtractionXaiXbi;
|
|
1705 }
|
|
1706
|
|
1707 # Return sum of squares of subtraction of correponding ordered vector values...
|
|
1708 #
|
|
1709 sub _GetSumOfSquaresOfSubtractionOfFingerprintsOrderedValues {
|
|
1710 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1711 my($Index, $SumSquaresSubtractionXaiXbi);
|
|
1712
|
|
1713 $SumSquaresSubtractionXaiXbi = 0;
|
|
1714 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
|
|
1715 $SumSquaresSubtractionXaiXbi += ($FingerprintsVectorA->{OrderedValuesRef}[$Index] - $FingerprintsVectorB->{OrderedValuesRef}[$Index])**2;
|
|
1716 }
|
|
1717 return $SumSquaresSubtractionXaiXbi;
|
|
1718 }
|
|
1719
|
|
1720 # Return sum of minimum of correponding ordered vector values...
|
|
1721 #
|
|
1722 sub _GetSumOfMinimumOfFingerprintsOrderdedValues {
|
|
1723 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1724 my($Index, $SumMinXaiXbi);
|
|
1725
|
|
1726 $SumMinXaiXbi = 0;
|
|
1727 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
|
|
1728 $SumMinXaiXbi += MathUtil::min($FingerprintsVectorA->{OrderedValuesRef}[$Index], $FingerprintsVectorB->{OrderedValuesRef}[$Index]);
|
|
1729 }
|
|
1730 return $SumMinXaiXbi;
|
|
1731 }
|
|
1732
|
|
1733 # Return sum of maximum of correponding ordered vector values...
|
|
1734 #
|
|
1735 sub _GetSumOfMaximumOfFingerprintsOrderdedValues {
|
|
1736 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1737 my($Index, $SumMaxXaiXbi);
|
|
1738
|
|
1739 $SumMaxXaiXbi = 0;
|
|
1740 for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
|
|
1741 $SumMaxXaiXbi += MathUtil::max($FingerprintsVectorA->{OrderedValuesRef}[$Index], $FingerprintsVectorB->{OrderedValuesRef}[$Index]);
|
|
1742 }
|
|
1743 return $SumMaxXaiXbi;
|
|
1744 }
|
|
1745
|
|
1746 # Get number of Na, Nb and Nc bits in vector A and B for BinaryForm calculation...
|
|
1747 #
|
|
1748 sub _GetNumOfIndividualAndCommonSetBits ($$) {
|
|
1749 my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
|
|
1750 my($Na, $Nb, $Nc, $Nd, $FingerprintsBitVectorA, $FingerprintsBitVectorB);
|
|
1751
|
|
1752 $FingerprintsBitVectorA = $FingerprintsVectorA->{BitVector};
|
|
1753 $FingerprintsBitVectorB = $FingerprintsVectorB->{BitVector};
|
|
1754
|
|
1755 # Number of bits set to "1" in A
|
|
1756 $Na = $FingerprintsBitVectorA->GetNumOfSetBits();
|
|
1757
|
|
1758 # Number of bits set to "1" in B
|
|
1759 $Nb = $FingerprintsBitVectorB->GetNumOfSetBits();
|
|
1760
|
|
1761 # Number of bits set to "1" in both A and B
|
|
1762 my($NcBitVector);
|
|
1763 $NcBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB;
|
|
1764 $Nc = $NcBitVector->GetNumOfSetBits();
|
|
1765
|
|
1766 return ($Na, $Nb, $Nc);
|
|
1767 }
|
|
1768
|
|
1769 # Return a list of supported distance coefficients...
|
|
1770 #
|
|
1771 sub GetSupportedDistanceCoefficients () {
|
|
1772
|
|
1773 return @DistanceCoefficients;
|
|
1774 }
|
|
1775
|
|
1776 # Return a list of supported similarity coefficients...
|
|
1777 #
|
|
1778 sub GetSupportedSimilarityCoefficients () {
|
|
1779
|
|
1780 return @SimilarityCoefficients;
|
|
1781 }
|
|
1782
|
|
1783 # Return a list of supported distance and similarity coefficients...
|
|
1784 #
|
|
1785 sub GetSupportedDistanceAndSimilarityCoefficients () {
|
|
1786 my(@DistanceAndSimilarityCoefficients);
|
|
1787
|
|
1788 @DistanceAndSimilarityCoefficients = ();
|
|
1789 push @DistanceAndSimilarityCoefficients, @DistanceCoefficients;
|
|
1790 push @DistanceAndSimilarityCoefficients, @SimilarityCoefficients;
|
|
1791
|
|
1792 return sort @DistanceAndSimilarityCoefficients;
|
|
1793 }
|
|
1794
|
|
1795 # Is it a fingerprints vector object?
|
|
1796 sub IsFingerprintsVector ($) {
|
|
1797 my($Object) = @_;
|
|
1798
|
|
1799 return _IsFingerprintsVector($Object);
|
|
1800 }
|
|
1801
|
|
1802 # Is it a fingerprints vector object?
|
|
1803 sub _IsFingerprintsVector {
|
|
1804 my($Object) = @_;
|
|
1805
|
|
1806 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
|
|
1807 }
|
|
1808
|
|
1809 # Return a string containing vector values...
|
|
1810 sub StringifyFingerprintsVector {
|
|
1811 my($This) = @_;
|
|
1812 my($FingerprintsVectorString);
|
|
1813
|
|
1814 # Set type, values and value IDs...
|
|
1815 my($NumOfValues, $ValuesString, $NumOfValueIDs, $ValueIDsString, $MaxValuesToStringify);
|
|
1816
|
|
1817 $NumOfValues = $This->GetNumOfValues();
|
|
1818 $MaxValuesToStringify = 500;
|
|
1819
|
|
1820 if ($NumOfValues < $MaxValuesToStringify) {
|
|
1821 # Append all values...
|
|
1822 $ValuesString = $NumOfValues ? join ' ', @{$This->{Values}} : 'None';
|
|
1823 }
|
|
1824 else {
|
|
1825 # Truncate values...
|
|
1826 my($Index, @Values);
|
|
1827 for $Index (0 .. ($MaxValuesToStringify - 1)) {
|
|
1828 push @Values, $This->{Values}[$Index];
|
|
1829 }
|
|
1830 $ValuesString = join(' ', @Values) . " ...";
|
|
1831 }
|
|
1832
|
|
1833 $NumOfValueIDs = $This->GetNumOfValueIDs();
|
|
1834 if ($NumOfValueIDs < $MaxValuesToStringify) {
|
|
1835 # Append all valueIDs...
|
|
1836 $ValueIDsString = $NumOfValueIDs ? join ' ', @{$This->{ValueIDs}} : 'None';
|
|
1837 }
|
|
1838 else {
|
|
1839 # Truncate value IDs...
|
|
1840 my($Index, @ValueIDs);
|
|
1841 @ValueIDs = ();
|
|
1842 for $Index (0 .. ($MaxValuesToStringify - 1)) {
|
|
1843 push @ValueIDs, $This->{ValueIDs}[$Index];
|
|
1844 }
|
|
1845 $ValueIDsString = join(' ', @ValueIDs) . " ...";
|
|
1846 }
|
|
1847
|
|
1848 $FingerprintsVectorString = "Type: $This->{Type}; NumOfValues: $NumOfValues";
|
|
1849 if ($This->{Type} =~ /^(OrderedNumericalValues|NumericalValues)$/i) {
|
|
1850 my($NumOfNonZeroValues);
|
|
1851 $NumOfNonZeroValues = $This->GetNumOfNonZeroValues();
|
|
1852 $FingerprintsVectorString .= "; NumOfNonZeroValues: $NumOfNonZeroValues";
|
|
1853 }
|
|
1854
|
|
1855 # Append all the values and value IDs...
|
|
1856 if ($NumOfValues < $MaxValuesToStringify) {
|
|
1857 $FingerprintsVectorString .= "; Values: <$ValuesString>; NumOfValueIDs: $NumOfValueIDs; ValueIDs: <$ValueIDsString>";
|
|
1858 }
|
|
1859 else {
|
|
1860 $FingerprintsVectorString .= "; Values (Truncated after $MaxValuesToStringify): <$ValuesString>; NumOfValueIDs: $NumOfValueIDs; ValueIDs (Truncated after $MaxValuesToStringify): <$ValueIDsString>";
|
|
1861 }
|
|
1862
|
|
1863 return $FingerprintsVectorString;
|
|
1864 }
|
|
1865
|
|
1866 1;
|
|
1867
|
|
1868 __END__
|
|
1869
|
|
1870 =head1 NAME
|
|
1871
|
|
1872 FingerprintsVector
|
|
1873
|
|
1874 =head1 SYNOPSIS
|
|
1875
|
|
1876 use Fingerprints::FingerprintsVector;
|
|
1877
|
|
1878 use Fingerprints::FingerprintsVector qw(:all);
|
|
1879
|
|
1880 =head1 DESCRIPTION
|
|
1881
|
|
1882 B<FingerprintsVector> class provides the following methods:
|
|
1883
|
|
1884 new, AddValueIDs, AddValues, CityBlockDistanceCoefficient,
|
|
1885 CosineSimilarityCoefficient, CzekanowskiSimilarityCoefficient,
|
|
1886 DiceSimilarityCoefficient, EuclideanDistanceCoefficient, GetDescription,
|
|
1887 GetFingerprintsVectorString, GetID, GetIDsAndValuesPairsString,
|
|
1888 GetIDsAndValuesString, GetNumOfNonZeroValues, GetNumOfValueIDs, GetNumOfValues,
|
|
1889 GetSupportedDistanceAndSimilarityCoefficients, GetSupportedDistanceCoefficients,
|
|
1890 GetSupportedSimilarityCoefficients, GetType, GetValue, GetValueID, GetValueIDs,
|
|
1891 GetValueIDsString, GetValues, GetValuesAndIDsPairsString, GetValuesAndIDsString,
|
|
1892 GetValuesString, GetVectorType, HammingDistanceCoefficient, IsFingerprintsVector,
|
|
1893 JaccardSimilarityCoefficient, ManhattanDistanceCoefficient,
|
|
1894 NewFromIDsAndValuesPairsString, NewFromIDsAndValuesString,
|
|
1895 NewFromValuesAndIDsPairsString, NewFromValuesAndIDsString, NewFromValuesString,
|
|
1896 OchiaiSimilarityCoefficient, SetDescription, SetID, SetType, SetValue, SetValueID,
|
|
1897 SetValueIDs, SetValues, SetVectorType, SoergelDistanceCoefficient,
|
|
1898 SorensonSimilarityCoefficient, StringifyFingerprintsVector,
|
|
1899 TanimotoSimilarityCoefficient
|
|
1900
|
|
1901 The methods available to create fingerprints vector from strings and to calculate similarity
|
|
1902 and distance coefficients between two vectors can also be invoked as class functions.
|
|
1903
|
|
1904 B<FingerprintsVector> class provides support to perform comparison between vectors
|
|
1905 containing three different types of values:
|
|
1906
|
|
1907 Type I: OrderedNumericalValues
|
|
1908
|
|
1909 o Size of two vectors are same
|
|
1910 o Vectors contain real values in a specific order. For example: MACCS keys
|
|
1911 count, Topological pharmacophore atom pairs and so on.
|
|
1912
|
|
1913 Type II: UnorderedNumericalValues
|
|
1914
|
|
1915 o Size of two vectors might not be same
|
|
1916 o Vectors contain unordered real value identified by value IDs. For example:
|
|
1917 Topological atom pairs, Topological atom torsions and so on
|
|
1918
|
|
1919 Type III: AlphaNumericalValues
|
|
1920
|
|
1921 o Size of two vectors might not be same
|
|
1922 o Vectors contain unordered alphanumerical values. For example: Extended
|
|
1923 connectivity fingerprints, atom neighborhood fingerprints.
|
|
1924
|
|
1925 Before performing similarity or distance calculations between vectors containing UnorderedNumericalValues
|
|
1926 or AlphaNumericalValues, the vectors are transformed into vectors containing unique OrderedNumericalValues
|
|
1927 using value IDs for UnorderedNumericalValues and values itself for AlphaNumericalValues.
|
|
1928
|
|
1929 Three forms of similarity and distance calculation between two vectors, specified using B<CalculationMode>
|
|
1930 option, are supported: I<AlgebraicForm, BinaryForm or SetTheoreticForm>.
|
|
1931
|
|
1932 For I<BinaryForm>, the ordered list of processed final vector values containing the value or
|
|
1933 count of each unique value type is simply converted into a binary vector containing 1s and 0s
|
|
1934 corresponding to presence or absence of values before calculating similarity or distance between
|
|
1935 two vectors.
|
|
1936
|
|
1937 For two fingerprint vectors A and B of same size containing OrderedNumericalValues, let:
|
|
1938
|
|
1939 N = Number values in A or B
|
|
1940
|
|
1941 Xa = Values of vector A
|
|
1942 Xb = Values of vector B
|
|
1943
|
|
1944 Xai = Value of ith element in A
|
|
1945 Xbi = Value of ith element in B
|
|
1946
|
|
1947 SUM = Sum of i over N values
|
|
1948
|
|
1949 For SetTheoreticForm of calculation between two vectors, let:
|
|
1950
|
|
1951 SetIntersectionXaXb = SUM ( MIN ( Xai, Xbi ) )
|
|
1952 SetDifferenceXaXb = SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) )
|
|
1953
|
|
1954 For BinaryForm of calculation between two vectors, let:
|
|
1955
|
|
1956 Na = Number of bits set to "1" in A = SUM ( Xai )
|
|
1957 Nb = Number of bits set to "1" in B = SUM ( Xbi )
|
|
1958 Nc = Number of bits set to "1" in both A and B = SUM ( Xai * Xbi )
|
|
1959 Nd = Number of bits set to "0" in both A and B
|
|
1960 = SUM ( 1 - Xai - Xbi + Xai * Xbi)
|
|
1961
|
|
1962 N = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd
|
|
1963
|
|
1964 Additionally, for BinaryForm various values also correspond to:
|
|
1965
|
|
1966 Na = | Xa |
|
|
1967 Nb = | Xb |
|
|
1968 Nc = | SetIntersectionXaXb |
|
|
1969 Nd = N - | SetDifferenceXaXb |
|
|
1970
|
|
1971 | SetDifferenceXaXb | = N - Nd = Na + Nb - Nc + Nd - Nd = Na + Nb - Nc
|
|
1972 = | Xa | + | Xb | - | SetIntersectionXaXb |
|
|
1973
|
|
1974 Various similarity and distance coefficients [ Ref 40, Ref 62, Ref 64 ] for a pair of vectors A and B
|
|
1975 in I<AlgebraicForm, BinaryForm and SetTheoreticForm> are defined as follows:
|
|
1976
|
|
1977 B<CityBlockDistance>: ( same as HammingDistance and ManhattanDistance)
|
|
1978
|
|
1979 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) )
|
|
1980
|
|
1981 I<BinaryForm>: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
|
|
1982
|
|
1983 I<SetTheoreticForm>: | SetDifferenceXaXb | - | SetIntersectionXaXb | = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
|
|
1984
|
|
1985 B<CosineSimilarity>: ( same as OchiaiSimilarityCoefficient)
|
|
1986
|
|
1987 I<AlgebraicForm>: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) )
|
|
1988
|
|
1989 I<BinaryForm>: Nc / SQRT ( Na * Nb)
|
|
1990
|
|
1991 I<SetTheoreticForm>: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| ) = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) )
|
|
1992
|
|
1993 B<CzekanowskiSimilarity>: ( same as DiceSimilarity and SorensonSimilarity)
|
|
1994
|
|
1995 I<AlgebraicForm>: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
|
|
1996
|
|
1997 I<BinaryForm>: 2 * Nc / ( Na + Nb )
|
|
1998
|
|
1999 I<SetTheoreticForm>: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
|
|
2000
|
|
2001 B<DiceSimilarity>: ( same as CzekanowskiSimilarity and SorensonSimilarity)
|
|
2002
|
|
2003 I<AlgebraicForm>: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
|
|
2004
|
|
2005 I<BinaryForm>: 2 * Nc / ( Na + Nb )
|
|
2006
|
|
2007 I<SetTheoreticForm>: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
|
|
2008
|
|
2009 B<EuclideanDistance>:
|
|
2010
|
|
2011 I<AlgebraicForm>: SQRT ( SUM ( ( ( Xai - Xbi ) ** 2 ) ) )
|
|
2012
|
|
2013 I<BinaryForm>: SQRT ( ( Na - Nc ) + ( Nb - Nc ) ) = SQRT ( Na + Nb - 2 * Nc )
|
|
2014
|
|
2015 I<SetTheoreticForm>: SQRT ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) = SQRT ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) )
|
|
2016
|
|
2017 B<HammingDistance>: ( same as CityBlockDistance and ManhattanDistance)
|
|
2018
|
|
2019 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) )
|
|
2020
|
|
2021 I<BinaryForm>: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
|
|
2022
|
|
2023 I<SetTheoreticForm>: | SetDifferenceXaXb | - | SetIntersectionXaXb | = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
|
|
2024
|
|
2025 B<JaccardSimilarity>: ( same as TanimotoSimilarity)
|
|
2026
|
|
2027 I<AlgebraicForm>: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) )
|
|
2028
|
|
2029 I<BinaryForm>: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc )
|
|
2030
|
|
2031 I<SetTheoreticForm>: | SetIntersectionXaXb | / | SetDifferenceXaXb | = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
|
|
2032
|
|
2033 B<ManhattanDistance>: ( same as CityBlockDistance and HammingDistance)
|
|
2034
|
|
2035 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) )
|
|
2036
|
|
2037 I<BinaryForm>: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
|
|
2038
|
|
2039 I<SetTheoreticForm>: | SetDifferenceXaXb | - | SetIntersectionXaXb | = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
|
|
2040
|
|
2041 B<OchiaiSimilarity>: ( same as CosineSimilarity)
|
|
2042
|
|
2043 I<AlgebraicForm>: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) )
|
|
2044
|
|
2045 I<BinaryForm>: Nc / SQRT ( Na * Nb)
|
|
2046
|
|
2047 I<SetTheoreticForm>: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| ) = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) )
|
|
2048
|
|
2049 B<SorensonSimilarity>: ( same as CzekanowskiSimilarity and DiceSimilarity)
|
|
2050
|
|
2051 I<AlgebraicForm>: ( 2 * ( SUM ( Xai * Xbi ) ) ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
|
|
2052
|
|
2053 I<BinaryForm>: 2 * Nc / ( Na + Nb )
|
|
2054
|
|
2055 I<SetTheoreticForm>: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
|
|
2056
|
|
2057 B<SoergelDistance>:
|
|
2058
|
|
2059 I<AlgebraicForm>: SUM ( ABS ( Xai - Xbi ) ) / SUM ( MAX ( Xai, Xbi ) )
|
|
2060
|
|
2061 I<BinaryForm>: 1 - Nc / ( Na + Nb - Nc ) = ( Na + Nb - 2 * Nc ) / ( Na + Nb - Nc )
|
|
2062
|
|
2063 I<SetTheoreticForm>: ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) / | SetDifferenceXaXb | = ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
|
|
2064
|
|
2065 B<TanimotoSimilarity>: ( same as JaccardSimilarity)
|
|
2066
|
|
2067 I<AlgebraicForm>: SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) )
|
|
2068
|
|
2069 I<BinaryForm>: Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc )
|
|
2070
|
|
2071 I<SetTheoreticForm>: | SetIntersectionXaXb | / | SetDifferenceXaXb | = SUM ( MIN ( Xai, Xbi ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
|
|
2072
|
|
2073 =head2 METHODS
|
|
2074
|
|
2075 =over 4
|
|
2076
|
|
2077 =item B<new>
|
|
2078
|
|
2079 $FPVector = new Fingerprints::FingerprintsVector(%NamesAndValues);
|
|
2080
|
|
2081 Using specified I<FingerprintsVector> property names and values hash, B<new> method creates
|
|
2082 a new object and returns a reference to newly created B<FingerprintsVectorsVector>
|
|
2083 object. By default, the following properties are initialized:
|
|
2084
|
|
2085 Type = ''
|
|
2086 @{Values} = ()
|
|
2087 @{ValuesIDs} = ()
|
|
2088
|
|
2089 Examples:
|
|
2090
|
|
2091 $FPVector = new Fingerprints::FingerprintsVector('Type' => 'OrderedNumericalValues',
|
|
2092 'Values' => [1, 2, 3, 4]);
|
|
2093 $FPVector = new Fingerprints::FingerprintsVector('Type' => 'NumericalValues',
|
|
2094 'Values' => [10, 22, 33, 44],
|
|
2095 'ValueIDs' => ['ID1', 'ID2', 'ID3', 'ID4']);
|
|
2096 $FPVector = new Fingerprints::FingerprintsVector('Type' => 'AlphaNumericalValues',
|
|
2097 'Values' => ['a1', 2, 'a3', 4]);
|
|
2098
|
|
2099 =item B<AddValueIDs>
|
|
2100
|
|
2101 $FingerprintsVector->AddValueIDs($ValueIDsRef);
|
|
2102 $FingerprintsVector->AddValueIDs(@ValueIDs);
|
|
2103
|
|
2104 Adds specified I<ValueIDs> to I<FingerprintsVector> and returns I<FingerprintsVector>.
|
|
2105
|
|
2106 =item B<AddValues>
|
|
2107
|
|
2108 $FingerprintsVector->AddValues($ValuesRef);
|
|
2109 $FingerprintsVector->AddValues(@Values);
|
|
2110 $FingerprintsVector->AddValues($Vector);
|
|
2111
|
|
2112 Adds specified I<Values> to I<FingerprintsVector> and returns I<FingerprintsVector>.
|
|
2113
|
|
2114 =item B<CityBlockDistanceCoefficient>
|
|
2115
|
|
2116 $Value = $FingerprintsVector->CityBlockDistanceCoefficient(
|
|
2117 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
|
|
2118 $Value = Fingerprints::FingerprintsVector::CityBlockDistanceCoefficient(
|
|
2119 $FingerprintsVectorA, $FingerprintVectorB,
|
|
2120 [$CalculationMode, $SkipValuesCheck]);
|
|
2121
|
|
2122 Returns value of I<CityBlock> distance coefficient between two I<FingerprintsVectors> using
|
|
2123 optionally specified I<CalculationMode> and optional checking of vector values.
|
|
2124
|
|
2125 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
|
|
2126 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
|
|
2127
|
|
2128 =item B<CosineSimilarityCoefficient>
|
|
2129
|
|
2130 $Value = $FingerprintsVector->CosineSimilarityCoefficient(
|
|
2131 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
|
|
2132 $Value = Fingerprints::FingerprintsVector::CosineSimilarityCoefficient(
|
|
2133 $FingerprintsVectorA, $FingerprintVectorB,
|
|
2134 [$CalculationMode, $SkipValuesCheck]);
|
|
2135
|
|
2136 Returns value of I<Cosine> similarity coefficient between two I<FingerprintsVectors> using
|
|
2137 optionally specified I<CalculationMode> and optional checking of vector values.
|
|
2138
|
|
2139 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
|
|
2140 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
|
|
2141
|
|
2142 =item B<CzekanowskiSimilarityCoefficient>
|
|
2143
|
|
2144 $Value = $FingerprintsVector->CzekanowskiSimilarityCoefficient(
|
|
2145 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
|
|
2146 $Value = Fingerprints::FingerprintsVector::CzekanowskiSimilarityCoefficient(
|
|
2147 $FingerprintsVectorA, $FingerprintVectorB,
|
|
2148 [$CalculationMode, $SkipValuesCheck]);
|
|
2149
|
|
2150 Returns value of I<Czekanowski> similarity coefficient between two I<FingerprintsVectors> using
|
|
2151 optionally specified I<CalculationMode> and optional checking of vector values.
|
|
2152
|
|
2153 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
|
|
2154 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
|
|
2155
|
|
2156 =item B<DiceSimilarityCoefficient>
|
|
2157
|
|
2158 $Value = $FingerprintsVector->DiceSimilarityCoefficient(
|
|
2159 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
|
|
2160 $Value = Fingerprints::FingerprintsVector::DiceSimilarityCoefficient(
|
|
2161 $FingerprintsVectorA, $FingerprintVectorB,
|
|
2162 [$CalculationMode, $SkipValuesCheck]);
|
|
2163
|
|
2164 Returns value of I<Dice> similarity coefficient between two I<FingerprintsVectors> using
|
|
2165 optionally specified I<CalculationMode> and optional checking of vector values.
|
|
2166
|
|
2167 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
|
|
2168 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
|
|
2169
|
|
2170 =item B<EuclideanDistanceCoefficient>
|
|
2171
|
|
2172 $Value = $FingerprintsVector->EuclideanDistanceCoefficient(
|
|
2173 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
|
|
2174 $Value = Fingerprints::FingerprintsVector::EuclideanDistanceCoefficient(
|
|
2175 $FingerprintsVectorA, $FingerprintVectorB,
|
|
2176 [$CalculationMode, $SkipValuesCheck]);
|
|
2177
|
|
2178 Returns value of I<Euclidean> distance coefficient between two I<FingerprintsVectors> using
|
|
2179 optionally specified I<CalculationMode> and optional checking of vector values.
|
|
2180
|
|
2181 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
|
|
2182 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
|
|
2183
|
|
2184 =item B<GetDescription>
|
|
2185
|
|
2186 $Description = $FingerprintsVector->GetDescription();
|
|
2187
|
|
2188 Returns a string containing description of fingerprints vector.
|
|
2189
|
|
2190 =item B<GetFingerprintsVectorString>
|
|
2191
|
|
2192 $FPString = $FingerprintsVector->GetFingerprintsVectorString($Format);
|
|
2193
|
|
2194 Returns a B<FingerprintsString> containing vector values and/or IDs in I<FingerprintsVector>
|
|
2195 corresponding to specified I<Format>.
|
|
2196
|
|
2197 Possible I<Format> values: I<IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString,
|
|
2198 IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs,
|
|
2199 ValueIDsString, ValueIDs, ValuesString, or Values>.
|
|
2200
|
|
2201 =item B<GetID>
|
|
2202
|
|
2203 $ID = $FingerprintsVector->GetID();
|
|
2204
|
|
2205 Returns I<ID> of I<FingerprintsVector>.
|
|
2206
|
|
2207 =item B<GetVectorType>
|
|
2208
|
|
2209 $VectorType = $FingerprintsVector->GetVectorType();
|
|
2210
|
|
2211 Returns I<VectorType> of I<FingerprintsVector>.
|
|
2212
|
|
2213 =item B<GetIDsAndValuesPairsString>
|
|
2214
|
|
2215 $IDsValuesPairsString = $FingerprintsVector->GetIDsAndValuesPairsString();
|
|
2216
|
|
2217 Returns I<FingerprintsVector> value IDs and values as space delimited ID/value pair
|
|
2218 string.
|
|
2219
|
|
2220 =item B<GetIDsAndValuesString>
|
|
2221
|
|
2222 $IDsValuesString = $FingerprintsVector->GetIDsAndValuesString();
|
|
2223
|
|
2224 Returns I<FingerprintsVector> value IDs and values as string containing space delimited IDs followed by
|
|
2225 values with semicolon as IDs and values delimiter.
|
|
2226
|
|
2227 =item B<GetNumOfNonZeroValues>
|
|
2228
|
|
2229 $NumOfNonZeroValues = $FingerprintsVector->GetNumOfNonZeroValues();
|
|
2230
|
|
2231 Returns number of non-zero values in I<FingerprintsVector>.
|
|
2232
|
|
2233 =item B<GetNumOfValueIDs>
|
|
2234
|
|
2235 $NumOfValueIDs = $FingerprintsVector->GetNumOfValueIDs();
|
|
2236
|
|
2237 Returns number of value IDs I<FingerprintsVector>.
|
|
2238
|
|
2239 =item B<GetNumOfValues>
|
|
2240
|
|
2241 $NumOfValues = $FingerprintsVector->GetNumOfValues();
|
|
2242
|
|
2243 Returns number of values I<FingerprintsVector>.
|
|
2244
|
|
2245 =item B<GetSupportedDistanceAndSimilarityCoefficients>
|
|
2246
|
|
2247 @SupportedDistanceAndSimilarityCoefficientsReturn =
|
|
2248 Fingerprints::FingerprintsVector::GetSupportedDistanceAndSimilarityCoefficients();
|
|
2249
|
|
2250 Returns an array containing names of supported distance and similarity coefficients.
|
|
2251
|
|
2252 =item B<GetSupportedDistanceCoefficients>
|
|
2253
|
|
2254 @SupportedDistanceCoefficientsReturn =
|
|
2255 Fingerprints::FingerprintsVector::GetSupportedDistanceCoefficients();
|
|
2256
|
|
2257 Returns an array containing names of supported disyance coefficients.
|
|
2258
|
|
2259 =item B<GetSupportedSimilarityCoefficients>
|
|
2260
|
|
2261 @SupportedSimilarityCoefficientsReturn =
|
|
2262 Fingerprints::FingerprintsVector::GetSupportedSimilarityCoefficients();
|
|
2263
|
|
2264 Returns an array containing names of supported similarity coefficients.
|
|
2265
|
|
2266 =item B<GetType>
|
|
2267
|
|
2268 $VectorType = $FingerprintsVector->GetType();
|
|
2269
|
|
2270 Returns I<FingerprintsVector> vector type.
|
|
2271
|
|
2272 =item B<GetValue>
|
|
2273
|
|
2274 $Value = $FingerprintsVector->GetValue($Index);
|
|
2275
|
|
2276 Returns fingerprints vector B<Value> specified using I<Index> starting at 0.
|
|
2277
|
|
2278 =item B<GetValueID>
|
|
2279
|
|
2280 $ValueID = $FingerprintsVector->GetValueID();
|
|
2281
|
|
2282 Returns fingerprints vector B<ValueID> specified using I<Index> starting at 0.
|
|
2283
|
|
2284 =item B<GetValueIDs>
|
|
2285
|
|
2286 $ValueIDs = $FingerprintsVector->GetValueIDs();
|
|
2287 @ValueIDs = $FingerprintsVector->GetValueIDs();
|
|
2288
|
|
2289 Returns fingerprints vector B<ValueIDs> as an array or reference to an array.
|
|
2290
|
|
2291 =item B<GetValueIDsString>
|
|
2292
|
|
2293 $ValueIDsString = $FingerprintsVector->GetValueIDsString();
|
|
2294
|
|
2295 Returns fingerprints vector B<ValueIDsString> with value IDs delimited by space.
|
|
2296
|
|
2297 =item B<GetValues>
|
|
2298
|
|
2299 $ValuesRef = $FingerprintsVector->GetValues();
|
|
2300 @Values = $FingerprintsVector->GetValues();
|
|
2301
|
|
2302 Returns fingerprints vector B<Values> as an array or reference to an array.
|
|
2303
|
|
2304 =item B<GetValuesAndIDsPairsString>
|
|
2305
|
|
2306 $ValuesIDsPairsString = $FingerprintsVector->GetValuesAndIDsPairsString();
|
|
2307
|
|
2308 Returns I<FingerprintsVector> value and value IDs as space delimited ID/value pair
|
|
2309 string.
|
|
2310
|
|
2311 =item B<GetValuesAndIDsString>
|
|
2312
|
|
2313 $ValuesIDsString = $FingerprintsVector->GetValuesAndIDsString();
|
|
2314
|
|
2315 Returns I<FingerprintsVector> values and value IDs as string containing space delimited IDs followed by
|
|
2316 values with semicolon as IDs and values delimiter.
|
|
2317
|
|
2318 =item B<GetValuesString>
|
|
2319
|
|
2320 $Return = $FingerprintsVector->GetValuesString();
|
|
2321
|
|
2322 Returns I<FingerprintsVector> values as space delimited string.
|
|
2323
|
|
2324 =item B<HammingDistanceCoefficient>
|
|
2325
|
|
2326 $Value = $FingerprintsVector->HammingDistanceCoefficient(
|
|
2327 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
|
|
2328 $Value = Fingerprints::FingerprintsVector::HammingDistanceCoefficient(
|
|
2329 $FingerprintsVectorA, $FingerprintVectorB,
|
|
2330 [$CalculationMode, $SkipValuesCheck]);
|
|
2331
|
|
2332 Returns value of I<Hamming> distance coefficient between two I<FingerprintsVectors> using
|
|
2333 optionally specified I<CalculationMode> and optional checking of vector values.
|
|
2334
|
|
2335 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
|
|
2336 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
|
|
2337
|
|
2338 =item B<IsFingerprintsVector>
|
|
2339
|
|
2340 $Status = Fingerprints::FingerprintsVector::IsFingerprintsVector($Object);
|
|
2341
|
|
2342 Returns 1 or 0 based on whether I<Object> is a I<FingerprintsVector>.
|
|
2343
|
|
2344 =item B<JaccardSimilarityCoefficient>
|
|
2345
|
|
2346 $Value = $FingerprintsVector->JaccardSimilarityCoefficient(
|
|
2347 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
|
|
2348 $Value = Fingerprints::FingerprintsVector::JaccardSimilarityCoefficient(
|
|
2349 $FingerprintsVectorA, $FingerprintVectorB,
|
|
2350 [$CalculationMode, $SkipValuesCheck]);
|
|
2351
|
|
2352 Returns value of I<Jaccard> similarity coefficient between two I<FingerprintsVectors> using
|
|
2353 optionally specified I<CalculationMode> and optional checking of vector values.
|
|
2354
|
|
2355 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
|
|
2356 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
|
|
2357
|
|
2358 =item B<ManhattanDistanceCoefficient>
|
|
2359
|
|
2360 $Value = $FingerprintsVector->ManhattanDistanceCoefficient(
|
|
2361 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
|
|
2362 $Value = Fingerprints::FingerprintsVector::ManhattanDistanceCoefficient(
|
|
2363 $FingerprintsVectorA, $FingerprintVectorB,
|
|
2364 [$CalculationMode, $SkipValuesCheck]);
|
|
2365
|
|
2366 Returns value of I<Manhattan> distance coefficient between two I<FingerprintsVectors> using
|
|
2367 optionally specified I<CalculationMode> and optional checking of vector values.
|
|
2368
|
|
2369 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
|
|
2370 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
|
|
2371
|
|
2372 =item B<NewFromIDsAndValuesPairsString>
|
|
2373
|
|
2374 $FingerprintsVector = $FingerprintsVector->NewFromIDsAndValuesPairsString(
|
|
2375 $ValuesType, $IDsAndValuesPairsString);
|
|
2376 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesPairsString(
|
|
2377 $ValuesType, $IDsAndValuesPairsString);
|
|
2378
|
|
2379 Creates a new I<FingerprintsVector> of I<ValuesType> using I<IDsAndValuesPairsString> containing
|
|
2380 space delimited value IDs and values pairs and returns new B<FingerprintsVector> object.
|
|
2381 Possible I<ValuesType> values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>.
|
|
2382
|
|
2383 =item B<NewFromIDsAndValuesString>
|
|
2384
|
|
2385 $FingerprintsVector = $FingerprintsVector->NewFromIDsAndValuesString(
|
|
2386 $ValuesType, $IDsAndValuesString);
|
|
2387 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesString(
|
|
2388 $ValuesType, $IDsAndValuesString);
|
|
2389
|
|
2390 Creates a new I<FingerprintsVector> of I<ValuesType> using I<IDsAndValuesString> containing
|
|
2391 semicolon delimited value IDs string followed by values strings and returns new B<FingerprintsVector>
|
|
2392 object. The values within value and value IDs tring are delimited by spaces. Possible I<ValuesType>
|
|
2393 values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>.
|
|
2394
|
|
2395 =item B<NewFromValuesAndIDsPairsString>
|
|
2396
|
|
2397 $FingerprintsVector = $FingerprintsVector->NewFromValuesAndIDsPairsString(
|
|
2398 $ValuesType, $ValuesAndIDsPairsString);
|
|
2399 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsPairsString(
|
|
2400 $ValuesType, $ValuesAndIDsPairsString);
|
|
2401
|
|
2402 Creates a new I<FingerprintsVector> of I<ValuesType> using I<ValuesAndIDsPairsString> containing
|
|
2403 space delimited value and value IDs pairs and returns new B<FingerprintsVector> object.
|
|
2404 Possible I<ValuesType> values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>.
|
|
2405
|
|
2406 =item B<NewFromValuesAndIDsString>
|
|
2407
|
|
2408 $FingerprintsVector = $FingerprintsVector->NewFromValuesAndIDsString(
|
|
2409 $ValuesType, $IDsAndValuesString);
|
|
2410 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsString(
|
|
2411 $ValuesType, $IDsAndValuesString);
|
|
2412
|
|
2413 Creates a new I<FingerprintsVector> of I<ValuesType> using I<ValuesAndIDsString> containing
|
|
2414 semicolon delimited values string followed by value IDs strings and returns new B<FingerprintsVector>
|
|
2415 object. The values within values and value IDs tring are delimited by spaces. Possible I<ValuesType>
|
|
2416 values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>.
|
|
2417
|
|
2418 =item B<NewFromValuesString>
|
|
2419
|
|
2420 $FingerprintsVector = $FingerprintsVector->NewFromValuesString(
|
|
2421 $ValuesType, $ValuesString);
|
|
2422 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesString(
|
|
2423 $ValuesType, $ValuesString);
|
|
2424
|
|
2425 Creates a new I<FingerprintsVector> of I<ValuesType> using I<ValuesString> containing space
|
|
2426 delimited values string and returns new B<FingerprintsVector> object. The values within values
|
|
2427 and value IDs tring are delimited by spaces. Possible I<ValuesType> values: I<OrderedNumericalValues,
|
|
2428 NumericalValues, or AlphaNumericalValues>.
|
|
2429
|
|
2430 =item B<OchiaiSimilarityCoefficient>
|
|
2431
|
|
2432 $Value = $FingerprintsVector->OchiaiSimilarityCoefficient(
|
|
2433 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
|
|
2434 $Value = Fingerprints::FingerprintsVector::OchiaiSimilarityCoefficient(
|
|
2435 $FingerprintsVectorA, $FingerprintVectorB,
|
|
2436 [$CalculationMode, $SkipValuesCheck]);
|
|
2437
|
|
2438 Returns value of I<Ochiai> similarity coefficient between two I<FingerprintsVectors> using
|
|
2439 optionally specified I<CalculationMode> and optional checking of vector values.
|
|
2440
|
|
2441 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
|
|
2442 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
|
|
2443
|
|
2444 =item B<SetDescription>
|
|
2445
|
|
2446 $FingerprintsVector->SetDescription($Description);
|
|
2447
|
|
2448 Sets I<Description> of fingerprints vector and returns I<FingerprintsVector>.
|
|
2449
|
|
2450 =item B<SetID>
|
|
2451
|
|
2452 $FingerprintsVector->SetID($ID);
|
|
2453
|
|
2454 Sets I<ID> of fingerprints vector and returns I<FingerprintsVector>.
|
|
2455
|
|
2456 =item B<SetVectorType>
|
|
2457
|
|
2458 $FingerprintsVector->SetVectorType($VectorType);
|
|
2459
|
|
2460 Sets I<VectorType> of fingerprints vector and returns I<FingerprintsVector>.
|
|
2461
|
|
2462 =item B<SetType>
|
|
2463
|
|
2464 $FingerprintsVector->SetType($Type);
|
|
2465
|
|
2466 Sets I<FingerprintsVector> values I<Type> and returns I<FingerprintsVector>. Possible I<Type>
|
|
2467 values: I<OrderedNumericalValues, NumericalValues, or AlphaNumericalValues>.
|
|
2468
|
|
2469 During calculation of similarity and distance coefficients between two I<FingerprintsVectors>, the
|
|
2470 following conditions apply to vector type, size, value and value IDs:
|
|
2471
|
|
2472 o For OrderedNumericalValues type, both vectors must be of the same size
|
|
2473 and contain similar types of numerical values in the same order.
|
|
2474
|
|
2475 o For NumericalValues type, vector value IDs for both vectors must be
|
|
2476 specified; however, their size and order of IDs and numerical values may
|
|
2477 be different. For each vector, value IDs must correspond to vector values.
|
|
2478
|
|
2479 o For AlphaNumericalValues type, vectors may contain both numerical and
|
|
2480 alphanumerical values and their sizes may be different.
|
|
2481
|
|
2482 =item B<SetValue>
|
|
2483
|
|
2484 $FingerprintsVector->SetValue($Index, $Value, [$SkipIndexCheck]);
|
|
2485
|
|
2486 Sets a I<FingerprintsVector> value specified by I<Index> starting at 0 to I<Value> along with
|
|
2487 optional index range check and returns I<FingerprintsVector>.
|
|
2488
|
|
2489 =item B<SetValueID>
|
|
2490
|
|
2491 $FingerprintsVector->SetValueID($Index, $ValueID, [$SkipIndexCheck]);
|
|
2492
|
|
2493 Sets a I<FingerprintsVector> value ID specified by I<Index> starting at 0 to I<ValueID> along with
|
|
2494 optional index range check and returns I<FingerprintsVector>.
|
|
2495
|
|
2496 =item B<SetValueIDs>
|
|
2497
|
|
2498 $FingerprintsVector->SetValueIDs($ValueIDsRef);
|
|
2499 $FingerprintsVector->SetValueIDs(@ValueIDs);
|
|
2500
|
|
2501 Sets I<FingerprintsVector> value IDs to specified I<ValueIDs> and returns I<FingerprintsVector>.
|
|
2502
|
|
2503 =item B<SetValues>
|
|
2504
|
|
2505 $FingerprintsVector->SetValues($ValuesRef);
|
|
2506 $FingerprintsVector->SetValues(@Values);
|
|
2507
|
|
2508 Sets I<FingerprintsVector> value to specified I<Values> and returns I<FingerprintsVector>.
|
|
2509
|
|
2510 =item B<SoergelDistanceCoefficient>
|
|
2511
|
|
2512 $Value = $FingerprintsVector->SoergelDistanceCoefficient(
|
|
2513 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
|
|
2514 $Value = Fingerprints::FingerprintsVector::SoergelDistanceCoefficient(
|
|
2515 $FingerprintsVectorA, $FingerprintVectorB,
|
|
2516 [$CalculationMode, $SkipValuesCheck]);
|
|
2517
|
|
2518 Returns value of I<Soergel> distance coefficient between two I<FingerprintsVectors> using
|
|
2519 optionally specified I<CalculationMode> and optional checking of vector values.
|
|
2520
|
|
2521 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
|
|
2522 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
|
|
2523
|
|
2524 =item B<SorensonSimilarityCoefficient>
|
|
2525
|
|
2526 $Value = $FingerprintsVector->SorensonSimilarityCoefficient(
|
|
2527 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
|
|
2528 $Value = Fingerprints::FingerprintsVector::SorensonSimilarityCoefficient(
|
|
2529 $FingerprintsVectorA, $FingerprintVectorB,
|
|
2530 [$CalculationMode, $SkipValuesCheck]);
|
|
2531
|
|
2532 Returns value of I<Sorenson> similarity coefficient between two I<FingerprintsVectors> using
|
|
2533 optionally specified I<CalculationMode> and optional checking of vector values.
|
|
2534
|
|
2535 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
|
|
2536 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
|
|
2537
|
|
2538 =item B<TanimotoSimilarityCoefficient>
|
|
2539
|
|
2540 $Value = $FingerprintsVector->TanimotoSimilarityCoefficient(
|
|
2541 $OtherFingerprintVector, [$CalculationMode, $SkipValuesCheck]);
|
|
2542 $Value = Fingerprints::FingerprintsVector::TanimotoSimilarityCoefficient(
|
|
2543 $FingerprintsVectorA, $FingerprintVectorB,
|
|
2544 [$CalculationMode, $SkipValuesCheck]);
|
|
2545
|
|
2546 Returns value of I<Tanimoto> similarity coefficient between two I<FingerprintsVectors> using
|
|
2547 optionally specified I<CalculationMode> and optional checking of vector values.
|
|
2548
|
|
2549 Possible I<CalculationMode> values: I<AlgebraicForm, BinaryForm or SetTheoreticForm>. Default
|
|
2550 I<CalculationMode> value: I<AlgebraicForm>. Default I<SkipValuesCheck> value: I<0>.
|
|
2551
|
|
2552 =item B<StringifyFingerprintsVector>
|
|
2553
|
|
2554 $String = $FingerprintsVector->StringifyFingerprintsVector();
|
|
2555
|
|
2556 Returns a string containing information about I<FingerprintsVector> object.
|
|
2557
|
|
2558 =back
|
|
2559
|
|
2560 =head1 AUTHOR
|
|
2561
|
|
2562 Manish Sud <msud@san.rr.com>
|
|
2563
|
|
2564 =head1 SEE ALSO
|
|
2565
|
|
2566 BitVector.pm, FingerprintsStringUtil.pm, FingerprintsBitVector.pm, Vector.pm
|
|
2567
|
|
2568 =head1 COPYRIGHT
|
|
2569
|
|
2570 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
2571
|
|
2572 This file is part of MayaChemTools.
|
|
2573
|
|
2574 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
2575 the terms of the GNU Lesser General Public License as published by the Free
|
|
2576 Software Foundation; either version 3 of the License, or (at your option)
|
|
2577 any later version.
|
|
2578
|
|
2579 =cut
|