comparison mayachemtools/lib/Fingerprints/FingerprintsStringUtil.pm @ 0:73ae111cf86f draft

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 11:55:01 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:73ae111cf86f
1 package Fingerprints::FingerprintsStringUtil;
2 #
3 # $RCSfile: FingerprintsStringUtil.pm,v $
4 # $Date: 2015/02/28 20:48:54 $
5 # $Revision: 1.24 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Exporter;
31 use Carp;
32 use TextUtil ();
33 use Fingerprints::FingerprintsBitVector;
34 use Fingerprints::FingerprintsVector;
35
36 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
37
38 @ISA = qw(Exporter);
39 @EXPORT = qw();
40 @EXPORT_OK = qw(AreFingerprintsStringValuesValid GenerateFingerprintsString GenerateFingerprintsBitVectorString GenerateFingerprintsVectorString GetFingerprintsStringTypeAndDescription GetDefaultBitsOrder GetDefaultBitStringFormat GetDefaultVectorStringFormat GetFingeprintsStringDelimiter GetFingerprintsStringValues ParseFingerprintsString ParseFingerprintsBitVectorString ParseFingerprintsVectorString);
41
42 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
43
44 # Fingerprint string values delimiter...
45 my($FPStringDelim) = ';';
46
47 # Generate fingerprints string...
48 #
49 sub GenerateFingerprintsString {
50 my($FingerprintsObject) = @_;
51 my($VectorType);
52
53 $VectorType = $FingerprintsObject->GetVectorType();
54
55 VECTORTYPE : {
56 if ($VectorType =~ /^FingerprintsBitVector$/i) { return GenerateFingerprintsBitVectorString(@_); last VECTORTYPE; }
57 if ($VectorType =~ /^FingerprintsVector$/i) { return GenerateFingerprintsVectorString(@_); last VECTORTYPE; }
58 croak "Error: FingerprintsStringUtil::GenerateFingerprintsString: Fingerprints object vector type, $VectorType, is not supported. Valid values: FingerprintsBitVector or FingerprintsVector...";
59 }
60 return '';
61 }
62
63 # Generate fingerprints bit vector string...
64 #
65 sub GenerateFingerprintsBitVectorString {
66 my($FingerprintsObject, $BitStringFormat, $BitsOrder) = @_;
67 my($FingerprintsString, $FingerprintsBitVector, @FingerprintsStringValues);
68
69 if (!$BitStringFormat) { $BitStringFormat = GetDefaultBitStringFormat(); }
70 if (!$BitsOrder) {$BitsOrder = GetDefaultBitsOrder(); }
71
72 $FingerprintsString = '';
73 $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::IsFingerprintsBitVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsBitVector();
74
75 # Use specified size instead of size: it corresponds to actual size of the fingerprints bit vector;
76 # size reflects actual internal size including any padding.
77 #
78
79 @FingerprintsStringValues = ();
80 push @FingerprintsStringValues, ($FingerprintsObject->GetVectorType(), _GetFingerprintsDescription($FingerprintsObject), $FingerprintsBitVector->GetSpecifiedSize(), $BitStringFormat, $BitsOrder);
81
82 $FingerprintsString = join("${FPStringDelim}", @FingerprintsStringValues) . "${FPStringDelim}" . _GetFingerprintBitVectorString($FingerprintsBitVector, $BitStringFormat, $BitsOrder);
83
84 return $FingerprintsString;
85 }
86
87 # Get fingerprint bit vector string...
88 #
89 sub _GetFingerprintBitVectorString {
90 my($FingerprintsBitVector, $BitStringFormat, $BitsOrder) = @_;
91 my($FingerprintBitString);
92
93 if (!$BitStringFormat) { $BitStringFormat = GetDefaultBitStringFormat(); }
94 if (!$BitsOrder) {$BitsOrder = GetDefaultBitsOrder(); }
95
96 $FingerprintBitString = '';
97 if (!$FingerprintsBitVector) {return $FingerprintBitString;}
98
99 BITSTRINGFORMAT : {
100 if ($BitStringFormat =~ /^(BinaryString|Binary|Bin)$/i) { return $FingerprintsBitVector->GetBitsAsBinaryString($BitsOrder); last BITSTRINGFORMAT; }
101 if ($BitStringFormat =~ /^(HexadecimalString|Hexadecimal|Hex)$/i) { return $FingerprintsBitVector->GetBitsAsHexadecimalString($BitsOrder); last BITSTRINGFORMAT; }
102 croak "Error: FingerprintsStringUtil::_GetFingerprintBitsAsString: Specified bit vector string format, $BitStringFormat, is not supported. Value values: Binary, Bin, BinaryString, Hexdecimal, Hex, HexadecimalString...";
103 }
104 return $FingerprintBitString;
105 }
106
107 # Generate fingerprints vector string...
108 #
109 sub GenerateFingerprintsVectorString {
110 my($FingerprintsObject, $VectorStringFormat) = @_;
111 my($FingerprintsString, $FingerprintsVector, @FingerprintsStringValues);
112
113 $FingerprintsString = '';
114 $FingerprintsVector = Fingerprints::FingerprintsVector::IsFingerprintsVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsVector();
115
116 if (!$VectorStringFormat) { $VectorStringFormat = _GetDefaultVectorStringFormat($FingerprintsVector); }
117
118 @FingerprintsStringValues = ();
119 push @FingerprintsStringValues, ($FingerprintsObject->GetVectorType(), _GetFingerprintsDescription($FingerprintsObject), $FingerprintsVector->GetNumOfValues(), $FingerprintsVector->GetType(), $VectorStringFormat);
120
121 $FingerprintsString = join("${FPStringDelim}", @FingerprintsStringValues) . "${FPStringDelim}" . _GetFingerprintVectorString($FingerprintsVector, $VectorStringFormat);
122
123 return $FingerprintsString;
124 }
125
126 # Get fingerprint vector string...
127 #
128 sub _GetFingerprintVectorString {
129 my($FingerprintsVector, $VectorStringFormat) = @_;
130 my($FingerprintString);
131
132 if (!$VectorStringFormat) { $VectorStringFormat = _GetDefaultVectorStringFormat($FingerprintsVector);}
133
134 $FingerprintString = '';
135 if (!$FingerprintsVector) {return $FingerprintString;}
136
137 VECTORSTRINGFORMAT : {
138 if ($VectorStringFormat =~ /^(IDsAndValuesString|IDsAndValues)$/i) { return $FingerprintsVector->GetIDsAndValuesString(); last VECTORSTRINGFORMAT; }
139 if ($VectorStringFormat =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) { return $FingerprintsVector->GetIDsAndValuesPairsString(); last VECTORSTRINGFORMAT; }
140 if ($VectorStringFormat =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) { return $FingerprintsVector->GetValuesAndIDsString(); last VECTORSTRINGFORMAT; }
141 if ($VectorStringFormat =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) { return $FingerprintsVector->GetValuesAndIDsPairsString(); last VECTORSTRINGFORMAT; }
142 if ($VectorStringFormat =~ /^(ValuesString|Values)$/i) { return $FingerprintsVector->GetValuesString(); last VECTORSTRINGFORMAT; }
143 croak "Error: FingerprintsStringUtil::_GetFingerprintVectorString: Specified vector string format, $VectorStringFormat, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValuesString, Values...";
144 }
145 return $FingerprintString;
146 }
147
148 # Get fingerprints string type and description...
149 sub GetFingerprintsStringTypeAndDescription {
150 my($FingerprintsString) = @_;
151 my($Type, $Description);
152
153 ($Type, $Description) = _ParseFingerprintsStringValues($FingerprintsString);
154
155 return ($Type, $Description);
156 }
157
158 # Get all fingerprints string values...
159 sub GetFingerprintsStringValues {
160 my($FingerprintsString) = @_;
161
162 return _ParseFingerprintsStringValues($FingerprintsString);
163 }
164
165 # Parse fingerprints string and return FingerprintsBitVector or FingerprintsVector object...
166 #
167 sub ParseFingerprintsString {
168 my($FingerprintsString) = @_;
169
170 VECTORTYPE : {
171 if ($FingerprintsString =~ /^FingerprintsBitVector/i) { return ParseFingerprintsBitVectorString(@_); last VECTORTYPE; }
172 if ($FingerprintsString =~ /^FingerprintsVector/i) { return ParseFingerprintsVectorString(@_); last VECTORTYPE; }
173 croak "Error: FingerprintsStringUtil::ParseFingerprintsString: Fingerprints string vector type is not supported. Valid values: FingerprintsBitVector or FingerprintsVector...";
174 }
175 return undef;
176 }
177
178 # Parse fingerprints bit vector string and retrun bit vector...
179 #
180 sub ParseFingerprintsBitVectorString {
181 my($FingerprintsString, $ValidateValues) = @_;
182 my($ErrorMsgPrefix, $VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString);
183
184 $ErrorMsgPrefix = "Error: ParsePathLengthFingerprintsBitVectorString";
185 ($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString) = _ParseFingerprintsStringValues($FingerprintsString);
186 if ($ValidateValues) {
187 _ValidateFingerprintsStringValues($ErrorMsgPrefix, $VectorType, $Size, $BitStringFormat, $BitsOrder, $BitVectorString);
188 }
189
190 return _GenerateFingerprintBitVector($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString);
191 }
192
193 # Generate fingerints bit vector...
194 #
195 sub _GenerateFingerprintBitVector {
196 my($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString) = @_;
197 my($FingerprintsBitVector);
198
199 $FingerprintsBitVector = undef;
200
201 BITSTRINGFORMAT : {
202 if ($BitStringFormat =~ /^(BinaryString|Binary|Bin)$/i) {
203 $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::NewFromBinaryString($BitVectorString, $BitsOrder);
204 last BITSTRINGFORMAT;
205 }
206 if ($BitStringFormat =~ /^(HexadecimalString|Hexadecimal|Hex)$/i) {
207 $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::NewFromHexadecimalString($BitVectorString, $BitsOrder);
208 last BITSTRINGFORMAT;
209 }
210 croak "Error: FingerprintsStringUtil::_GenerateFingerprintBitVector: Specified bit vector string format, $BitStringFormat, is not supported. Value values: Binary, Bin, BinaryString, Hexdecimal, Hex, HexadecimalString...";
211 }
212
213 if (defined $FingerprintsBitVector) {
214 # Set fingerints vector type and description...
215 $FingerprintsBitVector->SetVectorType($VectorType);
216 $FingerprintsBitVector->SetDescription($Description);
217
218 # Set specified size which might be different from the bit string size due to padding
219 # used by Perl vec function to handle bit vectors in BitVectot class...
220 #
221 $FingerprintsBitVector->SetSpecifiedSize($Size);
222 }
223
224 return $FingerprintsBitVector;
225 }
226
227 # Parse fingerprints vector string and retrun vector...
228 #
229 sub ParseFingerprintsVectorString {
230 my($FingerprintsString, $ValidateValues) = @_;
231 my($ErrorMsgPrefix, $VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2);
232
233 $ErrorMsgPrefix = "Error: ParseFingerprintsVectorString";
234 ($VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2) = _ParseFingerprintsStringValues($FingerprintsString);
235
236 # No need to check $VectorString1 and $VectorString2 values as they would be
237 # checked later during the creation of FingerprintsVector...
238 #
239 if ($ValidateValues) {
240 _ValidateFingerprintsStringValues($ErrorMsgPrefix, $VectorType, $NumOfValues, $VectorValuesType, $VectorStringFormat);
241 }
242
243 return _GenerateFingerprintVector($VectorType, $Description, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2);
244 }
245
246 # Generate fingerints vector...
247 #
248 sub _GenerateFingerprintVector {
249 my($VectorType, $Description, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2) = @_;
250 my($FingerprintsVector, $VectorString);
251
252 $VectorString = TextUtil::IsEmpty($VectorString2) ? $VectorString1 : "${VectorString1};${VectorString2}";
253 $FingerprintsVector = undef;
254
255 VECTORSTRINGFORMAT : {
256 if ($VectorStringFormat =~ /^(ValuesString|Values)$/i) {
257 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesString($VectorValuesType, $VectorString);
258 last VECTORSTRINGFORMAT;
259 }
260 if ($VectorStringFormat =~ /^(IDsAndValuesString|IDsAndValues)$/i) {
261 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesString($VectorValuesType, $VectorString);
262 last VECTORSTRINGFORMAT;
263 }
264 if ($VectorStringFormat =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) {
265 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesPairsString($VectorValuesType, $VectorString);
266 last VECTORSTRINGFORMAT;
267 }
268 if ($VectorStringFormat =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) {
269 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsString($VectorValuesType, $VectorString);
270 last VECTORSTRINGFORMAT;
271 }
272 if ($VectorStringFormat =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) {
273 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsPairsString($VectorValuesType, $VectorString);
274 last VECTORSTRINGFORMAT;
275 }
276 croak "Error: FingerprintsStringUtil::_GenerateFingerprintVector: Specified vector string format, $VectorStringFormat, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValuesString, Values...";
277 }
278
279 if (defined $FingerprintsVector) {
280 # Set fingerints vector type and description...
281 $FingerprintsVector->SetVectorType($VectorType);
282 $FingerprintsVector->SetDescription($Description);
283 }
284
285 return $FingerprintsVector;
286 }
287
288 # Validate fingerint string values...
289 #
290 sub AreFingerprintsStringValuesValid {
291 my($FingerprintsString) = @_;
292 my($Value);
293
294 for $Value (_ParseFingerprintsStringValues($FingerprintsString)) {
295 if (TextUtil::IsEmpty($Value)) {
296 return 0;
297 }
298 }
299 return 1;
300 }
301
302 # Get fingerprints description...
303 #
304 sub _GetFingerprintsDescription {
305 my($FingerprintsObject) = @_;
306 my($Description);
307
308 $Description = $FingerprintsObject->GetDescription();
309
310 return TextUtil::IsEmpty($Description) ? 'No description available for fingerprints' : $Description;
311 }
312
313 # Parse fingerprints string values...
314 #
315 sub _ParseFingerprintsStringValues {
316 my($FingerprintsString) = @_;
317
318 return split "${FPStringDelim}", $FingerprintsString;
319 }
320
321 # Check to make sure already parsed fingerprints string values are valid....
322 #
323 sub _ValidateFingerprintsStringValues {
324 my($ErrorMsgPrefix, @Values) = @_;
325 my($Value);
326
327 for $Value (@Values) {
328 if (TextUtil::IsEmpty($Value)) {
329 croak("${ErrorMsgPrefix}: _ValidateFingerprintsStringValues: Fingerprints string format is not valid: An empty value found...");
330 }
331 }
332 }
333
334 # Default bit string format...
335 #
336 sub GetDefaultBitStringFormat {
337 return 'HexadecimalString';
338 }
339
340 # Default bit order...
341 #
342 sub GetDefaultBitsOrder {
343 return 'Ascending';
344 }
345
346 # Default vector string format using fingerprints or fingerprints vector object...
347 #
348 sub GetDefaultVectorStringFormat {
349 my($FingerprintsObject) = @_;
350 my($FingerprintsVector);
351
352 $FingerprintsVector = Fingerprints::FingerprintsVector::IsFingerprintsVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsVector();
353
354 return _GetDefaultVectorStringFormat($FingerprintsVector);
355 }
356
357 # Default vector string format using fingerprits vector object...
358 #
359 sub _GetDefaultVectorStringFormat {
360 my($FingerprintsVector) = @_;
361 my($Type);
362
363 $Type = $FingerprintsVector->GetType();
364
365 return ($Type =~ /^NumericalValues$/i) ? 'IDsAndValuesString' : 'ValuesString';
366 }
367
368 # Fingerprints string delimiter...
369 #
370 sub GetFingeprintsStringDelimiter {
371 return $FPStringDelim;
372 }
373
374 1;
375
376 __END__
377
378 =head1 NAME
379
380 FingerprintsStringUtil
381
382 =head1 SYNOPSIS
383
384 use Fingerprints::FingerprintsStringUtil;
385
386 use Fingerprints::FingerprintsStringUtil qw(:all);
387
388 =head1 DESCRIPTION
389
390 B<FingerprintsStringUtil> module provides the following functions:
391
392 AreFingerprintsStringValuesValid, GenerateFingerprintsBitVectorString,
393 GenerateFingerprintsString, GenerateFingerprintsVectorString,
394 GetDefaultBitStringFormat, GetDefaultBitsOrder, GetDefaultVectorStringFormat,
395 GetFingeprintsStringDelimiter, GetFingerprintsStringTypeAndDescription,
396 GetFingerprintsStringValues, ParseFingerprintsBitVectorString,
397 ParseFingerprintsString, ParseFingerprintsVectorString
398
399 The current release of MayaChemTools supports the following types of fingerprint
400 bit-vector and vector strings:
401
402 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi
403 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT
404 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X
405 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A
406 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2
407 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B...
408
409 FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS
410 ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2
411 .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1
412 O.X1.BO2;2 4 14 3 10 1 1 1 3 2
413
414 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume
415 ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F
416 N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1
417
418 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN
419 umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C
420 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N
421 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8
422 O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1
423 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0...
424
425 FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs
426 AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN
427 H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3
428 .024 -2.270
429
430 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues;
431 ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435
432 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1
433 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
434 0 0 0 0 0 0 0 0 0 0 0 0 0 0
435
436 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi
437 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391
438 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414
439 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103
440 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338
441 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303...
442
443 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes
444 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524
445 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649
446 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...;
447 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2
448 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1
449
450 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp
451 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100
452 0000000001010000000110000011000000000000100000000000000000000000100001
453 1000000110000000000000000000000000010011000000000000000000000000010000
454 0000000000000000000000000010000000000000000001000000000000000000000000
455 0000000000010000100001000000000000101000000000000000100000000000000...
456
457 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu
458 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8
459 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567
460 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012
461 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455
462 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404...
463
464 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp
465 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184
466 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450
467 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430
468 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134
469 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566...
470
471 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000
472 0000000000000000000000000000000001001000010010000000010010000000011100
473 0100101010111100011011000100110110000011011110100110111111111111011111
474 11111111111110111000
475
476 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011
477 1110011111100101111111000111101100110000000000000011100010000000000000
478 0000000000000000000000000000000000000000000000101000000000000000000000
479 0000000000000000000000000000000000000000000000000000000000000000000000
480 0000000000000000000000000000000000000011000000000000000000000000000000
481 0000000000000000000000000000000000000000
482
483 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri
484 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
485 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0
486 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0
487 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1
488 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1
489
490 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri
491 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0
492 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0
493 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
494 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0
495 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
496
497 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
498 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110
499 0100010101011000101001011100110001000010001001101000001001001001001000
500 0010110100000111001001000001001010100100100000000011000000101001011100
501 0010000001000101010100000100111100110111011011011000000010110111001101
502 0101100011000000010001000011000010100011101100001000001000100000000...
503
504 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength
505 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2
506 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X
507 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1
508 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO
509 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C....
510
511 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt
512 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1
513 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N
514 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1
515 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR
516 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ...
517
518 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD
519 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1
520 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3.
521 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...;
522 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1
523 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1...
524
525 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi
526 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar
527 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H
528 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...;
529 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4
530 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ...
531
532 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3
533 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-
534 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO
535 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...;
536 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1
537
538 FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica
539 lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC
540 H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC-
541 ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...;
542 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2
543
544 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M
545 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1
546 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1
547 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1
548 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....;
549 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2
550 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8...
551
552 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1
553 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C
554 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3-
555 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2
556 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C.
557 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7...
558
559 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min
560 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H
561 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2-
562 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H
563 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...;
564 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10
565 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1
566
567 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist
568 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0
569 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1
570 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0
571 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0
572 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18...
573
574 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize:
575 MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1-
576 Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1
577 -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1-
578 HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...;
579 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23
580 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1
581 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ...
582
583 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD
584 istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106
585 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0
586 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26
587 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0
588 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ...
589
590 =head1 FUNCTIONS
591
592 =over 4
593
594 =item B<AreFingerprintsStringValuesValid>
595
596 $Status = AreFingerprintsStringValuesValid($FPString);
597
598 Returns 0 or 1 based on whether I<FingerprintsString> contains valid values.
599
600 =item B<GetDefaultBitStringFormat>
601
602 $BitStringFormat = GetDefaultBitStringFormat();
603
604 Returns default B<BitStringFormat> for fingerprints bit-vector strings.
605
606 =item B<GetDefaultBitsOrder>
607
608 $BitsOrder = GetDefaultBitsOrder();
609
610 Returns default B<BitsOrder> for fingerprints bit-vector fingerprints.
611
612 =item B<GetDefaultVectorStringFormat>
613
614 $StringFormat = GetDefaultVectorStringFormat();
615
616 Returns default B<VectorStringFormat> for fingerprints vector strings.
617
618 =item B<GetFingeprintsStringDelimiter>
619
620 $Delimiter = GetFingeprintsStringDelimiter();
621
622 Returns string B<Delimiter> used to generate fingerprints bit-vector and vector strings.
623
624 =item B<GenerateFingerprintsBitVectorString>
625
626 $FPString = GenerateFingerprintsBitVectorString($FPBitVectorObject,
627 [$BitStringFormat, $BitsOrder]);
628
629 Returns a B<FingerprintsString> generated using I<FingerprintsBitVectorObject> and
630 optionally specified I<BitStringFormat> and I<BitsOrder> values.
631
632 Possible I<BitStringFormat> values: I<BinaryString, Binary, Bin, HexadecimalString,
633 Hexadecimal, or Hex>. Default I<BitStringFormat> value: I<BinaryString>.
634
635 Possible I<BitsOrder> values: I<Ascending or Descending>. Default I<BitsOrder> value:
636 I<Ascending>.
637
638 =item B<GenerateFingerprintsVectorString>
639
640 $FPString = GenerateFingerprintsVectorString($FPVectorObject,
641 [$VectorStringFormat]);
642
643 Returns a B<FingerprintsString> generated using I<FingerprintsVectorObject> and optionally
644 specified I<VectorStringFormat>.
645
646 Possible I<VectorStringFormat> values: I<IDsAndValuesString, IDsAndValues,
647 IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs,
648 ValuesAndIDsPairsString, ValuesAndIDsPairs, ValuesString, Values>.
649
650 Default I<VectorStringFormat> value: for I<NumericalValues> I<FPVectorType> -
651 I<IDsAndValuesString>; for all other I<FPVectorType>s - I<ValuesString>.
652
653 =item B<GenerateFingerprintsString>
654
655 $FPString = GenerateFingerprintsBitVectorString($FPBitVectorObject,
656 [$BitStringFormat, $BitsOrder]);
657
658 $FPString = GenerateFingerprintsVectorString($FPVectorObject,
659 [$VectorStringFormat]);
660
661 Returns a B<FingerprintsString> generated using I<FingerprintsBitVectorObject> or
662 I<FingerprintsVectorObject> and optionally specified parameters.
663
664 =item B<GetFingerprintsStringTypeAndDescription>
665
666 ($FPType, $FPDescription) = GetFingerprintsStringTypeAndDescription(
667 $FPString);
668
669 Returns B<FingerprintsStringType> and I<FingerprintsStringDescription> strings for
670 B<FingerprintsString> corresponding to B<FingerprintsBitVectorObject> or
671 B<FingerprintsVectorObject>.
672
673 =item B<GetFingerprintsStringValues>
674
675 @FPStringValues = GetFingerprintsStringValues($FPString);
676
677 Parses B<FingerprintsString> corresponding to B<FingerprintsBitVectorObject> or
678 B<FingerprintsVectorObject> and returns its individual component values as an
679 array.
680
681 =item B<ParseFingerprintsBitVectorString>
682
683 $FPBitVectorObject = ParseFingerprintsBitVectorString($FPBitVectorString,
684 [$ValidateValues]);
685
686 Returns B<FingerprintsBitVectorObject> generated by parsing I<FingerprintsBitVectorString>
687 with optional validation of its component values.
688
689 =item B<ParseFingerprintsString>
690
691 $FPBitVectorObject = ParseFingerprintsBitVectorString($FPBitVectorString,
692 [$ValidateValues]);
693
694 $FPVectorObject = ParseFingerprintsVectorString($FPVectorString,
695 [$ValidateValues]);
696
697 Returns B<FingerprintsBitVectorObject> or I<B<FingerprintsVectorObject>> generated
698 by parsing I<FingerprintsBitVectorString> or I<FingerprintsVectorString> with
699 optional validation of its component values.
700
701 =item B<ParseFingerprintsVectorString>
702
703 $FPVectorObject = ParseFingerprintsVectorString($FPVectorString,
704 [$ValidateValues]);
705
706 Returns B<FingerprintsVectorObject> generated by parsing I<FingerprintsVectorString>
707 with optional validation of its component values.
708
709 =back
710
711 =head1 AUTHOR
712
713 Manish Sud <msud@san.rr.com>
714
715 =head1 SEE ALSO
716
717 BitVector.pm, FingerprintsBitVector.pm, FingerprintsVector.pm, Vector.pm
718
719 =head1 COPYRIGHT
720
721 Copyright (C) 2015 Manish Sud. All rights reserved.
722
723 This file is part of MayaChemTools.
724
725 MayaChemTools is free software; you can redistribute it and/or modify it under
726 the terms of the GNU Lesser General Public License as published by the Free
727 Software Foundation; either version 3 of the License, or (at your option)
728 any later version.
729
730 =cut