0
|
1 package FileIO::FingerprintsFPFileIO;
|
|
2 #
|
|
3 # $RCSfile: FingerprintsFPFileIO.pm,v $
|
|
4 # $Date: 2015/02/28 20:48:43 $
|
|
5 # $Revision: 1.19 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Exporter;
|
|
32 use Scalar::Util ();
|
|
33 use TextUtil ();
|
|
34 use FileUtil ();
|
|
35 use TimeUtil ();
|
|
36 use Fingerprints::FingerprintsStringUtil ();
|
|
37 use PackageInfo ();
|
|
38 use FileIO::FileIO;
|
|
39
|
|
40 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
41
|
|
42 @ISA = qw(FileIO::FileIO Exporter);
|
|
43 @EXPORT = qw();
|
|
44 @EXPORT_OK = qw(IsFingerprintsFPFile);
|
|
45
|
|
46 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
|
|
47
|
|
48 # Setup class variables...
|
|
49 my($ClassName);
|
|
50 _InitializeClass();
|
|
51
|
|
52 # Class constructor...
|
|
53 sub new {
|
|
54 my($Class, %NamesAndValues) = @_;
|
|
55
|
|
56 # Initialize object...
|
|
57 my $This = $Class->SUPER::new();
|
|
58 bless $This, ref($Class) || $Class;
|
|
59 $This->_InitializeFingerprintsFPFileIO();
|
|
60
|
|
61 $This->_InitializeFingerprintsFPFileIOProperties(%NamesAndValues);
|
|
62
|
|
63 return $This;
|
|
64 }
|
|
65
|
|
66 # Initialize object data...
|
|
67 #
|
|
68 sub _InitializeFingerprintsFPFileIO {
|
|
69 my($This) = @_;
|
|
70
|
|
71 # Fingerprints string data format during read/write...
|
|
72 #
|
|
73 # For file read:
|
|
74 #
|
|
75 # AutoDetect - automatically detect format of fingerprints string
|
|
76 # FingerprintsBitVectorString - Bit vector fingerprints string format
|
|
77 # FingerprintsVectorString - Vector fingerprints string format
|
|
78 #
|
|
79 # Default value: AutoDetect
|
|
80 #
|
|
81 # For file write:
|
|
82 #
|
|
83 # FingerprintsBitVectorString - Bit vector fingerprints string format
|
|
84 # FingerprintsVectorString - Vector fingerprints string format
|
|
85 #
|
|
86 # Default value: undef
|
|
87 #
|
|
88 $This->{FingerprintsStringMode} = undef;
|
|
89
|
|
90 # For file read:
|
|
91 #
|
|
92 # o Fingerprints bit-vector and vector object for current fingerprints string
|
|
93 #
|
|
94 # For file write:
|
|
95 #
|
|
96 # o Fingerprints bit-vector and vector object for current fingerprints string
|
|
97 # o Any supported fingerprints object: PathLengthFingerprints, ExtendedConnectivity, and so on.
|
|
98 #
|
|
99 $This->{FingerprintsObject} = undef;
|
|
100
|
|
101 # Fingeprints string for current line during read/write...
|
|
102 $This->{FingerprintsString} = undef;
|
|
103
|
|
104 # Partial fingeprints string corresponding to what's on the current line for current
|
|
105 # line during read/write...
|
|
106 $This->{PartialFingerprintsString} = undef;
|
|
107
|
|
108 # Required header data keys and values during read/write...
|
|
109 @{$This->{RequiredHeaderDataKeys}} = ();
|
|
110 %{$This->{RequiredHeaderDataKeysAndValues}} = ();
|
|
111
|
|
112 # First data line read/write...
|
|
113 $This->{FirstDataLineIO} = 1;
|
|
114
|
|
115 # Current fingerprints string data line number during read/write...
|
|
116 $This->{LineNum} = 0;
|
|
117
|
|
118 # FP line data during read/write...
|
|
119 $This->{DataLine} = undef;
|
|
120
|
|
121 # Initialize parameters for read...
|
|
122 $This->_InitializeFingerprintsFPFileIORead();
|
|
123
|
|
124 # Initialize parameters for write...
|
|
125 $This->_InitializeFingerprintsFPFileIOWrite();
|
|
126
|
|
127 return $This;
|
|
128 }
|
|
129
|
|
130 # Initialize class ...
|
|
131 sub _InitializeClass {
|
|
132 #Class name...
|
|
133 $ClassName = __PACKAGE__;
|
|
134
|
|
135 }
|
|
136
|
|
137 # Initialize object data for reading fingerprints FP file...
|
|
138 #
|
|
139 sub _InitializeFingerprintsFPFileIORead {
|
|
140 my($This) = @_;
|
|
141
|
|
142 # Header data keys and values...
|
|
143 #
|
|
144 @{$This->{HeaderDataKeys}} = ();
|
|
145 %{$This->{HeaderDataKeysAndValues}} = ();
|
|
146 %{$This->{CannonicalHeaderDataKeysAndValues}} = ();
|
|
147
|
|
148 # By default, the fingerprints data is assumed to be valid and no validation is
|
|
149 # performed before generating fingerprints objects...
|
|
150 #
|
|
151 $This->{ValidateData} = 1;
|
|
152
|
|
153 # Level of detail to print during validation of data for invalid or missing data...
|
|
154 $This->{DetailLevel} = 1;
|
|
155
|
|
156 # Number of missing and invalid fingerprints string data lines...
|
|
157 $This->{NumOfLinesWithMissingData} = 0;
|
|
158 $This->{NumOfLinesWithInvalidData} = 0;
|
|
159
|
|
160 # Compound ID for current fingerprints string...
|
|
161 $This->{CompoundID} = undef;
|
|
162
|
|
163 # Status of data in fingerprints FP file...
|
|
164 $This->{ValidFileData} = 0;
|
|
165 $This->{ValidRequiredHeaderDataKeys} = 0;
|
|
166 $This->{ValidFingerprintsStringMode} = 0;
|
|
167
|
|
168 return $This;
|
|
169 }
|
|
170
|
|
171 # Initialize object data for writing fingerprints FP file...
|
|
172 #
|
|
173 sub _InitializeFingerprintsFPFileIOWrite {
|
|
174 my($This) = @_;
|
|
175
|
|
176 # Fingerprints bit vector string format...
|
|
177 #
|
|
178 # Possible values: BinaryString or HexadecimalString [Default]
|
|
179 #
|
|
180 # Default BitStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat.
|
|
181 #
|
|
182 $This->{BitStringFormat} = undef;
|
|
183
|
|
184 # Bits order in fingerprints bit vector string...
|
|
185 #
|
|
186 # Ascending - First bit in each byte as the lowest bit [Default]
|
|
187 # Descending - First bit in each byte as the highest bit
|
|
188 #
|
|
189 # Default BitsOrder is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder.
|
|
190 #
|
|
191 $This->{BitsOrder} = undef;
|
|
192
|
|
193 # Fingerprints vector string format...
|
|
194 #
|
|
195 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString
|
|
196 #
|
|
197 # Default VectorStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat.
|
|
198 # For fingerprints vector object containing vector NumericalValues, it corresponds to IDsAndValuesString; otherwise,
|
|
199 # it's set to ValuesString.
|
|
200 #
|
|
201 $This->{VectorStringFormat} = undef;
|
|
202
|
|
203 # Overwriting existing file...
|
|
204 $This->{Overwrite} = 0;
|
|
205
|
|
206 return $This;
|
|
207 }
|
|
208
|
|
209 # Initialize object values...
|
|
210 sub _InitializeFingerprintsFPFileIOProperties {
|
|
211 my($This, %NamesAndValues) = @_;
|
|
212
|
|
213 # All other property names and values along with all Set/Get<PropertyName> methods
|
|
214 # are implemented on-demand using ObjectProperty class.
|
|
215
|
|
216 my($Name, $Value, $MethodName);
|
|
217 while (($Name, $Value) = each %NamesAndValues) {
|
|
218 $MethodName = "Set${Name}";
|
|
219 $This->$MethodName($Value);
|
|
220 }
|
|
221
|
|
222 if (!exists $NamesAndValues{Name}) {
|
|
223 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name...";
|
|
224 }
|
|
225
|
|
226 # Make sure it's a fingerprints file...
|
|
227 $Name = $NamesAndValues{Name};
|
|
228 if (!$This->IsFingerprintsFPFile($Name)) {
|
|
229 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be fingerprints format...";
|
|
230 }
|
|
231
|
|
232 if ($This->GetMode() =~ /^Read$/i) {
|
|
233 $This->_InitializeFingerprintsFPFileIOReadProperties(%NamesAndValues);
|
|
234 }
|
|
235 elsif ($This->GetMode() =~ /^(Write|Append)$/i) {
|
|
236 $This->_InitializeFingerprintsFPFileIOWriteProperties(%NamesAndValues);
|
|
237 }
|
|
238
|
|
239 return $This;
|
|
240 }
|
|
241
|
|
242 # Initialize object properties for reading fingerprints FP file...
|
|
243 #
|
|
244 sub _InitializeFingerprintsFPFileIOReadProperties {
|
|
245 my($This, %NamesAndValues) = @_;
|
|
246
|
|
247 # Set default value for FingerprintsStringMode...
|
|
248 if (!$This->{FingerprintsStringMode}) {
|
|
249 $This->{FingerprintsStringMode} = 'AutoDetect';
|
|
250 }
|
|
251
|
|
252 $This->_PrepareForReadingFingerprintsFPFileData();
|
|
253
|
|
254 return $This;
|
|
255 }
|
|
256
|
|
257 # Initialize object properties for writing fingerprints FP file...
|
|
258 #
|
|
259 sub _InitializeFingerprintsFPFileIOWriteProperties {
|
|
260 my($This, %NamesAndValues) = @_;
|
|
261
|
|
262 # Check FingerprintsStringMode value...
|
|
263 if (!exists $NamesAndValues{FingerprintsStringMode}) {
|
|
264 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying FingerprintsStringMode...";
|
|
265 }
|
|
266
|
|
267 if ($This->{FingerprintsStringMode} !~ /^(FingerprintsBitVectorString|FingerprintsVectorString)$/i) {
|
|
268 croak "Error: ${ClassName}->New: Object can't be instantiated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, is not valid; Supported values for write/append: FingerprintsBitVectorString or FingerprintsVectorString...";
|
|
269 }
|
|
270
|
|
271 $This->_PrepareForWritingFingerprintsFPFileData();
|
|
272
|
|
273 return $This;
|
|
274 }
|
|
275
|
|
276 # Set FingerprintsStringMode...
|
|
277 #
|
|
278 sub SetFingerprintsStringMode {
|
|
279 my($This, $Value) = @_;
|
|
280
|
|
281 # AutoDetect - automatically detect format of fingerprints string
|
|
282 # FingerprintsBitVectorString - Bit vector fingerprints string format
|
|
283 # FingerprintsVectorString - Vector fingerprints string format
|
|
284
|
|
285 if ($Value !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) {
|
|
286 croak "Error: ${ClassName}->SetFingerprintsStringMode: FingerprintsStringMode value, $Value, is not valid; Supported values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString...";
|
|
287 }
|
|
288
|
|
289 $This->{FingerprintsStringMode} = $Value;
|
|
290
|
|
291 return $This;
|
|
292 }
|
|
293
|
|
294 # Set DetailLevel...
|
|
295 #
|
|
296 sub SetDetailLevel {
|
|
297 my($This, $Value) = @_;
|
|
298
|
|
299 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
300 croak "Error: ${ClassName}->SetDetailLevel: DetailLevel value, $Value, is not valid; Supported values: > 0...";
|
|
301 }
|
|
302
|
|
303 $This->{DetailLevel} = $Value;
|
|
304
|
|
305 return $This;
|
|
306 }
|
|
307
|
|
308 # Set BitStringFormat...
|
|
309 #
|
|
310 sub SetBitStringFormat {
|
|
311 my($This, $Value) = @_;
|
|
312
|
|
313 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) {
|
|
314 croak "Error: ${ClassName}->SetBitStringFormat: BitStringFormat value, $Value, is not valid; Supported values: BinaryString or HexadecimalString...";
|
|
315 }
|
|
316
|
|
317 $This->{BitStringFormat} = $Value;
|
|
318
|
|
319 return $This;
|
|
320 }
|
|
321
|
|
322 # Set BitsOrder...
|
|
323 #
|
|
324 sub SetBitsOrder {
|
|
325 my($This, $Value) = @_;
|
|
326
|
|
327 # Ascending - First bit in each byte as the lowest bit
|
|
328 # Descending - First bit in each byte as the highest bit
|
|
329 #
|
|
330 if ($Value !~ /^(Ascending|Descending)$/i) {
|
|
331 croak "Error: ${ClassName}->SetBitsOrder: FingerprintsStringMode value, $Value, is not valid; Supported values: Ascending or Descending...";
|
|
332 }
|
|
333
|
|
334 $This->{BitsOrder} = $Value;
|
|
335
|
|
336 return $This;
|
|
337 }
|
|
338
|
|
339 # Set compound ID...
|
|
340 #
|
|
341 sub SetCompoundID {
|
|
342 my($This, $Value) = @_;
|
|
343
|
|
344 if ($Value =~ / /) {
|
|
345 $Value =~ s/ //g;
|
|
346 carp "Warning: ${ClassName}->SetCompoundID: Spaces are not allowed in compound ID; They have been removed...";
|
|
347 }
|
|
348
|
|
349 $This->{CompoundID} = $Value;
|
|
350
|
|
351 return $This;
|
|
352 }
|
|
353
|
|
354 # Set VectorStringFormat...
|
|
355 #
|
|
356 sub SetVectorStringFormat {
|
|
357 my($This, $Value) = @_;
|
|
358
|
|
359 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString
|
|
360
|
|
361 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) {
|
|
362 croak "Error: ${ClassName}->SetVectorStringFormat: FingerprintsStringMode value, $Value, is not valid; Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString...";
|
|
363 }
|
|
364
|
|
365 $This->{VectorStringFormat} = $Value;
|
|
366
|
|
367 return $This;
|
|
368 }
|
|
369
|
|
370 # Get header data keys or number of header data keys in header data block...
|
|
371 #
|
|
372 sub GetHeaderDataKeys {
|
|
373 my($This) = @_;
|
|
374
|
|
375 return wantarray ? @{$This->{HeaderDataKeys}} : scalar @{$This->{HeaderDataKeys}};
|
|
376 }
|
|
377
|
|
378 # Set header data keys...
|
|
379 #
|
|
380 sub SetHeaderDataKeys {
|
|
381 my($This, @Keys) = @_;
|
|
382
|
|
383 croak "Error: ${ClassName}->SetHeaderDataKeys: Can't set HeaderDataKeys: Not allowed...";
|
|
384
|
|
385 return $This;
|
|
386 }
|
|
387
|
|
388 # Get header data keys and values hash...
|
|
389 #
|
|
390 sub GetHeaderDataKeysAndValues {
|
|
391 my($This) = @_;
|
|
392
|
|
393 return %{$This->{HeaderDataKeysAndValues}};
|
|
394 }
|
|
395
|
|
396 # Set header data keys and values hash...
|
|
397 #
|
|
398 sub SetHeaderDataKeysAndValues {
|
|
399 my($This, %KeysAndValues) = @_;
|
|
400
|
|
401 croak "Error: ${ClassName}->SetHeaderDataKeysAndValues: Can't set HeaderDataKeysAndValues: Not allowed...";
|
|
402
|
|
403 return $This;
|
|
404 }
|
|
405
|
|
406 # Get required header data keys or number of header data keys in header data block...
|
|
407 #
|
|
408 sub GetRequiredHeaderDataKeys {
|
|
409 my($This) = @_;
|
|
410
|
|
411 return wantarray ? @{$This->{RequiredHeaderDataKeys}} : scalar @{$This->{RequiredHeaderDataKeys}};
|
|
412 }
|
|
413
|
|
414 # Set required header data keys...
|
|
415 #
|
|
416 sub SetRequiredHeaderDataKeys {
|
|
417 my($This, @Keys) = @_;
|
|
418
|
|
419 croak "Error: ${ClassName}->SetRequiredHeaderDataKeys: Can't set RequiredHeaderDataKeys: Not allowed...";
|
|
420
|
|
421 return $This;
|
|
422 }
|
|
423
|
|
424 # Get required header data keys and values hash...
|
|
425 #
|
|
426 sub GetRequiredHeaderDataKeysAndValues {
|
|
427 my($This) = @_;
|
|
428
|
|
429 return %{$This->{RequiredHeaderDataKeysAndValues}};
|
|
430 }
|
|
431
|
|
432 # Set required header data keys and values hash...
|
|
433 #
|
|
434 sub SetRequiredHeaderDataKeysAndValues {
|
|
435 my($This, %KeysAndValues) = @_;
|
|
436
|
|
437 croak "Error: ${ClassName}->SetRequiredHeaderDataKeysAndValues: Can't set RequiredHeaderDataKeysAndValues: Not allowed...";
|
|
438
|
|
439 return $This;
|
|
440 }
|
|
441
|
|
442 # Get fingerprints object for current data line...
|
|
443 #
|
|
444 sub GetFingerprints {
|
|
445 my($This) = @_;
|
|
446
|
|
447 return $This->{FingerprintsObject};
|
|
448 }
|
|
449
|
|
450 # Set fingerprints object for current data line...
|
|
451 #
|
|
452 sub SetFingerprints {
|
|
453 my($This, $FingerprintsObject) = @_;
|
|
454
|
|
455 $This->{FingerprintsObject} = $FingerprintsObject;
|
|
456
|
|
457 return $This;
|
|
458 }
|
|
459
|
|
460 # Get fingerprints string for current data line...
|
|
461 #
|
|
462 sub GetFingerprintsString {
|
|
463 my($This) = @_;
|
|
464
|
|
465 return $This->{FingerprintsString} ? $This->{FingerprintsString} : 'None';
|
|
466 }
|
|
467
|
|
468 # Set fingerprints string for current data line...
|
|
469 #
|
|
470 sub SetFingerprintsString {
|
|
471 my($This, $FingerprintsString) = @_;
|
|
472
|
|
473 $This->{FingerprintsString} = $FingerprintsString;
|
|
474
|
|
475 return $This;
|
|
476 }
|
|
477
|
|
478 # Get partial fingerprints string for current data line...
|
|
479 #
|
|
480 sub GetPartialFingerprintsString {
|
|
481 my($This) = @_;
|
|
482
|
|
483 return $This->{PartialFingerprintsString} ? $This->{PartialFingerprintsString} : 'None';
|
|
484 }
|
|
485
|
|
486 # Set partial fingerprints string for current data line...
|
|
487 #
|
|
488 sub SetPartialFingerprintsString {
|
|
489 my($This, $PartialFingerprintsString) = @_;
|
|
490
|
|
491 $This->{PartialFingerprintsString} = $PartialFingerprintsString;
|
|
492
|
|
493 return $This;
|
|
494 }
|
|
495
|
|
496 # Does fingerprints FP file contain valid data?
|
|
497 #
|
|
498 sub IsFingerprintsFileDataValid {
|
|
499 my($This) = @_;
|
|
500
|
|
501 return $This->{ValidFileData} ? 1 : 0;
|
|
502 }
|
|
503
|
|
504 # Does current data line contains valid fingerprints object data?
|
|
505 #
|
|
506 sub IsFingerprintsDataValid {
|
|
507 my($This) = @_;
|
|
508
|
|
509 return defined $This->{FingerprintsObject} ? 1 : 0;
|
|
510 }
|
|
511
|
|
512 # Check presence of a header data key...
|
|
513 #
|
|
514 sub IsHeaderDataKeyPresent {
|
|
515 my($This, $Key) = @_;
|
|
516 my($CannonicalKey);
|
|
517
|
|
518 $CannonicalKey = lc $Key;
|
|
519
|
|
520 return exists $This->{CannonicalHeaderDataKeysAndValues}{$CannonicalKey} ? 1 : 0;
|
|
521 }
|
|
522
|
|
523 # Get value of header data key...
|
|
524 #
|
|
525 sub GetHeaderDataKeyValue {
|
|
526 my($This, $Key) = @_;
|
|
527 my($CannonicalKey);
|
|
528
|
|
529 $CannonicalKey = lc $Key;
|
|
530
|
|
531 return exists $This->{CannonicalHeaderDataKeysAndValues}{$CannonicalKey} ? $This->{CannonicalHeaderDataKeysAndValues}{$CannonicalKey} : undef;
|
|
532 }
|
|
533
|
|
534 #
|
|
535 # Read next available fingerprints line, process it and generate appropriate fingerprints
|
|
536 # objects...
|
|
537 #
|
|
538 sub Read {
|
|
539 my($This) = @_;
|
|
540
|
|
541 # Read data line...
|
|
542 if (!$This->_ReadDataLine()) {
|
|
543 return undef;
|
|
544 }
|
|
545
|
|
546 # No need to process invalid FP file with invalid data...
|
|
547 if (!$This->{ValidFileData}) {
|
|
548 if ($This->{ValidateData}) {
|
|
549 $This->{NumOfLinesWithMissingData} += 1;
|
|
550 }
|
|
551 return $This;
|
|
552 }
|
|
553
|
|
554 # Perform data validation...
|
|
555 if ($This->{ValidateData}) {
|
|
556 if (!$This->_ValidateReadDataLine()) {
|
|
557 return $This;
|
|
558 }
|
|
559 }
|
|
560
|
|
561 # Check again to handle problematic data for non-validated data lines...
|
|
562 if (!$This->{FingerprintsString}) {
|
|
563 return $This;
|
|
564 }
|
|
565
|
|
566 # Generate fingeprints object...
|
|
567 $This->_GenerateFingerprintsObject();
|
|
568
|
|
569 # Setup fingerprints compound ID for fingerprints string...
|
|
570 $This->_GenerateCompoundID();
|
|
571
|
|
572 return $This;
|
|
573 }
|
|
574
|
|
575 # Read next available fingerprints line, process it and generate appropriate fingerprints
|
|
576 # objects...
|
|
577 #
|
|
578 sub Next {
|
|
579 my($This) = @_;
|
|
580
|
|
581 return $This->Read();
|
|
582 }
|
|
583
|
|
584 # Read fingerprints data line line...
|
|
585 #
|
|
586 sub _ReadDataLine {
|
|
587 my($This) = @_;
|
|
588
|
|
589 # Initialize data for current line...
|
|
590 $This->_InitializeReadDataLine();
|
|
591
|
|
592 if ($This->{FirstDataLineIO}) {
|
|
593 # Get first data line...
|
|
594 $This->_ProcessFirstDataLineRead();
|
|
595 }
|
|
596 else {
|
|
597 # Get next data line...
|
|
598 $This->{LineNum} += 1;
|
|
599 $This->{DataLine} = TextUtil::GetTextLine($This->{FileHandle});
|
|
600 }
|
|
601
|
|
602 # Is it end of file?
|
|
603 if (!$This->{DataLine}) {
|
|
604 return 0;
|
|
605 }
|
|
606
|
|
607 # Process data line to retrieve compound ID and fingerprints string information...
|
|
608 $This->_ProcessDataLineRead();
|
|
609
|
|
610 return 1;
|
|
611 }
|
|
612
|
|
613 # Process data line to retrieve compound ID and fingerprints string information...
|
|
614 #
|
|
615 sub _ProcessDataLineRead {
|
|
616 my($This) = @_;
|
|
617 my($CompoundID, $PartialFingerprintsString);
|
|
618
|
|
619 ($CompoundID, $PartialFingerprintsString) = $This->{DataLine} =~ /^(.*?)[ ]+(.*?)$/;
|
|
620
|
|
621 if (!(defined($CompoundID) && defined($PartialFingerprintsString))) {
|
|
622 return $This;
|
|
623 }
|
|
624
|
|
625 $This->{CompoundID} = $CompoundID;
|
|
626 $This->{PartialFingerprintsString} = $PartialFingerprintsString;
|
|
627
|
|
628 # Set up fingerprints string...
|
|
629 $This->_GenerateFingerprintsStringFromPartialFingerprintsString();
|
|
630
|
|
631 return $This;
|
|
632 }
|
|
633
|
|
634 # Initialize data line for reading...
|
|
635 #
|
|
636 sub _InitializeReadDataLine {
|
|
637 my($This) = @_;
|
|
638
|
|
639 $This->{CompoundID} = undef;
|
|
640 $This->{DataLine} = undef;
|
|
641
|
|
642 $This->{FingerprintsObject} = undef;
|
|
643
|
|
644 $This->{FingerprintsString} = undef;
|
|
645 $This->{PartialFingerprintsString} = undef;
|
|
646
|
|
647 return $This;
|
|
648 }
|
|
649
|
|
650 # Validate fingerprints string data line...
|
|
651 #
|
|
652 sub _ValidateReadDataLine {
|
|
653 my($This) = @_;
|
|
654
|
|
655 # Check for missing data...
|
|
656 if (!($This->{CompoundID} && $This->{PartialFingerprintsString})) {
|
|
657 # Missing data...
|
|
658 $This->{NumOfLinesWithMissingData} += 1;
|
|
659 if ($This->{DetailLevel} >= 3) {
|
|
660 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains no fingerprints data: $This->{DataLine}...";
|
|
661 }
|
|
662 elsif ($This->{DetailLevel} >= 2) {
|
|
663 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains no fingerprints data...";
|
|
664 }
|
|
665 return 0;
|
|
666 }
|
|
667
|
|
668 # Check for invalid data...
|
|
669 my($InvalidFingerprintsData);
|
|
670
|
|
671 $InvalidFingerprintsData = 0;
|
|
672 if ($This->{FingerprintsString}) {
|
|
673 $InvalidFingerprintsData = Fingerprints::FingerprintsStringUtil::AreFingerprintsStringValuesValid($This->{FingerprintsString}) ? 0 : 1;
|
|
674 }
|
|
675 else {
|
|
676 $InvalidFingerprintsData = 1;
|
|
677 }
|
|
678
|
|
679 if ($InvalidFingerprintsData) {
|
|
680 $This->{NumOfLinesWithInvalidData} += 1;
|
|
681 if ($This->{DetailLevel} >= 3) {
|
|
682 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains invalid fingerprints data: $This->{DataLine}...";
|
|
683 }
|
|
684 elsif ($This->{DetailLevel} >= 2) {
|
|
685 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains invalid fingerprints data...";
|
|
686 }
|
|
687 return 0;
|
|
688 }
|
|
689
|
|
690 return 1;
|
|
691 }
|
|
692
|
|
693 # Setup fingerprints compound ID for fingerprints string...
|
|
694 sub _GenerateCompoundID {
|
|
695 my($This) = @_;
|
|
696
|
|
697 # Set fingerprints ID...
|
|
698 if ($This->{FingerprintsObject}) {
|
|
699 $This->{FingerprintsObject}->SetID($This->{CompoundID});
|
|
700 }
|
|
701
|
|
702 return $This;
|
|
703 }
|
|
704
|
|
705 # Process first read...
|
|
706 #
|
|
707 sub _ProcessFirstDataLineRead {
|
|
708 my($This) = @_;
|
|
709 my($Line);
|
|
710
|
|
711 $This->{FirstDataLineIO} = 0;
|
|
712
|
|
713 # Skip over header data lines and collect first data line...
|
|
714
|
|
715 LINE: while ($Line = TextUtil::GetTextLine($This->{FileHandle})) {
|
|
716 $This->{LineNum} += 1;
|
|
717
|
|
718 # Is it a header data line?
|
|
719 if ($Line =~ /^#/) {
|
|
720 next LINE;
|
|
721 }
|
|
722 $This->{DataLine} = $Line;
|
|
723 last LINE;
|
|
724 }
|
|
725
|
|
726 return $This;
|
|
727 }
|
|
728
|
|
729 # Get ready for reading fingerprints FP file...
|
|
730 #
|
|
731 sub _PrepareForReadingFingerprintsFPFileData {
|
|
732 my($This) = @_;
|
|
733
|
|
734 # Retrieve FP file data headers information....
|
|
735 $This->_RetrieveFPFileDataHeaders();
|
|
736
|
|
737 # Validate header data keys and values information...
|
|
738 $This->_ValidateReadHeaderDataKeysAndValues();
|
|
739
|
|
740 # Validate fingeprints string mode information...
|
|
741 if ($This->{ValidRequiredHeaderDataKeys}) {
|
|
742 $This->_ValidateReadFingerprintsStringMode();
|
|
743 }
|
|
744
|
|
745 # Set status of FP file data...
|
|
746 $This->{ValidFileData} = ($This->{ValidRequiredHeaderDataKeys} && $This->{ValidFingerprintsStringMode}) ? 1 : 0;
|
|
747
|
|
748 return $This;
|
|
749 }
|
|
750
|
|
751 # Retrieve information about fingerprints date header in FP file...
|
|
752 #
|
|
753 sub _RetrieveFPFileDataHeaders {
|
|
754 my($This) = @_;
|
|
755 my($FPFile, $Line, $Index, $KeyValuePair, $Key, $Value, $KeyValueDelimiter, $KeyValuePairDelimiter, @LineKeyValuePairs);
|
|
756
|
|
757 $FPFile = $This->{Name};
|
|
758
|
|
759 if (!(-e $FPFile)) {
|
|
760 croak "Error: ${ClassName}->_RetrieveFPFileDataHeaders: File, $FPFile, doesn't exist...";
|
|
761 }
|
|
762
|
|
763 if (!open FPFILE, "$FPFile") {
|
|
764 croak "Error: ${ClassName}->_RetrieveFPFileDataHeaders: Couldn't open input FP file $FPFile: $! ...";
|
|
765 }
|
|
766
|
|
767 # Process header key/value pair data...
|
|
768 #
|
|
769 $KeyValueDelimiter = '=';
|
|
770 $KeyValuePairDelimiter = ';';
|
|
771
|
|
772 @{$This->{HeaderDataKeys}} = ();
|
|
773 %{$This->{HeaderDataKeysAndValues}} = ();
|
|
774 %{$This->{CannonicalHeaderDataKeysAndValues}} = ();
|
|
775
|
|
776 LINE: while ($Line = TextUtil::GetTextLine(\*FPFILE)) {
|
|
777 # Is it a key/value pairs line?
|
|
778 if ($Line !~ /^#/) {
|
|
779 last LINE;
|
|
780 }
|
|
781
|
|
782 # Take out starting hash mark before processing key/value pairs...
|
|
783 $Line =~ s/^#//;
|
|
784 if (TextUtil::IsEmpty($Line)) {
|
|
785 next LINE;
|
|
786 }
|
|
787
|
|
788 @LineKeyValuePairs = ();
|
|
789
|
|
790 for $KeyValuePair (split "$KeyValuePairDelimiter", $Line) {
|
|
791 ($Key, $Value) = split "$KeyValueDelimiter", $KeyValuePair;
|
|
792
|
|
793 $Key = defined($Key) ? TextUtil::RemoveLeadingAndTrailingWhiteSpaces($Key) : '';
|
|
794 $Value = defined($Value) ? TextUtil::RemoveLeadingAndTrailingWhiteSpaces($Value) : '';
|
|
795
|
|
796 if (TextUtil::IsEmpty($Key) || TextUtil::IsEmpty($Value)) {
|
|
797 carp "Warning: ${ClassName}->_RetrieveFPFileDataHeaders: Data header line containing \"Key = Value\" pairs is not valid: It must contain even number of \"Key = Value\" pairs with valid values. Ignoring data header line: \"$Line\"...";
|
|
798 next LINE;
|
|
799 }
|
|
800 push @{$This->{HeaderDataKeys}}, $Key;
|
|
801 push @LineKeyValuePairs, ($Key, $Value);
|
|
802 }
|
|
803
|
|
804 for ($Index = 0; $Index < $#LineKeyValuePairs; $Index += 2) {
|
|
805 $Key = $LineKeyValuePairs[$Index]; $Value = $LineKeyValuePairs[$Index + 1];
|
|
806
|
|
807 $This->{HeaderDataKeysAndValues}{$Key} = $Value;
|
|
808 $This->{CannonicalHeaderDataKeysAndValues}{lc($Key)} = $Value;
|
|
809 }
|
|
810 }
|
|
811 close FPFILE;
|
|
812
|
|
813 return $This;
|
|
814 }
|
|
815
|
|
816 # Validate header data and keys...
|
|
817 #
|
|
818 sub _ValidateReadHeaderDataKeysAndValues {
|
|
819 my($This) = @_;
|
|
820 my($FingerprintsStringType, $Key, $Value, @RequiredHeaderDataKeys);
|
|
821
|
|
822 $This->{ValidRequiredHeaderDataKeys} = 0;
|
|
823 @{$This->{RequiredHeaderDataKeys}} = ();
|
|
824
|
|
825 # Is FingerprintsStringType key is present?
|
|
826 if (!$This->IsHeaderDataKeyPresent('FingerprintsStringType')) {
|
|
827 carp "carp: ${ClassName}->_ValidateReadHeaderDataKeysAndValues: FingerprintsStringType data header key is missing in fingerprints file...";
|
|
828 return 0;
|
|
829 }
|
|
830 $FingerprintsStringType = $This->GetHeaderDataKeyValue('FingerprintsStringType');
|
|
831
|
|
832 # Are all required data header keys present?
|
|
833 #
|
|
834 @RequiredHeaderDataKeys = ();
|
|
835
|
|
836 if ($FingerprintsStringType =~ /^(FingerprintsBitVector|FingerprintsVector)$/i) {
|
|
837 push @RequiredHeaderDataKeys, $This->_GetRequiredHeaderDataKeys($FingerprintsStringType);
|
|
838 }
|
|
839 else {
|
|
840 carp "Warning: ${ClassName}->_ValidateReadHeaderDataKeysAndValues: FingerprintsStringType data header key value, $FingerprintsStringType, is not valid. SUpported values: FingerprintsBitVector or FingerprintsVector...";
|
|
841 return 0;
|
|
842 }
|
|
843
|
|
844 for $Key (@RequiredHeaderDataKeys) {
|
|
845 if (!$This->IsHeaderDataKeyPresent($Key)) {
|
|
846 croak "Error: ${ClassName}->_ValidateReadHeaderDataKeysAndValues: Requires data header key, $Key, is missing in fingerprints file...";
|
|
847 }
|
|
848 }
|
|
849
|
|
850 push @{$This->{RequiredHeaderDataKeys}}, @RequiredHeaderDataKeys;
|
|
851
|
|
852 # Are all required data header key values valid?
|
|
853 #
|
|
854 if (!$This->_ValidateRequiredHeaderDataKeyValues()) {
|
|
855 return 0;
|
|
856 }
|
|
857
|
|
858 # Process required header key values...
|
|
859 #
|
|
860 $This->_ProcessRequiredHeaderDataKeyValues();
|
|
861
|
|
862 $This->{ValidRequiredHeaderDataKeys} = 1;
|
|
863
|
|
864 return 1;
|
|
865 }
|
|
866
|
|
867 # Validate data header key values....
|
|
868 #
|
|
869 sub _ValidateRequiredHeaderDataKeyValues {
|
|
870 my($This) = @_;
|
|
871 my($Key, $Value);
|
|
872
|
|
873 for $Key (@{$This->{RequiredHeaderDataKeys}}) {
|
|
874 $Value = $This->GetHeaderDataKeyValue($Key);
|
|
875 KEY: {
|
|
876 if ($Key =~ /^FingerprintsStringType$/i) {
|
|
877 if ($Value !~ /^(FingerprintsBitVector|FingerprintsVector)$/i) {
|
|
878 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: FingerprintsBitVector or FingerprintsVector...";
|
|
879 return 0;
|
|
880 }
|
|
881 last KEY;
|
|
882 }
|
|
883 if ($Key =~ /^Size$/i) {
|
|
884 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
885 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: > 0...";
|
|
886 return 0;
|
|
887 }
|
|
888 last KEY;
|
|
889 }
|
|
890 if ($Key =~ /^BitStringFormat$/i) {
|
|
891 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) {
|
|
892 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: BinaryString or HexadecimalString ...";
|
|
893 return 0;
|
|
894 }
|
|
895 last KEY;
|
|
896 }
|
|
897 if ($Key =~ /^BitsOrder$/i) {
|
|
898 if ($Value !~ /^(Ascending|Descending)$/i) {
|
|
899 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: Ascending or Descending...";
|
|
900 return 0;
|
|
901 }
|
|
902 last KEY;
|
|
903 }
|
|
904 if ($Key =~ /^VectorStringFormat$/i) {
|
|
905 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) {
|
|
906 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString ...";
|
|
907 return 0;
|
|
908 }
|
|
909 last KEY;
|
|
910 }
|
|
911 if ($Key =~ /^VectorValuesType$/i) {
|
|
912 if ($Value !~ /^(OrderedNumericalValues|NumericalValues|AlphaNumericalValues)$/i) {
|
|
913 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: OrderedNumericalValues, NumericalValues or AlphaNumericalValues...";
|
|
914 return 0;
|
|
915 }
|
|
916 last KEY;
|
|
917 }
|
|
918 if ($Key =~ /^Description$/i) {
|
|
919 if (TextUtil::IsEmpty($Value)) {
|
|
920 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value is not valid. Supported value: A no-empty text string...";
|
|
921 return 0;
|
|
922 }
|
|
923 last KEY;
|
|
924 }
|
|
925 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key is not not supported...";
|
|
926 return 0;
|
|
927 }
|
|
928 }
|
|
929
|
|
930 return 1;
|
|
931 }
|
|
932
|
|
933 # Process required header key valeues for access during complete fingerprints
|
|
934 # string generation from a partial fingerprints string specified on fingerprints
|
|
935 # line...
|
|
936 #
|
|
937 sub _ProcessRequiredHeaderDataKeyValues {
|
|
938 my($This) = @_;
|
|
939 my($Key, $Value, @Keys);
|
|
940
|
|
941 %{$This->{RequiredHeaderDataKeysAndValues}} = ();
|
|
942
|
|
943 for $Key (@{$This->{RequiredHeaderDataKeys}}) {
|
|
944 $Value = $This->GetHeaderDataKeyValue($Key);
|
|
945 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $Value;
|
|
946 }
|
|
947
|
|
948 # Setup prefixes for generating fingerprints strings...
|
|
949 $This->{FingerprintsBitVectorStringPrefix} = '';
|
|
950 $This->{FingerprintsVectorStringPrefix1} = '';
|
|
951 $This->{FingerprintsVectorStringPrefix2} = '';
|
|
952
|
|
953 if ($This->{RequiredHeaderDataKeysAndValues}{FingerprintsStringType} =~ /^FingerprintsBitVector$/i) {
|
|
954 @Keys = qw(FingerprintsStringType Description Size BitStringFormat BitsOrder);
|
|
955 $This->{FingerprintsBitVectorStringPrefix} = $This->_GenerateFingerprintsPrefixUsingKeys(@Keys);
|
|
956 }
|
|
957 elsif ($This->{RequiredHeaderDataKeysAndValues}{FingerprintsStringType} =~ /^FingerprintsVector$/i) {
|
|
958 @Keys = qw(FingerprintsStringType Description);
|
|
959 $This->{FingerprintsVectorStringPrefix1} = $This->_GenerateFingerprintsPrefixUsingKeys(@Keys);
|
|
960
|
|
961 @Keys = qw(VectorValuesType VectorStringFormat);
|
|
962 $This->{FingerprintsVectorStringPrefix2} = $This->_GenerateFingerprintsPrefixUsingKeys(@Keys);
|
|
963 }
|
|
964
|
|
965 return $This;
|
|
966 }
|
|
967
|
|
968 # Generate fingerprints prefix using header keys data...
|
|
969 #
|
|
970 sub _GenerateFingerprintsPrefixUsingKeys {
|
|
971 my($This, @Keys) = @_;
|
|
972 my($Delimiter, $Key, @Values);
|
|
973
|
|
974 $Delimiter = Fingerprints::FingerprintsStringUtil::GetFingeprintsStringDelimiter();
|
|
975
|
|
976 @Values = ();
|
|
977 for $Key (@Keys) {
|
|
978 push @Values, $This->{RequiredHeaderDataKeysAndValues}{$Key};
|
|
979 }
|
|
980
|
|
981 return join($Delimiter, @Values)
|
|
982 }
|
|
983
|
|
984 # Get required header data keys...
|
|
985 #
|
|
986 sub _GetRequiredHeaderDataKeys {
|
|
987 my($This, $FingerprintsStringType) = @_;
|
|
988 my(@RequiredKeys);
|
|
989
|
|
990 @RequiredKeys = ();
|
|
991
|
|
992 if ($FingerprintsStringType =~ /FingerprintsBitVector$/i) {
|
|
993 push @RequiredKeys, qw(FingerprintsStringType Description Size BitStringFormat BitsOrder);
|
|
994 }
|
|
995 elsif ($FingerprintsStringType =~ /^FingerprintsVector/i) {
|
|
996 push @RequiredKeys, qw(FingerprintsStringType Description VectorStringFormat VectorValuesType);
|
|
997 }
|
|
998 else {
|
|
999 carp "Warning: ${ClassName}->GetRequiredHeaderDataKeys: FingerprintsStringType value, $FingerprintsStringType, is not valid. Supported values: FingerprintsBitVector or FingerprintsVector...";
|
|
1000 }
|
|
1001
|
|
1002 return @RequiredKeys;
|
|
1003 }
|
|
1004
|
|
1005 # Validate fingerprints string mode information...
|
|
1006 #
|
|
1007 sub _ValidateReadFingerprintsStringMode {
|
|
1008 my($This) = @_;
|
|
1009 my($FingerprintsStringType, $FingerprintsStringDescription, $FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FirstFingerprintsStringType, $FirstFingerprintsStringDescription);
|
|
1010
|
|
1011 $This->{ValidFingerprintsStringMode} = 0;
|
|
1012 $This->{FingerprintsBitVectorStringMode} = 0;
|
|
1013 $This->{FingerprintsVectorStringMode} = 0;
|
|
1014
|
|
1015 $This->{FirstFingerprintsStringType} = '';
|
|
1016 $This->{FirstFingerprintsStringDescription} = '';
|
|
1017
|
|
1018 $FingerprintsBitVectorStringMode = 0;
|
|
1019 $FingerprintsVectorStringMode = 0;
|
|
1020
|
|
1021 $FirstFingerprintsStringType = '';
|
|
1022 $FirstFingerprintsStringDescription = '';
|
|
1023
|
|
1024 $FingerprintsStringType = $This->GetHeaderDataKeyValue('FingerprintsStringType');
|
|
1025 $FingerprintsStringDescription = $This->GetHeaderDataKeyValue('Description');
|
|
1026
|
|
1027 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) {
|
|
1028 if ($FingerprintsStringType !~ /^FingerprintsBitVector$/i) {
|
|
1029 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: Fingerprints string data type, $FingerprintsStringType, doesn't correspond to, FingerprintsBitVectorString, specified using \"FingerprintsStringMode\"...";
|
|
1030 return 0;
|
|
1031 }
|
|
1032 $FingerprintsBitVectorStringMode = 1;
|
|
1033 $FirstFingerprintsStringType = 'FingerprintsBitVector';
|
|
1034 $FirstFingerprintsStringDescription = $FingerprintsStringDescription;
|
|
1035 }
|
|
1036 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) {
|
|
1037 if ($FingerprintsStringType !~ /^FingerprintsVector$/i) {
|
|
1038 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: Fingerprints string data type, $FingerprintsStringType, doesn't correspond to, FingerprintsVectorString, specified using \"FingerprintsStringMode\"...";
|
|
1039 return 0;
|
|
1040 }
|
|
1041 $FingerprintsVectorStringMode = 1;
|
|
1042 $FirstFingerprintsStringType = 'FingerprintsVector';
|
|
1043 $FirstFingerprintsStringDescription = $FingerprintsStringDescription;
|
|
1044 }
|
|
1045 else {
|
|
1046 # AutoDetect mode...
|
|
1047 if ($FingerprintsStringType =~ /^FingerprintsBitVector$/i) {
|
|
1048 $FingerprintsBitVectorStringMode = 1;
|
|
1049 }
|
|
1050 elsif ($FingerprintsStringType =~ /^FingerprintsVector$/i) {
|
|
1051 $FingerprintsVectorStringMode = 1;
|
|
1052 }
|
|
1053 else {
|
|
1054 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: Fingerprints string data type, $FingerprintsStringType, identified during, AutoDetect, value of \"FingerprintsStringMode\" is not valid; Supported fingerprints types: FingerprintBitVector or FingerprintsVector...";
|
|
1055 return 0;
|
|
1056 }
|
|
1057 $FirstFingerprintsStringType = $FingerprintsStringType;
|
|
1058 $FirstFingerprintsStringDescription = $FingerprintsStringDescription;
|
|
1059 }
|
|
1060
|
|
1061 $This->{ValidFingerprintsStringMode} = 1;
|
|
1062
|
|
1063 $This->{FingerprintsBitVectorStringMode} = $FingerprintsBitVectorStringMode;
|
|
1064 $This->{FingerprintsVectorStringMode} = $FingerprintsVectorStringMode;
|
|
1065
|
|
1066 $This->{FirstFingerprintsStringType} = $FirstFingerprintsStringType;
|
|
1067 $This->{FirstFingerprintsStringDescription} = $FirstFingerprintsStringDescription;
|
|
1068
|
|
1069 return 1;
|
|
1070 }
|
|
1071
|
|
1072 # Write fingerprints string generated from specified fingerprints - fingerprints-bit vector or
|
|
1073 # fingerprints vector - object and other data to FP file...
|
|
1074 #
|
|
1075 sub WriteFingerprints {
|
|
1076 my($This, $FingerprintsObject, $CompoundID) = @_;
|
|
1077
|
|
1078 # Initialize data for current line...
|
|
1079 $This->_InitializeWriteDataLine();
|
|
1080
|
|
1081 # Set fingerprints object and compound ID...
|
|
1082 $This->{FingerprintsObject} = $FingerprintsObject;
|
|
1083 $This->SetCompoundID($CompoundID);
|
|
1084
|
|
1085 # Generate fingerprints string...
|
|
1086 $This->_GenerateFingerprintsString();
|
|
1087
|
|
1088 # Generate partial fingerprints string...
|
|
1089 $This->_GeneratePartialFingerprintsStringFromFingerprintsString();
|
|
1090
|
|
1091 # Write data line..
|
|
1092 $This->_WriteDataLine();
|
|
1093
|
|
1094 return $This;
|
|
1095 }
|
|
1096
|
|
1097 # Write fingerprints string and other data to FP file...
|
|
1098 #
|
|
1099 # Notes:
|
|
1100 # o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat values
|
|
1101 # are ignored during writing of fingerprints and it's written to the file as it is.
|
|
1102 # o FingerprintsString is a regular fingerprints string as oppose to a partial fingerprints
|
|
1103 # string.
|
|
1104 #
|
|
1105 sub WriteFingerprintsString {
|
|
1106 my($This, $FingerprintsString, $CompoundID) = @_;
|
|
1107
|
|
1108 # Initialize data for current line...
|
|
1109 $This->_InitializeWriteDataLine();
|
|
1110
|
|
1111 # Set fingerprints string and compound ID...
|
|
1112 $This->{FingerprintsString} = $FingerprintsString;
|
|
1113 $This->SetCompoundID($CompoundID);
|
|
1114
|
|
1115 # Generate fingerprints object...
|
|
1116 $This->_GenerateFingerprintsObject();
|
|
1117
|
|
1118 # Generate partial fingerprints string...
|
|
1119 $This->_GeneratePartialFingerprintsStringFromFingerprintsString();
|
|
1120
|
|
1121 # Write data line..
|
|
1122 $This->_WriteDataLine();
|
|
1123
|
|
1124 return $This;
|
|
1125 }
|
|
1126
|
|
1127 # Initialize data line for reading...
|
|
1128 #
|
|
1129 sub _InitializeWriteDataLine {
|
|
1130 my($This) = @_;
|
|
1131
|
|
1132 $This->{DataLine} = undef;
|
|
1133 $This->{CompoundID} = undef;
|
|
1134
|
|
1135 $This->{FingerprintsObject} = undef;
|
|
1136
|
|
1137 $This->{FingerprintsString} = undef;
|
|
1138 $This->{PartialFingerprintsString} = undef;
|
|
1139
|
|
1140 return $This;
|
|
1141 }
|
|
1142
|
|
1143 # Write fingerprints data line line...
|
|
1144 #
|
|
1145 sub _WriteDataLine {
|
|
1146 my($This) = @_;
|
|
1147 my($FileHandle, $Line);
|
|
1148
|
|
1149 if ($This->{FirstDataLineIO}) {
|
|
1150 $This->_ProcessFirstDataLineWrite();
|
|
1151 }
|
|
1152
|
|
1153 # Write data compound ID along with partial fingerprints string...
|
|
1154 $Line = $This->{CompoundID} . ' ' . $This->{PartialFingerprintsString};
|
|
1155
|
|
1156 $This->{LineNum} += 1;
|
|
1157 $FileHandle = $This->{FileHandle};
|
|
1158 print $FileHandle "$Line\n";
|
|
1159
|
|
1160 $This->{DataLine} = $Line;
|
|
1161
|
|
1162 return $This;
|
|
1163 }
|
|
1164
|
|
1165 # Process first write...
|
|
1166 #
|
|
1167 sub _ProcessFirstDataLineWrite {
|
|
1168 my($This) = @_;
|
|
1169 my($Line, $FileHandle);
|
|
1170
|
|
1171 $This->{FirstDataLineIO} = 0;
|
|
1172
|
|
1173 if ($This->GetMode() =~ /^Write$/i) {
|
|
1174 # Skip it for append mode...
|
|
1175 $This->_WritePackageAndTimeStampHeaderKeys();
|
|
1176 $This->_WriteRequiredHeaderDataKeys();
|
|
1177 }
|
|
1178
|
|
1179 return $This;
|
|
1180 }
|
|
1181
|
|
1182 # Write out package and time stamp information...
|
|
1183 #
|
|
1184 sub _WritePackageAndTimeStampHeaderKeys {
|
|
1185 my($This) = @_;
|
|
1186 my($FileHandle, $Key, $Value);
|
|
1187
|
|
1188 $FileHandle = $This->{FileHandle};
|
|
1189
|
|
1190 # Package information...
|
|
1191 $This->{LineNum} += 1;
|
|
1192 $Key = "Package"; $Value = PackageInfo::GetPackageName() . " " . PackageInfo::GetVersionNumber();
|
|
1193 print $FileHandle "# $Key = $Value\n";
|
|
1194
|
|
1195 $This->{LineNum} += 1;
|
|
1196 $Key = "Release Date"; $Value = PackageInfo::GetReleaseDate();
|
|
1197 print $FileHandle "# $Key = $Value\n";
|
|
1198
|
|
1199 # Timestamp information...
|
|
1200 $This->{LineNum} += 1;
|
|
1201 print $FileHandle "#\n";
|
|
1202
|
|
1203 $This->{LineNum} += 1;
|
|
1204 $Key = "TimeStamp"; $Value = TimeUtil::FPFileTimeStamp();
|
|
1205 print $FileHandle "# $Key = $Value\n";
|
|
1206
|
|
1207 return $This;
|
|
1208 }
|
|
1209
|
|
1210 # Write out required header data keys...
|
|
1211 #
|
|
1212 sub _WriteRequiredHeaderDataKeys {
|
|
1213 my($This) = @_;
|
|
1214 my($FileHandle, $Key, $Value);
|
|
1215
|
|
1216 $FileHandle = $This->{FileHandle};
|
|
1217
|
|
1218 $This->_GenerateWriteRequiredHeaderDataKeys();
|
|
1219
|
|
1220 $This->{LineNum} += 1;
|
|
1221 print $FileHandle "#\n";
|
|
1222
|
|
1223 for $Key (@{$This->{RequiredHeaderDataKeys}}) {
|
|
1224 $Value = $This->{RequiredHeaderDataKeysAndValues}{$Key};
|
|
1225
|
|
1226 $This->{LineNum} += 1;
|
|
1227 print $FileHandle "# $Key = $Value\n";
|
|
1228
|
|
1229 if ($Key =~ /^FingerprintsStringType$/i) {
|
|
1230 $This->{LineNum} += 1;
|
|
1231 print $FileHandle "#\n";
|
|
1232 }
|
|
1233 }
|
|
1234
|
|
1235 $This->{LineNum} += 1;
|
|
1236 print $FileHandle "#\n";
|
|
1237
|
|
1238 return $This;
|
|
1239 }
|
|
1240
|
|
1241 sub _GenerateWriteRequiredHeaderDataKeys {
|
|
1242 my($This) = @_;
|
|
1243
|
|
1244 if ($This->{FingerprintsBitVectorStringMode} && ($This->{FingerprintsString} =~ /^FingerprintsBitVector/i)) {
|
|
1245 $This->_GenerateWriteRequiredHeaderDataKeysForBitVectorString();
|
|
1246 }
|
|
1247 elsif ($This->{FingerprintsVectorStringMode} && ($This->{FingerprintsString} =~ /^FingerprintsVector/i)) {
|
|
1248 $This->_GenerateWriteRequiredHeaderDataKeysForVectorString();
|
|
1249 }
|
|
1250 else {
|
|
1251 croak "Error: ${ClassName}->_GenerateWriteRequiredHeaderDataKeys: Required header data keys can't be generated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, doesn't correspond to type of first FingerprintsString: $This->{FingerprintsString}...";
|
|
1252 }
|
|
1253
|
|
1254 return $This;
|
|
1255 }
|
|
1256
|
|
1257 # Generate required data header keys and values for writing fingerprints bit vector string...
|
|
1258 #
|
|
1259 sub _GenerateWriteRequiredHeaderDataKeysForBitVectorString {
|
|
1260 my($This) = @_;
|
|
1261 my($Key, $VectorType, $Description, $Size, $BitStringFormat, $BitsOrder);
|
|
1262
|
|
1263 @{$This->{RequiredHeaderDataKeys}} = ();
|
|
1264 push @{$This->{RequiredHeaderDataKeys}}, $This->_GetRequiredHeaderDataKeys('FingerprintsBitVector');
|
|
1265
|
|
1266 ($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($This->{FingerprintsString});
|
|
1267
|
|
1268 %{$This->{RequiredHeaderDataKeysAndValues}} = ();
|
|
1269
|
|
1270 for $Key (@{$This->{RequiredHeaderDataKeys}}) {
|
|
1271 KEYTYPE: {
|
|
1272 if ($Key =~ /^FingerprintsStringType$/i) {
|
|
1273 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $VectorType;
|
|
1274 last KEYTYPE;
|
|
1275 }
|
|
1276 if ($Key =~ /^Description$/i) {
|
|
1277 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $Description;
|
|
1278 last KEYTYPE;
|
|
1279 }
|
|
1280 if ($Key =~ /^Size$/i) {
|
|
1281 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $Size;
|
|
1282 last KEYTYPE;
|
|
1283 }
|
|
1284 if ($Key =~ /^BitStringFormat$/i) {
|
|
1285 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $BitStringFormat;
|
|
1286 last KEYTYPE;
|
|
1287 }
|
|
1288 if ($Key =~ /^BitsOrder$/i) {
|
|
1289 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $BitsOrder;
|
|
1290 last KEYTYPE;
|
|
1291 }
|
|
1292 croak "Error: ${ClassName}->_GenerateWriteRequiredHeaderDataKeysForBitVectorString: Required header data key, $Key, value can't be generated: It's not a known key ...";
|
|
1293 }
|
|
1294 }
|
|
1295
|
|
1296 return $This;
|
|
1297 }
|
|
1298
|
|
1299 # Generate required data header keys and values for writing fingerprints vector string...
|
|
1300 #
|
|
1301 sub _GenerateWriteRequiredHeaderDataKeysForVectorString {
|
|
1302 my($This) = @_;
|
|
1303 my($Key, $Value, $VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat);
|
|
1304
|
|
1305 @{$This->{RequiredHeaderDataKeys}} = ();
|
|
1306 push @{$This->{RequiredHeaderDataKeys}}, $This->_GetRequiredHeaderDataKeys('FingerprintsVector');
|
|
1307
|
|
1308 ($VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($This->{FingerprintsString});
|
|
1309
|
|
1310 %{$This->{RequiredHeaderDataKeysAndValues}} = ();
|
|
1311
|
|
1312 for $Key (@{$This->{RequiredHeaderDataKeys}}) {
|
|
1313 KEYTYPE: {
|
|
1314 if ($Key =~ /^FingerprintsStringType$/i) {
|
|
1315 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $VectorType;
|
|
1316 last KEYTYPE;
|
|
1317 }
|
|
1318 if ($Key =~ /^Description$/i) {
|
|
1319 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $Description;
|
|
1320 last KEYTYPE;
|
|
1321 }
|
|
1322 if ($Key =~ /^VectorValuesType$/i) {
|
|
1323 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $VectorValuesType;
|
|
1324 last KEYTYPE;
|
|
1325 }
|
|
1326 if ($Key =~ /^VectorStringFormat$/i) {
|
|
1327 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $VectorStringFormat;
|
|
1328 last KEYTYPE;
|
|
1329 }
|
|
1330 croak "Error: ${ClassName}->_GenerateWriteRequiredHeaderDataKeysForVectorString: Required header data key, $Key, value can't be generated: It's not a known key ...";
|
|
1331 }
|
|
1332 }
|
|
1333
|
|
1334 return $This;
|
|
1335 }
|
|
1336
|
|
1337
|
|
1338 # Get ready for writing fingerprints FP file...
|
|
1339 #
|
|
1340 sub _PrepareForWritingFingerprintsFPFileData {
|
|
1341 my($This) = @_;
|
|
1342 my($FPFile, $FileDir, $FileName, $FileExt, $OutDelim);
|
|
1343
|
|
1344 $FPFile = $This->{Name};
|
|
1345 if (!$This->{Overwrite}) {
|
|
1346 if (-e $FPFile) {
|
|
1347 croak "Error: ${ClassName}->_PrepareForWritingFingerprintsFPFileData: File, $FPFile, already exist. Use overwrite option...";
|
|
1348 }
|
|
1349 }
|
|
1350
|
|
1351 # Setup FingerprintsStringMode status...
|
|
1352 #
|
|
1353 $This->{FingerprintsBitVectorStringMode} = 0;
|
|
1354 $This->{FingerprintsVectorStringMode} = 0;
|
|
1355 $This->{ValidFingerprintsStringMode} = 0;
|
|
1356
|
|
1357 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) {
|
|
1358 $This->{FingerprintsBitVectorStringMode} = 1;
|
|
1359 }
|
|
1360 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) {
|
|
1361 $This->{FingerprintsVectorStringMode} = 1;
|
|
1362 }
|
|
1363
|
|
1364 $This->{ValidFingerprintsStringMode} = ($This->{FingerprintsBitVectorStringMode} || $This->{FingerprintsVectorStringMode}) ? 1 : 0;
|
|
1365
|
|
1366 if ($This->{FingerprintsBitVectorStringMode}) {
|
|
1367 $This->_SetDefaultBitStringFormat();
|
|
1368 $This->_SetDefaultBitsOrder();
|
|
1369 }
|
|
1370 elsif ($This->{FingerprintsVectorStringMode}) {
|
|
1371 $This->_SetDefaultVectorStringFormat();
|
|
1372 }
|
|
1373
|
|
1374 return $This;
|
|
1375 }
|
|
1376
|
|
1377 # Set default value for bit string format...
|
|
1378 #
|
|
1379 sub _SetDefaultBitStringFormat {
|
|
1380 my($This) = @_;
|
|
1381
|
|
1382 if (!$This->{BitStringFormat}) {
|
|
1383 $This->{BitStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat();
|
|
1384 }
|
|
1385
|
|
1386 return $This;
|
|
1387 }
|
|
1388
|
|
1389 # Set default value for bit string format...
|
|
1390 #
|
|
1391 sub _SetDefaultBitsOrder {
|
|
1392 my($This) = @_;
|
|
1393
|
|
1394 if (!$This->{BitsOrder}) {
|
|
1395 $This->{BitsOrder} = Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder();
|
|
1396 }
|
|
1397
|
|
1398 return $This;
|
|
1399 }
|
|
1400
|
|
1401 # Set default value for vector string format...
|
|
1402 #
|
|
1403 sub _SetDefaultVectorStringFormat {
|
|
1404 my($This) = @_;
|
|
1405
|
|
1406 if (!$This->{VectorStringFormat} && $This->{FingerprintsObject}) {
|
|
1407 $This->{VectorStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat($This->{FingerprintsObject});
|
|
1408 }
|
|
1409
|
|
1410 return $This;
|
|
1411 }
|
|
1412
|
|
1413 # Generate fingerprints object using current fingerprints string...
|
|
1414 #
|
|
1415 sub _GenerateFingerprintsObject {
|
|
1416 my($This) = @_;
|
|
1417
|
|
1418 $This->{FingerprintsObject} = undef;
|
|
1419
|
|
1420 if (!$This->{FingerprintsString}) {
|
|
1421 return $This;
|
|
1422 }
|
|
1423
|
|
1424 if ($This->{FingerprintsBitVectorStringMode}) {
|
|
1425 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsBitVectorString($This->{FingerprintsString});
|
|
1426 }
|
|
1427 elsif ($This->{FingerprintsVectorStringMode}) {
|
|
1428 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsVectorString($This->{FingerprintsString});
|
|
1429 }
|
|
1430 else {
|
|
1431 return undef;
|
|
1432 }
|
|
1433
|
|
1434 return $This;
|
|
1435 }
|
|
1436
|
|
1437 # Generate fingerprints string using current fingerprints object...
|
|
1438 #
|
|
1439 sub _GenerateFingerprintsString {
|
|
1440 my($This) = @_;
|
|
1441
|
|
1442 $This->{FingerprintsString} = '';
|
|
1443
|
|
1444 if (!$This->{FingerprintsObject}) {
|
|
1445 return $This;
|
|
1446 }
|
|
1447
|
|
1448 if ($This->{FingerprintsBitVectorStringMode}) {
|
|
1449 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{BitStringFormat}, $This->{BitsOrder});
|
|
1450 }
|
|
1451 elsif ($This->{FingerprintsVectorStringMode}) {
|
|
1452 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{VectorStringFormat});
|
|
1453 }
|
|
1454
|
|
1455 return $This;
|
|
1456 }
|
|
1457
|
|
1458 # Generate fingerprints string using partial fingerprints string and header keys data...
|
|
1459 #
|
|
1460 # Notes:
|
|
1461 # o FP file fingerprints data line only contain partial fingerprints data which
|
|
1462 # can't be used directly to create fingerprints bit-vector or vector objects
|
|
1463 # using functions available in FingerprintsStringUtil.pm module
|
|
1464 #
|
|
1465 sub _GenerateFingerprintsStringFromPartialFingerprintsString {
|
|
1466 my($This) = @_;
|
|
1467 my($FPStringDelim);
|
|
1468
|
|
1469 $This->{FingerprintsString} = '';
|
|
1470
|
|
1471 if (!$This->{PartialFingerprintsString}) {
|
|
1472 return $This;
|
|
1473 }
|
|
1474
|
|
1475 $FPStringDelim = Fingerprints::FingerprintsStringUtil::GetFingeprintsStringDelimiter();
|
|
1476
|
|
1477 if ($This->{FingerprintsBitVectorStringMode}) {
|
|
1478 $This->{FingerprintsString} = $This->{FingerprintsBitVectorStringPrefix} . $FPStringDelim . $This->{PartialFingerprintsString};
|
|
1479 }
|
|
1480 elsif ($This->{FingerprintsVectorStringMode}) {
|
|
1481 my($NumOfValues, $VectorStringData);
|
|
1482
|
|
1483 ($NumOfValues, $VectorStringData) = $This->{PartialFingerprintsString} =~ /^(.*?)$FPStringDelim(.*?)$/;
|
|
1484 if (!(defined($NumOfValues) && defined($VectorStringData) && $VectorStringData)) {
|
|
1485 return $This;
|
|
1486 }
|
|
1487
|
|
1488 $This->{FingerprintsString} = $This->{FingerprintsVectorStringPrefix1} . $FPStringDelim . $NumOfValues . $FPStringDelim . $This->{FingerprintsVectorStringPrefix2} . $FPStringDelim . $VectorStringData;
|
|
1489 }
|
|
1490
|
|
1491 return $This;
|
|
1492 }
|
|
1493
|
|
1494 # Generate partial fingerprints string using fingerprints string and header keys data...
|
|
1495 #
|
|
1496 # Notes:
|
|
1497 # o FP file fingerprints data line only contain partial fingerprints data which
|
|
1498 # can't be used directly to create fingerprints bit-vector or vector objects
|
|
1499 # using functions available in FingerprintsStringUtil.pm module
|
|
1500 #
|
|
1501 sub _GeneratePartialFingerprintsStringFromFingerprintsString {
|
|
1502 my($This) = @_;
|
|
1503
|
|
1504 $This->{PartialFingerprintsString} = '';
|
|
1505
|
|
1506 if (!$This->{FingerprintsString}) {
|
|
1507 return $This;
|
|
1508 }
|
|
1509
|
|
1510 if ($This->{FingerprintsBitVectorStringMode}) {
|
|
1511 my($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString);
|
|
1512
|
|
1513 ($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($This->{FingerprintsString});
|
|
1514 $This->{PartialFingerprintsString} = $BitVectorString;
|
|
1515 }
|
|
1516 elsif ($This->{FingerprintsVectorStringMode}) {
|
|
1517 my($FPStringDelim, $VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2, $VectorString);
|
|
1518
|
|
1519 $FPStringDelim = Fingerprints::FingerprintsStringUtil::GetFingeprintsStringDelimiter();
|
|
1520
|
|
1521 ($VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($This->{FingerprintsString});
|
|
1522 $VectorString = TextUtil::IsEmpty($VectorString2) ? $VectorString1 : "${VectorString1}${FPStringDelim}${VectorString2}";
|
|
1523
|
|
1524 $This->{PartialFingerprintsString} = $NumOfValues . $FPStringDelim . $VectorString;
|
|
1525 }
|
|
1526
|
|
1527 return $This;
|
|
1528 }
|
|
1529
|
|
1530 # Is it a fingerprints file?
|
|
1531 sub IsFingerprintsFPFile ($;$) {
|
|
1532 my($FirstParameter, $SecondParameter) = @_;
|
|
1533 my($This, $FileName, $Status);
|
|
1534
|
|
1535 if ((@_ == 2) && (_IsFingerprintsFPFileIO($FirstParameter))) {
|
|
1536 ($This, $FileName) = ($FirstParameter, $SecondParameter);
|
|
1537 }
|
|
1538 else {
|
|
1539 $FileName = $FirstParameter;
|
|
1540 }
|
|
1541
|
|
1542 # Check file extension...
|
|
1543 $Status = FileUtil::CheckFileType($FileName, "fpf fp");
|
|
1544
|
|
1545 return $Status;
|
|
1546 }
|
|
1547
|
|
1548 # Is it a FingerprintsFPFileIO object?
|
|
1549 sub _IsFingerprintsFPFileIO {
|
|
1550 my($Object) = @_;
|
|
1551
|
|
1552 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
|
|
1553 }
|
|
1554
|
|
1555 1;
|
|
1556
|
|
1557 __END__
|
|
1558
|
|
1559 =head1 NAME
|
|
1560
|
|
1561 FingerprintsFPFileIO
|
|
1562
|
|
1563 =head1 SYNOPSIS
|
|
1564
|
|
1565 use FileIO::FingerprintsFPFileIO;
|
|
1566
|
|
1567 use FileIO::FingerprintsFPFileIO qw(:all);
|
|
1568
|
|
1569 =head1 DESCRIPTION
|
|
1570
|
|
1571 B<FingerprintsFPFileIO> class provides the following methods:
|
|
1572
|
|
1573 new, GetFingerprints, GetFingerprintsString, GetHeaderDataKeyValue,
|
|
1574 GetHeaderDataKeys, GetHeaderDataKeysAndValues, GetPartialFingerprintsString,
|
|
1575 GetRequiredHeaderDataKeys, GetRequiredHeaderDataKeysAndValues,
|
|
1576 IsFingerprintsDataValid, IsFingerprintsFPFile, IsFingerprintsFileDataValid,
|
|
1577 IsHeaderDataKeyPresent, Next, Read, SetBitStringFormat, SetBitsOrder,
|
|
1578 SetCompoundID, SetDetailLevel, SetFingerprints, SetFingerprintsString,
|
|
1579 SetFingerprintsStringMode, SetPartialFingerprintsString, SetVectorStringFormat,
|
|
1580 WriteFingerprints, WriteFingerprintsString
|
|
1581
|
|
1582 The following methods can also be used as functions:
|
|
1583
|
|
1584 IsFingerprintsFPFile
|
|
1585
|
|
1586 B<FingerprintsFPFileIO> class is derived from I<FileIO> class and uses its methods to support
|
|
1587 generic file related functionality.
|
|
1588
|
|
1589 The MayaChemTools fingerprints file (FP) format with B<.fpf> or B<.fp> file extensions supports
|
|
1590 two types of fingerprints data: fingerprints bit-vectors and fingerprints vectors.
|
|
1591
|
|
1592 Example of FP file format containing fingerprints bit-vector string data:
|
|
1593
|
|
1594 #
|
|
1595 # Package = MayaChemTools 7.4
|
|
1596 # ReleaseDate = Oct 21, 2010
|
|
1597 #
|
|
1598 # TimeStamp = Mon Mar 7 15:14:01 2011
|
|
1599 #
|
|
1600 # FingerprintsStringType = FingerprintsBitVector
|
|
1601 #
|
|
1602 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:...
|
|
1603 # Size = 1024
|
|
1604 # BitStringFormat = HexadecimalString
|
|
1605 # BitsOrder = Ascending
|
|
1606 #
|
|
1607 Cmpd1 9c8460989ec8a49913991a6603130b0a19e8051c89184414953800cc21510...
|
|
1608 Cmpd2 000000249400840040100042011001001980410c000000001010088001120...
|
|
1609 ... ...
|
|
1610 ... ..
|
|
1611
|
|
1612 Example of FP file format containing fingerprints vector string data:
|
|
1613
|
|
1614 #
|
|
1615 # Package = MayaChemTools 7.4
|
|
1616 # ReleaseDate = Oct 21, 2010
|
|
1617 #
|
|
1618 # TimeStamp = Mon Mar 7 15:14:01 2011
|
|
1619 #
|
|
1620 # FingerprintsStringType = FingerprintsVector
|
|
1621 #
|
|
1622 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:...
|
|
1623 # VectorStringFormat = IDsAndValuesString
|
|
1624 # VectorValuesType = NumericalValues
|
|
1625 #
|
|
1626 Cmpd1 338;C F N O C:C C:N C=O CC CF CN CO C:C:C C:C:N C:CC C:CF C:CN C:
|
|
1627 N:C C:NC CC:N CC=O CCC CCN CCO CNC NC=O O=CO C:C:C:C C:C:C:N C:C:CC...;
|
|
1628 33 1 2 5 21 2 2 12 1 3 3 20 2 10 2 2 1 2 2 2 8 2 5 1 1 1 19 2 8 2 2 2 2
|
|
1629 6 2 2 2 2 2 2 2 2 3 2 2 1 4 1 5 1 1 18 6 2 2 1 2 10 2 1 2 1 2 2 2 2 ...
|
|
1630 Cmpd2 103;C N O C=N C=O CC CN CO CC=O CCC CCN CCO CNC N=CN NC=O NCN O=C
|
|
1631 O C CC=O CCCC CCCN CCCO CCNC CNC=N CNC=O CNCN CCCC=O CCCCC CCCCN CC...;
|
|
1632 15 4 4 1 2 13 5 2 2 15 5 3 2 2 1 1 1 2 17 7 6 5 1 1 1 2 15 8 5 7 2 2 2 2
|
|
1633 1 2 1 1 3 15 7 6 8 3 4 4 3 2 2 1 2 3 14 2 4 7 4 4 4 4 1 1 1 2 1 1 1 ...
|
|
1634 ... ...
|
|
1635 ... ...
|
|
1636
|
|
1637 B<FP> file data format consists of two main sections: header section and fingerprints string
|
|
1638 data section. The header section lines start with # and the first line not starting with # represents
|
|
1639 the start of fingerprints string data section. The header section contains both the required and
|
|
1640 optional information which is specified as key = value pairs. The required information
|
|
1641 describes fingerprints bit-vector and vector strings and used to generate fingerprints objects;
|
|
1642 the optional information is ignored during generation of fingerpints objects.
|
|
1643
|
|
1644 The key = value data specification in the header section and its processing follows these
|
|
1645 rules:
|
|
1646
|
|
1647 o Leading and trailing spaces for key = value pairs are ignored
|
|
1648 o Key and value strings may contain spaces
|
|
1649 o Multiple key = value pairs on a single are delimited by semicolon
|
|
1650
|
|
1651 The default optional header data section key = value pairs are:
|
|
1652
|
|
1653 # Package = MayaChemTools 7.4
|
|
1654 # ReleaseDate = Oct 21, 2010
|
|
1655
|
|
1656 The B<FingerprintsStringType> key is required data header key for both fingerprints bit-vector
|
|
1657 and vector strings. Possible key values: I<FingerprintsBitVector or FingerprintsVector>.
|
|
1658 For example:
|
|
1659
|
|
1660 # FingerprintsStringType = FingerprintsBitVector
|
|
1661
|
|
1662 The required data header keys for fingerprints bit-vector string are: B<Description, Size,
|
|
1663 BitStringFormat, and BitsOrder>. Possible values for B<BitStringFormat>: I<HexadecimalString
|
|
1664 or BinaryString>. Possible values for B<BitsOrder>: I<Ascending or Descending>. The B<Description>
|
|
1665 key contains information about various parameters used to generate fingerprints bit-vector
|
|
1666 string. The B<Size> corresponds to number of fingerprints bits and is always less than or equal
|
|
1667 to number of bits in bit-vetor string which might contain extra bits at the end to round off the
|
|
1668 size to make it multiple of 8. For example:
|
|
1669
|
|
1670 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:...
|
|
1671 # Size = 1024
|
|
1672 # BitStringFormat = HexadecimalString
|
|
1673 # BitsOrder = Ascending
|
|
1674
|
|
1675 The required data header keys for fingerprints vector string are: B<Description, VectorStringFormat,
|
|
1676 and VectorValuesType>. Possible values for B<VectorStringFormat>: I<DsAndValuesString,
|
|
1677 IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString or ValuesString>.
|
|
1678 Possible values for B<VectorValuesType>: I<NumericalValues, OrderedNumericalValues or
|
|
1679 AlphaNumericalValues>. The B<Description> keys contains information various parameters used
|
|
1680 to generate fingerprints vector string. For example:
|
|
1681
|
|
1682 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:...
|
|
1683 # VectorStringFormat = IDsAndValuesString
|
|
1684 # VectorValuesType = NumericalValues
|
|
1685
|
|
1686 The fingerprints data section for fingerprints bit-vector string contains data in the following
|
|
1687 format:
|
|
1688
|
|
1689 ... ...
|
|
1690 CmpdID FingerprintsPartialBitVectorString
|
|
1691 ... ...
|
|
1692
|
|
1693 For example:
|
|
1694
|
|
1695 ... ...
|
|
1696 Cmpd1 9c8460989ec8a49913991a6603130b0a19e8051c89184414953800cc21510...
|
|
1697 ... ...
|
|
1698
|
|
1699 The fingerprints data section for fingerprints vector string contains data in the following
|
|
1700 format:
|
|
1701
|
|
1702 ... ...
|
|
1703 CmpdID Size;FingerprintsPartialVectorString
|
|
1704 ... ...
|
|
1705
|
|
1706 For example:
|
|
1707
|
|
1708 ... ...
|
|
1709 Cmpd1 338;C F N O C:C C:N C=O CC CF CN CO C:C:C C:C:N C:CC C:CF C:CN C:
|
|
1710 N:C C:NC CC:N CC=O CCC CCN CCO CNC NC=O O=CO C:C:C:C C:C:C:N C:C:CC...;
|
|
1711 33 1 2 5 21 2 2 12 1 3 3 20 2 10 2 2 1 2 2 2 8 2 5 1 1 1 19 2 8 2 2 2 2
|
|
1712 6 2 2 2 2 2 2 2 2 3 2 2 1 4 1 5 1 1 18 6 2 2 1 2 10 2 1 2 1 2 2 2 2 ...
|
|
1713 ... ...
|
|
1714
|
|
1715 Unlike fingerprints bit-vector string, I<Size> is specified for each partial fingerprints vector string:
|
|
1716 It may change from molecule to molecule for same type of fingerprints.
|
|
1717
|
|
1718 Values IDs are optional for fingerprints vector string containing I<OrderedNumericalValues or
|
|
1719 AlphaNumericalValues>; however, they must be present for for I<NumericalValues>. Due to
|
|
1720 various possible values for B<VectorStringFormat>, the fingerprints data section for fingerprints
|
|
1721 vector string supports following type of data formats:
|
|
1722
|
|
1723 CmpdID Size;ID1 ID2 ID3...;Value1 Value2 Value3...
|
|
1724 CmpdID Size;ID1 Value1 ID2 Value2 ID3 Value3... ...
|
|
1725 CmpdID Size;ValuesAndIDsString: Value1 Value2 Value3...;ID1 ID2 ID3...
|
|
1726 CmpdID Size;ValuesAndIDsPairsString: Value1 ID1 Value2 ID2 Value3 ID3... ...
|
|
1727 CmpdID Size;Value1 Value2 Value3 ...
|
|
1728
|
|
1729 However, all the fingerprints vector string data present in FP file must correspond to only
|
|
1730 one of the formats shown above; multiple data formats in the same file are not allowed.
|
|
1731
|
|
1732 The current release of MayaChemTools supports the following types of fingerprint
|
|
1733 bit-vector and vector strings:
|
|
1734
|
|
1735 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi
|
|
1736 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT
|
|
1737 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X
|
|
1738 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A
|
|
1739 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2
|
|
1740 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B...
|
|
1741
|
|
1742 FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS
|
|
1743 ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2
|
|
1744 .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1
|
|
1745 O.X1.BO2;2 4 14 3 10 1 1 1 3 2
|
|
1746
|
|
1747 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume
|
|
1748 ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F
|
|
1749 N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1
|
|
1750
|
|
1751 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN
|
|
1752 umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C
|
|
1753 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N
|
|
1754 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8
|
|
1755 O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1
|
|
1756 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0...
|
|
1757
|
|
1758 FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs
|
|
1759 AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN
|
|
1760 H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3
|
|
1761 .024 -2.270
|
|
1762
|
|
1763 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues;
|
|
1764 ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435
|
|
1765 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1
|
|
1766 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1767 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1768
|
|
1769 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi
|
|
1770 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391
|
|
1771 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414
|
|
1772 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103
|
|
1773 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338
|
|
1774 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303...
|
|
1775
|
|
1776 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes
|
|
1777 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524
|
|
1778 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649
|
|
1779 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...;
|
|
1780 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2
|
|
1781 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1
|
|
1782
|
|
1783 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp
|
|
1784 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100
|
|
1785 0000000001010000000110000011000000000000100000000000000000000000100001
|
|
1786 1000000110000000000000000000000000010011000000000000000000000000010000
|
|
1787 0000000000000000000000000010000000000000000001000000000000000000000000
|
|
1788 0000000000010000100001000000000000101000000000000000100000000000000...
|
|
1789
|
|
1790 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu
|
|
1791 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8
|
|
1792 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567
|
|
1793 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012
|
|
1794 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455
|
|
1795 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404...
|
|
1796
|
|
1797 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp
|
|
1798 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184
|
|
1799 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450
|
|
1800 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430
|
|
1801 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134
|
|
1802 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566...
|
|
1803
|
|
1804 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000
|
|
1805 0000000000000000000000000000000001001000010010000000010010000000011100
|
|
1806 0100101010111100011011000100110110000011011110100110111111111111011111
|
|
1807 11111111111110111000
|
|
1808
|
|
1809 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011
|
|
1810 1110011111100101111111000111101100110000000000000011100010000000000000
|
|
1811 0000000000000000000000000000000000000000000000101000000000000000000000
|
|
1812 0000000000000000000000000000000000000000000000000000000000000000000000
|
|
1813 0000000000000000000000000000000000000011000000000000000000000000000000
|
|
1814 0000000000000000000000000000000000000000
|
|
1815
|
|
1816 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri
|
|
1817 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1818 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0
|
|
1819 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0
|
|
1820 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1
|
|
1821 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1
|
|
1822
|
|
1823 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri
|
|
1824 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0
|
|
1825 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0
|
|
1826 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1827 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0
|
|
1828 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
|
|
1829
|
|
1830 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
|
|
1831 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110
|
|
1832 0100010101011000101001011100110001000010001001101000001001001001001000
|
|
1833 0010110100000111001001000001001010100100100000000011000000101001011100
|
|
1834 0010000001000101010100000100111100110111011011011000000010110111001101
|
|
1835 0101100011000000010001000011000010100011101100001000001000100000000...
|
|
1836
|
|
1837 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength
|
|
1838 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2
|
|
1839 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X
|
|
1840 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1
|
|
1841 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO
|
|
1842 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C....
|
|
1843
|
|
1844 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt
|
|
1845 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1
|
|
1846 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N
|
|
1847 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1
|
|
1848 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR
|
|
1849 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ...
|
|
1850
|
|
1851 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD
|
|
1852 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1
|
|
1853 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3.
|
|
1854 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...;
|
|
1855 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1
|
|
1856 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1...
|
|
1857
|
|
1858 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi
|
|
1859 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar
|
|
1860 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H
|
|
1861 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...;
|
|
1862 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4
|
|
1863 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ...
|
|
1864
|
|
1865 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3
|
|
1866 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-
|
|
1867 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO
|
|
1868 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...;
|
|
1869 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1
|
|
1870
|
|
1871 FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica
|
|
1872 lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC
|
|
1873 H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC-
|
|
1874 ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...;
|
|
1875 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2
|
|
1876
|
|
1877 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M
|
|
1878 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1
|
|
1879 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1
|
|
1880 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1
|
|
1881 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....;
|
|
1882 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2
|
|
1883 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8...
|
|
1884
|
|
1885 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1
|
|
1886 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C
|
|
1887 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3-
|
|
1888 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2
|
|
1889 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C.
|
|
1890 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7...
|
|
1891
|
|
1892 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min
|
|
1893 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H
|
|
1894 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2-
|
|
1895 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H
|
|
1896 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...;
|
|
1897 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10
|
|
1898 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1
|
|
1899
|
|
1900 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist
|
|
1901 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0
|
|
1902 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1
|
|
1903 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0
|
|
1904 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0
|
|
1905 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18...
|
|
1906
|
|
1907 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize:
|
|
1908 MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1-
|
|
1909 Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1
|
|
1910 -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1-
|
|
1911 HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...;
|
|
1912 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23
|
|
1913 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1
|
|
1914 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ...
|
|
1915
|
|
1916 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD
|
|
1917 istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106
|
|
1918 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0
|
|
1919 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26
|
|
1920 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0
|
|
1921 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ...
|
|
1922
|
|
1923 =head2 METHODS
|
|
1924
|
|
1925 =over 4
|
|
1926
|
|
1927 =item B<new>
|
|
1928
|
|
1929 $NewFingerprintsFPFileIO = new FileIO::FingerprintsFPFileIO(%IOParameters);
|
|
1930
|
|
1931 Using specified I<IOParameters> names and values hash, B<new> method creates a new
|
|
1932 object and returns a reference to a newly created B<FingerprintsFPFileIO> object. By default,
|
|
1933 the following properties are initialized during I<Read> mode:
|
|
1934
|
|
1935 Name = '';
|
|
1936 Mode = 'Read';
|
|
1937 Status = 0;
|
|
1938 FingerprintsStringMode = 'AutoDetect';
|
|
1939 ValidateData = 1;
|
|
1940 DetailLevel = 1;
|
|
1941
|
|
1942 During I<Write> mode, the following properties get initialize by default:
|
|
1943
|
|
1944 FingerprintsStringMode = undef;
|
|
1945
|
|
1946 BitStringFormat = HexadecimalString;
|
|
1947 BitsOrder = Ascending;
|
|
1948
|
|
1949 VectorStringFormat = NumericalValuesString or ValuesString;
|
|
1950
|
|
1951 Examples:
|
|
1952
|
|
1953 $NewFingerprintsFPFileIO = new FileIO::FingerprintsFPFileIO(
|
|
1954 'Name' => 'Sample.fpf',
|
|
1955 'Mode' => 'Read',
|
|
1956 'FingerprintsStringMode' =>
|
|
1957 'AutoDetect');
|
|
1958
|
|
1959 $NewFingerprintsFPFileIO = new FileIO::FingerprintsFPFileIO(
|
|
1960 'Name' => 'Sample.fpf',
|
|
1961 'Mode' => 'Write',
|
|
1962 'FingerprintsStringMode' =>
|
|
1963 'FingerprintsBitVectorString',
|
|
1964 'Overwrite' => 1,
|
|
1965 'BitStringFormat' => 'HexadecimalString',
|
|
1966 'BitsOrder' => 'Ascending');
|
|
1967
|
|
1968 $NewFingerprintsFPFileIO = new FileIO::FingerprintsFPFileIO(
|
|
1969 'Name' => 'Sample.fp',
|
|
1970 'Mode' => 'Write',
|
|
1971 'FingerprintsStringMode' =>
|
|
1972 'FingerprintsVectorString',
|
|
1973 'Overwrite' => 1,
|
|
1974 'VectorStringFormat' => 'IDsAndValuesString');
|
|
1975
|
|
1976 =item B<GetFingerprints>
|
|
1977
|
|
1978 $FingerprintsObject = $FingerprintsFPFileIO->GetFingerprints();
|
|
1979
|
|
1980 Returns B<FingerprintsObject> generated for current data line using fingerprints bit-vector
|
|
1981 or vector string data. The fingerprints object corresponds to any of the supported fingerprints
|
|
1982 such as PathLengthFingerprints, ExtendedConnectivity, and so on.
|
|
1983
|
|
1984 =item B<GetFingerprintsString>
|
|
1985
|
|
1986 $FingerprintsString = $FingerprintsFPFileIO->GetFingerprintsString();
|
|
1987
|
|
1988 Returns B<FingerprintsString> for current data line.
|
|
1989
|
|
1990 =item B<GetHeaderDataKeyValue>
|
|
1991
|
|
1992 $KeyValue = $FingerprintsFPFileIO->GetHeaderDataKeyValue($Key);
|
|
1993
|
|
1994 Returns B<KeyValue> of a data header I<Key>.
|
|
1995
|
|
1996 =item B<GetHeaderDataKeys>
|
|
1997
|
|
1998 @Keys = $FingerprintsFPFileIO->GetHeaderDataKeys();
|
|
1999 $NumOfKeys = $FingerprintsFPFileIO->GetHeaderDataKeys();
|
|
2000
|
|
2001 Returns an array of data header B<Keys> retrieved from data header section of fingerprints
|
|
2002 file. In scalar context, it returns number of keys.
|
|
2003
|
|
2004 =item B<GetHeaderDataKeysAndValues>
|
|
2005
|
|
2006 %KeysAndValues = $FingerprintsFPFileIO->GetHeaderDataKeysAndValues();
|
|
2007
|
|
2008 Returns a hash of data header keys and values retrieved from data header section of fingerprints
|
|
2009 file.
|
|
2010
|
|
2011 =item B<GetPartialFingerprintsString>
|
|
2012
|
|
2013 $FingerprintsString = $FingerprintsFPFileIO->GetPartialFingerprintsString();
|
|
2014
|
|
2015 Returns partial B<FingerprintsString> for current data line. It corresponds to fingerprints string
|
|
2016 specified present in a line.
|
|
2017
|
|
2018 =item B<GetRequiredHeaderDataKeys>
|
|
2019
|
|
2020 @Keys = $FingerprintsFPFileIO->GetRequiredHeaderDataKeys();
|
|
2021 $NumOfKeys = $FingerprintsFPFileIO->GetRequiredHeaderDataKeys();
|
|
2022
|
|
2023 Returns an array of required data header B<Keys> for a fingerprints file containing bit-vector or
|
|
2024 vector strings data. In scalar context, it returns number of keys.
|
|
2025
|
|
2026 =item B<GetRequiredHeaderDataKeysAndValues>
|
|
2027
|
|
2028 %KeysAndValues = $FingerprintsFPFileIO->
|
|
2029 GetRequiredHeaderDataKeysAndValues();
|
|
2030
|
|
2031 Returns a hash of required data header keys and values for a fingerprints file containing bit-vector or
|
|
2032 vector strings data
|
|
2033
|
|
2034 =item B<IsFingerprintsDataValid>
|
|
2035
|
|
2036 $Status = $FingerprintsFPFileIO->IsFingerprintsDataValid();
|
|
2037
|
|
2038 Returns 1 or 0 based on whether B<FingerprintsObject> is valid.
|
|
2039
|
|
2040 =item B<IsFingerprintsFPFile>
|
|
2041
|
|
2042 $Status = $FingerprintsFPFileIO->IsFingerprintsFPFile($FileName);
|
|
2043 $Status = FileIO::FingerprintsFPFileIO::IsFingerprintsFPFile($FileName);
|
|
2044
|
|
2045 Returns 1 or 0 based on whether I<FileName> is a FP file.
|
|
2046
|
|
2047 =item B<IsFingerprintsFileDataValid>
|
|
2048
|
|
2049 $Status = $FingerprintsFPFileIO->IsFingerprintsFileDataValid();
|
|
2050
|
|
2051 Returns 1 or 0 based on whether fingerprints file contains valid fingerprints data.
|
|
2052
|
|
2053 =item B<IsHeaderDataKeyPresent>
|
|
2054
|
|
2055 $Status = $FingerprintsFPFileIO->IsHeaderDataKeyPresent($Key);
|
|
2056
|
|
2057 Returns 1 or 0 based on whether data header I<Key> is present in data header
|
|
2058 section of a FP file.
|
|
2059
|
|
2060 =item B<Next or Read>
|
|
2061
|
|
2062 $FingerprintsFPFileIO = $FingerprintsFPFileIO->Next();
|
|
2063 $FingerprintsFPFileIO = $FingerprintsFPFileIO->Read();
|
|
2064
|
|
2065 Reads next available fingerprints line in FP file, processes the data, generates appropriate fingerprints
|
|
2066 object, and returns B<FingerprintsFPFileIO>. The generated fingerprints object is available using
|
|
2067 method B<GetFingerprints>.
|
|
2068
|
|
2069 =item B<SetBitStringFormat>
|
|
2070
|
|
2071 $FingerprintsFPFileIO->SetBitStringFormat($Format);
|
|
2072
|
|
2073 Sets bit string I<Format> for fingerprints bit-vector string data in a FP file and returns B<FingerprintsFPFileIO>.
|
|
2074 Possible values for B<BitStringFormat>: I<BinaryString or HexadecimalString>.
|
|
2075
|
|
2076 =item B<SetBitsOrder>
|
|
2077
|
|
2078 $FingerprintsFPFileIO->SetBitsOrder($BitsOrder);
|
|
2079
|
|
2080 Sets I<BitsOrder> for fingerprints bit-vector string data in a FP file and returns B<FingerprintsFPFileIO>.
|
|
2081 Possible values for B<BitsOrder>: I<Ascending or Descending>.
|
|
2082
|
|
2083 =item B<SetCompoundID>
|
|
2084
|
|
2085 $FingerprintsFPFileIO->SetCompoundID($ID);
|
|
2086
|
|
2087 Sets compound ID for current data line and returns B<FingerprintsFPFileIO>. Spaces are not allowed
|
|
2088 in compound IDs.
|
|
2089
|
|
2090 =item B<SetDetailLevel>
|
|
2091
|
|
2092 $FingerprintsFPFileIO->SetDetailLevel($Level);
|
|
2093
|
|
2094 Sets details I<Level> for generating diagnostics messages during FP file processing and returns
|
|
2095 B<FingerprintsFPFileIO>. Possible values: I<Positive integers>.
|
|
2096
|
|
2097 =item B<SetFingerprints>
|
|
2098
|
|
2099 $FingerprintsFPFileIO->SetFingerprints($FingerprintsObject);
|
|
2100
|
|
2101 Sets I<FingerprintsObject> for current data line and returns B<FingerprintsFPFileIO>.
|
|
2102
|
|
2103 =item B<SetFingerprintsString>
|
|
2104
|
|
2105 $FingerprintsFPFileIO->SetFingerprintsString($FingerprintsString);
|
|
2106
|
|
2107 Sets I<FingerprintsString> for current data line and returns B<FingerprintsFPFileIO>.
|
|
2108
|
|
2109 =item B<SetFingerprintsStringMode>
|
|
2110
|
|
2111 $FingerprintsFPFileIO->SetFingerprintsStringMode($Mode);
|
|
2112
|
|
2113 Sets I<FingerprintsStringMode> for FP file and returns B<FingerprintsFPFileIO>.
|
|
2114 Possible values: I<AutoDetect, FingerprintsBitVectorString, or FingerprintsVectorString>
|
|
2115
|
|
2116 =item B<SetPartialFingerprintsString>
|
|
2117
|
|
2118 $FingerprintsFPFileIO->SetPartialFingerprintsString($PartialString);
|
|
2119
|
|
2120 Sets I<PartialFingerprintsString> for current data line and returns B<FingerprintsFPFileIO>.
|
|
2121
|
|
2122 =item B<SetVectorStringFormat>
|
|
2123
|
|
2124 $FingerprintsFPFileIO->SetVectorStringFormat($Format);
|
|
2125
|
|
2126 Sets I<VectorStringFormat> for FP file and returns B<FingerprintsFPFileIO>. Possible values:
|
|
2127 I<IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString>.
|
|
2128
|
|
2129 =item B<WriteFingerprints>
|
|
2130
|
|
2131 $FingerprintsFPFileIO->WriteFingerprints($FingerprintsObject,
|
|
2132 $CompoundID);
|
|
2133
|
|
2134 Writes fingerprints string generated from I<FingerprintsObject> object and other data including
|
|
2135 I<CompoundID> to FP file and returns B<FingerprintsFPFileIO>.
|
|
2136
|
|
2137 =item B<WriteFingerprintsString>
|
|
2138
|
|
2139 $FingerprintsFPFileIO->WriteFingerprints($FingerprintsString,
|
|
2140 $CompoundID);
|
|
2141
|
|
2142 Writes I<FingerprintsString> and other data including I<CompoundID> to FP file and returns
|
|
2143 B<FingerprintsFPFileIO>.
|
|
2144
|
|
2145 Caveats:
|
|
2146
|
|
2147 o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat
|
|
2148 values are ignored during writing of fingerprints and it's written to
|
|
2149 the file as it is.
|
|
2150 o FingerprintsString is a regular fingerprints string as oppose to a
|
|
2151 partial fingerprints string.
|
|
2152
|
|
2153 =back
|
|
2154
|
|
2155 =head1 AUTHOR
|
|
2156
|
|
2157 Manish Sud <msud@san.rr.com>
|
|
2158
|
|
2159 =head1 SEE ALSO
|
|
2160
|
|
2161 FingerprintsSDFileIO.pm, FingerprintsTextFileIO.pm
|
|
2162
|
|
2163 =head1 COPYRIGHT
|
|
2164
|
|
2165 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
2166
|
|
2167 This file is part of MayaChemTools.
|
|
2168
|
|
2169 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
2170 the terms of the GNU Lesser General Public License as published by the Free
|
|
2171 Software Foundation; either version 3 of the License, or (at your option)
|
|
2172 any later version.
|
|
2173
|
|
2174 =cut
|