Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/FileIO/FingerprintsFPFileIO.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4816e4a8ae95 |
---|---|
1 package FileIO::FingerprintsFPFileIO; | |
2 # | |
3 # $RCSfile: FingerprintsFPFileIO.pm,v $ | |
4 # $Date: 2015/02/28 20:48:43 $ | |
5 # $Revision: 1.19 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Carp; | |
31 use Exporter; | |
32 use Scalar::Util (); | |
33 use TextUtil (); | |
34 use FileUtil (); | |
35 use TimeUtil (); | |
36 use Fingerprints::FingerprintsStringUtil (); | |
37 use PackageInfo (); | |
38 use FileIO::FileIO; | |
39 | |
40 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
41 | |
42 @ISA = qw(FileIO::FileIO Exporter); | |
43 @EXPORT = qw(); | |
44 @EXPORT_OK = qw(IsFingerprintsFPFile); | |
45 | |
46 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
47 | |
48 # Setup class variables... | |
49 my($ClassName); | |
50 _InitializeClass(); | |
51 | |
52 # Class constructor... | |
53 sub new { | |
54 my($Class, %NamesAndValues) = @_; | |
55 | |
56 # Initialize object... | |
57 my $This = $Class->SUPER::new(); | |
58 bless $This, ref($Class) || $Class; | |
59 $This->_InitializeFingerprintsFPFileIO(); | |
60 | |
61 $This->_InitializeFingerprintsFPFileIOProperties(%NamesAndValues); | |
62 | |
63 return $This; | |
64 } | |
65 | |
66 # Initialize object data... | |
67 # | |
68 sub _InitializeFingerprintsFPFileIO { | |
69 my($This) = @_; | |
70 | |
71 # Fingerprints string data format during read/write... | |
72 # | |
73 # For file read: | |
74 # | |
75 # AutoDetect - automatically detect format of fingerprints string | |
76 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
77 # FingerprintsVectorString - Vector fingerprints string format | |
78 # | |
79 # Default value: AutoDetect | |
80 # | |
81 # For file write: | |
82 # | |
83 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
84 # FingerprintsVectorString - Vector fingerprints string format | |
85 # | |
86 # Default value: undef | |
87 # | |
88 $This->{FingerprintsStringMode} = undef; | |
89 | |
90 # For file read: | |
91 # | |
92 # o Fingerprints bit-vector and vector object for current fingerprints string | |
93 # | |
94 # For file write: | |
95 # | |
96 # o Fingerprints bit-vector and vector object for current fingerprints string | |
97 # o Any supported fingerprints object: PathLengthFingerprints, ExtendedConnectivity, and so on. | |
98 # | |
99 $This->{FingerprintsObject} = undef; | |
100 | |
101 # Fingeprints string for current line during read/write... | |
102 $This->{FingerprintsString} = undef; | |
103 | |
104 # Partial fingeprints string corresponding to what's on the current line for current | |
105 # line during read/write... | |
106 $This->{PartialFingerprintsString} = undef; | |
107 | |
108 # Required header data keys and values during read/write... | |
109 @{$This->{RequiredHeaderDataKeys}} = (); | |
110 %{$This->{RequiredHeaderDataKeysAndValues}} = (); | |
111 | |
112 # First data line read/write... | |
113 $This->{FirstDataLineIO} = 1; | |
114 | |
115 # Current fingerprints string data line number during read/write... | |
116 $This->{LineNum} = 0; | |
117 | |
118 # FP line data during read/write... | |
119 $This->{DataLine} = undef; | |
120 | |
121 # Initialize parameters for read... | |
122 $This->_InitializeFingerprintsFPFileIORead(); | |
123 | |
124 # Initialize parameters for write... | |
125 $This->_InitializeFingerprintsFPFileIOWrite(); | |
126 | |
127 return $This; | |
128 } | |
129 | |
130 # Initialize class ... | |
131 sub _InitializeClass { | |
132 #Class name... | |
133 $ClassName = __PACKAGE__; | |
134 | |
135 } | |
136 | |
137 # Initialize object data for reading fingerprints FP file... | |
138 # | |
139 sub _InitializeFingerprintsFPFileIORead { | |
140 my($This) = @_; | |
141 | |
142 # Header data keys and values... | |
143 # | |
144 @{$This->{HeaderDataKeys}} = (); | |
145 %{$This->{HeaderDataKeysAndValues}} = (); | |
146 %{$This->{CannonicalHeaderDataKeysAndValues}} = (); | |
147 | |
148 # By default, the fingerprints data is assumed to be valid and no validation is | |
149 # performed before generating fingerprints objects... | |
150 # | |
151 $This->{ValidateData} = 1; | |
152 | |
153 # Level of detail to print during validation of data for invalid or missing data... | |
154 $This->{DetailLevel} = 1; | |
155 | |
156 # Number of missing and invalid fingerprints string data lines... | |
157 $This->{NumOfLinesWithMissingData} = 0; | |
158 $This->{NumOfLinesWithInvalidData} = 0; | |
159 | |
160 # Compound ID for current fingerprints string... | |
161 $This->{CompoundID} = undef; | |
162 | |
163 # Status of data in fingerprints FP file... | |
164 $This->{ValidFileData} = 0; | |
165 $This->{ValidRequiredHeaderDataKeys} = 0; | |
166 $This->{ValidFingerprintsStringMode} = 0; | |
167 | |
168 return $This; | |
169 } | |
170 | |
171 # Initialize object data for writing fingerprints FP file... | |
172 # | |
173 sub _InitializeFingerprintsFPFileIOWrite { | |
174 my($This) = @_; | |
175 | |
176 # Fingerprints bit vector string format... | |
177 # | |
178 # Possible values: BinaryString or HexadecimalString [Default] | |
179 # | |
180 # Default BitStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat. | |
181 # | |
182 $This->{BitStringFormat} = undef; | |
183 | |
184 # Bits order in fingerprints bit vector string... | |
185 # | |
186 # Ascending - First bit in each byte as the lowest bit [Default] | |
187 # Descending - First bit in each byte as the highest bit | |
188 # | |
189 # Default BitsOrder is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder. | |
190 # | |
191 $This->{BitsOrder} = undef; | |
192 | |
193 # Fingerprints vector string format... | |
194 # | |
195 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString | |
196 # | |
197 # Default VectorStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat. | |
198 # For fingerprints vector object containing vector NumericalValues, it corresponds to IDsAndValuesString; otherwise, | |
199 # it's set to ValuesString. | |
200 # | |
201 $This->{VectorStringFormat} = undef; | |
202 | |
203 # Overwriting existing file... | |
204 $This->{Overwrite} = 0; | |
205 | |
206 return $This; | |
207 } | |
208 | |
209 # Initialize object values... | |
210 sub _InitializeFingerprintsFPFileIOProperties { | |
211 my($This, %NamesAndValues) = @_; | |
212 | |
213 # All other property names and values along with all Set/Get<PropertyName> methods | |
214 # are implemented on-demand using ObjectProperty class. | |
215 | |
216 my($Name, $Value, $MethodName); | |
217 while (($Name, $Value) = each %NamesAndValues) { | |
218 $MethodName = "Set${Name}"; | |
219 $This->$MethodName($Value); | |
220 } | |
221 | |
222 if (!exists $NamesAndValues{Name}) { | |
223 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; | |
224 } | |
225 | |
226 # Make sure it's a fingerprints file... | |
227 $Name = $NamesAndValues{Name}; | |
228 if (!$This->IsFingerprintsFPFile($Name)) { | |
229 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be fingerprints format..."; | |
230 } | |
231 | |
232 if ($This->GetMode() =~ /^Read$/i) { | |
233 $This->_InitializeFingerprintsFPFileIOReadProperties(%NamesAndValues); | |
234 } | |
235 elsif ($This->GetMode() =~ /^(Write|Append)$/i) { | |
236 $This->_InitializeFingerprintsFPFileIOWriteProperties(%NamesAndValues); | |
237 } | |
238 | |
239 return $This; | |
240 } | |
241 | |
242 # Initialize object properties for reading fingerprints FP file... | |
243 # | |
244 sub _InitializeFingerprintsFPFileIOReadProperties { | |
245 my($This, %NamesAndValues) = @_; | |
246 | |
247 # Set default value for FingerprintsStringMode... | |
248 if (!$This->{FingerprintsStringMode}) { | |
249 $This->{FingerprintsStringMode} = 'AutoDetect'; | |
250 } | |
251 | |
252 $This->_PrepareForReadingFingerprintsFPFileData(); | |
253 | |
254 return $This; | |
255 } | |
256 | |
257 # Initialize object properties for writing fingerprints FP file... | |
258 # | |
259 sub _InitializeFingerprintsFPFileIOWriteProperties { | |
260 my($This, %NamesAndValues) = @_; | |
261 | |
262 # Check FingerprintsStringMode value... | |
263 if (!exists $NamesAndValues{FingerprintsStringMode}) { | |
264 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying FingerprintsStringMode..."; | |
265 } | |
266 | |
267 if ($This->{FingerprintsStringMode} !~ /^(FingerprintsBitVectorString|FingerprintsVectorString)$/i) { | |
268 croak "Error: ${ClassName}->New: Object can't be instantiated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, is not valid; Supported values for write/append: FingerprintsBitVectorString or FingerprintsVectorString..."; | |
269 } | |
270 | |
271 $This->_PrepareForWritingFingerprintsFPFileData(); | |
272 | |
273 return $This; | |
274 } | |
275 | |
276 # Set FingerprintsStringMode... | |
277 # | |
278 sub SetFingerprintsStringMode { | |
279 my($This, $Value) = @_; | |
280 | |
281 # AutoDetect - automatically detect format of fingerprints string | |
282 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
283 # FingerprintsVectorString - Vector fingerprints string format | |
284 | |
285 if ($Value !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) { | |
286 croak "Error: ${ClassName}->SetFingerprintsStringMode: FingerprintsStringMode value, $Value, is not valid; Supported values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString..."; | |
287 } | |
288 | |
289 $This->{FingerprintsStringMode} = $Value; | |
290 | |
291 return $This; | |
292 } | |
293 | |
294 # Set DetailLevel... | |
295 # | |
296 sub SetDetailLevel { | |
297 my($This, $Value) = @_; | |
298 | |
299 if (!TextUtil::IsPositiveInteger($Value)) { | |
300 croak "Error: ${ClassName}->SetDetailLevel: DetailLevel value, $Value, is not valid; Supported values: > 0..."; | |
301 } | |
302 | |
303 $This->{DetailLevel} = $Value; | |
304 | |
305 return $This; | |
306 } | |
307 | |
308 # Set BitStringFormat... | |
309 # | |
310 sub SetBitStringFormat { | |
311 my($This, $Value) = @_; | |
312 | |
313 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) { | |
314 croak "Error: ${ClassName}->SetBitStringFormat: BitStringFormat value, $Value, is not valid; Supported values: BinaryString or HexadecimalString..."; | |
315 } | |
316 | |
317 $This->{BitStringFormat} = $Value; | |
318 | |
319 return $This; | |
320 } | |
321 | |
322 # Set BitsOrder... | |
323 # | |
324 sub SetBitsOrder { | |
325 my($This, $Value) = @_; | |
326 | |
327 # Ascending - First bit in each byte as the lowest bit | |
328 # Descending - First bit in each byte as the highest bit | |
329 # | |
330 if ($Value !~ /^(Ascending|Descending)$/i) { | |
331 croak "Error: ${ClassName}->SetBitsOrder: FingerprintsStringMode value, $Value, is not valid; Supported values: Ascending or Descending..."; | |
332 } | |
333 | |
334 $This->{BitsOrder} = $Value; | |
335 | |
336 return $This; | |
337 } | |
338 | |
339 # Set compound ID... | |
340 # | |
341 sub SetCompoundID { | |
342 my($This, $Value) = @_; | |
343 | |
344 if ($Value =~ / /) { | |
345 $Value =~ s/ //g; | |
346 carp "Warning: ${ClassName}->SetCompoundID: Spaces are not allowed in compound ID; They have been removed..."; | |
347 } | |
348 | |
349 $This->{CompoundID} = $Value; | |
350 | |
351 return $This; | |
352 } | |
353 | |
354 # Set VectorStringFormat... | |
355 # | |
356 sub SetVectorStringFormat { | |
357 my($This, $Value) = @_; | |
358 | |
359 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString | |
360 | |
361 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) { | |
362 croak "Error: ${ClassName}->SetVectorStringFormat: FingerprintsStringMode value, $Value, is not valid; Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString..."; | |
363 } | |
364 | |
365 $This->{VectorStringFormat} = $Value; | |
366 | |
367 return $This; | |
368 } | |
369 | |
370 # Get header data keys or number of header data keys in header data block... | |
371 # | |
372 sub GetHeaderDataKeys { | |
373 my($This) = @_; | |
374 | |
375 return wantarray ? @{$This->{HeaderDataKeys}} : scalar @{$This->{HeaderDataKeys}}; | |
376 } | |
377 | |
378 # Set header data keys... | |
379 # | |
380 sub SetHeaderDataKeys { | |
381 my($This, @Keys) = @_; | |
382 | |
383 croak "Error: ${ClassName}->SetHeaderDataKeys: Can't set HeaderDataKeys: Not allowed..."; | |
384 | |
385 return $This; | |
386 } | |
387 | |
388 # Get header data keys and values hash... | |
389 # | |
390 sub GetHeaderDataKeysAndValues { | |
391 my($This) = @_; | |
392 | |
393 return %{$This->{HeaderDataKeysAndValues}}; | |
394 } | |
395 | |
396 # Set header data keys and values hash... | |
397 # | |
398 sub SetHeaderDataKeysAndValues { | |
399 my($This, %KeysAndValues) = @_; | |
400 | |
401 croak "Error: ${ClassName}->SetHeaderDataKeysAndValues: Can't set HeaderDataKeysAndValues: Not allowed..."; | |
402 | |
403 return $This; | |
404 } | |
405 | |
406 # Get required header data keys or number of header data keys in header data block... | |
407 # | |
408 sub GetRequiredHeaderDataKeys { | |
409 my($This) = @_; | |
410 | |
411 return wantarray ? @{$This->{RequiredHeaderDataKeys}} : scalar @{$This->{RequiredHeaderDataKeys}}; | |
412 } | |
413 | |
414 # Set required header data keys... | |
415 # | |
416 sub SetRequiredHeaderDataKeys { | |
417 my($This, @Keys) = @_; | |
418 | |
419 croak "Error: ${ClassName}->SetRequiredHeaderDataKeys: Can't set RequiredHeaderDataKeys: Not allowed..."; | |
420 | |
421 return $This; | |
422 } | |
423 | |
424 # Get required header data keys and values hash... | |
425 # | |
426 sub GetRequiredHeaderDataKeysAndValues { | |
427 my($This) = @_; | |
428 | |
429 return %{$This->{RequiredHeaderDataKeysAndValues}}; | |
430 } | |
431 | |
432 # Set required header data keys and values hash... | |
433 # | |
434 sub SetRequiredHeaderDataKeysAndValues { | |
435 my($This, %KeysAndValues) = @_; | |
436 | |
437 croak "Error: ${ClassName}->SetRequiredHeaderDataKeysAndValues: Can't set RequiredHeaderDataKeysAndValues: Not allowed..."; | |
438 | |
439 return $This; | |
440 } | |
441 | |
442 # Get fingerprints object for current data line... | |
443 # | |
444 sub GetFingerprints { | |
445 my($This) = @_; | |
446 | |
447 return $This->{FingerprintsObject}; | |
448 } | |
449 | |
450 # Set fingerprints object for current data line... | |
451 # | |
452 sub SetFingerprints { | |
453 my($This, $FingerprintsObject) = @_; | |
454 | |
455 $This->{FingerprintsObject} = $FingerprintsObject; | |
456 | |
457 return $This; | |
458 } | |
459 | |
460 # Get fingerprints string for current data line... | |
461 # | |
462 sub GetFingerprintsString { | |
463 my($This) = @_; | |
464 | |
465 return $This->{FingerprintsString} ? $This->{FingerprintsString} : 'None'; | |
466 } | |
467 | |
468 # Set fingerprints string for current data line... | |
469 # | |
470 sub SetFingerprintsString { | |
471 my($This, $FingerprintsString) = @_; | |
472 | |
473 $This->{FingerprintsString} = $FingerprintsString; | |
474 | |
475 return $This; | |
476 } | |
477 | |
478 # Get partial fingerprints string for current data line... | |
479 # | |
480 sub GetPartialFingerprintsString { | |
481 my($This) = @_; | |
482 | |
483 return $This->{PartialFingerprintsString} ? $This->{PartialFingerprintsString} : 'None'; | |
484 } | |
485 | |
486 # Set partial fingerprints string for current data line... | |
487 # | |
488 sub SetPartialFingerprintsString { | |
489 my($This, $PartialFingerprintsString) = @_; | |
490 | |
491 $This->{PartialFingerprintsString} = $PartialFingerprintsString; | |
492 | |
493 return $This; | |
494 } | |
495 | |
496 # Does fingerprints FP file contain valid data? | |
497 # | |
498 sub IsFingerprintsFileDataValid { | |
499 my($This) = @_; | |
500 | |
501 return $This->{ValidFileData} ? 1 : 0; | |
502 } | |
503 | |
504 # Does current data line contains valid fingerprints object data? | |
505 # | |
506 sub IsFingerprintsDataValid { | |
507 my($This) = @_; | |
508 | |
509 return defined $This->{FingerprintsObject} ? 1 : 0; | |
510 } | |
511 | |
512 # Check presence of a header data key... | |
513 # | |
514 sub IsHeaderDataKeyPresent { | |
515 my($This, $Key) = @_; | |
516 my($CannonicalKey); | |
517 | |
518 $CannonicalKey = lc $Key; | |
519 | |
520 return exists $This->{CannonicalHeaderDataKeysAndValues}{$CannonicalKey} ? 1 : 0; | |
521 } | |
522 | |
523 # Get value of header data key... | |
524 # | |
525 sub GetHeaderDataKeyValue { | |
526 my($This, $Key) = @_; | |
527 my($CannonicalKey); | |
528 | |
529 $CannonicalKey = lc $Key; | |
530 | |
531 return exists $This->{CannonicalHeaderDataKeysAndValues}{$CannonicalKey} ? $This->{CannonicalHeaderDataKeysAndValues}{$CannonicalKey} : undef; | |
532 } | |
533 | |
534 # | |
535 # Read next available fingerprints line, process it and generate appropriate fingerprints | |
536 # objects... | |
537 # | |
538 sub Read { | |
539 my($This) = @_; | |
540 | |
541 # Read data line... | |
542 if (!$This->_ReadDataLine()) { | |
543 return undef; | |
544 } | |
545 | |
546 # No need to process invalid FP file with invalid data... | |
547 if (!$This->{ValidFileData}) { | |
548 if ($This->{ValidateData}) { | |
549 $This->{NumOfLinesWithMissingData} += 1; | |
550 } | |
551 return $This; | |
552 } | |
553 | |
554 # Perform data validation... | |
555 if ($This->{ValidateData}) { | |
556 if (!$This->_ValidateReadDataLine()) { | |
557 return $This; | |
558 } | |
559 } | |
560 | |
561 # Check again to handle problematic data for non-validated data lines... | |
562 if (!$This->{FingerprintsString}) { | |
563 return $This; | |
564 } | |
565 | |
566 # Generate fingeprints object... | |
567 $This->_GenerateFingerprintsObject(); | |
568 | |
569 # Setup fingerprints compound ID for fingerprints string... | |
570 $This->_GenerateCompoundID(); | |
571 | |
572 return $This; | |
573 } | |
574 | |
575 # Read next available fingerprints line, process it and generate appropriate fingerprints | |
576 # objects... | |
577 # | |
578 sub Next { | |
579 my($This) = @_; | |
580 | |
581 return $This->Read(); | |
582 } | |
583 | |
584 # Read fingerprints data line line... | |
585 # | |
586 sub _ReadDataLine { | |
587 my($This) = @_; | |
588 | |
589 # Initialize data for current line... | |
590 $This->_InitializeReadDataLine(); | |
591 | |
592 if ($This->{FirstDataLineIO}) { | |
593 # Get first data line... | |
594 $This->_ProcessFirstDataLineRead(); | |
595 } | |
596 else { | |
597 # Get next data line... | |
598 $This->{LineNum} += 1; | |
599 $This->{DataLine} = TextUtil::GetTextLine($This->{FileHandle}); | |
600 } | |
601 | |
602 # Is it end of file? | |
603 if (!$This->{DataLine}) { | |
604 return 0; | |
605 } | |
606 | |
607 # Process data line to retrieve compound ID and fingerprints string information... | |
608 $This->_ProcessDataLineRead(); | |
609 | |
610 return 1; | |
611 } | |
612 | |
613 # Process data line to retrieve compound ID and fingerprints string information... | |
614 # | |
615 sub _ProcessDataLineRead { | |
616 my($This) = @_; | |
617 my($CompoundID, $PartialFingerprintsString); | |
618 | |
619 ($CompoundID, $PartialFingerprintsString) = $This->{DataLine} =~ /^(.*?)[ ]+(.*?)$/; | |
620 | |
621 if (!(defined($CompoundID) && defined($PartialFingerprintsString))) { | |
622 return $This; | |
623 } | |
624 | |
625 $This->{CompoundID} = $CompoundID; | |
626 $This->{PartialFingerprintsString} = $PartialFingerprintsString; | |
627 | |
628 # Set up fingerprints string... | |
629 $This->_GenerateFingerprintsStringFromPartialFingerprintsString(); | |
630 | |
631 return $This; | |
632 } | |
633 | |
634 # Initialize data line for reading... | |
635 # | |
636 sub _InitializeReadDataLine { | |
637 my($This) = @_; | |
638 | |
639 $This->{CompoundID} = undef; | |
640 $This->{DataLine} = undef; | |
641 | |
642 $This->{FingerprintsObject} = undef; | |
643 | |
644 $This->{FingerprintsString} = undef; | |
645 $This->{PartialFingerprintsString} = undef; | |
646 | |
647 return $This; | |
648 } | |
649 | |
650 # Validate fingerprints string data line... | |
651 # | |
652 sub _ValidateReadDataLine { | |
653 my($This) = @_; | |
654 | |
655 # Check for missing data... | |
656 if (!($This->{CompoundID} && $This->{PartialFingerprintsString})) { | |
657 # Missing data... | |
658 $This->{NumOfLinesWithMissingData} += 1; | |
659 if ($This->{DetailLevel} >= 3) { | |
660 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains no fingerprints data: $This->{DataLine}..."; | |
661 } | |
662 elsif ($This->{DetailLevel} >= 2) { | |
663 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains no fingerprints data..."; | |
664 } | |
665 return 0; | |
666 } | |
667 | |
668 # Check for invalid data... | |
669 my($InvalidFingerprintsData); | |
670 | |
671 $InvalidFingerprintsData = 0; | |
672 if ($This->{FingerprintsString}) { | |
673 $InvalidFingerprintsData = Fingerprints::FingerprintsStringUtil::AreFingerprintsStringValuesValid($This->{FingerprintsString}) ? 0 : 1; | |
674 } | |
675 else { | |
676 $InvalidFingerprintsData = 1; | |
677 } | |
678 | |
679 if ($InvalidFingerprintsData) { | |
680 $This->{NumOfLinesWithInvalidData} += 1; | |
681 if ($This->{DetailLevel} >= 3) { | |
682 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains invalid fingerprints data: $This->{DataLine}..."; | |
683 } | |
684 elsif ($This->{DetailLevel} >= 2) { | |
685 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains invalid fingerprints data..."; | |
686 } | |
687 return 0; | |
688 } | |
689 | |
690 return 1; | |
691 } | |
692 | |
693 # Setup fingerprints compound ID for fingerprints string... | |
694 sub _GenerateCompoundID { | |
695 my($This) = @_; | |
696 | |
697 # Set fingerprints ID... | |
698 if ($This->{FingerprintsObject}) { | |
699 $This->{FingerprintsObject}->SetID($This->{CompoundID}); | |
700 } | |
701 | |
702 return $This; | |
703 } | |
704 | |
705 # Process first read... | |
706 # | |
707 sub _ProcessFirstDataLineRead { | |
708 my($This) = @_; | |
709 my($Line); | |
710 | |
711 $This->{FirstDataLineIO} = 0; | |
712 | |
713 # Skip over header data lines and collect first data line... | |
714 | |
715 LINE: while ($Line = TextUtil::GetTextLine($This->{FileHandle})) { | |
716 $This->{LineNum} += 1; | |
717 | |
718 # Is it a header data line? | |
719 if ($Line =~ /^#/) { | |
720 next LINE; | |
721 } | |
722 $This->{DataLine} = $Line; | |
723 last LINE; | |
724 } | |
725 | |
726 return $This; | |
727 } | |
728 | |
729 # Get ready for reading fingerprints FP file... | |
730 # | |
731 sub _PrepareForReadingFingerprintsFPFileData { | |
732 my($This) = @_; | |
733 | |
734 # Retrieve FP file data headers information.... | |
735 $This->_RetrieveFPFileDataHeaders(); | |
736 | |
737 # Validate header data keys and values information... | |
738 $This->_ValidateReadHeaderDataKeysAndValues(); | |
739 | |
740 # Validate fingeprints string mode information... | |
741 if ($This->{ValidRequiredHeaderDataKeys}) { | |
742 $This->_ValidateReadFingerprintsStringMode(); | |
743 } | |
744 | |
745 # Set status of FP file data... | |
746 $This->{ValidFileData} = ($This->{ValidRequiredHeaderDataKeys} && $This->{ValidFingerprintsStringMode}) ? 1 : 0; | |
747 | |
748 return $This; | |
749 } | |
750 | |
751 # Retrieve information about fingerprints date header in FP file... | |
752 # | |
753 sub _RetrieveFPFileDataHeaders { | |
754 my($This) = @_; | |
755 my($FPFile, $Line, $Index, $KeyValuePair, $Key, $Value, $KeyValueDelimiter, $KeyValuePairDelimiter, @LineKeyValuePairs); | |
756 | |
757 $FPFile = $This->{Name}; | |
758 | |
759 if (!(-e $FPFile)) { | |
760 croak "Error: ${ClassName}->_RetrieveFPFileDataHeaders: File, $FPFile, doesn't exist..."; | |
761 } | |
762 | |
763 if (!open FPFILE, "$FPFile") { | |
764 croak "Error: ${ClassName}->_RetrieveFPFileDataHeaders: Couldn't open input FP file $FPFile: $! ..."; | |
765 } | |
766 | |
767 # Process header key/value pair data... | |
768 # | |
769 $KeyValueDelimiter = '='; | |
770 $KeyValuePairDelimiter = ';'; | |
771 | |
772 @{$This->{HeaderDataKeys}} = (); | |
773 %{$This->{HeaderDataKeysAndValues}} = (); | |
774 %{$This->{CannonicalHeaderDataKeysAndValues}} = (); | |
775 | |
776 LINE: while ($Line = TextUtil::GetTextLine(\*FPFILE)) { | |
777 # Is it a key/value pairs line? | |
778 if ($Line !~ /^#/) { | |
779 last LINE; | |
780 } | |
781 | |
782 # Take out starting hash mark before processing key/value pairs... | |
783 $Line =~ s/^#//; | |
784 if (TextUtil::IsEmpty($Line)) { | |
785 next LINE; | |
786 } | |
787 | |
788 @LineKeyValuePairs = (); | |
789 | |
790 for $KeyValuePair (split "$KeyValuePairDelimiter", $Line) { | |
791 ($Key, $Value) = split "$KeyValueDelimiter", $KeyValuePair; | |
792 | |
793 $Key = defined($Key) ? TextUtil::RemoveLeadingAndTrailingWhiteSpaces($Key) : ''; | |
794 $Value = defined($Value) ? TextUtil::RemoveLeadingAndTrailingWhiteSpaces($Value) : ''; | |
795 | |
796 if (TextUtil::IsEmpty($Key) || TextUtil::IsEmpty($Value)) { | |
797 carp "Warning: ${ClassName}->_RetrieveFPFileDataHeaders: Data header line containing \"Key = Value\" pairs is not valid: It must contain even number of \"Key = Value\" pairs with valid values. Ignoring data header line: \"$Line\"..."; | |
798 next LINE; | |
799 } | |
800 push @{$This->{HeaderDataKeys}}, $Key; | |
801 push @LineKeyValuePairs, ($Key, $Value); | |
802 } | |
803 | |
804 for ($Index = 0; $Index < $#LineKeyValuePairs; $Index += 2) { | |
805 $Key = $LineKeyValuePairs[$Index]; $Value = $LineKeyValuePairs[$Index + 1]; | |
806 | |
807 $This->{HeaderDataKeysAndValues}{$Key} = $Value; | |
808 $This->{CannonicalHeaderDataKeysAndValues}{lc($Key)} = $Value; | |
809 } | |
810 } | |
811 close FPFILE; | |
812 | |
813 return $This; | |
814 } | |
815 | |
816 # Validate header data and keys... | |
817 # | |
818 sub _ValidateReadHeaderDataKeysAndValues { | |
819 my($This) = @_; | |
820 my($FingerprintsStringType, $Key, $Value, @RequiredHeaderDataKeys); | |
821 | |
822 $This->{ValidRequiredHeaderDataKeys} = 0; | |
823 @{$This->{RequiredHeaderDataKeys}} = (); | |
824 | |
825 # Is FingerprintsStringType key is present? | |
826 if (!$This->IsHeaderDataKeyPresent('FingerprintsStringType')) { | |
827 carp "carp: ${ClassName}->_ValidateReadHeaderDataKeysAndValues: FingerprintsStringType data header key is missing in fingerprints file..."; | |
828 return 0; | |
829 } | |
830 $FingerprintsStringType = $This->GetHeaderDataKeyValue('FingerprintsStringType'); | |
831 | |
832 # Are all required data header keys present? | |
833 # | |
834 @RequiredHeaderDataKeys = (); | |
835 | |
836 if ($FingerprintsStringType =~ /^(FingerprintsBitVector|FingerprintsVector)$/i) { | |
837 push @RequiredHeaderDataKeys, $This->_GetRequiredHeaderDataKeys($FingerprintsStringType); | |
838 } | |
839 else { | |
840 carp "Warning: ${ClassName}->_ValidateReadHeaderDataKeysAndValues: FingerprintsStringType data header key value, $FingerprintsStringType, is not valid. SUpported values: FingerprintsBitVector or FingerprintsVector..."; | |
841 return 0; | |
842 } | |
843 | |
844 for $Key (@RequiredHeaderDataKeys) { | |
845 if (!$This->IsHeaderDataKeyPresent($Key)) { | |
846 croak "Error: ${ClassName}->_ValidateReadHeaderDataKeysAndValues: Requires data header key, $Key, is missing in fingerprints file..."; | |
847 } | |
848 } | |
849 | |
850 push @{$This->{RequiredHeaderDataKeys}}, @RequiredHeaderDataKeys; | |
851 | |
852 # Are all required data header key values valid? | |
853 # | |
854 if (!$This->_ValidateRequiredHeaderDataKeyValues()) { | |
855 return 0; | |
856 } | |
857 | |
858 # Process required header key values... | |
859 # | |
860 $This->_ProcessRequiredHeaderDataKeyValues(); | |
861 | |
862 $This->{ValidRequiredHeaderDataKeys} = 1; | |
863 | |
864 return 1; | |
865 } | |
866 | |
867 # Validate data header key values.... | |
868 # | |
869 sub _ValidateRequiredHeaderDataKeyValues { | |
870 my($This) = @_; | |
871 my($Key, $Value); | |
872 | |
873 for $Key (@{$This->{RequiredHeaderDataKeys}}) { | |
874 $Value = $This->GetHeaderDataKeyValue($Key); | |
875 KEY: { | |
876 if ($Key =~ /^FingerprintsStringType$/i) { | |
877 if ($Value !~ /^(FingerprintsBitVector|FingerprintsVector)$/i) { | |
878 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: FingerprintsBitVector or FingerprintsVector..."; | |
879 return 0; | |
880 } | |
881 last KEY; | |
882 } | |
883 if ($Key =~ /^Size$/i) { | |
884 if (!TextUtil::IsPositiveInteger($Value)) { | |
885 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: > 0..."; | |
886 return 0; | |
887 } | |
888 last KEY; | |
889 } | |
890 if ($Key =~ /^BitStringFormat$/i) { | |
891 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) { | |
892 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: BinaryString or HexadecimalString ..."; | |
893 return 0; | |
894 } | |
895 last KEY; | |
896 } | |
897 if ($Key =~ /^BitsOrder$/i) { | |
898 if ($Value !~ /^(Ascending|Descending)$/i) { | |
899 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: Ascending or Descending..."; | |
900 return 0; | |
901 } | |
902 last KEY; | |
903 } | |
904 if ($Key =~ /^VectorStringFormat$/i) { | |
905 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) { | |
906 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString ..."; | |
907 return 0; | |
908 } | |
909 last KEY; | |
910 } | |
911 if ($Key =~ /^VectorValuesType$/i) { | |
912 if ($Value !~ /^(OrderedNumericalValues|NumericalValues|AlphaNumericalValues)$/i) { | |
913 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: OrderedNumericalValues, NumericalValues or AlphaNumericalValues..."; | |
914 return 0; | |
915 } | |
916 last KEY; | |
917 } | |
918 if ($Key =~ /^Description$/i) { | |
919 if (TextUtil::IsEmpty($Value)) { | |
920 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value is not valid. Supported value: A no-empty text string..."; | |
921 return 0; | |
922 } | |
923 last KEY; | |
924 } | |
925 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key is not not supported..."; | |
926 return 0; | |
927 } | |
928 } | |
929 | |
930 return 1; | |
931 } | |
932 | |
933 # Process required header key valeues for access during complete fingerprints | |
934 # string generation from a partial fingerprints string specified on fingerprints | |
935 # line... | |
936 # | |
937 sub _ProcessRequiredHeaderDataKeyValues { | |
938 my($This) = @_; | |
939 my($Key, $Value, @Keys); | |
940 | |
941 %{$This->{RequiredHeaderDataKeysAndValues}} = (); | |
942 | |
943 for $Key (@{$This->{RequiredHeaderDataKeys}}) { | |
944 $Value = $This->GetHeaderDataKeyValue($Key); | |
945 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $Value; | |
946 } | |
947 | |
948 # Setup prefixes for generating fingerprints strings... | |
949 $This->{FingerprintsBitVectorStringPrefix} = ''; | |
950 $This->{FingerprintsVectorStringPrefix1} = ''; | |
951 $This->{FingerprintsVectorStringPrefix2} = ''; | |
952 | |
953 if ($This->{RequiredHeaderDataKeysAndValues}{FingerprintsStringType} =~ /^FingerprintsBitVector$/i) { | |
954 @Keys = qw(FingerprintsStringType Description Size BitStringFormat BitsOrder); | |
955 $This->{FingerprintsBitVectorStringPrefix} = $This->_GenerateFingerprintsPrefixUsingKeys(@Keys); | |
956 } | |
957 elsif ($This->{RequiredHeaderDataKeysAndValues}{FingerprintsStringType} =~ /^FingerprintsVector$/i) { | |
958 @Keys = qw(FingerprintsStringType Description); | |
959 $This->{FingerprintsVectorStringPrefix1} = $This->_GenerateFingerprintsPrefixUsingKeys(@Keys); | |
960 | |
961 @Keys = qw(VectorValuesType VectorStringFormat); | |
962 $This->{FingerprintsVectorStringPrefix2} = $This->_GenerateFingerprintsPrefixUsingKeys(@Keys); | |
963 } | |
964 | |
965 return $This; | |
966 } | |
967 | |
968 # Generate fingerprints prefix using header keys data... | |
969 # | |
970 sub _GenerateFingerprintsPrefixUsingKeys { | |
971 my($This, @Keys) = @_; | |
972 my($Delimiter, $Key, @Values); | |
973 | |
974 $Delimiter = Fingerprints::FingerprintsStringUtil::GetFingeprintsStringDelimiter(); | |
975 | |
976 @Values = (); | |
977 for $Key (@Keys) { | |
978 push @Values, $This->{RequiredHeaderDataKeysAndValues}{$Key}; | |
979 } | |
980 | |
981 return join($Delimiter, @Values) | |
982 } | |
983 | |
984 # Get required header data keys... | |
985 # | |
986 sub _GetRequiredHeaderDataKeys { | |
987 my($This, $FingerprintsStringType) = @_; | |
988 my(@RequiredKeys); | |
989 | |
990 @RequiredKeys = (); | |
991 | |
992 if ($FingerprintsStringType =~ /FingerprintsBitVector$/i) { | |
993 push @RequiredKeys, qw(FingerprintsStringType Description Size BitStringFormat BitsOrder); | |
994 } | |
995 elsif ($FingerprintsStringType =~ /^FingerprintsVector/i) { | |
996 push @RequiredKeys, qw(FingerprintsStringType Description VectorStringFormat VectorValuesType); | |
997 } | |
998 else { | |
999 carp "Warning: ${ClassName}->GetRequiredHeaderDataKeys: FingerprintsStringType value, $FingerprintsStringType, is not valid. Supported values: FingerprintsBitVector or FingerprintsVector..."; | |
1000 } | |
1001 | |
1002 return @RequiredKeys; | |
1003 } | |
1004 | |
1005 # Validate fingerprints string mode information... | |
1006 # | |
1007 sub _ValidateReadFingerprintsStringMode { | |
1008 my($This) = @_; | |
1009 my($FingerprintsStringType, $FingerprintsStringDescription, $FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FirstFingerprintsStringType, $FirstFingerprintsStringDescription); | |
1010 | |
1011 $This->{ValidFingerprintsStringMode} = 0; | |
1012 $This->{FingerprintsBitVectorStringMode} = 0; | |
1013 $This->{FingerprintsVectorStringMode} = 0; | |
1014 | |
1015 $This->{FirstFingerprintsStringType} = ''; | |
1016 $This->{FirstFingerprintsStringDescription} = ''; | |
1017 | |
1018 $FingerprintsBitVectorStringMode = 0; | |
1019 $FingerprintsVectorStringMode = 0; | |
1020 | |
1021 $FirstFingerprintsStringType = ''; | |
1022 $FirstFingerprintsStringDescription = ''; | |
1023 | |
1024 $FingerprintsStringType = $This->GetHeaderDataKeyValue('FingerprintsStringType'); | |
1025 $FingerprintsStringDescription = $This->GetHeaderDataKeyValue('Description'); | |
1026 | |
1027 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { | |
1028 if ($FingerprintsStringType !~ /^FingerprintsBitVector$/i) { | |
1029 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: Fingerprints string data type, $FingerprintsStringType, doesn't correspond to, FingerprintsBitVectorString, specified using \"FingerprintsStringMode\"..."; | |
1030 return 0; | |
1031 } | |
1032 $FingerprintsBitVectorStringMode = 1; | |
1033 $FirstFingerprintsStringType = 'FingerprintsBitVector'; | |
1034 $FirstFingerprintsStringDescription = $FingerprintsStringDescription; | |
1035 } | |
1036 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { | |
1037 if ($FingerprintsStringType !~ /^FingerprintsVector$/i) { | |
1038 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: Fingerprints string data type, $FingerprintsStringType, doesn't correspond to, FingerprintsVectorString, specified using \"FingerprintsStringMode\"..."; | |
1039 return 0; | |
1040 } | |
1041 $FingerprintsVectorStringMode = 1; | |
1042 $FirstFingerprintsStringType = 'FingerprintsVector'; | |
1043 $FirstFingerprintsStringDescription = $FingerprintsStringDescription; | |
1044 } | |
1045 else { | |
1046 # AutoDetect mode... | |
1047 if ($FingerprintsStringType =~ /^FingerprintsBitVector$/i) { | |
1048 $FingerprintsBitVectorStringMode = 1; | |
1049 } | |
1050 elsif ($FingerprintsStringType =~ /^FingerprintsVector$/i) { | |
1051 $FingerprintsVectorStringMode = 1; | |
1052 } | |
1053 else { | |
1054 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: Fingerprints string data type, $FingerprintsStringType, identified during, AutoDetect, value of \"FingerprintsStringMode\" is not valid; Supported fingerprints types: FingerprintBitVector or FingerprintsVector..."; | |
1055 return 0; | |
1056 } | |
1057 $FirstFingerprintsStringType = $FingerprintsStringType; | |
1058 $FirstFingerprintsStringDescription = $FingerprintsStringDescription; | |
1059 } | |
1060 | |
1061 $This->{ValidFingerprintsStringMode} = 1; | |
1062 | |
1063 $This->{FingerprintsBitVectorStringMode} = $FingerprintsBitVectorStringMode; | |
1064 $This->{FingerprintsVectorStringMode} = $FingerprintsVectorStringMode; | |
1065 | |
1066 $This->{FirstFingerprintsStringType} = $FirstFingerprintsStringType; | |
1067 $This->{FirstFingerprintsStringDescription} = $FirstFingerprintsStringDescription; | |
1068 | |
1069 return 1; | |
1070 } | |
1071 | |
1072 # Write fingerprints string generated from specified fingerprints - fingerprints-bit vector or | |
1073 # fingerprints vector - object and other data to FP file... | |
1074 # | |
1075 sub WriteFingerprints { | |
1076 my($This, $FingerprintsObject, $CompoundID) = @_; | |
1077 | |
1078 # Initialize data for current line... | |
1079 $This->_InitializeWriteDataLine(); | |
1080 | |
1081 # Set fingerprints object and compound ID... | |
1082 $This->{FingerprintsObject} = $FingerprintsObject; | |
1083 $This->SetCompoundID($CompoundID); | |
1084 | |
1085 # Generate fingerprints string... | |
1086 $This->_GenerateFingerprintsString(); | |
1087 | |
1088 # Generate partial fingerprints string... | |
1089 $This->_GeneratePartialFingerprintsStringFromFingerprintsString(); | |
1090 | |
1091 # Write data line.. | |
1092 $This->_WriteDataLine(); | |
1093 | |
1094 return $This; | |
1095 } | |
1096 | |
1097 # Write fingerprints string and other data to FP file... | |
1098 # | |
1099 # Notes: | |
1100 # o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat values | |
1101 # are ignored during writing of fingerprints and it's written to the file as it is. | |
1102 # o FingerprintsString is a regular fingerprints string as oppose to a partial fingerprints | |
1103 # string. | |
1104 # | |
1105 sub WriteFingerprintsString { | |
1106 my($This, $FingerprintsString, $CompoundID) = @_; | |
1107 | |
1108 # Initialize data for current line... | |
1109 $This->_InitializeWriteDataLine(); | |
1110 | |
1111 # Set fingerprints string and compound ID... | |
1112 $This->{FingerprintsString} = $FingerprintsString; | |
1113 $This->SetCompoundID($CompoundID); | |
1114 | |
1115 # Generate fingerprints object... | |
1116 $This->_GenerateFingerprintsObject(); | |
1117 | |
1118 # Generate partial fingerprints string... | |
1119 $This->_GeneratePartialFingerprintsStringFromFingerprintsString(); | |
1120 | |
1121 # Write data line.. | |
1122 $This->_WriteDataLine(); | |
1123 | |
1124 return $This; | |
1125 } | |
1126 | |
1127 # Initialize data line for reading... | |
1128 # | |
1129 sub _InitializeWriteDataLine { | |
1130 my($This) = @_; | |
1131 | |
1132 $This->{DataLine} = undef; | |
1133 $This->{CompoundID} = undef; | |
1134 | |
1135 $This->{FingerprintsObject} = undef; | |
1136 | |
1137 $This->{FingerprintsString} = undef; | |
1138 $This->{PartialFingerprintsString} = undef; | |
1139 | |
1140 return $This; | |
1141 } | |
1142 | |
1143 # Write fingerprints data line line... | |
1144 # | |
1145 sub _WriteDataLine { | |
1146 my($This) = @_; | |
1147 my($FileHandle, $Line); | |
1148 | |
1149 if ($This->{FirstDataLineIO}) { | |
1150 $This->_ProcessFirstDataLineWrite(); | |
1151 } | |
1152 | |
1153 # Write data compound ID along with partial fingerprints string... | |
1154 $Line = $This->{CompoundID} . ' ' . $This->{PartialFingerprintsString}; | |
1155 | |
1156 $This->{LineNum} += 1; | |
1157 $FileHandle = $This->{FileHandle}; | |
1158 print $FileHandle "$Line\n"; | |
1159 | |
1160 $This->{DataLine} = $Line; | |
1161 | |
1162 return $This; | |
1163 } | |
1164 | |
1165 # Process first write... | |
1166 # | |
1167 sub _ProcessFirstDataLineWrite { | |
1168 my($This) = @_; | |
1169 my($Line, $FileHandle); | |
1170 | |
1171 $This->{FirstDataLineIO} = 0; | |
1172 | |
1173 if ($This->GetMode() =~ /^Write$/i) { | |
1174 # Skip it for append mode... | |
1175 $This->_WritePackageAndTimeStampHeaderKeys(); | |
1176 $This->_WriteRequiredHeaderDataKeys(); | |
1177 } | |
1178 | |
1179 return $This; | |
1180 } | |
1181 | |
1182 # Write out package and time stamp information... | |
1183 # | |
1184 sub _WritePackageAndTimeStampHeaderKeys { | |
1185 my($This) = @_; | |
1186 my($FileHandle, $Key, $Value); | |
1187 | |
1188 $FileHandle = $This->{FileHandle}; | |
1189 | |
1190 # Package information... | |
1191 $This->{LineNum} += 1; | |
1192 $Key = "Package"; $Value = PackageInfo::GetPackageName() . " " . PackageInfo::GetVersionNumber(); | |
1193 print $FileHandle "# $Key = $Value\n"; | |
1194 | |
1195 $This->{LineNum} += 1; | |
1196 $Key = "Release Date"; $Value = PackageInfo::GetReleaseDate(); | |
1197 print $FileHandle "# $Key = $Value\n"; | |
1198 | |
1199 # Timestamp information... | |
1200 $This->{LineNum} += 1; | |
1201 print $FileHandle "#\n"; | |
1202 | |
1203 $This->{LineNum} += 1; | |
1204 $Key = "TimeStamp"; $Value = TimeUtil::FPFileTimeStamp(); | |
1205 print $FileHandle "# $Key = $Value\n"; | |
1206 | |
1207 return $This; | |
1208 } | |
1209 | |
1210 # Write out required header data keys... | |
1211 # | |
1212 sub _WriteRequiredHeaderDataKeys { | |
1213 my($This) = @_; | |
1214 my($FileHandle, $Key, $Value); | |
1215 | |
1216 $FileHandle = $This->{FileHandle}; | |
1217 | |
1218 $This->_GenerateWriteRequiredHeaderDataKeys(); | |
1219 | |
1220 $This->{LineNum} += 1; | |
1221 print $FileHandle "#\n"; | |
1222 | |
1223 for $Key (@{$This->{RequiredHeaderDataKeys}}) { | |
1224 $Value = $This->{RequiredHeaderDataKeysAndValues}{$Key}; | |
1225 | |
1226 $This->{LineNum} += 1; | |
1227 print $FileHandle "# $Key = $Value\n"; | |
1228 | |
1229 if ($Key =~ /^FingerprintsStringType$/i) { | |
1230 $This->{LineNum} += 1; | |
1231 print $FileHandle "#\n"; | |
1232 } | |
1233 } | |
1234 | |
1235 $This->{LineNum} += 1; | |
1236 print $FileHandle "#\n"; | |
1237 | |
1238 return $This; | |
1239 } | |
1240 | |
1241 sub _GenerateWriteRequiredHeaderDataKeys { | |
1242 my($This) = @_; | |
1243 | |
1244 if ($This->{FingerprintsBitVectorStringMode} && ($This->{FingerprintsString} =~ /^FingerprintsBitVector/i)) { | |
1245 $This->_GenerateWriteRequiredHeaderDataKeysForBitVectorString(); | |
1246 } | |
1247 elsif ($This->{FingerprintsVectorStringMode} && ($This->{FingerprintsString} =~ /^FingerprintsVector/i)) { | |
1248 $This->_GenerateWriteRequiredHeaderDataKeysForVectorString(); | |
1249 } | |
1250 else { | |
1251 croak "Error: ${ClassName}->_GenerateWriteRequiredHeaderDataKeys: Required header data keys can't be generated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, doesn't correspond to type of first FingerprintsString: $This->{FingerprintsString}..."; | |
1252 } | |
1253 | |
1254 return $This; | |
1255 } | |
1256 | |
1257 # Generate required data header keys and values for writing fingerprints bit vector string... | |
1258 # | |
1259 sub _GenerateWriteRequiredHeaderDataKeysForBitVectorString { | |
1260 my($This) = @_; | |
1261 my($Key, $VectorType, $Description, $Size, $BitStringFormat, $BitsOrder); | |
1262 | |
1263 @{$This->{RequiredHeaderDataKeys}} = (); | |
1264 push @{$This->{RequiredHeaderDataKeys}}, $This->_GetRequiredHeaderDataKeys('FingerprintsBitVector'); | |
1265 | |
1266 ($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($This->{FingerprintsString}); | |
1267 | |
1268 %{$This->{RequiredHeaderDataKeysAndValues}} = (); | |
1269 | |
1270 for $Key (@{$This->{RequiredHeaderDataKeys}}) { | |
1271 KEYTYPE: { | |
1272 if ($Key =~ /^FingerprintsStringType$/i) { | |
1273 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $VectorType; | |
1274 last KEYTYPE; | |
1275 } | |
1276 if ($Key =~ /^Description$/i) { | |
1277 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $Description; | |
1278 last KEYTYPE; | |
1279 } | |
1280 if ($Key =~ /^Size$/i) { | |
1281 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $Size; | |
1282 last KEYTYPE; | |
1283 } | |
1284 if ($Key =~ /^BitStringFormat$/i) { | |
1285 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $BitStringFormat; | |
1286 last KEYTYPE; | |
1287 } | |
1288 if ($Key =~ /^BitsOrder$/i) { | |
1289 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $BitsOrder; | |
1290 last KEYTYPE; | |
1291 } | |
1292 croak "Error: ${ClassName}->_GenerateWriteRequiredHeaderDataKeysForBitVectorString: Required header data key, $Key, value can't be generated: It's not a known key ..."; | |
1293 } | |
1294 } | |
1295 | |
1296 return $This; | |
1297 } | |
1298 | |
1299 # Generate required data header keys and values for writing fingerprints vector string... | |
1300 # | |
1301 sub _GenerateWriteRequiredHeaderDataKeysForVectorString { | |
1302 my($This) = @_; | |
1303 my($Key, $Value, $VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat); | |
1304 | |
1305 @{$This->{RequiredHeaderDataKeys}} = (); | |
1306 push @{$This->{RequiredHeaderDataKeys}}, $This->_GetRequiredHeaderDataKeys('FingerprintsVector'); | |
1307 | |
1308 ($VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($This->{FingerprintsString}); | |
1309 | |
1310 %{$This->{RequiredHeaderDataKeysAndValues}} = (); | |
1311 | |
1312 for $Key (@{$This->{RequiredHeaderDataKeys}}) { | |
1313 KEYTYPE: { | |
1314 if ($Key =~ /^FingerprintsStringType$/i) { | |
1315 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $VectorType; | |
1316 last KEYTYPE; | |
1317 } | |
1318 if ($Key =~ /^Description$/i) { | |
1319 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $Description; | |
1320 last KEYTYPE; | |
1321 } | |
1322 if ($Key =~ /^VectorValuesType$/i) { | |
1323 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $VectorValuesType; | |
1324 last KEYTYPE; | |
1325 } | |
1326 if ($Key =~ /^VectorStringFormat$/i) { | |
1327 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $VectorStringFormat; | |
1328 last KEYTYPE; | |
1329 } | |
1330 croak "Error: ${ClassName}->_GenerateWriteRequiredHeaderDataKeysForVectorString: Required header data key, $Key, value can't be generated: It's not a known key ..."; | |
1331 } | |
1332 } | |
1333 | |
1334 return $This; | |
1335 } | |
1336 | |
1337 | |
1338 # Get ready for writing fingerprints FP file... | |
1339 # | |
1340 sub _PrepareForWritingFingerprintsFPFileData { | |
1341 my($This) = @_; | |
1342 my($FPFile, $FileDir, $FileName, $FileExt, $OutDelim); | |
1343 | |
1344 $FPFile = $This->{Name}; | |
1345 if (!$This->{Overwrite}) { | |
1346 if (-e $FPFile) { | |
1347 croak "Error: ${ClassName}->_PrepareForWritingFingerprintsFPFileData: File, $FPFile, already exist. Use overwrite option..."; | |
1348 } | |
1349 } | |
1350 | |
1351 # Setup FingerprintsStringMode status... | |
1352 # | |
1353 $This->{FingerprintsBitVectorStringMode} = 0; | |
1354 $This->{FingerprintsVectorStringMode} = 0; | |
1355 $This->{ValidFingerprintsStringMode} = 0; | |
1356 | |
1357 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { | |
1358 $This->{FingerprintsBitVectorStringMode} = 1; | |
1359 } | |
1360 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { | |
1361 $This->{FingerprintsVectorStringMode} = 1; | |
1362 } | |
1363 | |
1364 $This->{ValidFingerprintsStringMode} = ($This->{FingerprintsBitVectorStringMode} || $This->{FingerprintsVectorStringMode}) ? 1 : 0; | |
1365 | |
1366 if ($This->{FingerprintsBitVectorStringMode}) { | |
1367 $This->_SetDefaultBitStringFormat(); | |
1368 $This->_SetDefaultBitsOrder(); | |
1369 } | |
1370 elsif ($This->{FingerprintsVectorStringMode}) { | |
1371 $This->_SetDefaultVectorStringFormat(); | |
1372 } | |
1373 | |
1374 return $This; | |
1375 } | |
1376 | |
1377 # Set default value for bit string format... | |
1378 # | |
1379 sub _SetDefaultBitStringFormat { | |
1380 my($This) = @_; | |
1381 | |
1382 if (!$This->{BitStringFormat}) { | |
1383 $This->{BitStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat(); | |
1384 } | |
1385 | |
1386 return $This; | |
1387 } | |
1388 | |
1389 # Set default value for bit string format... | |
1390 # | |
1391 sub _SetDefaultBitsOrder { | |
1392 my($This) = @_; | |
1393 | |
1394 if (!$This->{BitsOrder}) { | |
1395 $This->{BitsOrder} = Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder(); | |
1396 } | |
1397 | |
1398 return $This; | |
1399 } | |
1400 | |
1401 # Set default value for vector string format... | |
1402 # | |
1403 sub _SetDefaultVectorStringFormat { | |
1404 my($This) = @_; | |
1405 | |
1406 if (!$This->{VectorStringFormat} && $This->{FingerprintsObject}) { | |
1407 $This->{VectorStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat($This->{FingerprintsObject}); | |
1408 } | |
1409 | |
1410 return $This; | |
1411 } | |
1412 | |
1413 # Generate fingerprints object using current fingerprints string... | |
1414 # | |
1415 sub _GenerateFingerprintsObject { | |
1416 my($This) = @_; | |
1417 | |
1418 $This->{FingerprintsObject} = undef; | |
1419 | |
1420 if (!$This->{FingerprintsString}) { | |
1421 return $This; | |
1422 } | |
1423 | |
1424 if ($This->{FingerprintsBitVectorStringMode}) { | |
1425 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsBitVectorString($This->{FingerprintsString}); | |
1426 } | |
1427 elsif ($This->{FingerprintsVectorStringMode}) { | |
1428 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsVectorString($This->{FingerprintsString}); | |
1429 } | |
1430 else { | |
1431 return undef; | |
1432 } | |
1433 | |
1434 return $This; | |
1435 } | |
1436 | |
1437 # Generate fingerprints string using current fingerprints object... | |
1438 # | |
1439 sub _GenerateFingerprintsString { | |
1440 my($This) = @_; | |
1441 | |
1442 $This->{FingerprintsString} = ''; | |
1443 | |
1444 if (!$This->{FingerprintsObject}) { | |
1445 return $This; | |
1446 } | |
1447 | |
1448 if ($This->{FingerprintsBitVectorStringMode}) { | |
1449 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{BitStringFormat}, $This->{BitsOrder}); | |
1450 } | |
1451 elsif ($This->{FingerprintsVectorStringMode}) { | |
1452 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{VectorStringFormat}); | |
1453 } | |
1454 | |
1455 return $This; | |
1456 } | |
1457 | |
1458 # Generate fingerprints string using partial fingerprints string and header keys data... | |
1459 # | |
1460 # Notes: | |
1461 # o FP file fingerprints data line only contain partial fingerprints data which | |
1462 # can't be used directly to create fingerprints bit-vector or vector objects | |
1463 # using functions available in FingerprintsStringUtil.pm module | |
1464 # | |
1465 sub _GenerateFingerprintsStringFromPartialFingerprintsString { | |
1466 my($This) = @_; | |
1467 my($FPStringDelim); | |
1468 | |
1469 $This->{FingerprintsString} = ''; | |
1470 | |
1471 if (!$This->{PartialFingerprintsString}) { | |
1472 return $This; | |
1473 } | |
1474 | |
1475 $FPStringDelim = Fingerprints::FingerprintsStringUtil::GetFingeprintsStringDelimiter(); | |
1476 | |
1477 if ($This->{FingerprintsBitVectorStringMode}) { | |
1478 $This->{FingerprintsString} = $This->{FingerprintsBitVectorStringPrefix} . $FPStringDelim . $This->{PartialFingerprintsString}; | |
1479 } | |
1480 elsif ($This->{FingerprintsVectorStringMode}) { | |
1481 my($NumOfValues, $VectorStringData); | |
1482 | |
1483 ($NumOfValues, $VectorStringData) = $This->{PartialFingerprintsString} =~ /^(.*?)$FPStringDelim(.*?)$/; | |
1484 if (!(defined($NumOfValues) && defined($VectorStringData) && $VectorStringData)) { | |
1485 return $This; | |
1486 } | |
1487 | |
1488 $This->{FingerprintsString} = $This->{FingerprintsVectorStringPrefix1} . $FPStringDelim . $NumOfValues . $FPStringDelim . $This->{FingerprintsVectorStringPrefix2} . $FPStringDelim . $VectorStringData; | |
1489 } | |
1490 | |
1491 return $This; | |
1492 } | |
1493 | |
1494 # Generate partial fingerprints string using fingerprints string and header keys data... | |
1495 # | |
1496 # Notes: | |
1497 # o FP file fingerprints data line only contain partial fingerprints data which | |
1498 # can't be used directly to create fingerprints bit-vector or vector objects | |
1499 # using functions available in FingerprintsStringUtil.pm module | |
1500 # | |
1501 sub _GeneratePartialFingerprintsStringFromFingerprintsString { | |
1502 my($This) = @_; | |
1503 | |
1504 $This->{PartialFingerprintsString} = ''; | |
1505 | |
1506 if (!$This->{FingerprintsString}) { | |
1507 return $This; | |
1508 } | |
1509 | |
1510 if ($This->{FingerprintsBitVectorStringMode}) { | |
1511 my($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString); | |
1512 | |
1513 ($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($This->{FingerprintsString}); | |
1514 $This->{PartialFingerprintsString} = $BitVectorString; | |
1515 } | |
1516 elsif ($This->{FingerprintsVectorStringMode}) { | |
1517 my($FPStringDelim, $VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2, $VectorString); | |
1518 | |
1519 $FPStringDelim = Fingerprints::FingerprintsStringUtil::GetFingeprintsStringDelimiter(); | |
1520 | |
1521 ($VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($This->{FingerprintsString}); | |
1522 $VectorString = TextUtil::IsEmpty($VectorString2) ? $VectorString1 : "${VectorString1}${FPStringDelim}${VectorString2}"; | |
1523 | |
1524 $This->{PartialFingerprintsString} = $NumOfValues . $FPStringDelim . $VectorString; | |
1525 } | |
1526 | |
1527 return $This; | |
1528 } | |
1529 | |
1530 # Is it a fingerprints file? | |
1531 sub IsFingerprintsFPFile ($;$) { | |
1532 my($FirstParameter, $SecondParameter) = @_; | |
1533 my($This, $FileName, $Status); | |
1534 | |
1535 if ((@_ == 2) && (_IsFingerprintsFPFileIO($FirstParameter))) { | |
1536 ($This, $FileName) = ($FirstParameter, $SecondParameter); | |
1537 } | |
1538 else { | |
1539 $FileName = $FirstParameter; | |
1540 } | |
1541 | |
1542 # Check file extension... | |
1543 $Status = FileUtil::CheckFileType($FileName, "fpf fp"); | |
1544 | |
1545 return $Status; | |
1546 } | |
1547 | |
1548 # Is it a FingerprintsFPFileIO object? | |
1549 sub _IsFingerprintsFPFileIO { | |
1550 my($Object) = @_; | |
1551 | |
1552 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
1553 } | |
1554 | |
1555 1; | |
1556 | |
1557 __END__ | |
1558 | |
1559 =head1 NAME | |
1560 | |
1561 FingerprintsFPFileIO | |
1562 | |
1563 =head1 SYNOPSIS | |
1564 | |
1565 use FileIO::FingerprintsFPFileIO; | |
1566 | |
1567 use FileIO::FingerprintsFPFileIO qw(:all); | |
1568 | |
1569 =head1 DESCRIPTION | |
1570 | |
1571 B<FingerprintsFPFileIO> class provides the following methods: | |
1572 | |
1573 new, GetFingerprints, GetFingerprintsString, GetHeaderDataKeyValue, | |
1574 GetHeaderDataKeys, GetHeaderDataKeysAndValues, GetPartialFingerprintsString, | |
1575 GetRequiredHeaderDataKeys, GetRequiredHeaderDataKeysAndValues, | |
1576 IsFingerprintsDataValid, IsFingerprintsFPFile, IsFingerprintsFileDataValid, | |
1577 IsHeaderDataKeyPresent, Next, Read, SetBitStringFormat, SetBitsOrder, | |
1578 SetCompoundID, SetDetailLevel, SetFingerprints, SetFingerprintsString, | |
1579 SetFingerprintsStringMode, SetPartialFingerprintsString, SetVectorStringFormat, | |
1580 WriteFingerprints, WriteFingerprintsString | |
1581 | |
1582 The following methods can also be used as functions: | |
1583 | |
1584 IsFingerprintsFPFile | |
1585 | |
1586 B<FingerprintsFPFileIO> class is derived from I<FileIO> class and uses its methods to support | |
1587 generic file related functionality. | |
1588 | |
1589 The MayaChemTools fingerprints file (FP) format with B<.fpf> or B<.fp> file extensions supports | |
1590 two types of fingerprints data: fingerprints bit-vectors and fingerprints vectors. | |
1591 | |
1592 Example of FP file format containing fingerprints bit-vector string data: | |
1593 | |
1594 # | |
1595 # Package = MayaChemTools 7.4 | |
1596 # ReleaseDate = Oct 21, 2010 | |
1597 # | |
1598 # TimeStamp = Mon Mar 7 15:14:01 2011 | |
1599 # | |
1600 # FingerprintsStringType = FingerprintsBitVector | |
1601 # | |
1602 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:... | |
1603 # Size = 1024 | |
1604 # BitStringFormat = HexadecimalString | |
1605 # BitsOrder = Ascending | |
1606 # | |
1607 Cmpd1 9c8460989ec8a49913991a6603130b0a19e8051c89184414953800cc21510... | |
1608 Cmpd2 000000249400840040100042011001001980410c000000001010088001120... | |
1609 ... ... | |
1610 ... .. | |
1611 | |
1612 Example of FP file format containing fingerprints vector string data: | |
1613 | |
1614 # | |
1615 # Package = MayaChemTools 7.4 | |
1616 # ReleaseDate = Oct 21, 2010 | |
1617 # | |
1618 # TimeStamp = Mon Mar 7 15:14:01 2011 | |
1619 # | |
1620 # FingerprintsStringType = FingerprintsVector | |
1621 # | |
1622 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:... | |
1623 # VectorStringFormat = IDsAndValuesString | |
1624 # VectorValuesType = NumericalValues | |
1625 # | |
1626 Cmpd1 338;C F N O C:C C:N C=O CC CF CN CO C:C:C C:C:N C:CC C:CF C:CN C: | |
1627 N:C C:NC CC:N CC=O CCC CCN CCO CNC NC=O O=CO C:C:C:C C:C:C:N C:C:CC...; | |
1628 33 1 2 5 21 2 2 12 1 3 3 20 2 10 2 2 1 2 2 2 8 2 5 1 1 1 19 2 8 2 2 2 2 | |
1629 6 2 2 2 2 2 2 2 2 3 2 2 1 4 1 5 1 1 18 6 2 2 1 2 10 2 1 2 1 2 2 2 2 ... | |
1630 Cmpd2 103;C N O C=N C=O CC CN CO CC=O CCC CCN CCO CNC N=CN NC=O NCN O=C | |
1631 O C CC=O CCCC CCCN CCCO CCNC CNC=N CNC=O CNCN CCCC=O CCCCC CCCCN CC...; | |
1632 15 4 4 1 2 13 5 2 2 15 5 3 2 2 1 1 1 2 17 7 6 5 1 1 1 2 15 8 5 7 2 2 2 2 | |
1633 1 2 1 1 3 15 7 6 8 3 4 4 3 2 2 1 2 3 14 2 4 7 4 4 4 4 1 1 1 2 1 1 1 ... | |
1634 ... ... | |
1635 ... ... | |
1636 | |
1637 B<FP> file data format consists of two main sections: header section and fingerprints string | |
1638 data section. The header section lines start with # and the first line not starting with # represents | |
1639 the start of fingerprints string data section. The header section contains both the required and | |
1640 optional information which is specified as key = value pairs. The required information | |
1641 describes fingerprints bit-vector and vector strings and used to generate fingerprints objects; | |
1642 the optional information is ignored during generation of fingerpints objects. | |
1643 | |
1644 The key = value data specification in the header section and its processing follows these | |
1645 rules: | |
1646 | |
1647 o Leading and trailing spaces for key = value pairs are ignored | |
1648 o Key and value strings may contain spaces | |
1649 o Multiple key = value pairs on a single are delimited by semicolon | |
1650 | |
1651 The default optional header data section key = value pairs are: | |
1652 | |
1653 # Package = MayaChemTools 7.4 | |
1654 # ReleaseDate = Oct 21, 2010 | |
1655 | |
1656 The B<FingerprintsStringType> key is required data header key for both fingerprints bit-vector | |
1657 and vector strings. Possible key values: I<FingerprintsBitVector or FingerprintsVector>. | |
1658 For example: | |
1659 | |
1660 # FingerprintsStringType = FingerprintsBitVector | |
1661 | |
1662 The required data header keys for fingerprints bit-vector string are: B<Description, Size, | |
1663 BitStringFormat, and BitsOrder>. Possible values for B<BitStringFormat>: I<HexadecimalString | |
1664 or BinaryString>. Possible values for B<BitsOrder>: I<Ascending or Descending>. The B<Description> | |
1665 key contains information about various parameters used to generate fingerprints bit-vector | |
1666 string. The B<Size> corresponds to number of fingerprints bits and is always less than or equal | |
1667 to number of bits in bit-vetor string which might contain extra bits at the end to round off the | |
1668 size to make it multiple of 8. For example: | |
1669 | |
1670 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:... | |
1671 # Size = 1024 | |
1672 # BitStringFormat = HexadecimalString | |
1673 # BitsOrder = Ascending | |
1674 | |
1675 The required data header keys for fingerprints vector string are: B<Description, VectorStringFormat, | |
1676 and VectorValuesType>. Possible values for B<VectorStringFormat>: I<DsAndValuesString, | |
1677 IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString or ValuesString>. | |
1678 Possible values for B<VectorValuesType>: I<NumericalValues, OrderedNumericalValues or | |
1679 AlphaNumericalValues>. The B<Description> keys contains information various parameters used | |
1680 to generate fingerprints vector string. For example: | |
1681 | |
1682 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:... | |
1683 # VectorStringFormat = IDsAndValuesString | |
1684 # VectorValuesType = NumericalValues | |
1685 | |
1686 The fingerprints data section for fingerprints bit-vector string contains data in the following | |
1687 format: | |
1688 | |
1689 ... ... | |
1690 CmpdID FingerprintsPartialBitVectorString | |
1691 ... ... | |
1692 | |
1693 For example: | |
1694 | |
1695 ... ... | |
1696 Cmpd1 9c8460989ec8a49913991a6603130b0a19e8051c89184414953800cc21510... | |
1697 ... ... | |
1698 | |
1699 The fingerprints data section for fingerprints vector string contains data in the following | |
1700 format: | |
1701 | |
1702 ... ... | |
1703 CmpdID Size;FingerprintsPartialVectorString | |
1704 ... ... | |
1705 | |
1706 For example: | |
1707 | |
1708 ... ... | |
1709 Cmpd1 338;C F N O C:C C:N C=O CC CF CN CO C:C:C C:C:N C:CC C:CF C:CN C: | |
1710 N:C C:NC CC:N CC=O CCC CCN CCO CNC NC=O O=CO C:C:C:C C:C:C:N C:C:CC...; | |
1711 33 1 2 5 21 2 2 12 1 3 3 20 2 10 2 2 1 2 2 2 8 2 5 1 1 1 19 2 8 2 2 2 2 | |
1712 6 2 2 2 2 2 2 2 2 3 2 2 1 4 1 5 1 1 18 6 2 2 1 2 10 2 1 2 1 2 2 2 2 ... | |
1713 ... ... | |
1714 | |
1715 Unlike fingerprints bit-vector string, I<Size> is specified for each partial fingerprints vector string: | |
1716 It may change from molecule to molecule for same type of fingerprints. | |
1717 | |
1718 Values IDs are optional for fingerprints vector string containing I<OrderedNumericalValues or | |
1719 AlphaNumericalValues>; however, they must be present for for I<NumericalValues>. Due to | |
1720 various possible values for B<VectorStringFormat>, the fingerprints data section for fingerprints | |
1721 vector string supports following type of data formats: | |
1722 | |
1723 CmpdID Size;ID1 ID2 ID3...;Value1 Value2 Value3... | |
1724 CmpdID Size;ID1 Value1 ID2 Value2 ID3 Value3... ... | |
1725 CmpdID Size;ValuesAndIDsString: Value1 Value2 Value3...;ID1 ID2 ID3... | |
1726 CmpdID Size;ValuesAndIDsPairsString: Value1 ID1 Value2 ID2 Value3 ID3... ... | |
1727 CmpdID Size;Value1 Value2 Value3 ... | |
1728 | |
1729 However, all the fingerprints vector string data present in FP file must correspond to only | |
1730 one of the formats shown above; multiple data formats in the same file are not allowed. | |
1731 | |
1732 The current release of MayaChemTools supports the following types of fingerprint | |
1733 bit-vector and vector strings: | |
1734 | |
1735 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi | |
1736 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT | |
1737 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X | |
1738 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A | |
1739 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2 | |
1740 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B... | |
1741 | |
1742 FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS | |
1743 ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2 | |
1744 .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1 | |
1745 O.X1.BO2;2 4 14 3 10 1 1 1 3 2 | |
1746 | |
1747 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume | |
1748 ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F | |
1749 N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1 | |
1750 | |
1751 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN | |
1752 umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C | |
1753 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N | |
1754 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8 | |
1755 O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1 | |
1756 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0... | |
1757 | |
1758 FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs | |
1759 AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN | |
1760 H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3 | |
1761 .024 -2.270 | |
1762 | |
1763 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues; | |
1764 ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435 | |
1765 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1 | |
1766 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
1767 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
1768 | |
1769 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi | |
1770 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391 | |
1771 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414 | |
1772 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103 | |
1773 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338 | |
1774 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303... | |
1775 | |
1776 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes | |
1777 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524 | |
1778 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 | |
1779 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...; | |
1780 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2 | |
1781 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1 | |
1782 | |
1783 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp | |
1784 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100 | |
1785 0000000001010000000110000011000000000000100000000000000000000000100001 | |
1786 1000000110000000000000000000000000010011000000000000000000000000010000 | |
1787 0000000000000000000000000010000000000000000001000000000000000000000000 | |
1788 0000000000010000100001000000000000101000000000000000100000000000000... | |
1789 | |
1790 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu | |
1791 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8 | |
1792 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567 | |
1793 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012 | |
1794 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455 | |
1795 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404... | |
1796 | |
1797 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp | |
1798 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184 | |
1799 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450 | |
1800 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430 | |
1801 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134 | |
1802 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566... | |
1803 | |
1804 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000 | |
1805 0000000000000000000000000000000001001000010010000000010010000000011100 | |
1806 0100101010111100011011000100110110000011011110100110111111111111011111 | |
1807 11111111111110111000 | |
1808 | |
1809 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011 | |
1810 1110011111100101111111000111101100110000000000000011100010000000000000 | |
1811 0000000000000000000000000000000000000000000000101000000000000000000000 | |
1812 0000000000000000000000000000000000000000000000000000000000000000000000 | |
1813 0000000000000000000000000000000000000011000000000000000000000000000000 | |
1814 0000000000000000000000000000000000000000 | |
1815 | |
1816 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri | |
1817 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
1818 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 | |
1819 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0 | |
1820 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1 | |
1821 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1 | |
1822 | |
1823 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri | |
1824 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0 | |
1825 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0 | |
1826 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
1827 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0 | |
1828 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... | |
1829 | |
1830 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng | |
1831 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110 | |
1832 0100010101011000101001011100110001000010001001101000001001001001001000 | |
1833 0010110100000111001001000001001010100100100000000011000000101001011100 | |
1834 0010000001000101010100000100111100110111011011011000000010110111001101 | |
1835 0101100011000000010001000011000010100011101100001000001000100000000... | |
1836 | |
1837 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength | |
1838 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2 | |
1839 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X | |
1840 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1 | |
1841 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO | |
1842 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C.... | |
1843 | |
1844 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt | |
1845 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1 | |
1846 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N | |
1847 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1 | |
1848 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR | |
1849 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ... | |
1850 | |
1851 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD | |
1852 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1 | |
1853 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3. | |
1854 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...; | |
1855 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1 | |
1856 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1... | |
1857 | |
1858 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi | |
1859 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar | |
1860 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H | |
1861 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...; | |
1862 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4 | |
1863 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ... | |
1864 | |
1865 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3 | |
1866 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4- | |
1867 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO | |
1868 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...; | |
1869 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1 | |
1870 | |
1871 FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica | |
1872 lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC | |
1873 H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC- | |
1874 ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...; | |
1875 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2 | |
1876 | |
1877 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M | |
1878 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1 | |
1879 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1 | |
1880 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1 | |
1881 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....; | |
1882 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2 | |
1883 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8... | |
1884 | |
1885 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1 | |
1886 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C | |
1887 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3- | |
1888 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2 | |
1889 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C. | |
1890 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7... | |
1891 | |
1892 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min | |
1893 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H | |
1894 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2- | |
1895 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H | |
1896 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...; | |
1897 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10 | |
1898 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1 | |
1899 | |
1900 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist | |
1901 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0 | |
1902 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1 | |
1903 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0 | |
1904 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0 | |
1905 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18... | |
1906 | |
1907 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize: | |
1908 MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1- | |
1909 Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1 | |
1910 -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1- | |
1911 HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...; | |
1912 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23 | |
1913 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1 | |
1914 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ... | |
1915 | |
1916 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD | |
1917 istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106 | |
1918 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0 | |
1919 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26 | |
1920 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0 | |
1921 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ... | |
1922 | |
1923 =head2 METHODS | |
1924 | |
1925 =over 4 | |
1926 | |
1927 =item B<new> | |
1928 | |
1929 $NewFingerprintsFPFileIO = new FileIO::FingerprintsFPFileIO(%IOParameters); | |
1930 | |
1931 Using specified I<IOParameters> names and values hash, B<new> method creates a new | |
1932 object and returns a reference to a newly created B<FingerprintsFPFileIO> object. By default, | |
1933 the following properties are initialized during I<Read> mode: | |
1934 | |
1935 Name = ''; | |
1936 Mode = 'Read'; | |
1937 Status = 0; | |
1938 FingerprintsStringMode = 'AutoDetect'; | |
1939 ValidateData = 1; | |
1940 DetailLevel = 1; | |
1941 | |
1942 During I<Write> mode, the following properties get initialize by default: | |
1943 | |
1944 FingerprintsStringMode = undef; | |
1945 | |
1946 BitStringFormat = HexadecimalString; | |
1947 BitsOrder = Ascending; | |
1948 | |
1949 VectorStringFormat = NumericalValuesString or ValuesString; | |
1950 | |
1951 Examples: | |
1952 | |
1953 $NewFingerprintsFPFileIO = new FileIO::FingerprintsFPFileIO( | |
1954 'Name' => 'Sample.fpf', | |
1955 'Mode' => 'Read', | |
1956 'FingerprintsStringMode' => | |
1957 'AutoDetect'); | |
1958 | |
1959 $NewFingerprintsFPFileIO = new FileIO::FingerprintsFPFileIO( | |
1960 'Name' => 'Sample.fpf', | |
1961 'Mode' => 'Write', | |
1962 'FingerprintsStringMode' => | |
1963 'FingerprintsBitVectorString', | |
1964 'Overwrite' => 1, | |
1965 'BitStringFormat' => 'HexadecimalString', | |
1966 'BitsOrder' => 'Ascending'); | |
1967 | |
1968 $NewFingerprintsFPFileIO = new FileIO::FingerprintsFPFileIO( | |
1969 'Name' => 'Sample.fp', | |
1970 'Mode' => 'Write', | |
1971 'FingerprintsStringMode' => | |
1972 'FingerprintsVectorString', | |
1973 'Overwrite' => 1, | |
1974 'VectorStringFormat' => 'IDsAndValuesString'); | |
1975 | |
1976 =item B<GetFingerprints> | |
1977 | |
1978 $FingerprintsObject = $FingerprintsFPFileIO->GetFingerprints(); | |
1979 | |
1980 Returns B<FingerprintsObject> generated for current data line using fingerprints bit-vector | |
1981 or vector string data. The fingerprints object corresponds to any of the supported fingerprints | |
1982 such as PathLengthFingerprints, ExtendedConnectivity, and so on. | |
1983 | |
1984 =item B<GetFingerprintsString> | |
1985 | |
1986 $FingerprintsString = $FingerprintsFPFileIO->GetFingerprintsString(); | |
1987 | |
1988 Returns B<FingerprintsString> for current data line. | |
1989 | |
1990 =item B<GetHeaderDataKeyValue> | |
1991 | |
1992 $KeyValue = $FingerprintsFPFileIO->GetHeaderDataKeyValue($Key); | |
1993 | |
1994 Returns B<KeyValue> of a data header I<Key>. | |
1995 | |
1996 =item B<GetHeaderDataKeys> | |
1997 | |
1998 @Keys = $FingerprintsFPFileIO->GetHeaderDataKeys(); | |
1999 $NumOfKeys = $FingerprintsFPFileIO->GetHeaderDataKeys(); | |
2000 | |
2001 Returns an array of data header B<Keys> retrieved from data header section of fingerprints | |
2002 file. In scalar context, it returns number of keys. | |
2003 | |
2004 =item B<GetHeaderDataKeysAndValues> | |
2005 | |
2006 %KeysAndValues = $FingerprintsFPFileIO->GetHeaderDataKeysAndValues(); | |
2007 | |
2008 Returns a hash of data header keys and values retrieved from data header section of fingerprints | |
2009 file. | |
2010 | |
2011 =item B<GetPartialFingerprintsString> | |
2012 | |
2013 $FingerprintsString = $FingerprintsFPFileIO->GetPartialFingerprintsString(); | |
2014 | |
2015 Returns partial B<FingerprintsString> for current data line. It corresponds to fingerprints string | |
2016 specified present in a line. | |
2017 | |
2018 =item B<GetRequiredHeaderDataKeys> | |
2019 | |
2020 @Keys = $FingerprintsFPFileIO->GetRequiredHeaderDataKeys(); | |
2021 $NumOfKeys = $FingerprintsFPFileIO->GetRequiredHeaderDataKeys(); | |
2022 | |
2023 Returns an array of required data header B<Keys> for a fingerprints file containing bit-vector or | |
2024 vector strings data. In scalar context, it returns number of keys. | |
2025 | |
2026 =item B<GetRequiredHeaderDataKeysAndValues> | |
2027 | |
2028 %KeysAndValues = $FingerprintsFPFileIO-> | |
2029 GetRequiredHeaderDataKeysAndValues(); | |
2030 | |
2031 Returns a hash of required data header keys and values for a fingerprints file containing bit-vector or | |
2032 vector strings data | |
2033 | |
2034 =item B<IsFingerprintsDataValid> | |
2035 | |
2036 $Status = $FingerprintsFPFileIO->IsFingerprintsDataValid(); | |
2037 | |
2038 Returns 1 or 0 based on whether B<FingerprintsObject> is valid. | |
2039 | |
2040 =item B<IsFingerprintsFPFile> | |
2041 | |
2042 $Status = $FingerprintsFPFileIO->IsFingerprintsFPFile($FileName); | |
2043 $Status = FileIO::FingerprintsFPFileIO::IsFingerprintsFPFile($FileName); | |
2044 | |
2045 Returns 1 or 0 based on whether I<FileName> is a FP file. | |
2046 | |
2047 =item B<IsFingerprintsFileDataValid> | |
2048 | |
2049 $Status = $FingerprintsFPFileIO->IsFingerprintsFileDataValid(); | |
2050 | |
2051 Returns 1 or 0 based on whether fingerprints file contains valid fingerprints data. | |
2052 | |
2053 =item B<IsHeaderDataKeyPresent> | |
2054 | |
2055 $Status = $FingerprintsFPFileIO->IsHeaderDataKeyPresent($Key); | |
2056 | |
2057 Returns 1 or 0 based on whether data header I<Key> is present in data header | |
2058 section of a FP file. | |
2059 | |
2060 =item B<Next or Read> | |
2061 | |
2062 $FingerprintsFPFileIO = $FingerprintsFPFileIO->Next(); | |
2063 $FingerprintsFPFileIO = $FingerprintsFPFileIO->Read(); | |
2064 | |
2065 Reads next available fingerprints line in FP file, processes the data, generates appropriate fingerprints | |
2066 object, and returns B<FingerprintsFPFileIO>. The generated fingerprints object is available using | |
2067 method B<GetFingerprints>. | |
2068 | |
2069 =item B<SetBitStringFormat> | |
2070 | |
2071 $FingerprintsFPFileIO->SetBitStringFormat($Format); | |
2072 | |
2073 Sets bit string I<Format> for fingerprints bit-vector string data in a FP file and returns B<FingerprintsFPFileIO>. | |
2074 Possible values for B<BitStringFormat>: I<BinaryString or HexadecimalString>. | |
2075 | |
2076 =item B<SetBitsOrder> | |
2077 | |
2078 $FingerprintsFPFileIO->SetBitsOrder($BitsOrder); | |
2079 | |
2080 Sets I<BitsOrder> for fingerprints bit-vector string data in a FP file and returns B<FingerprintsFPFileIO>. | |
2081 Possible values for B<BitsOrder>: I<Ascending or Descending>. | |
2082 | |
2083 =item B<SetCompoundID> | |
2084 | |
2085 $FingerprintsFPFileIO->SetCompoundID($ID); | |
2086 | |
2087 Sets compound ID for current data line and returns B<FingerprintsFPFileIO>. Spaces are not allowed | |
2088 in compound IDs. | |
2089 | |
2090 =item B<SetDetailLevel> | |
2091 | |
2092 $FingerprintsFPFileIO->SetDetailLevel($Level); | |
2093 | |
2094 Sets details I<Level> for generating diagnostics messages during FP file processing and returns | |
2095 B<FingerprintsFPFileIO>. Possible values: I<Positive integers>. | |
2096 | |
2097 =item B<SetFingerprints> | |
2098 | |
2099 $FingerprintsFPFileIO->SetFingerprints($FingerprintsObject); | |
2100 | |
2101 Sets I<FingerprintsObject> for current data line and returns B<FingerprintsFPFileIO>. | |
2102 | |
2103 =item B<SetFingerprintsString> | |
2104 | |
2105 $FingerprintsFPFileIO->SetFingerprintsString($FingerprintsString); | |
2106 | |
2107 Sets I<FingerprintsString> for current data line and returns B<FingerprintsFPFileIO>. | |
2108 | |
2109 =item B<SetFingerprintsStringMode> | |
2110 | |
2111 $FingerprintsFPFileIO->SetFingerprintsStringMode($Mode); | |
2112 | |
2113 Sets I<FingerprintsStringMode> for FP file and returns B<FingerprintsFPFileIO>. | |
2114 Possible values: I<AutoDetect, FingerprintsBitVectorString, or FingerprintsVectorString> | |
2115 | |
2116 =item B<SetPartialFingerprintsString> | |
2117 | |
2118 $FingerprintsFPFileIO->SetPartialFingerprintsString($PartialString); | |
2119 | |
2120 Sets I<PartialFingerprintsString> for current data line and returns B<FingerprintsFPFileIO>. | |
2121 | |
2122 =item B<SetVectorStringFormat> | |
2123 | |
2124 $FingerprintsFPFileIO->SetVectorStringFormat($Format); | |
2125 | |
2126 Sets I<VectorStringFormat> for FP file and returns B<FingerprintsFPFileIO>. Possible values: | |
2127 I<IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString>. | |
2128 | |
2129 =item B<WriteFingerprints> | |
2130 | |
2131 $FingerprintsFPFileIO->WriteFingerprints($FingerprintsObject, | |
2132 $CompoundID); | |
2133 | |
2134 Writes fingerprints string generated from I<FingerprintsObject> object and other data including | |
2135 I<CompoundID> to FP file and returns B<FingerprintsFPFileIO>. | |
2136 | |
2137 =item B<WriteFingerprintsString> | |
2138 | |
2139 $FingerprintsFPFileIO->WriteFingerprints($FingerprintsString, | |
2140 $CompoundID); | |
2141 | |
2142 Writes I<FingerprintsString> and other data including I<CompoundID> to FP file and returns | |
2143 B<FingerprintsFPFileIO>. | |
2144 | |
2145 Caveats: | |
2146 | |
2147 o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat | |
2148 values are ignored during writing of fingerprints and it's written to | |
2149 the file as it is. | |
2150 o FingerprintsString is a regular fingerprints string as oppose to a | |
2151 partial fingerprints string. | |
2152 | |
2153 =back | |
2154 | |
2155 =head1 AUTHOR | |
2156 | |
2157 Manish Sud <msud@san.rr.com> | |
2158 | |
2159 =head1 SEE ALSO | |
2160 | |
2161 FingerprintsSDFileIO.pm, FingerprintsTextFileIO.pm | |
2162 | |
2163 =head1 COPYRIGHT | |
2164 | |
2165 Copyright (C) 2015 Manish Sud. All rights reserved. | |
2166 | |
2167 This file is part of MayaChemTools. | |
2168 | |
2169 MayaChemTools is free software; you can redistribute it and/or modify it under | |
2170 the terms of the GNU Lesser General Public License as published by the Free | |
2171 Software Foundation; either version 3 of the License, or (at your option) | |
2172 any later version. | |
2173 | |
2174 =cut |