Mercurial > repos > deepakjadmin > mayatool3_test3
comparison mayachemtools/lib/FileIO/FingerprintsSDFileIO.pm @ 0:73ae111cf86f draft
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 11:55:01 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:73ae111cf86f |
---|---|
1 package FileIO::FingerprintsSDFileIO; | |
2 # | |
3 # $RCSfile: FingerprintsSDFileIO.pm,v $ | |
4 # $Date: 2015/02/28 20:48:43 $ | |
5 # $Revision: 1.18 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Carp; | |
31 use Exporter; | |
32 use Scalar::Util (); | |
33 use TextUtil (); | |
34 use FileUtil (); | |
35 use SDFileUtil (); | |
36 use Fingerprints::FingerprintsStringUtil (); | |
37 use FileIO::FileIO; | |
38 | |
39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
40 | |
41 @ISA = qw(FileIO::FileIO Exporter); | |
42 @EXPORT = qw(); | |
43 @EXPORT_OK = qw(IsFingerprintsSDFile); | |
44 | |
45 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
46 | |
47 # Setup class variables... | |
48 my($ClassName); | |
49 _InitializeClass(); | |
50 | |
51 # Class constructor... | |
52 sub new { | |
53 my($Class, %NamesAndValues) = @_; | |
54 | |
55 # Initialize object... | |
56 my $This = $Class->SUPER::new(); | |
57 bless $This, ref($Class) || $Class; | |
58 $This->_InitializeFingerprintsSDFileIO(); | |
59 | |
60 $This->_InitializeFingerprintsSDFileIOProperties(%NamesAndValues); | |
61 | |
62 return $This; | |
63 } | |
64 | |
65 # Initialize object data... | |
66 # | |
67 sub _InitializeFingerprintsSDFileIO { | |
68 my($This) = @_; | |
69 | |
70 # Fingerprints string data format during read/write... | |
71 # | |
72 # For file read: | |
73 # | |
74 # AutoDetect - automatically detect format of fingerprints string | |
75 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
76 # FingerprintsVectorString - Vector fingerprints string format | |
77 # | |
78 # Default value: AutoDetect | |
79 # | |
80 # For file write: | |
81 # | |
82 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
83 # FingerprintsVectorString - Vector fingerprints string format | |
84 # | |
85 # Default value: undef | |
86 # | |
87 $This->{FingerprintsStringMode} = undef; | |
88 | |
89 # For file read: | |
90 # | |
91 # o Fingerprints bit-vector and vector object for current fingerprints string | |
92 # | |
93 # For file write: | |
94 # | |
95 # o Fingerprints bit-vector and vector object for current fingerprints string | |
96 # o Any supported fingerprints object: PathLengthFingerprints, ExtendedConnectivity, and so on. | |
97 # | |
98 $This->{FingerprintsObject} = undef; | |
99 | |
100 # Fingerprints SD file data field label during read/write | |
101 # | |
102 # For file read: | |
103 # | |
104 # Value of AutoDetect implies use first data field containing the word Fingerprints in its | |
105 # data field label to retrieve fingerprints string data. Othwewise, a valid data field name | |
106 # must be specified. | |
107 # | |
108 # For file write: | |
109 # | |
110 # Data field label to use for writing fingerprints string. Default: Fingerprints | |
111 # | |
112 $This->{FingerprintsFieldLabel} = undef; | |
113 | |
114 # Fingepritns string for current line during read/write... | |
115 $This->{FingerprintsString} = undef; | |
116 | |
117 # First compound data string read/write... | |
118 $This->{FirstCompoundDataIO} = 1; | |
119 | |
120 # Current fingerprints string data compound number during read/write... | |
121 $This->{CompoundNum} = 0; | |
122 | |
123 # Compound data string during read/write... | |
124 $This->{CompoundString} = undef; | |
125 | |
126 # Initialize parameters for read... | |
127 $This->_InitializeFingerprintsSDFileIORead(); | |
128 | |
129 # Initialize parameters for write... | |
130 $This->_InitializeFingerprintsSDFileIOWrite(); | |
131 | |
132 return $This; | |
133 } | |
134 | |
135 # Initialize class ... | |
136 sub _InitializeClass { | |
137 #Class name... | |
138 $ClassName = __PACKAGE__; | |
139 | |
140 } | |
141 | |
142 # Initialize object data for reading fingerprints SD file... | |
143 # | |
144 sub _InitializeFingerprintsSDFileIORead { | |
145 my($This) = @_; | |
146 | |
147 # Compound ID mode to use for retrieving compound IDs for fingerprints... | |
148 # | |
149 # Specify how to generate compound IDs: use a SD file datafield value; use molname line from | |
150 # SD file; generate a sequential ID with specific prefix; use combination of both MolName and | |
151 # LabelPrefix with usage of LabelPrefix values for empty molname lines. | |
152 # | |
153 # Possible values: DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix. Default: LabelPrefix. | |
154 # | |
155 # For MolNameAndLabelPrefix value of CompoundIDMode, molname line in SD file takes precedence over | |
156 # sequential compound IDs generated using LabelPrefix and only empty molname values are replaced | |
157 # with sequential compound IDs. | |
158 # | |
159 $This->{CompoundIDMode} = 'LabelPrefix'; | |
160 | |
161 # | |
162 # Compound ID data field label name whose value is used as compound ID during DatafField value of | |
163 # CompoundIDMode | |
164 # | |
165 $This->{CompoundIDFieldLabel} = undef; | |
166 | |
167 # A prefix string used for generating compound IDs like LabelPrefixString<Number> during LabelPrefix | |
168 # or MolNameOrLabelPrefix value of CompoundIDMode. Default value, Cmpd, generates compound IDs | |
169 # which look like Cmpd<Number>. | |
170 # | |
171 $This->{CompoundIDPrefix} = 'Cmpd'; | |
172 | |
173 # By default, the fingerprints data corresponding to FingerprintsCol is assumed to | |
174 # be valid and no validation is performed before generating fingerprints objects... | |
175 # | |
176 $This->{ValidateData} = 1; | |
177 | |
178 # Level of detail to print during validation of data for invalid or missing data... | |
179 $This->{DetailLevel} = 1; | |
180 | |
181 # Number of missing and invalid fingerprints string data compound strings... | |
182 $This->{NumOfCmpdsWithMissingData} = 0; | |
183 $This->{NumOfCmpdsWithInvalidData} = 0; | |
184 | |
185 # Compound ID for current fingerprints string... | |
186 $This->{CompoundID} = undef; | |
187 | |
188 # Compound data field labels and values map for current compound data... | |
189 %{$This->{DataFieldLabelsAndValues}} = (); | |
190 | |
191 # Status of data in fingerprints SD file... | |
192 $This->{ValidFileData} = 0; | |
193 | |
194 $This->{ValidCompoundIDField} = 0; | |
195 $This->{ValidFingerprintsField} = 0; | |
196 | |
197 $This->{ValidFingerprintsStringMode} = 0; | |
198 | |
199 return $This; | |
200 } | |
201 | |
202 # Initialize object data for writing fingerprints SD file... | |
203 # | |
204 sub _InitializeFingerprintsSDFileIOWrite { | |
205 my($This) = @_; | |
206 | |
207 # Fingerprints bit vector string format... | |
208 # | |
209 # Possible values: BinaryString or HexadecimalString [Default] | |
210 # | |
211 # Default BitStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat. | |
212 # | |
213 $This->{BitStringFormat} = undef; | |
214 | |
215 # Bits order in fingerprints bit vector string... | |
216 # | |
217 # Ascending - First bit in each byte as the lowest bit [Default] | |
218 # Descending - First bit in each byte as the highest bit | |
219 # | |
220 # Default BitsOrder is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder. | |
221 # | |
222 $This->{BitsOrder} = undef; | |
223 | |
224 # Fingerprints vector string format... | |
225 # | |
226 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString | |
227 # | |
228 # Default VectorStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat. | |
229 # For fingerprints vector object containing vector NumericalValues, it corresponds to IDsAndValuesString; othwerwise, | |
230 # it's set to ValuesString. | |
231 # | |
232 $This->{VectorStringFormat} = undef; | |
233 | |
234 # Overwriting existing file... | |
235 $This->{Overwrite} = 0; | |
236 | |
237 return $This; | |
238 } | |
239 | |
240 # Initialize object values... | |
241 sub _InitializeFingerprintsSDFileIOProperties { | |
242 my($This, %NamesAndValues) = @_; | |
243 | |
244 # All other property names and values along with all Set/Get<PropertyName> methods | |
245 # are implemented on-demand using ObjectProperty class. | |
246 | |
247 my($Name, $Value, $MethodName); | |
248 while (($Name, $Value) = each %NamesAndValues) { | |
249 $MethodName = "Set${Name}"; | |
250 $This->$MethodName($Value); | |
251 } | |
252 | |
253 if (!exists $NamesAndValues{Name}) { | |
254 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; | |
255 } | |
256 | |
257 # Make sure it's a fingerprints file... | |
258 $Name = $NamesAndValues{Name}; | |
259 if (!$This->IsFingerprintsSDFile($Name)) { | |
260 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be fingerprints format..."; | |
261 } | |
262 | |
263 if ($This->GetMode() =~ /^Read$/i) { | |
264 $This->_InitializeFingerprintsSDFileIOReadProperties(%NamesAndValues); | |
265 } | |
266 elsif ($This->GetMode() =~ /^(Write|Append)$/i) { | |
267 $This->_InitializeFingerprintsSDFileIOWriteProperties(%NamesAndValues); | |
268 } | |
269 | |
270 return $This; | |
271 } | |
272 | |
273 # Initialize object properties for reading fingerprints SD file... | |
274 # | |
275 sub _InitializeFingerprintsSDFileIOReadProperties { | |
276 my($This, %NamesAndValues) = @_; | |
277 | |
278 # Set default value for FingerprintsStringMode... | |
279 if (!$This->{FingerprintsStringMode}) { | |
280 $This->{FingerprintsStringMode} = 'AutoDetect'; | |
281 } | |
282 | |
283 # Set default value for FingerprintsFieldLabel... | |
284 if (!$This->{FingerprintsFieldLabel}) { | |
285 $This->{FingerprintsFieldLabel} = 'AutoDetect'; | |
286 } | |
287 | |
288 # Check compound ID data field... | |
289 if (($This->{CompoundIDMode} =~ /^DataField$/i) && (!defined($This->{CompoundIDFieldLabel}))) { | |
290 croak "Error: ${ClassName}->: Object can't be instantiated: Compound ID data field lable must be specifed using \"CompoundIDFieldLabel\" during \"DataField\" value of \"CompoundIDMode\"..."; | |
291 } | |
292 | |
293 $This->_PrepareForReadingFingerprintsSDFileData(); | |
294 | |
295 return $This; | |
296 } | |
297 | |
298 # Initialize object properties for writing fingerprints SD file... | |
299 # | |
300 sub _InitializeFingerprintsSDFileIOWriteProperties { | |
301 my($This, %NamesAndValues) = @_; | |
302 | |
303 # Check FingerprintsStringMode value... | |
304 if (!exists $NamesAndValues{FingerprintsStringMode}) { | |
305 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying FingerprintsStringMode..."; | |
306 } | |
307 | |
308 if ($This->{FingerprintsStringMode} !~ /^(FingerprintsBitVectorString|FingerprintsVectorString)$/i) { | |
309 croak "Error: ${ClassName}->: Object can't be instantiated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, is not valid; Supported values for write/append: FingerprintsBitVectorString or FingerprintsVectorString..."; | |
310 } | |
311 | |
312 # Set default value for FingerprintsFieldLabel... | |
313 if (!$This->{FingerprintsFieldLabel}) { | |
314 $This->{FingerprintsFieldLabel} = 'Fingerprints'; | |
315 } | |
316 | |
317 $This->_PrepareForWritingFingerprintsSDFileData(); | |
318 | |
319 return $This; | |
320 } | |
321 | |
322 # Set FingerprintsStringMode... | |
323 # | |
324 sub SetFingerprintsStringMode { | |
325 my($This, $Value) = @_; | |
326 | |
327 # AutoDetect - automatically detect format of fingerprints string | |
328 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
329 # FingerprintsVectorString - Vector fingerprints string format | |
330 | |
331 if ($Value !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) { | |
332 croak "Error: ${ClassName}->SetFingerprintsStringMode: FingerprintsStringMode value, $Value, is not valid; Supported values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString..."; | |
333 } | |
334 | |
335 $This->{FingerprintsStringMode} = $Value; | |
336 | |
337 return $This; | |
338 } | |
339 | |
340 # Set CompoundIDMode... | |
341 # | |
342 sub SetCompoundIDMode { | |
343 my($This, $Value) = @_; | |
344 | |
345 if ($Value !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) { | |
346 croak "Error: ${ClassName}->SetFingerprintsStringMode: CompoundIDMode value, $Value, is not valid; Supported values: DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix..."; | |
347 } | |
348 | |
349 $This->{CompoundIDMode} = $Value; | |
350 | |
351 return $This; | |
352 } | |
353 | |
354 # Set DetailLevel... | |
355 # | |
356 sub SetDetailLevel { | |
357 my($This, $Value) = @_; | |
358 | |
359 if (!TextUtil::IsPositiveInteger($Value)) { | |
360 croak "Error: ${ClassName}->SetDetailLevel: DetailLevel value, $Value, is not valid; Supported values: > 0..."; | |
361 } | |
362 | |
363 $This->{DetailLevel} = $Value; | |
364 | |
365 return $This; | |
366 } | |
367 | |
368 # Set BitStringFormat... | |
369 # | |
370 sub SetBitStringFormat { | |
371 my($This, $Value) = @_; | |
372 | |
373 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) { | |
374 croak "Error: ${ClassName}->SetBitStringFormat: BitStringFormat value, $Value, is not valid; Supported values: BinaryString or HexadecimalString..."; | |
375 } | |
376 | |
377 $This->{BitStringFormat} = $Value; | |
378 | |
379 return $This; | |
380 } | |
381 | |
382 # Set BitsOrder... | |
383 # | |
384 sub SetBitsOrder { | |
385 my($This, $Value) = @_; | |
386 | |
387 # Ascending - First bit in each byte as the lowest bit | |
388 # Descending - First bit in each byte as the highest bit | |
389 # | |
390 if ($Value !~ /^(Ascending|Descending)$/i) { | |
391 croak "Error: ${ClassName}->SetBitsOrder: FingerprintsStringMode value, $Value, is not valid; Supported values: Ascending or Descending..."; | |
392 } | |
393 | |
394 $This->{BitsOrder} = $Value; | |
395 | |
396 return $This; | |
397 } | |
398 | |
399 # Set VectorStringFormat... | |
400 # | |
401 sub SetVectorStringFormat { | |
402 my($This, $Value) = @_; | |
403 | |
404 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString | |
405 | |
406 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) { | |
407 croak "Error: ${ClassName}->SetVectorStringFormat: FingerprintsStringMode value, $Value, is not valid; Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString..."; | |
408 } | |
409 | |
410 $This->{VectorStringFormat} = $Value; | |
411 | |
412 return $This; | |
413 } | |
414 | |
415 # Get compound string for current compound with optional removal of fingerprints data.. | |
416 # | |
417 sub GetCompoundString { | |
418 my($This, $RemoveFingerprintsData) = @_; | |
419 | |
420 $RemoveFingerprintsData = defined $RemoveFingerprintsData ? $RemoveFingerprintsData : 0; | |
421 | |
422 if ($RemoveFingerprintsData && $This->_IsFingerprintsDataPresentInCompoundString()) { | |
423 return SDFileUtil::RemoveCmpdDataHeaderLabelAndValue($This->{CompoundString}, $This->{FingerprintsFieldLabel}); | |
424 } | |
425 | |
426 return $This->{CompoundString}; | |
427 } | |
428 | |
429 # Set compound string for current compound.. | |
430 # | |
431 sub SetCompoundString { | |
432 my($This, $CompoundString) = @_; | |
433 | |
434 $This->{CompoundString} = $CompoundString; | |
435 | |
436 return $This; | |
437 } | |
438 | |
439 # Get fingerprints object for current compound using fingerprints, fingerprints bit-vector | |
440 # fingerprints vector object. Fingerprints object correspond to any of supported fingerprints | |
441 # objects such as PathLengthFingerprints, ExtendedConnectivity, and so on. | |
442 # | |
443 sub GetFingerprints { | |
444 my($This) = @_; | |
445 | |
446 return $This->{FingerprintsObject}; | |
447 } | |
448 | |
449 # Set fingerprints object for current compound... | |
450 # | |
451 sub SetFingerprints { | |
452 my($This, $FingerprintsObject) = @_; | |
453 | |
454 $This->{FingerprintsObject} = $FingerprintsObject; | |
455 | |
456 return $This; | |
457 } | |
458 | |
459 # Get fingerprints string for current compound... | |
460 # | |
461 sub GetFingerprintsString { | |
462 my($This) = @_; | |
463 | |
464 return $This->{FingerprintsString} ? $This->{FingerprintsString} : 'None'; | |
465 } | |
466 | |
467 # Set fingerprints string for current compound... | |
468 # | |
469 sub SetFingerprintsString { | |
470 my($This, $FingerprintsString) = @_; | |
471 | |
472 $This->{FingerprintsString} = $FingerprintsString; | |
473 | |
474 return $This; | |
475 } | |
476 | |
477 # Does fingerprints SD file contain valid data? | |
478 # | |
479 sub IsFingerprintsFileDataValid { | |
480 my($This) = @_; | |
481 | |
482 return $This->{ValidFileData} ? 1 : 0; | |
483 } | |
484 | |
485 # Does current compound contains valid fingerprints object data? | |
486 # | |
487 sub IsFingerprintsDataValid { | |
488 my($This) = @_; | |
489 | |
490 return defined $This->{FingerprintsObject} ? 1 : 0; | |
491 } | |
492 | |
493 # Read next available compound data string, process it and generate appropriate fingerprints | |
494 # objects... | |
495 # | |
496 sub Read { | |
497 my($This) = @_; | |
498 | |
499 # Read compound data string... | |
500 if (!$This->_ReadCompoundDataString()) { | |
501 return undef; | |
502 } | |
503 | |
504 # No need to process invalid SD file with invalid data... | |
505 if (!$This->{ValidFileData}) { | |
506 if ($This->{ValidateData}) { | |
507 $This->{NumOfCmpdsWithMissingData} += 1; | |
508 } | |
509 return $This; | |
510 } | |
511 | |
512 # Perform data validation... | |
513 if ($This->{ValidateData}) { | |
514 if (!$This->_ValidateReadCompoundDataString()) { | |
515 return $This; | |
516 } | |
517 } | |
518 | |
519 # Setup fingerprints string after checking again to handle problematic data for | |
520 # non-validated compound string data... | |
521 # | |
522 my($FingerprintsFieldLabel); | |
523 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; | |
524 if (exists $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}) { | |
525 $This->{FingerprintsString} = $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}; | |
526 } | |
527 | |
528 # Generate fingeprints object... | |
529 $This->_GenerateFingerprintsObject(); | |
530 | |
531 # Setup fingerprints compound ID for fingerprints string... | |
532 $This->_GenerateCompoundID(); | |
533 | |
534 return $This; | |
535 } | |
536 | |
537 # Read next available compound data string, process it and generate appropriate fingerprints | |
538 # objects... | |
539 # | |
540 sub Next { | |
541 my($This) = @_; | |
542 | |
543 return $This->Read(); | |
544 } | |
545 | |
546 # Read compound data string... | |
547 # | |
548 sub _ReadCompoundDataString { | |
549 my($This) = @_; | |
550 my(@CmpdLines); | |
551 | |
552 if ($This->{FirstCompoundDataIO}) { | |
553 $This->_ProcessFirstCompoundDataStringRead(); | |
554 } | |
555 | |
556 # Initialize data for current compound data string... | |
557 $This->_InitializeReadCompoundDataString(); | |
558 | |
559 # Get next compound data line... | |
560 $This->{CompoundString} = SDFileUtil::ReadCmpdString($This->{FileHandle}); | |
561 if (!$This->{CompoundString}) { | |
562 return 0; | |
563 } | |
564 | |
565 $This->{CompoundNum} += 1; | |
566 | |
567 # Set up data field labels and values... | |
568 @CmpdLines = split "\n", $This->{CompoundString}; | |
569 %{$This->{DataFieldLabelsAndValues}} = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@CmpdLines); | |
570 | |
571 return 1; | |
572 } | |
573 | |
574 # Initialize compound data string for reading... | |
575 # | |
576 sub _InitializeReadCompoundDataString { | |
577 my($This) = @_; | |
578 | |
579 $This->{CompoundID} = undef; | |
580 $This->{CompoundString} = undef; | |
581 | |
582 %{$This->{DataFieldLabelsAndValues}} = (); | |
583 | |
584 $This->{FingerprintsObject} = undef; | |
585 $This->{FingerprintsString} = undef; | |
586 | |
587 return $This; | |
588 } | |
589 | |
590 # Validate compound data string containing fingerprints data... | |
591 # | |
592 sub _ValidateReadCompoundDataString { | |
593 my($This) = @_; | |
594 my($FingerprintsFieldLabel); | |
595 | |
596 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; | |
597 | |
598 # Check for missing data... | |
599 if (!exists $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}) { | |
600 # Missing data... | |
601 $This->{NumOfCmpdsWithMissingData} += 1; | |
602 if ($This->{DetailLevel} >= 3) { | |
603 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains no fingerprints data: $This->{CompoundString}..."; | |
604 } | |
605 elsif ($This->{DetailLevel} >= 2) { | |
606 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains no fingerprints data..."; | |
607 } | |
608 return 0; | |
609 } | |
610 | |
611 # Check for invalid data... | |
612 my($InvalidFingerprintsData, $FingerprintsType, $FingerprintsDescription); | |
613 | |
614 $InvalidFingerprintsData = 0; | |
615 | |
616 if (Fingerprints::FingerprintsStringUtil::AreFingerprintsStringValuesValid($This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel})) { | |
617 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}); | |
618 if (defined($FingerprintsType) && defined($FingerprintsDescription)) { | |
619 if ($This->{FirstFingerprintsStringType} !~ /^$FingerprintsType$/i || $This->{FirstFingerprintsStringDescription} !~ /^$FingerprintsDescription$/i) { | |
620 $InvalidFingerprintsData = 1; | |
621 } | |
622 } | |
623 else { | |
624 $InvalidFingerprintsData = 1; | |
625 } | |
626 } | |
627 else { | |
628 $InvalidFingerprintsData = 1; | |
629 } | |
630 | |
631 if ($InvalidFingerprintsData) { | |
632 $This->{NumOfCmpdsWithInvalidData} += 1; | |
633 if ($This->{DetailLevel} >= 3) { | |
634 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains invalid fingerprints data: $This->{DataLine}..."; | |
635 } | |
636 elsif ($This->{DetailLevel} >= 2) { | |
637 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains invalid fingerprints data..."; | |
638 } | |
639 return 0; | |
640 } | |
641 | |
642 return 1; | |
643 } | |
644 | |
645 # Setup fingerprints compound ID for fingerprints string... | |
646 sub _GenerateCompoundID { | |
647 my($This) = @_; | |
648 my($CompoundID, $MolName); | |
649 | |
650 $CompoundID = ''; | |
651 | |
652 if ($This->{CompoundIDMode} =~ /^LabelPrefix$/i) { | |
653 $CompoundID = "$This->{CompoundIDPrefix}$This->{CompoundNum}"; | |
654 } | |
655 elsif ($This->{CompoundIDMode} =~ /^DataField$/i) { | |
656 my($SpecifiedDataField); | |
657 $SpecifiedDataField = $This->{CompoundIDFieldLabel}; | |
658 $CompoundID = exists $This->{DataFieldLabelsAndValues}{$SpecifiedDataField} ? $This->{DataFieldLabelsAndValues}{$SpecifiedDataField} : ''; | |
659 } | |
660 elsif ($This->{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) { | |
661 ($MolName) = split "\n", $This->{CompoundString}; | |
662 $CompoundID = TextUtil::IsNotEmpty($MolName) ? $MolName : "$This->{CompoundIDPrefix}$This->{CompoundNum}"; | |
663 } | |
664 elsif ($This->{CompoundIDMode} =~ /^MolName$/i) { | |
665 ($MolName) = split "\n", $This->{CompoundString}; | |
666 $CompoundID = $MolName; | |
667 } | |
668 | |
669 $This->{CompoundID} = $CompoundID; | |
670 | |
671 return $This; | |
672 } | |
673 | |
674 # Process first compound data string read... | |
675 # | |
676 sub _ProcessFirstCompoundDataStringRead { | |
677 my($This) = @_; | |
678 my($Line, $FileHandle); | |
679 | |
680 $This->{FirstCompoundDataIO} = 0; | |
681 | |
682 return $This; | |
683 } | |
684 | |
685 # Get ready for reading fingerprints SD file... | |
686 # | |
687 sub _PrepareForReadingFingerprintsSDFileData { | |
688 my($This) = @_; | |
689 | |
690 # Retrieve SD file data fields information.... | |
691 $This->_RetrieveSDFileDataFields(); | |
692 | |
693 # Validate compound and fingerprints field information... | |
694 $This->_ValidateReadCompoundIDField(); | |
695 $This->_ValidateReadFingerprintsField(); | |
696 | |
697 # Validate fingeprints string mode information... | |
698 if ($This->{ValidFingerprintsField}) { | |
699 $This->_ValidateReadFingerprintsStringMode(); | |
700 } | |
701 | |
702 # Set status of SD file data... | |
703 $This->{ValidFileData} = ($This->{ValidCompoundIDField} && $This->{ValidFingerprintsField} && $This->{ValidFingerprintsStringMode}) ? 1 : 0; | |
704 | |
705 return $This; | |
706 } | |
707 | |
708 # Retrieve information data fields and fingerprints string... | |
709 # | |
710 sub _RetrieveSDFileDataFields { | |
711 my($This) = @_; | |
712 my($SDFile, $CmpdString, @CmpdLines); | |
713 | |
714 $SDFile = $This->{Name}; | |
715 | |
716 if (!(-e $SDFile)) { | |
717 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $SDFile, doesn't exist..."; | |
718 } | |
719 | |
720 if (!open SDFILE, "$SDFile") { | |
721 croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input SD file $SDFile: $! ..."; | |
722 } | |
723 $CmpdString = SDFileUtil::ReadCmpdString(\*SDFILE); | |
724 close SDFILE; | |
725 | |
726 # Set up data field labels and values for first compound string data... | |
727 @CmpdLines = split "\n", $CmpdString; | |
728 | |
729 %{$This->{FirstDataFieldLabelsAndValues}} = (); | |
730 %{$This->{FirstDataFieldLabelsAndValues}} = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@CmpdLines); | |
731 | |
732 return $This; | |
733 } | |
734 | |
735 # Validate compound ID field information... | |
736 # | |
737 sub _ValidateReadCompoundIDField { | |
738 my($This) = @_; | |
739 my($SpecifiedDataField); | |
740 | |
741 $This->{ValidCompoundIDField} = 0; | |
742 | |
743 if ($This->{CompoundIDMode} =~ /^DataField$/i) { | |
744 $SpecifiedDataField = $This->{CompoundIDFieldLabel}; | |
745 if (! exists $This->{FirstDataFieldLabelsAndValues}{$SpecifiedDataField}) { | |
746 carp "Warning: ${ClassName}->_ValidateReadCompoundIDField: Compound ID data field, $SpecifiedDataField, specified using \"CompoundIDField\" in \"DataField\" \"CompoundIDMode\" doesn't exist..."; | |
747 return 0; | |
748 } | |
749 } | |
750 | |
751 $This->{ValidCompoundIDField} = 1; | |
752 | |
753 return 1; | |
754 } | |
755 | |
756 # Validate fingerprints string field information... | |
757 # | |
758 sub _ValidateReadFingerprintsField { | |
759 my($This) = @_; | |
760 my($FingerprintsFieldLabel); | |
761 | |
762 $This->{ValidFingerprintsField} = 0; | |
763 | |
764 $FingerprintsFieldLabel = ''; | |
765 | |
766 if ($This->{FingerprintsFieldLabel} !~ /^AutoDetect$/i) { | |
767 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; | |
768 if (! exists $This->{FirstDataFieldLabelsAndValues}{$FingerprintsFieldLabel}) { | |
769 carp "Warning: ${ClassName}->_ValidateReadFingerprintsField: Fingerprints data field value, $FingerprintsFieldLabel, specified using \"FingerprintsFieldLabel\" doesn't exist..."; | |
770 return 0; | |
771 } | |
772 } | |
773 else { | |
774 # Make sure default fingerprints field does exist... | |
775 my($FingerprintsFieldFound, $DataFieldLabel); | |
776 $FingerprintsFieldFound = 0; | |
777 | |
778 DATAFIELDLABEL: for $DataFieldLabel (keys %{$This->{FirstDataFieldLabelsAndValues}}) { | |
779 if ($DataFieldLabel =~ /Fingerprints/i) { | |
780 $FingerprintsFieldFound = 1; | |
781 $FingerprintsFieldLabel = $DataFieldLabel; | |
782 last DATAFIELDLABEL; | |
783 } | |
784 } | |
785 if (!$FingerprintsFieldFound) { | |
786 carp "Warning: ${ClassName}->_ValidateReadFingerprintsField: Data field label containing \"Fingerprints\" string in its name doesn't exist..."; | |
787 return 0; | |
788 } | |
789 } | |
790 | |
791 $This->{ValidFingerprintsField} = 1; | |
792 $This->{FingerprintsFieldLabel} = $FingerprintsFieldLabel; | |
793 | |
794 return 1; | |
795 } | |
796 | |
797 # Validate fingerprints string mode information... | |
798 # | |
799 sub _ValidateReadFingerprintsStringMode { | |
800 my($This) = @_; | |
801 my($FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FirstFingerprintsStringType, $FirstFingerprintsStringDescription, $FingerprintsFieldLabel, $FingerprintsType, $FingerprintsDescription); | |
802 | |
803 $This->{ValidFingerprintsStringMode} = 0; | |
804 | |
805 $This->{FingerprintsBitVectorStringMode} = 0; | |
806 $This->{FingerprintsVectorStringMode} = 0; | |
807 | |
808 $This->{FirstFingerprintsStringType} = ''; | |
809 $This->{FirstFingerprintsStringDescription} = ''; | |
810 | |
811 $FingerprintsBitVectorStringMode = 0; | |
812 $FingerprintsVectorStringMode = 0; | |
813 | |
814 $FirstFingerprintsStringType = ''; | |
815 $FirstFingerprintsStringDescription = ''; | |
816 | |
817 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; | |
818 | |
819 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{FirstDataFieldLabelsAndValues}{$FingerprintsFieldLabel}); | |
820 | |
821 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { | |
822 if ($FingerprintsType !~ /^FingerprintsBitVector$/i) { | |
823 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsBitVectorString, specified using \"FingerprintsStringMode\"..."; | |
824 return 0; | |
825 } | |
826 $FingerprintsBitVectorStringMode = 1; | |
827 $FirstFingerprintsStringType = 'FingerprintsBitVector'; | |
828 $FirstFingerprintsStringDescription = $FingerprintsDescription; | |
829 } | |
830 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { | |
831 if ($FingerprintsType !~ /^FingerprintsVector$/i) { | |
832 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsVectorString, specified using \"FingerprintsStringMode\"..."; | |
833 return 0; | |
834 } | |
835 $FingerprintsVectorStringMode = 1; | |
836 $FirstFingerprintsStringType = 'FingerprintsVector'; | |
837 $FirstFingerprintsStringDescription = $FingerprintsDescription; | |
838 } | |
839 else { | |
840 # AutoDetect mode... | |
841 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) { | |
842 $FingerprintsBitVectorStringMode = 1; | |
843 } | |
844 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) { | |
845 $FingerprintsVectorStringMode = 1; | |
846 } | |
847 else { | |
848 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, identified during, AutoDetect, value of \"FingerprintsStringMode\" is not valid; Supported fingerprints types: FingerprintBitVector or FingerprintsVector..."; | |
849 return 0; | |
850 } | |
851 $FirstFingerprintsStringType = $FingerprintsType; | |
852 $FirstFingerprintsStringDescription = $FingerprintsDescription; | |
853 } | |
854 | |
855 $This->{ValidFingerprintsStringMode} = 1; | |
856 | |
857 $This->{FingerprintsBitVectorStringMode} = $FingerprintsBitVectorStringMode; | |
858 $This->{FingerprintsVectorStringMode} = $FingerprintsVectorStringMode; | |
859 | |
860 $This->{FirstFingerprintsStringType} = $FirstFingerprintsStringType; | |
861 $This->{FirstFingerprintsStringDescription} = $FirstFingerprintsStringDescription; | |
862 | |
863 return 1; | |
864 } | |
865 | |
866 # Write fingerprints string generated from specified fingerprints, fingerprints-bit vector, or | |
867 # fingerprints vector object and other data to SD file... | |
868 # | |
869 sub WriteFingerprints { | |
870 my($This, $FingerprintsObject, $CompoundString) = @_; | |
871 | |
872 # Initialize data for current compound... | |
873 $This->_InitializeWriteCompoundDataString(); | |
874 | |
875 # Set fingerprints object... | |
876 $This->{FingerprintsObject} = $FingerprintsObject; | |
877 | |
878 # Generate fingerprints string... | |
879 $This->_GenerateFingerprintsString(); | |
880 | |
881 # Set and update compound string... | |
882 $This->{CompoundString} = $CompoundString; | |
883 $This->_AddFingerprintsDataToCompoundString(); | |
884 | |
885 # Write it out... | |
886 $This->_WriteCompoundDataString(); | |
887 | |
888 return $This; | |
889 } | |
890 | |
891 # Write fingerprints string and other data to SD file... | |
892 # | |
893 # Note: | |
894 # o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat values | |
895 # are ignored during writing of fingerprints and it's written to the file as it is. | |
896 # o CompoundString contains no fingerprints data | |
897 # | |
898 sub WriteFingerprintsString { | |
899 my($This, $FingerprintsString, $CompoundString) = @_; | |
900 | |
901 # Initialize data for current compound... | |
902 $This->_InitializeWriteCompoundDataString(); | |
903 | |
904 # Set fingerprints string... | |
905 $This->{FingerprintsString} = $FingerprintsString; | |
906 | |
907 # Generate fingerprints object... | |
908 $This->_GenerateFingerprintsObject(); | |
909 | |
910 # Set and update compound string... | |
911 $This->{CompoundString} = $CompoundString; | |
912 $This->_AddFingerprintsDataToCompoundString(); | |
913 | |
914 # Write it out... | |
915 $This->_WriteCompoundDataString(); | |
916 | |
917 return $This; | |
918 } | |
919 | |
920 # Initialize compound data string for writing... | |
921 # | |
922 sub _InitializeWriteCompoundDataString { | |
923 my($This) = @_; | |
924 | |
925 $This->{CompoundString} = undef; | |
926 | |
927 $This->{FingerprintsObject} = undef; | |
928 $This->{FingerprintsString} = undef; | |
929 | |
930 return $This; | |
931 } | |
932 | |
933 # Writi compound data string... | |
934 # | |
935 sub _WriteCompoundDataString { | |
936 my($This) = @_; | |
937 my($FileHandle); | |
938 | |
939 if ($This->{FirstCompoundDataIO}) { | |
940 $This->_ProcessFirstCompoundDataStringWrite(); | |
941 } | |
942 | |
943 $This->{CompoundNum} += 1; | |
944 $FileHandle = $This->{FileHandle}; | |
945 | |
946 print $FileHandle "$This->{CompoundString}\n"; | |
947 | |
948 return $This; | |
949 } | |
950 | |
951 # Process first compound data string write... | |
952 # | |
953 sub _ProcessFirstCompoundDataStringWrite { | |
954 my($This) = @_; | |
955 my($Line, $FileHandle); | |
956 | |
957 $This->{FirstCompoundDataIO} = 0; | |
958 | |
959 return $This; | |
960 } | |
961 | |
962 # Get ready for writing fingerprints SD file... | |
963 # | |
964 sub _PrepareForWritingFingerprintsSDFileData { | |
965 my($This) = @_; | |
966 my($SDFile); | |
967 | |
968 $SDFile = $This->{Name}; | |
969 if (!$This->{Overwrite}) { | |
970 if (-e $SDFile) { | |
971 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $SDFile, already exist. Use overwrite option..."; | |
972 } | |
973 } | |
974 | |
975 # Setup FingerprintsStringMode status... | |
976 | |
977 $This->{FingerprintsBitVectorStringMode} = 0; | |
978 $This->{FingerprintsVectorStringMode} = 0; | |
979 $This->{ValidFingerprintsStringMode} = 0; | |
980 | |
981 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { | |
982 $This->{FingerprintsBitVectorStringMode} = 1; | |
983 } | |
984 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { | |
985 $This->{FingerprintsVectorStringMode} = 1; | |
986 } | |
987 | |
988 $This->{ValidFingerprintsStringMode} = ($This->{FingerprintsBitVectorStringMode} || $This->{FingerprintsVectorStringMode}) ? 1 : 0; | |
989 | |
990 if ($This->{FingerprintsBitVectorStringMode}) { | |
991 $This->_SetDefaultBitStringFormat(); | |
992 $This->_SetDefaultBitsOrder(); | |
993 } | |
994 elsif ($This->{FingerprintsVectorStringMode}) { | |
995 $This->_SetDefaultVectorStringFormat(); | |
996 } | |
997 | |
998 return $This; | |
999 } | |
1000 | |
1001 # Set default value for bit string format... | |
1002 # | |
1003 sub _SetDefaultBitStringFormat { | |
1004 my($This) = @_; | |
1005 | |
1006 if (!$This->{BitStringFormat}) { | |
1007 $This->{BitStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat(); | |
1008 } | |
1009 | |
1010 return $This; | |
1011 } | |
1012 | |
1013 # Set default value for bit string format... | |
1014 # | |
1015 sub _SetDefaultBitsOrder { | |
1016 my($This) = @_; | |
1017 | |
1018 if (!$This->{BitsOrder}) { | |
1019 $This->{BitsOrder} = Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder(); | |
1020 } | |
1021 | |
1022 return $This; | |
1023 } | |
1024 | |
1025 # Set default value for vector string format... | |
1026 # | |
1027 sub _SetDefaultVectorStringFormat { | |
1028 my($This) = @_; | |
1029 | |
1030 if (!$This->{VectorStringFormat} && $This->{FingerprintsObject}) { | |
1031 $This->{VectorStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat($This->{FingerprintsObject}); | |
1032 } | |
1033 | |
1034 return $This; | |
1035 } | |
1036 | |
1037 # Add fingerprints data to compound string... | |
1038 # | |
1039 sub _AddFingerprintsDataToCompoundString { | |
1040 my($This) = @_; | |
1041 my($CmpdString); | |
1042 | |
1043 # Check and remove existing fingerprints data... | |
1044 if ($This->_IsFingerprintsDataPresentInCompoundString()) { | |
1045 carp "Warning: ${ClassName}->_AddFingerprintsDataToCompoundString: The compound string already contains fingerprints data corresponding to fingerprints field label $This->{FingerprintsFieldLabel}; It has been replaced with new fingerprints data..."; | |
1046 $This->{CompoundString} = SDFileUtil::RemoveCmpdDataHeaderLabelAndValue($This->{CompoundString}, $This->{FingerprintsFieldLabel}); | |
1047 } | |
1048 | |
1049 $CmpdString = $This->{CompoundString}; | |
1050 | |
1051 $CmpdString =~ s/\$\$\$\$$//; | |
1052 | |
1053 $This->{CompoundString} = "${CmpdString}> <$This->{FingerprintsFieldLabel}>\n$This->{FingerprintsString}\n\n\$\$\$\$"; | |
1054 | |
1055 return $This; | |
1056 } | |
1057 | |
1058 # Is fingerprints data already present in compound string? | |
1059 # | |
1060 sub _IsFingerprintsDataPresentInCompoundString { | |
1061 my($This) = @_; | |
1062 my($FingerprintsFieldLabel); | |
1063 | |
1064 if (TextUtil::IsEmpty($This->{CompoundString}) || TextUtil::IsEmpty($This->{FingerprintsFieldLabel})) { | |
1065 return 0; | |
1066 } | |
1067 | |
1068 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; | |
1069 | |
1070 return ($This->{CompoundString} =~ /<$FingerprintsFieldLabel>/) ? 1 : 0; | |
1071 } | |
1072 | |
1073 # Generate fingerprints object using current fingerprints string... | |
1074 # | |
1075 sub _GenerateFingerprintsObject { | |
1076 my($This) = @_; | |
1077 | |
1078 $This->{FingerprintsObject} = undef; | |
1079 | |
1080 if (!$This->{FingerprintsString}) { | |
1081 return $This; | |
1082 } | |
1083 | |
1084 if ($This->{FingerprintsBitVectorStringMode}) { | |
1085 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsBitVectorString($This->{FingerprintsString}); | |
1086 } | |
1087 elsif ($This->{FingerprintsVectorStringMode}) { | |
1088 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsVectorString($This->{FingerprintsString}); | |
1089 } | |
1090 else { | |
1091 return undef; | |
1092 } | |
1093 | |
1094 return $This; | |
1095 } | |
1096 | |
1097 # Generate fingerprints string using current fingerprints object... | |
1098 # | |
1099 sub _GenerateFingerprintsString { | |
1100 my($This) = @_; | |
1101 | |
1102 $This->{FingerprintsString} = ''; | |
1103 | |
1104 if (!$This->{FingerprintsObject}) { | |
1105 return $This; | |
1106 } | |
1107 | |
1108 if ($This->{FingerprintsBitVectorStringMode}) { | |
1109 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{BitStringFormat}, $This->{BitsOrder}); | |
1110 } | |
1111 elsif ($This->{FingerprintsVectorStringMode}) { | |
1112 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{VectorStringFormat}); | |
1113 } | |
1114 | |
1115 return $This; | |
1116 } | |
1117 | |
1118 # Is it a fingerprints file? | |
1119 sub IsFingerprintsSDFile ($;$) { | |
1120 my($FirstParameter, $SecondParameter) = @_; | |
1121 my($This, $FileName, $Status); | |
1122 | |
1123 if ((@_ == 2) && (_IsFingerprintsSDFileIO($FirstParameter))) { | |
1124 ($This, $FileName) = ($FirstParameter, $SecondParameter); | |
1125 } | |
1126 else { | |
1127 $FileName = $FirstParameter; | |
1128 } | |
1129 | |
1130 # Check file extension... | |
1131 $Status = FileUtil::CheckFileType($FileName, "sdf sd"); | |
1132 | |
1133 return $Status; | |
1134 } | |
1135 | |
1136 # Is it a FingerprintsSDFileIO object? | |
1137 sub _IsFingerprintsSDFileIO { | |
1138 my($Object) = @_; | |
1139 | |
1140 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
1141 } | |
1142 | |
1143 1; | |
1144 | |
1145 __END__ | |
1146 | |
1147 =head1 NAME | |
1148 | |
1149 FingerprintsSDFileIO | |
1150 | |
1151 =head1 SYNOPSIS | |
1152 | |
1153 use FileIO::FingerprintsSDFileIO; | |
1154 | |
1155 use FileIO::FingerprintsSDFileIO qw(:all); | |
1156 | |
1157 =head1 DESCRIPTION | |
1158 | |
1159 B<FingerprintsSDFileIO> class provides the following methods: | |
1160 | |
1161 new, GetCompoundString, GetFingerprints, GetFingerprintsString, | |
1162 IsFingerprintsDataValid, IsFingerprintsFileDataValid, IsFingerprintsSDFile, Next, | |
1163 Read, SetBitStringFormat, SetBitsOrder, SetCompoundIDMode, SetCompoundString, | |
1164 SetDetailLevel, SetFingerprints, SetFingerprintsString, SetFingerprintsStringMode, | |
1165 SetVectorStringFormat, WriteFingerprints, WriteFingerprintsString | |
1166 | |
1167 The following methods can also be used as functions: | |
1168 | |
1169 IsFingerprintsSDFile | |
1170 | |
1171 B<FingerprintsSDFileIO> class is derived from I<FileIO> class and uses its methods to support | |
1172 generic file related functionality. | |
1173 | |
1174 The fingerprints SD file format with B<.sdf> or B<.sd> file extensions supports two types of | |
1175 fingerprints string data: fingerprints bit-vectors and fingerprints vector strings. The fingerprints | |
1176 string data is treated as value of a fingerprints data field label in a SD file. | |
1177 | |
1178 Example of SD file format containing fingerprints string data: | |
1179 | |
1180 ... ... | |
1181 ... ... | |
1182 $$$$ | |
1183 ... ... | |
1184 ... ... | |
1185 ... ... | |
1186 41 44 0 0 0 0 0 0 0 0999 V2000 | |
1187 -3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 | |
1188 ... ... | |
1189 2 3 1 0 0 0 0 | |
1190 ... ... | |
1191 M END | |
1192 > <CmpdID> | |
1193 Test | |
1194 | |
1195 > <PathLengthFingerprints> | |
1196 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLengt | |
1197 h1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a49913991a66 | |
1198 03130b0a19e8051c89184414953800cc2151082844a201042800130860308e8204d4028 | |
1199 00831048940e44281c00060449a5000ac80c894114e006321264401600846c050164462 | |
1200 08190410805000304a10205b0100e04c0038ba0fad0209c0ca8b1200012268b61c0026a | |
1201 aa0660a11014a011d46 | |
1202 | |
1203 $$$$ | |
1204 ... ... | |
1205 ... ... | |
1206 | |
1207 The current release of MayaChemTools supports the following types of fingerprint | |
1208 bit-vector and vector strings: | |
1209 | |
1210 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi | |
1211 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT | |
1212 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X | |
1213 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A | |
1214 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2 | |
1215 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B... | |
1216 | |
1217 FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS | |
1218 ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2 | |
1219 .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1 | |
1220 O.X1.BO2;2 4 14 3 10 1 1 1 3 2 | |
1221 | |
1222 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume | |
1223 ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F | |
1224 N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1 | |
1225 | |
1226 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN | |
1227 umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C | |
1228 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N | |
1229 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8 | |
1230 O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1 | |
1231 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0... | |
1232 | |
1233 FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs | |
1234 AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN | |
1235 H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3 | |
1236 .024 -2.270 | |
1237 | |
1238 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues; | |
1239 ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435 | |
1240 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1 | |
1241 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
1242 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
1243 | |
1244 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi | |
1245 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391 | |
1246 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414 | |
1247 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103 | |
1248 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338 | |
1249 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303... | |
1250 | |
1251 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes | |
1252 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524 | |
1253 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 | |
1254 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...; | |
1255 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2 | |
1256 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1 | |
1257 | |
1258 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp | |
1259 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100 | |
1260 0000000001010000000110000011000000000000100000000000000000000000100001 | |
1261 1000000110000000000000000000000000010011000000000000000000000000010000 | |
1262 0000000000000000000000000010000000000000000001000000000000000000000000 | |
1263 0000000000010000100001000000000000101000000000000000100000000000000... | |
1264 | |
1265 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu | |
1266 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8 | |
1267 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567 | |
1268 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012 | |
1269 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455 | |
1270 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404... | |
1271 | |
1272 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp | |
1273 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184 | |
1274 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450 | |
1275 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430 | |
1276 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134 | |
1277 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566... | |
1278 | |
1279 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000 | |
1280 0000000000000000000000000000000001001000010010000000010010000000011100 | |
1281 0100101010111100011011000100110110000011011110100110111111111111011111 | |
1282 11111111111110111000 | |
1283 | |
1284 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011 | |
1285 1110011111100101111111000111101100110000000000000011100010000000000000 | |
1286 0000000000000000000000000000000000000000000000101000000000000000000000 | |
1287 0000000000000000000000000000000000000000000000000000000000000000000000 | |
1288 0000000000000000000000000000000000000011000000000000000000000000000000 | |
1289 0000000000000000000000000000000000000000 | |
1290 | |
1291 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri | |
1292 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
1293 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 | |
1294 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0 | |
1295 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1 | |
1296 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1 | |
1297 | |
1298 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri | |
1299 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0 | |
1300 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0 | |
1301 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
1302 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0 | |
1303 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... | |
1304 | |
1305 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng | |
1306 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110 | |
1307 0100010101011000101001011100110001000010001001101000001001001001001000 | |
1308 0010110100000111001001000001001010100100100000000011000000101001011100 | |
1309 0010000001000101010100000100111100110111011011011000000010110111001101 | |
1310 0101100011000000010001000011000010100011101100001000001000100000000... | |
1311 | |
1312 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength | |
1313 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2 | |
1314 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X | |
1315 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1 | |
1316 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO | |
1317 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C.... | |
1318 | |
1319 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt | |
1320 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1 | |
1321 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N | |
1322 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1 | |
1323 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR | |
1324 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ... | |
1325 | |
1326 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD | |
1327 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1 | |
1328 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3. | |
1329 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...; | |
1330 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1 | |
1331 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1... | |
1332 | |
1333 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi | |
1334 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar | |
1335 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H | |
1336 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...; | |
1337 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4 | |
1338 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ... | |
1339 | |
1340 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3 | |
1341 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4- | |
1342 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO | |
1343 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...; | |
1344 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1 | |
1345 | |
1346 FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica | |
1347 lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC | |
1348 H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC- | |
1349 ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...; | |
1350 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2 | |
1351 | |
1352 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M | |
1353 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1 | |
1354 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1 | |
1355 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1 | |
1356 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....; | |
1357 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2 | |
1358 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8... | |
1359 | |
1360 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1 | |
1361 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C | |
1362 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3- | |
1363 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2 | |
1364 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C. | |
1365 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7... | |
1366 | |
1367 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min | |
1368 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H | |
1369 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2- | |
1370 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H | |
1371 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...; | |
1372 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10 | |
1373 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1 | |
1374 | |
1375 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist | |
1376 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0 | |
1377 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1 | |
1378 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0 | |
1379 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0 | |
1380 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18... | |
1381 | |
1382 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize: | |
1383 MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1- | |
1384 Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1 | |
1385 -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1- | |
1386 HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...; | |
1387 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23 | |
1388 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1 | |
1389 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ... | |
1390 | |
1391 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD | |
1392 istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106 | |
1393 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0 | |
1394 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26 | |
1395 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0 | |
1396 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ... | |
1397 | |
1398 =head2 METHODS | |
1399 | |
1400 =over 4 | |
1401 | |
1402 =item B<new> | |
1403 | |
1404 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO(%IOParameters); | |
1405 | |
1406 Using specified I<IOParameters> names and values hash, B<new> method creates a new | |
1407 object and returns a reference to a newly created B<FingerprintsSDFileIO> object. By default, | |
1408 the following properties are initialized during I<Read> mode: | |
1409 | |
1410 Name = ''; | |
1411 Mode = 'Read'; | |
1412 Status = 0; | |
1413 FingerprintsStringMode = 'AutoDetect'; | |
1414 FingerprintsFieldLabel = 'AutoDetect'; | |
1415 CompoundIDMode = 'LabelPrefix'; | |
1416 CompoundIDFieldLabel = undef; | |
1417 CompoundIDPrefix = 'Cmpd'; | |
1418 ValidateData = 1; | |
1419 DetailLevel = 1; | |
1420 | |
1421 During I<Write> mode, the following properties get initialize by default: | |
1422 | |
1423 FingerprintsStringMode = undef; | |
1424 | |
1425 BitStringFormat = HexadecimalString; | |
1426 BitsOrder = Ascending; | |
1427 | |
1428 VectorStringFormat = NumericalValuesString or ValuesString; | |
1429 | |
1430 Examples: | |
1431 | |
1432 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO( | |
1433 'Name' => 'Sample.sdf', | |
1434 'Mode' => 'Read'); | |
1435 | |
1436 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO( | |
1437 'Name' => 'Sample.sdf', | |
1438 'Mode' => 'Read',; | |
1439 'FingerprintsStringMode' => | |
1440 'AutoDetect', | |
1441 'FingerprintsFieldLabel' => | |
1442 'Fingerprints', | |
1443 'CompoundIDMode' => | |
1444 'DataField', | |
1445 'CompoundIDFieldLabel' => | |
1446 'CompoundID'); | |
1447 | |
1448 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO( | |
1449 'Name' => 'Sample.sdf', | |
1450 'Mode' => 'Write', | |
1451 'FingerprintsStringMode' => | |
1452 'FingerprintsBitVectorString', | |
1453 'Overwrite' => 1, | |
1454 'BitStringFormat' => 'HexadecimalString', | |
1455 'BitsOrder' => 'Ascending'); | |
1456 | |
1457 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO( | |
1458 'Name' => 'Sample.sd', | |
1459 'Mode' => 'Write', | |
1460 'FingerprintsStringMode' => | |
1461 'FingerprintsVectorString', | |
1462 'Overwrite' => 1, | |
1463 'VectorStringFormat' => 'IDsAndValuesString', | |
1464 'FingerprintsLabel' => 'Fingerprints'); | |
1465 | |
1466 =item B<GetCompoundString> | |
1467 | |
1468 $CompoundString = $FingerprintsSDFileIO->GetCompoundString(); | |
1469 | |
1470 Returns B<CompoundString> for current compound. | |
1471 | |
1472 =item B<GetFingerprints> | |
1473 | |
1474 $FingerprintsObject = $FingerprintsSDFileIO->GetFingerprints(); | |
1475 | |
1476 Returns B<FingerprintsObject> generated for current compound using fingerprints bit-vector | |
1477 or vector string data. The fingerprints object corresponds to any of the supported fingerprints | |
1478 such as PathLengthFingerprints, ExtendedConnectivity, and so on. | |
1479 | |
1480 =item B<GetFingerprintsString> | |
1481 | |
1482 $FingerprintsString = $FingerprintsSDFileIO->GetFingerprintsString(); | |
1483 | |
1484 Returns B<FingerprintsString> for current compound. | |
1485 | |
1486 =item B<IsFingerprintsDataValid> | |
1487 | |
1488 $Status = $FingerprintsSDFileIO->IsFingerprintsDataValid(); | |
1489 | |
1490 Returns 1 or 0 based on whether B<FingerprintsObject> is valid. | |
1491 | |
1492 =item B<IsFingerprintsFileDataValid> | |
1493 | |
1494 $Status = $FingerprintsSDFileIO->IsFingerprintsFileDataValid(); | |
1495 | |
1496 Returns 1 or 0 based on whether fingerprints file contains valid fingerprints data. | |
1497 | |
1498 =item B<IsFingerprintsSDFile> | |
1499 | |
1500 $Status = $FingerprintsSDFileIO->IsFingerprintsSDFile($FileName); | |
1501 $Status = FileIO::FingerprintsSDFileIO::IsFingerprintsSDFile($FileName); | |
1502 | |
1503 Returns 1 or 0 based on whether I<FileName> is a SD file. | |
1504 | |
1505 =item B<Next or Read> | |
1506 | |
1507 $FingerprintsSDFileIO = $FingerprintsSDFileIO->Next(); | |
1508 $FingerprintsSDFileIO = $FingerprintsSDFileIO->Read(); | |
1509 | |
1510 Reads next available compound fingerprints in SD file, processes the data, generates appropriate | |
1511 fingerprints object, and returns B<FingerprintsSDFileIO>. The generated fingerprints object is available | |
1512 using method B<GetFingerprints>. | |
1513 | |
1514 =item B<SetBitStringFormat> | |
1515 | |
1516 $FingerprintsSDFileIO->SetBitStringFormat($Format); | |
1517 | |
1518 Sets bit string I<Format> for fingerprints bit-vector string data in a SD file and returns B<FingerprintsSDFileIO>. | |
1519 Possible values for B<BitStringFormat>: I<BinaryString or HexadecimalString>. | |
1520 | |
1521 =item B<SetBitsOrder> | |
1522 | |
1523 $FingerprintsSDFileIO->SetBitsOrder($BitsOrder); | |
1524 | |
1525 Sets I<BitsOrder> for fingerprints bit-vector string data in SD file and returns B<FingerprintsSDFileIO>. | |
1526 Possible values for B<BitsOrder>: I<Ascending or Descending>. | |
1527 | |
1528 =item B<SetCompoundIDMode> | |
1529 | |
1530 $FingerprintsSDFileIO->SetCompoundIDMode($Mode); | |
1531 | |
1532 Sets compound ID I<Mode> for fingerprints bit-vector string data in a SD file and returns B<FingerprintsSDFileIO>. | |
1533 Possible values for B<CompoundIDMode>: I<DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix>. | |
1534 | |
1535 =item B<SetCompoundString> | |
1536 | |
1537 $FingerprintsSDFileIO->SetCompoundString($CompoundString); | |
1538 | |
1539 Sets I<CompoundString> and returns B<FingerprintsSDFileIO>. | |
1540 | |
1541 =item B<SetDetailLevel> | |
1542 | |
1543 $FingerprintsSDFileIO->SetDetailLevel($Level); | |
1544 | |
1545 Sets details I<Level> for generating diagnostics messages during SD file processing and returns | |
1546 B<FingerprintsSDFileIO>. Possible values: I<Positive integers>. | |
1547 | |
1548 =item B<SetFingerprints> | |
1549 | |
1550 $FingerprintsSDFileIO->SetFingerprints($FingerprintsObject); | |
1551 | |
1552 Sets I<FingerprintsObject> for current data line and returns B<FingerprintsSDFileIO>. | |
1553 | |
1554 =item B<SetFingerprintsString> | |
1555 | |
1556 $FingerprintsSDFileIO->SetFingerprintsString($FingerprintsString); | |
1557 | |
1558 Sets I<FingerprintsString> for current data line and returns B<FingerprintsSDFileIO>. | |
1559 | |
1560 =item B<SetFingerprintsStringMode> | |
1561 | |
1562 $FingerprintsSDFileIO->SetFingerprintsStringMode($Mode); | |
1563 | |
1564 Sets I<FingerprintsStringMode> for SD file and returns B<FingerprintsFPFileIO>. | |
1565 Possible values: I<AutoDetect, FingerprintsBitVectorString, or FingerprintsVectorString> | |
1566 | |
1567 =item B<SetVectorStringFormat> | |
1568 | |
1569 $FingerprintsSDFileIO->SetVectorStringFormat($Format); | |
1570 | |
1571 Sets I<VectorStringFormat> for SD file and returns B<FingerprintsFPFileIO>. Possible values: | |
1572 I<IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString>. | |
1573 | |
1574 =item B<WriteFingerprints> | |
1575 | |
1576 $FingerprintsFPFileIO->WriteFingerprints($FingerprintsObject, | |
1577 $CompoundID); | |
1578 | |
1579 Writes fingerprints string generated from I<FingerprintsObject> object and other data including | |
1580 I<CompoundID> to SD file and returns B<FingerprintsSDFileIO>. | |
1581 | |
1582 =item B<WriteFingerprintsString> | |
1583 | |
1584 $FingerprintsSDFileIO->WriteFingerprints($FingerprintsString, | |
1585 $CompoundID); | |
1586 | |
1587 Writes I<FingerprintsString> and other data including I<CompoundID> to SD file and returns | |
1588 B<FingerprintsSDFileIO>. | |
1589 | |
1590 Caveats: | |
1591 | |
1592 o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat | |
1593 values are ignored during writing of fingerprints and it's written to the file | |
1594 as it is. | |
1595 o CompoundString is not checked to remove any existing fingerprints data | |
1596 | |
1597 | |
1598 =back | |
1599 | |
1600 =head1 AUTHOR | |
1601 | |
1602 Manish Sud <msud@san.rr.com> | |
1603 | |
1604 =head1 SEE ALSO | |
1605 | |
1606 FingerprintsTextFileIO.pm, FingerprintsFPFileIO.pm, SDFileIO.pm | |
1607 | |
1608 =head1 COPYRIGHT | |
1609 | |
1610 Copyright (C) 2015 Manish Sud. All rights reserved. | |
1611 | |
1612 This file is part of MayaChemTools. | |
1613 | |
1614 MayaChemTools is free software; you can redistribute it and/or modify it under | |
1615 the terms of the GNU Lesser General Public License as published by the Free | |
1616 Software Foundation; either version 3 of the License, or (at your option) | |
1617 any later version. | |
1618 | |
1619 =cut |