0
|
1 package FileIO::FingerprintsSDFileIO;
|
|
2 #
|
|
3 # $RCSfile: FingerprintsSDFileIO.pm,v $
|
|
4 # $Date: 2015/02/28 20:48:43 $
|
|
5 # $Revision: 1.18 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Exporter;
|
|
32 use Scalar::Util ();
|
|
33 use TextUtil ();
|
|
34 use FileUtil ();
|
|
35 use SDFileUtil ();
|
|
36 use Fingerprints::FingerprintsStringUtil ();
|
|
37 use FileIO::FileIO;
|
|
38
|
|
39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
40
|
|
41 @ISA = qw(FileIO::FileIO Exporter);
|
|
42 @EXPORT = qw();
|
|
43 @EXPORT_OK = qw(IsFingerprintsSDFile);
|
|
44
|
|
45 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
|
|
46
|
|
47 # Setup class variables...
|
|
48 my($ClassName);
|
|
49 _InitializeClass();
|
|
50
|
|
51 # Class constructor...
|
|
52 sub new {
|
|
53 my($Class, %NamesAndValues) = @_;
|
|
54
|
|
55 # Initialize object...
|
|
56 my $This = $Class->SUPER::new();
|
|
57 bless $This, ref($Class) || $Class;
|
|
58 $This->_InitializeFingerprintsSDFileIO();
|
|
59
|
|
60 $This->_InitializeFingerprintsSDFileIOProperties(%NamesAndValues);
|
|
61
|
|
62 return $This;
|
|
63 }
|
|
64
|
|
65 # Initialize object data...
|
|
66 #
|
|
67 sub _InitializeFingerprintsSDFileIO {
|
|
68 my($This) = @_;
|
|
69
|
|
70 # Fingerprints string data format during read/write...
|
|
71 #
|
|
72 # For file read:
|
|
73 #
|
|
74 # AutoDetect - automatically detect format of fingerprints string
|
|
75 # FingerprintsBitVectorString - Bit vector fingerprints string format
|
|
76 # FingerprintsVectorString - Vector fingerprints string format
|
|
77 #
|
|
78 # Default value: AutoDetect
|
|
79 #
|
|
80 # For file write:
|
|
81 #
|
|
82 # FingerprintsBitVectorString - Bit vector fingerprints string format
|
|
83 # FingerprintsVectorString - Vector fingerprints string format
|
|
84 #
|
|
85 # Default value: undef
|
|
86 #
|
|
87 $This->{FingerprintsStringMode} = undef;
|
|
88
|
|
89 # For file read:
|
|
90 #
|
|
91 # o Fingerprints bit-vector and vector object for current fingerprints string
|
|
92 #
|
|
93 # For file write:
|
|
94 #
|
|
95 # o Fingerprints bit-vector and vector object for current fingerprints string
|
|
96 # o Any supported fingerprints object: PathLengthFingerprints, ExtendedConnectivity, and so on.
|
|
97 #
|
|
98 $This->{FingerprintsObject} = undef;
|
|
99
|
|
100 # Fingerprints SD file data field label during read/write
|
|
101 #
|
|
102 # For file read:
|
|
103 #
|
|
104 # Value of AutoDetect implies use first data field containing the word Fingerprints in its
|
|
105 # data field label to retrieve fingerprints string data. Othwewise, a valid data field name
|
|
106 # must be specified.
|
|
107 #
|
|
108 # For file write:
|
|
109 #
|
|
110 # Data field label to use for writing fingerprints string. Default: Fingerprints
|
|
111 #
|
|
112 $This->{FingerprintsFieldLabel} = undef;
|
|
113
|
|
114 # Fingepritns string for current line during read/write...
|
|
115 $This->{FingerprintsString} = undef;
|
|
116
|
|
117 # First compound data string read/write...
|
|
118 $This->{FirstCompoundDataIO} = 1;
|
|
119
|
|
120 # Current fingerprints string data compound number during read/write...
|
|
121 $This->{CompoundNum} = 0;
|
|
122
|
|
123 # Compound data string during read/write...
|
|
124 $This->{CompoundString} = undef;
|
|
125
|
|
126 # Initialize parameters for read...
|
|
127 $This->_InitializeFingerprintsSDFileIORead();
|
|
128
|
|
129 # Initialize parameters for write...
|
|
130 $This->_InitializeFingerprintsSDFileIOWrite();
|
|
131
|
|
132 return $This;
|
|
133 }
|
|
134
|
|
135 # Initialize class ...
|
|
136 sub _InitializeClass {
|
|
137 #Class name...
|
|
138 $ClassName = __PACKAGE__;
|
|
139
|
|
140 }
|
|
141
|
|
142 # Initialize object data for reading fingerprints SD file...
|
|
143 #
|
|
144 sub _InitializeFingerprintsSDFileIORead {
|
|
145 my($This) = @_;
|
|
146
|
|
147 # Compound ID mode to use for retrieving compound IDs for fingerprints...
|
|
148 #
|
|
149 # Specify how to generate compound IDs: use a SD file datafield value; use molname line from
|
|
150 # SD file; generate a sequential ID with specific prefix; use combination of both MolName and
|
|
151 # LabelPrefix with usage of LabelPrefix values for empty molname lines.
|
|
152 #
|
|
153 # Possible values: DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix. Default: LabelPrefix.
|
|
154 #
|
|
155 # For MolNameAndLabelPrefix value of CompoundIDMode, molname line in SD file takes precedence over
|
|
156 # sequential compound IDs generated using LabelPrefix and only empty molname values are replaced
|
|
157 # with sequential compound IDs.
|
|
158 #
|
|
159 $This->{CompoundIDMode} = 'LabelPrefix';
|
|
160
|
|
161 #
|
|
162 # Compound ID data field label name whose value is used as compound ID during DatafField value of
|
|
163 # CompoundIDMode
|
|
164 #
|
|
165 $This->{CompoundIDFieldLabel} = undef;
|
|
166
|
|
167 # A prefix string used for generating compound IDs like LabelPrefixString<Number> during LabelPrefix
|
|
168 # or MolNameOrLabelPrefix value of CompoundIDMode. Default value, Cmpd, generates compound IDs
|
|
169 # which look like Cmpd<Number>.
|
|
170 #
|
|
171 $This->{CompoundIDPrefix} = 'Cmpd';
|
|
172
|
|
173 # By default, the fingerprints data corresponding to FingerprintsCol is assumed to
|
|
174 # be valid and no validation is performed before generating fingerprints objects...
|
|
175 #
|
|
176 $This->{ValidateData} = 1;
|
|
177
|
|
178 # Level of detail to print during validation of data for invalid or missing data...
|
|
179 $This->{DetailLevel} = 1;
|
|
180
|
|
181 # Number of missing and invalid fingerprints string data compound strings...
|
|
182 $This->{NumOfCmpdsWithMissingData} = 0;
|
|
183 $This->{NumOfCmpdsWithInvalidData} = 0;
|
|
184
|
|
185 # Compound ID for current fingerprints string...
|
|
186 $This->{CompoundID} = undef;
|
|
187
|
|
188 # Compound data field labels and values map for current compound data...
|
|
189 %{$This->{DataFieldLabelsAndValues}} = ();
|
|
190
|
|
191 # Status of data in fingerprints SD file...
|
|
192 $This->{ValidFileData} = 0;
|
|
193
|
|
194 $This->{ValidCompoundIDField} = 0;
|
|
195 $This->{ValidFingerprintsField} = 0;
|
|
196
|
|
197 $This->{ValidFingerprintsStringMode} = 0;
|
|
198
|
|
199 return $This;
|
|
200 }
|
|
201
|
|
202 # Initialize object data for writing fingerprints SD file...
|
|
203 #
|
|
204 sub _InitializeFingerprintsSDFileIOWrite {
|
|
205 my($This) = @_;
|
|
206
|
|
207 # Fingerprints bit vector string format...
|
|
208 #
|
|
209 # Possible values: BinaryString or HexadecimalString [Default]
|
|
210 #
|
|
211 # Default BitStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat.
|
|
212 #
|
|
213 $This->{BitStringFormat} = undef;
|
|
214
|
|
215 # Bits order in fingerprints bit vector string...
|
|
216 #
|
|
217 # Ascending - First bit in each byte as the lowest bit [Default]
|
|
218 # Descending - First bit in each byte as the highest bit
|
|
219 #
|
|
220 # Default BitsOrder is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder.
|
|
221 #
|
|
222 $This->{BitsOrder} = undef;
|
|
223
|
|
224 # Fingerprints vector string format...
|
|
225 #
|
|
226 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString
|
|
227 #
|
|
228 # Default VectorStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat.
|
|
229 # For fingerprints vector object containing vector NumericalValues, it corresponds to IDsAndValuesString; othwerwise,
|
|
230 # it's set to ValuesString.
|
|
231 #
|
|
232 $This->{VectorStringFormat} = undef;
|
|
233
|
|
234 # Overwriting existing file...
|
|
235 $This->{Overwrite} = 0;
|
|
236
|
|
237 return $This;
|
|
238 }
|
|
239
|
|
240 # Initialize object values...
|
|
241 sub _InitializeFingerprintsSDFileIOProperties {
|
|
242 my($This, %NamesAndValues) = @_;
|
|
243
|
|
244 # All other property names and values along with all Set/Get<PropertyName> methods
|
|
245 # are implemented on-demand using ObjectProperty class.
|
|
246
|
|
247 my($Name, $Value, $MethodName);
|
|
248 while (($Name, $Value) = each %NamesAndValues) {
|
|
249 $MethodName = "Set${Name}";
|
|
250 $This->$MethodName($Value);
|
|
251 }
|
|
252
|
|
253 if (!exists $NamesAndValues{Name}) {
|
|
254 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name...";
|
|
255 }
|
|
256
|
|
257 # Make sure it's a fingerprints file...
|
|
258 $Name = $NamesAndValues{Name};
|
|
259 if (!$This->IsFingerprintsSDFile($Name)) {
|
|
260 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be fingerprints format...";
|
|
261 }
|
|
262
|
|
263 if ($This->GetMode() =~ /^Read$/i) {
|
|
264 $This->_InitializeFingerprintsSDFileIOReadProperties(%NamesAndValues);
|
|
265 }
|
|
266 elsif ($This->GetMode() =~ /^(Write|Append)$/i) {
|
|
267 $This->_InitializeFingerprintsSDFileIOWriteProperties(%NamesAndValues);
|
|
268 }
|
|
269
|
|
270 return $This;
|
|
271 }
|
|
272
|
|
273 # Initialize object properties for reading fingerprints SD file...
|
|
274 #
|
|
275 sub _InitializeFingerprintsSDFileIOReadProperties {
|
|
276 my($This, %NamesAndValues) = @_;
|
|
277
|
|
278 # Set default value for FingerprintsStringMode...
|
|
279 if (!$This->{FingerprintsStringMode}) {
|
|
280 $This->{FingerprintsStringMode} = 'AutoDetect';
|
|
281 }
|
|
282
|
|
283 # Set default value for FingerprintsFieldLabel...
|
|
284 if (!$This->{FingerprintsFieldLabel}) {
|
|
285 $This->{FingerprintsFieldLabel} = 'AutoDetect';
|
|
286 }
|
|
287
|
|
288 # Check compound ID data field...
|
|
289 if (($This->{CompoundIDMode} =~ /^DataField$/i) && (!defined($This->{CompoundIDFieldLabel}))) {
|
|
290 croak "Error: ${ClassName}->: Object can't be instantiated: Compound ID data field lable must be specifed using \"CompoundIDFieldLabel\" during \"DataField\" value of \"CompoundIDMode\"...";
|
|
291 }
|
|
292
|
|
293 $This->_PrepareForReadingFingerprintsSDFileData();
|
|
294
|
|
295 return $This;
|
|
296 }
|
|
297
|
|
298 # Initialize object properties for writing fingerprints SD file...
|
|
299 #
|
|
300 sub _InitializeFingerprintsSDFileIOWriteProperties {
|
|
301 my($This, %NamesAndValues) = @_;
|
|
302
|
|
303 # Check FingerprintsStringMode value...
|
|
304 if (!exists $NamesAndValues{FingerprintsStringMode}) {
|
|
305 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying FingerprintsStringMode...";
|
|
306 }
|
|
307
|
|
308 if ($This->{FingerprintsStringMode} !~ /^(FingerprintsBitVectorString|FingerprintsVectorString)$/i) {
|
|
309 croak "Error: ${ClassName}->: Object can't be instantiated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, is not valid; Supported values for write/append: FingerprintsBitVectorString or FingerprintsVectorString...";
|
|
310 }
|
|
311
|
|
312 # Set default value for FingerprintsFieldLabel...
|
|
313 if (!$This->{FingerprintsFieldLabel}) {
|
|
314 $This->{FingerprintsFieldLabel} = 'Fingerprints';
|
|
315 }
|
|
316
|
|
317 $This->_PrepareForWritingFingerprintsSDFileData();
|
|
318
|
|
319 return $This;
|
|
320 }
|
|
321
|
|
322 # Set FingerprintsStringMode...
|
|
323 #
|
|
324 sub SetFingerprintsStringMode {
|
|
325 my($This, $Value) = @_;
|
|
326
|
|
327 # AutoDetect - automatically detect format of fingerprints string
|
|
328 # FingerprintsBitVectorString - Bit vector fingerprints string format
|
|
329 # FingerprintsVectorString - Vector fingerprints string format
|
|
330
|
|
331 if ($Value !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) {
|
|
332 croak "Error: ${ClassName}->SetFingerprintsStringMode: FingerprintsStringMode value, $Value, is not valid; Supported values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString...";
|
|
333 }
|
|
334
|
|
335 $This->{FingerprintsStringMode} = $Value;
|
|
336
|
|
337 return $This;
|
|
338 }
|
|
339
|
|
340 # Set CompoundIDMode...
|
|
341 #
|
|
342 sub SetCompoundIDMode {
|
|
343 my($This, $Value) = @_;
|
|
344
|
|
345 if ($Value !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) {
|
|
346 croak "Error: ${ClassName}->SetFingerprintsStringMode: CompoundIDMode value, $Value, is not valid; Supported values: DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix...";
|
|
347 }
|
|
348
|
|
349 $This->{CompoundIDMode} = $Value;
|
|
350
|
|
351 return $This;
|
|
352 }
|
|
353
|
|
354 # Set DetailLevel...
|
|
355 #
|
|
356 sub SetDetailLevel {
|
|
357 my($This, $Value) = @_;
|
|
358
|
|
359 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
360 croak "Error: ${ClassName}->SetDetailLevel: DetailLevel value, $Value, is not valid; Supported values: > 0...";
|
|
361 }
|
|
362
|
|
363 $This->{DetailLevel} = $Value;
|
|
364
|
|
365 return $This;
|
|
366 }
|
|
367
|
|
368 # Set BitStringFormat...
|
|
369 #
|
|
370 sub SetBitStringFormat {
|
|
371 my($This, $Value) = @_;
|
|
372
|
|
373 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) {
|
|
374 croak "Error: ${ClassName}->SetBitStringFormat: BitStringFormat value, $Value, is not valid; Supported values: BinaryString or HexadecimalString...";
|
|
375 }
|
|
376
|
|
377 $This->{BitStringFormat} = $Value;
|
|
378
|
|
379 return $This;
|
|
380 }
|
|
381
|
|
382 # Set BitsOrder...
|
|
383 #
|
|
384 sub SetBitsOrder {
|
|
385 my($This, $Value) = @_;
|
|
386
|
|
387 # Ascending - First bit in each byte as the lowest bit
|
|
388 # Descending - First bit in each byte as the highest bit
|
|
389 #
|
|
390 if ($Value !~ /^(Ascending|Descending)$/i) {
|
|
391 croak "Error: ${ClassName}->SetBitsOrder: FingerprintsStringMode value, $Value, is not valid; Supported values: Ascending or Descending...";
|
|
392 }
|
|
393
|
|
394 $This->{BitsOrder} = $Value;
|
|
395
|
|
396 return $This;
|
|
397 }
|
|
398
|
|
399 # Set VectorStringFormat...
|
|
400 #
|
|
401 sub SetVectorStringFormat {
|
|
402 my($This, $Value) = @_;
|
|
403
|
|
404 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString
|
|
405
|
|
406 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) {
|
|
407 croak "Error: ${ClassName}->SetVectorStringFormat: FingerprintsStringMode value, $Value, is not valid; Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString...";
|
|
408 }
|
|
409
|
|
410 $This->{VectorStringFormat} = $Value;
|
|
411
|
|
412 return $This;
|
|
413 }
|
|
414
|
|
415 # Get compound string for current compound with optional removal of fingerprints data..
|
|
416 #
|
|
417 sub GetCompoundString {
|
|
418 my($This, $RemoveFingerprintsData) = @_;
|
|
419
|
|
420 $RemoveFingerprintsData = defined $RemoveFingerprintsData ? $RemoveFingerprintsData : 0;
|
|
421
|
|
422 if ($RemoveFingerprintsData && $This->_IsFingerprintsDataPresentInCompoundString()) {
|
|
423 return SDFileUtil::RemoveCmpdDataHeaderLabelAndValue($This->{CompoundString}, $This->{FingerprintsFieldLabel});
|
|
424 }
|
|
425
|
|
426 return $This->{CompoundString};
|
|
427 }
|
|
428
|
|
429 # Set compound string for current compound..
|
|
430 #
|
|
431 sub SetCompoundString {
|
|
432 my($This, $CompoundString) = @_;
|
|
433
|
|
434 $This->{CompoundString} = $CompoundString;
|
|
435
|
|
436 return $This;
|
|
437 }
|
|
438
|
|
439 # Get fingerprints object for current compound using fingerprints, fingerprints bit-vector
|
|
440 # fingerprints vector object. Fingerprints object correspond to any of supported fingerprints
|
|
441 # objects such as PathLengthFingerprints, ExtendedConnectivity, and so on.
|
|
442 #
|
|
443 sub GetFingerprints {
|
|
444 my($This) = @_;
|
|
445
|
|
446 return $This->{FingerprintsObject};
|
|
447 }
|
|
448
|
|
449 # Set fingerprints object for current compound...
|
|
450 #
|
|
451 sub SetFingerprints {
|
|
452 my($This, $FingerprintsObject) = @_;
|
|
453
|
|
454 $This->{FingerprintsObject} = $FingerprintsObject;
|
|
455
|
|
456 return $This;
|
|
457 }
|
|
458
|
|
459 # Get fingerprints string for current compound...
|
|
460 #
|
|
461 sub GetFingerprintsString {
|
|
462 my($This) = @_;
|
|
463
|
|
464 return $This->{FingerprintsString} ? $This->{FingerprintsString} : 'None';
|
|
465 }
|
|
466
|
|
467 # Set fingerprints string for current compound...
|
|
468 #
|
|
469 sub SetFingerprintsString {
|
|
470 my($This, $FingerprintsString) = @_;
|
|
471
|
|
472 $This->{FingerprintsString} = $FingerprintsString;
|
|
473
|
|
474 return $This;
|
|
475 }
|
|
476
|
|
477 # Does fingerprints SD file contain valid data?
|
|
478 #
|
|
479 sub IsFingerprintsFileDataValid {
|
|
480 my($This) = @_;
|
|
481
|
|
482 return $This->{ValidFileData} ? 1 : 0;
|
|
483 }
|
|
484
|
|
485 # Does current compound contains valid fingerprints object data?
|
|
486 #
|
|
487 sub IsFingerprintsDataValid {
|
|
488 my($This) = @_;
|
|
489
|
|
490 return defined $This->{FingerprintsObject} ? 1 : 0;
|
|
491 }
|
|
492
|
|
493 # Read next available compound data string, process it and generate appropriate fingerprints
|
|
494 # objects...
|
|
495 #
|
|
496 sub Read {
|
|
497 my($This) = @_;
|
|
498
|
|
499 # Read compound data string...
|
|
500 if (!$This->_ReadCompoundDataString()) {
|
|
501 return undef;
|
|
502 }
|
|
503
|
|
504 # No need to process invalid SD file with invalid data...
|
|
505 if (!$This->{ValidFileData}) {
|
|
506 if ($This->{ValidateData}) {
|
|
507 $This->{NumOfCmpdsWithMissingData} += 1;
|
|
508 }
|
|
509 return $This;
|
|
510 }
|
|
511
|
|
512 # Perform data validation...
|
|
513 if ($This->{ValidateData}) {
|
|
514 if (!$This->_ValidateReadCompoundDataString()) {
|
|
515 return $This;
|
|
516 }
|
|
517 }
|
|
518
|
|
519 # Setup fingerprints string after checking again to handle problematic data for
|
|
520 # non-validated compound string data...
|
|
521 #
|
|
522 my($FingerprintsFieldLabel);
|
|
523 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
|
|
524 if (exists $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}) {
|
|
525 $This->{FingerprintsString} = $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel};
|
|
526 }
|
|
527
|
|
528 # Generate fingeprints object...
|
|
529 $This->_GenerateFingerprintsObject();
|
|
530
|
|
531 # Setup fingerprints compound ID for fingerprints string...
|
|
532 $This->_GenerateCompoundID();
|
|
533
|
|
534 return $This;
|
|
535 }
|
|
536
|
|
537 # Read next available compound data string, process it and generate appropriate fingerprints
|
|
538 # objects...
|
|
539 #
|
|
540 sub Next {
|
|
541 my($This) = @_;
|
|
542
|
|
543 return $This->Read();
|
|
544 }
|
|
545
|
|
546 # Read compound data string...
|
|
547 #
|
|
548 sub _ReadCompoundDataString {
|
|
549 my($This) = @_;
|
|
550 my(@CmpdLines);
|
|
551
|
|
552 if ($This->{FirstCompoundDataIO}) {
|
|
553 $This->_ProcessFirstCompoundDataStringRead();
|
|
554 }
|
|
555
|
|
556 # Initialize data for current compound data string...
|
|
557 $This->_InitializeReadCompoundDataString();
|
|
558
|
|
559 # Get next compound data line...
|
|
560 $This->{CompoundString} = SDFileUtil::ReadCmpdString($This->{FileHandle});
|
|
561 if (!$This->{CompoundString}) {
|
|
562 return 0;
|
|
563 }
|
|
564
|
|
565 $This->{CompoundNum} += 1;
|
|
566
|
|
567 # Set up data field labels and values...
|
|
568 @CmpdLines = split "\n", $This->{CompoundString};
|
|
569 %{$This->{DataFieldLabelsAndValues}} = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
|
|
570
|
|
571 return 1;
|
|
572 }
|
|
573
|
|
574 # Initialize compound data string for reading...
|
|
575 #
|
|
576 sub _InitializeReadCompoundDataString {
|
|
577 my($This) = @_;
|
|
578
|
|
579 $This->{CompoundID} = undef;
|
|
580 $This->{CompoundString} = undef;
|
|
581
|
|
582 %{$This->{DataFieldLabelsAndValues}} = ();
|
|
583
|
|
584 $This->{FingerprintsObject} = undef;
|
|
585 $This->{FingerprintsString} = undef;
|
|
586
|
|
587 return $This;
|
|
588 }
|
|
589
|
|
590 # Validate compound data string containing fingerprints data...
|
|
591 #
|
|
592 sub _ValidateReadCompoundDataString {
|
|
593 my($This) = @_;
|
|
594 my($FingerprintsFieldLabel);
|
|
595
|
|
596 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
|
|
597
|
|
598 # Check for missing data...
|
|
599 if (!exists $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}) {
|
|
600 # Missing data...
|
|
601 $This->{NumOfCmpdsWithMissingData} += 1;
|
|
602 if ($This->{DetailLevel} >= 3) {
|
|
603 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains no fingerprints data: $This->{CompoundString}...";
|
|
604 }
|
|
605 elsif ($This->{DetailLevel} >= 2) {
|
|
606 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains no fingerprints data...";
|
|
607 }
|
|
608 return 0;
|
|
609 }
|
|
610
|
|
611 # Check for invalid data...
|
|
612 my($InvalidFingerprintsData, $FingerprintsType, $FingerprintsDescription);
|
|
613
|
|
614 $InvalidFingerprintsData = 0;
|
|
615
|
|
616 if (Fingerprints::FingerprintsStringUtil::AreFingerprintsStringValuesValid($This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel})) {
|
|
617 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel});
|
|
618 if (defined($FingerprintsType) && defined($FingerprintsDescription)) {
|
|
619 if ($This->{FirstFingerprintsStringType} !~ /^$FingerprintsType$/i || $This->{FirstFingerprintsStringDescription} !~ /^$FingerprintsDescription$/i) {
|
|
620 $InvalidFingerprintsData = 1;
|
|
621 }
|
|
622 }
|
|
623 else {
|
|
624 $InvalidFingerprintsData = 1;
|
|
625 }
|
|
626 }
|
|
627 else {
|
|
628 $InvalidFingerprintsData = 1;
|
|
629 }
|
|
630
|
|
631 if ($InvalidFingerprintsData) {
|
|
632 $This->{NumOfCmpdsWithInvalidData} += 1;
|
|
633 if ($This->{DetailLevel} >= 3) {
|
|
634 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains invalid fingerprints data: $This->{DataLine}...";
|
|
635 }
|
|
636 elsif ($This->{DetailLevel} >= 2) {
|
|
637 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains invalid fingerprints data...";
|
|
638 }
|
|
639 return 0;
|
|
640 }
|
|
641
|
|
642 return 1;
|
|
643 }
|
|
644
|
|
645 # Setup fingerprints compound ID for fingerprints string...
|
|
646 sub _GenerateCompoundID {
|
|
647 my($This) = @_;
|
|
648 my($CompoundID, $MolName);
|
|
649
|
|
650 $CompoundID = '';
|
|
651
|
|
652 if ($This->{CompoundIDMode} =~ /^LabelPrefix$/i) {
|
|
653 $CompoundID = "$This->{CompoundIDPrefix}$This->{CompoundNum}";
|
|
654 }
|
|
655 elsif ($This->{CompoundIDMode} =~ /^DataField$/i) {
|
|
656 my($SpecifiedDataField);
|
|
657 $SpecifiedDataField = $This->{CompoundIDFieldLabel};
|
|
658 $CompoundID = exists $This->{DataFieldLabelsAndValues}{$SpecifiedDataField} ? $This->{DataFieldLabelsAndValues}{$SpecifiedDataField} : '';
|
|
659 }
|
|
660 elsif ($This->{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) {
|
|
661 ($MolName) = split "\n", $This->{CompoundString};
|
|
662 $CompoundID = TextUtil::IsNotEmpty($MolName) ? $MolName : "$This->{CompoundIDPrefix}$This->{CompoundNum}";
|
|
663 }
|
|
664 elsif ($This->{CompoundIDMode} =~ /^MolName$/i) {
|
|
665 ($MolName) = split "\n", $This->{CompoundString};
|
|
666 $CompoundID = $MolName;
|
|
667 }
|
|
668
|
|
669 $This->{CompoundID} = $CompoundID;
|
|
670
|
|
671 return $This;
|
|
672 }
|
|
673
|
|
674 # Process first compound data string read...
|
|
675 #
|
|
676 sub _ProcessFirstCompoundDataStringRead {
|
|
677 my($This) = @_;
|
|
678 my($Line, $FileHandle);
|
|
679
|
|
680 $This->{FirstCompoundDataIO} = 0;
|
|
681
|
|
682 return $This;
|
|
683 }
|
|
684
|
|
685 # Get ready for reading fingerprints SD file...
|
|
686 #
|
|
687 sub _PrepareForReadingFingerprintsSDFileData {
|
|
688 my($This) = @_;
|
|
689
|
|
690 # Retrieve SD file data fields information....
|
|
691 $This->_RetrieveSDFileDataFields();
|
|
692
|
|
693 # Validate compound and fingerprints field information...
|
|
694 $This->_ValidateReadCompoundIDField();
|
|
695 $This->_ValidateReadFingerprintsField();
|
|
696
|
|
697 # Validate fingeprints string mode information...
|
|
698 if ($This->{ValidFingerprintsField}) {
|
|
699 $This->_ValidateReadFingerprintsStringMode();
|
|
700 }
|
|
701
|
|
702 # Set status of SD file data...
|
|
703 $This->{ValidFileData} = ($This->{ValidCompoundIDField} && $This->{ValidFingerprintsField} && $This->{ValidFingerprintsStringMode}) ? 1 : 0;
|
|
704
|
|
705 return $This;
|
|
706 }
|
|
707
|
|
708 # Retrieve information data fields and fingerprints string...
|
|
709 #
|
|
710 sub _RetrieveSDFileDataFields {
|
|
711 my($This) = @_;
|
|
712 my($SDFile, $CmpdString, @CmpdLines);
|
|
713
|
|
714 $SDFile = $This->{Name};
|
|
715
|
|
716 if (!(-e $SDFile)) {
|
|
717 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $SDFile, doesn't exist...";
|
|
718 }
|
|
719
|
|
720 if (!open SDFILE, "$SDFile") {
|
|
721 croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input SD file $SDFile: $! ...";
|
|
722 }
|
|
723 $CmpdString = SDFileUtil::ReadCmpdString(\*SDFILE);
|
|
724 close SDFILE;
|
|
725
|
|
726 # Set up data field labels and values for first compound string data...
|
|
727 @CmpdLines = split "\n", $CmpdString;
|
|
728
|
|
729 %{$This->{FirstDataFieldLabelsAndValues}} = ();
|
|
730 %{$This->{FirstDataFieldLabelsAndValues}} = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
|
|
731
|
|
732 return $This;
|
|
733 }
|
|
734
|
|
735 # Validate compound ID field information...
|
|
736 #
|
|
737 sub _ValidateReadCompoundIDField {
|
|
738 my($This) = @_;
|
|
739 my($SpecifiedDataField);
|
|
740
|
|
741 $This->{ValidCompoundIDField} = 0;
|
|
742
|
|
743 if ($This->{CompoundIDMode} =~ /^DataField$/i) {
|
|
744 $SpecifiedDataField = $This->{CompoundIDFieldLabel};
|
|
745 if (! exists $This->{FirstDataFieldLabelsAndValues}{$SpecifiedDataField}) {
|
|
746 carp "Warning: ${ClassName}->_ValidateReadCompoundIDField: Compound ID data field, $SpecifiedDataField, specified using \"CompoundIDField\" in \"DataField\" \"CompoundIDMode\" doesn't exist...";
|
|
747 return 0;
|
|
748 }
|
|
749 }
|
|
750
|
|
751 $This->{ValidCompoundIDField} = 1;
|
|
752
|
|
753 return 1;
|
|
754 }
|
|
755
|
|
756 # Validate fingerprints string field information...
|
|
757 #
|
|
758 sub _ValidateReadFingerprintsField {
|
|
759 my($This) = @_;
|
|
760 my($FingerprintsFieldLabel);
|
|
761
|
|
762 $This->{ValidFingerprintsField} = 0;
|
|
763
|
|
764 $FingerprintsFieldLabel = '';
|
|
765
|
|
766 if ($This->{FingerprintsFieldLabel} !~ /^AutoDetect$/i) {
|
|
767 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
|
|
768 if (! exists $This->{FirstDataFieldLabelsAndValues}{$FingerprintsFieldLabel}) {
|
|
769 carp "Warning: ${ClassName}->_ValidateReadFingerprintsField: Fingerprints data field value, $FingerprintsFieldLabel, specified using \"FingerprintsFieldLabel\" doesn't exist...";
|
|
770 return 0;
|
|
771 }
|
|
772 }
|
|
773 else {
|
|
774 # Make sure default fingerprints field does exist...
|
|
775 my($FingerprintsFieldFound, $DataFieldLabel);
|
|
776 $FingerprintsFieldFound = 0;
|
|
777
|
|
778 DATAFIELDLABEL: for $DataFieldLabel (keys %{$This->{FirstDataFieldLabelsAndValues}}) {
|
|
779 if ($DataFieldLabel =~ /Fingerprints/i) {
|
|
780 $FingerprintsFieldFound = 1;
|
|
781 $FingerprintsFieldLabel = $DataFieldLabel;
|
|
782 last DATAFIELDLABEL;
|
|
783 }
|
|
784 }
|
|
785 if (!$FingerprintsFieldFound) {
|
|
786 carp "Warning: ${ClassName}->_ValidateReadFingerprintsField: Data field label containing \"Fingerprints\" string in its name doesn't exist...";
|
|
787 return 0;
|
|
788 }
|
|
789 }
|
|
790
|
|
791 $This->{ValidFingerprintsField} = 1;
|
|
792 $This->{FingerprintsFieldLabel} = $FingerprintsFieldLabel;
|
|
793
|
|
794 return 1;
|
|
795 }
|
|
796
|
|
797 # Validate fingerprints string mode information...
|
|
798 #
|
|
799 sub _ValidateReadFingerprintsStringMode {
|
|
800 my($This) = @_;
|
|
801 my($FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FirstFingerprintsStringType, $FirstFingerprintsStringDescription, $FingerprintsFieldLabel, $FingerprintsType, $FingerprintsDescription);
|
|
802
|
|
803 $This->{ValidFingerprintsStringMode} = 0;
|
|
804
|
|
805 $This->{FingerprintsBitVectorStringMode} = 0;
|
|
806 $This->{FingerprintsVectorStringMode} = 0;
|
|
807
|
|
808 $This->{FirstFingerprintsStringType} = '';
|
|
809 $This->{FirstFingerprintsStringDescription} = '';
|
|
810
|
|
811 $FingerprintsBitVectorStringMode = 0;
|
|
812 $FingerprintsVectorStringMode = 0;
|
|
813
|
|
814 $FirstFingerprintsStringType = '';
|
|
815 $FirstFingerprintsStringDescription = '';
|
|
816
|
|
817 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
|
|
818
|
|
819 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{FirstDataFieldLabelsAndValues}{$FingerprintsFieldLabel});
|
|
820
|
|
821 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) {
|
|
822 if ($FingerprintsType !~ /^FingerprintsBitVector$/i) {
|
|
823 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsBitVectorString, specified using \"FingerprintsStringMode\"...";
|
|
824 return 0;
|
|
825 }
|
|
826 $FingerprintsBitVectorStringMode = 1;
|
|
827 $FirstFingerprintsStringType = 'FingerprintsBitVector';
|
|
828 $FirstFingerprintsStringDescription = $FingerprintsDescription;
|
|
829 }
|
|
830 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) {
|
|
831 if ($FingerprintsType !~ /^FingerprintsVector$/i) {
|
|
832 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsVectorString, specified using \"FingerprintsStringMode\"...";
|
|
833 return 0;
|
|
834 }
|
|
835 $FingerprintsVectorStringMode = 1;
|
|
836 $FirstFingerprintsStringType = 'FingerprintsVector';
|
|
837 $FirstFingerprintsStringDescription = $FingerprintsDescription;
|
|
838 }
|
|
839 else {
|
|
840 # AutoDetect mode...
|
|
841 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) {
|
|
842 $FingerprintsBitVectorStringMode = 1;
|
|
843 }
|
|
844 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) {
|
|
845 $FingerprintsVectorStringMode = 1;
|
|
846 }
|
|
847 else {
|
|
848 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, identified during, AutoDetect, value of \"FingerprintsStringMode\" is not valid; Supported fingerprints types: FingerprintBitVector or FingerprintsVector...";
|
|
849 return 0;
|
|
850 }
|
|
851 $FirstFingerprintsStringType = $FingerprintsType;
|
|
852 $FirstFingerprintsStringDescription = $FingerprintsDescription;
|
|
853 }
|
|
854
|
|
855 $This->{ValidFingerprintsStringMode} = 1;
|
|
856
|
|
857 $This->{FingerprintsBitVectorStringMode} = $FingerprintsBitVectorStringMode;
|
|
858 $This->{FingerprintsVectorStringMode} = $FingerprintsVectorStringMode;
|
|
859
|
|
860 $This->{FirstFingerprintsStringType} = $FirstFingerprintsStringType;
|
|
861 $This->{FirstFingerprintsStringDescription} = $FirstFingerprintsStringDescription;
|
|
862
|
|
863 return 1;
|
|
864 }
|
|
865
|
|
866 # Write fingerprints string generated from specified fingerprints, fingerprints-bit vector, or
|
|
867 # fingerprints vector object and other data to SD file...
|
|
868 #
|
|
869 sub WriteFingerprints {
|
|
870 my($This, $FingerprintsObject, $CompoundString) = @_;
|
|
871
|
|
872 # Initialize data for current compound...
|
|
873 $This->_InitializeWriteCompoundDataString();
|
|
874
|
|
875 # Set fingerprints object...
|
|
876 $This->{FingerprintsObject} = $FingerprintsObject;
|
|
877
|
|
878 # Generate fingerprints string...
|
|
879 $This->_GenerateFingerprintsString();
|
|
880
|
|
881 # Set and update compound string...
|
|
882 $This->{CompoundString} = $CompoundString;
|
|
883 $This->_AddFingerprintsDataToCompoundString();
|
|
884
|
|
885 # Write it out...
|
|
886 $This->_WriteCompoundDataString();
|
|
887
|
|
888 return $This;
|
|
889 }
|
|
890
|
|
891 # Write fingerprints string and other data to SD file...
|
|
892 #
|
|
893 # Note:
|
|
894 # o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat values
|
|
895 # are ignored during writing of fingerprints and it's written to the file as it is.
|
|
896 # o CompoundString contains no fingerprints data
|
|
897 #
|
|
898 sub WriteFingerprintsString {
|
|
899 my($This, $FingerprintsString, $CompoundString) = @_;
|
|
900
|
|
901 # Initialize data for current compound...
|
|
902 $This->_InitializeWriteCompoundDataString();
|
|
903
|
|
904 # Set fingerprints string...
|
|
905 $This->{FingerprintsString} = $FingerprintsString;
|
|
906
|
|
907 # Generate fingerprints object...
|
|
908 $This->_GenerateFingerprintsObject();
|
|
909
|
|
910 # Set and update compound string...
|
|
911 $This->{CompoundString} = $CompoundString;
|
|
912 $This->_AddFingerprintsDataToCompoundString();
|
|
913
|
|
914 # Write it out...
|
|
915 $This->_WriteCompoundDataString();
|
|
916
|
|
917 return $This;
|
|
918 }
|
|
919
|
|
920 # Initialize compound data string for writing...
|
|
921 #
|
|
922 sub _InitializeWriteCompoundDataString {
|
|
923 my($This) = @_;
|
|
924
|
|
925 $This->{CompoundString} = undef;
|
|
926
|
|
927 $This->{FingerprintsObject} = undef;
|
|
928 $This->{FingerprintsString} = undef;
|
|
929
|
|
930 return $This;
|
|
931 }
|
|
932
|
|
933 # Writi compound data string...
|
|
934 #
|
|
935 sub _WriteCompoundDataString {
|
|
936 my($This) = @_;
|
|
937 my($FileHandle);
|
|
938
|
|
939 if ($This->{FirstCompoundDataIO}) {
|
|
940 $This->_ProcessFirstCompoundDataStringWrite();
|
|
941 }
|
|
942
|
|
943 $This->{CompoundNum} += 1;
|
|
944 $FileHandle = $This->{FileHandle};
|
|
945
|
|
946 print $FileHandle "$This->{CompoundString}\n";
|
|
947
|
|
948 return $This;
|
|
949 }
|
|
950
|
|
951 # Process first compound data string write...
|
|
952 #
|
|
953 sub _ProcessFirstCompoundDataStringWrite {
|
|
954 my($This) = @_;
|
|
955 my($Line, $FileHandle);
|
|
956
|
|
957 $This->{FirstCompoundDataIO} = 0;
|
|
958
|
|
959 return $This;
|
|
960 }
|
|
961
|
|
962 # Get ready for writing fingerprints SD file...
|
|
963 #
|
|
964 sub _PrepareForWritingFingerprintsSDFileData {
|
|
965 my($This) = @_;
|
|
966 my($SDFile);
|
|
967
|
|
968 $SDFile = $This->{Name};
|
|
969 if (!$This->{Overwrite}) {
|
|
970 if (-e $SDFile) {
|
|
971 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $SDFile, already exist. Use overwrite option...";
|
|
972 }
|
|
973 }
|
|
974
|
|
975 # Setup FingerprintsStringMode status...
|
|
976
|
|
977 $This->{FingerprintsBitVectorStringMode} = 0;
|
|
978 $This->{FingerprintsVectorStringMode} = 0;
|
|
979 $This->{ValidFingerprintsStringMode} = 0;
|
|
980
|
|
981 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) {
|
|
982 $This->{FingerprintsBitVectorStringMode} = 1;
|
|
983 }
|
|
984 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) {
|
|
985 $This->{FingerprintsVectorStringMode} = 1;
|
|
986 }
|
|
987
|
|
988 $This->{ValidFingerprintsStringMode} = ($This->{FingerprintsBitVectorStringMode} || $This->{FingerprintsVectorStringMode}) ? 1 : 0;
|
|
989
|
|
990 if ($This->{FingerprintsBitVectorStringMode}) {
|
|
991 $This->_SetDefaultBitStringFormat();
|
|
992 $This->_SetDefaultBitsOrder();
|
|
993 }
|
|
994 elsif ($This->{FingerprintsVectorStringMode}) {
|
|
995 $This->_SetDefaultVectorStringFormat();
|
|
996 }
|
|
997
|
|
998 return $This;
|
|
999 }
|
|
1000
|
|
1001 # Set default value for bit string format...
|
|
1002 #
|
|
1003 sub _SetDefaultBitStringFormat {
|
|
1004 my($This) = @_;
|
|
1005
|
|
1006 if (!$This->{BitStringFormat}) {
|
|
1007 $This->{BitStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat();
|
|
1008 }
|
|
1009
|
|
1010 return $This;
|
|
1011 }
|
|
1012
|
|
1013 # Set default value for bit string format...
|
|
1014 #
|
|
1015 sub _SetDefaultBitsOrder {
|
|
1016 my($This) = @_;
|
|
1017
|
|
1018 if (!$This->{BitsOrder}) {
|
|
1019 $This->{BitsOrder} = Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder();
|
|
1020 }
|
|
1021
|
|
1022 return $This;
|
|
1023 }
|
|
1024
|
|
1025 # Set default value for vector string format...
|
|
1026 #
|
|
1027 sub _SetDefaultVectorStringFormat {
|
|
1028 my($This) = @_;
|
|
1029
|
|
1030 if (!$This->{VectorStringFormat} && $This->{FingerprintsObject}) {
|
|
1031 $This->{VectorStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat($This->{FingerprintsObject});
|
|
1032 }
|
|
1033
|
|
1034 return $This;
|
|
1035 }
|
|
1036
|
|
1037 # Add fingerprints data to compound string...
|
|
1038 #
|
|
1039 sub _AddFingerprintsDataToCompoundString {
|
|
1040 my($This) = @_;
|
|
1041 my($CmpdString);
|
|
1042
|
|
1043 # Check and remove existing fingerprints data...
|
|
1044 if ($This->_IsFingerprintsDataPresentInCompoundString()) {
|
|
1045 carp "Warning: ${ClassName}->_AddFingerprintsDataToCompoundString: The compound string already contains fingerprints data corresponding to fingerprints field label $This->{FingerprintsFieldLabel}; It has been replaced with new fingerprints data...";
|
|
1046 $This->{CompoundString} = SDFileUtil::RemoveCmpdDataHeaderLabelAndValue($This->{CompoundString}, $This->{FingerprintsFieldLabel});
|
|
1047 }
|
|
1048
|
|
1049 $CmpdString = $This->{CompoundString};
|
|
1050
|
|
1051 $CmpdString =~ s/\$\$\$\$$//;
|
|
1052
|
|
1053 $This->{CompoundString} = "${CmpdString}> <$This->{FingerprintsFieldLabel}>\n$This->{FingerprintsString}\n\n\$\$\$\$";
|
|
1054
|
|
1055 return $This;
|
|
1056 }
|
|
1057
|
|
1058 # Is fingerprints data already present in compound string?
|
|
1059 #
|
|
1060 sub _IsFingerprintsDataPresentInCompoundString {
|
|
1061 my($This) = @_;
|
|
1062 my($FingerprintsFieldLabel);
|
|
1063
|
|
1064 if (TextUtil::IsEmpty($This->{CompoundString}) || TextUtil::IsEmpty($This->{FingerprintsFieldLabel})) {
|
|
1065 return 0;
|
|
1066 }
|
|
1067
|
|
1068 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
|
|
1069
|
|
1070 return ($This->{CompoundString} =~ /<$FingerprintsFieldLabel>/) ? 1 : 0;
|
|
1071 }
|
|
1072
|
|
1073 # Generate fingerprints object using current fingerprints string...
|
|
1074 #
|
|
1075 sub _GenerateFingerprintsObject {
|
|
1076 my($This) = @_;
|
|
1077
|
|
1078 $This->{FingerprintsObject} = undef;
|
|
1079
|
|
1080 if (!$This->{FingerprintsString}) {
|
|
1081 return $This;
|
|
1082 }
|
|
1083
|
|
1084 if ($This->{FingerprintsBitVectorStringMode}) {
|
|
1085 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsBitVectorString($This->{FingerprintsString});
|
|
1086 }
|
|
1087 elsif ($This->{FingerprintsVectorStringMode}) {
|
|
1088 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsVectorString($This->{FingerprintsString});
|
|
1089 }
|
|
1090 else {
|
|
1091 return undef;
|
|
1092 }
|
|
1093
|
|
1094 return $This;
|
|
1095 }
|
|
1096
|
|
1097 # Generate fingerprints string using current fingerprints object...
|
|
1098 #
|
|
1099 sub _GenerateFingerprintsString {
|
|
1100 my($This) = @_;
|
|
1101
|
|
1102 $This->{FingerprintsString} = '';
|
|
1103
|
|
1104 if (!$This->{FingerprintsObject}) {
|
|
1105 return $This;
|
|
1106 }
|
|
1107
|
|
1108 if ($This->{FingerprintsBitVectorStringMode}) {
|
|
1109 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{BitStringFormat}, $This->{BitsOrder});
|
|
1110 }
|
|
1111 elsif ($This->{FingerprintsVectorStringMode}) {
|
|
1112 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{VectorStringFormat});
|
|
1113 }
|
|
1114
|
|
1115 return $This;
|
|
1116 }
|
|
1117
|
|
1118 # Is it a fingerprints file?
|
|
1119 sub IsFingerprintsSDFile ($;$) {
|
|
1120 my($FirstParameter, $SecondParameter) = @_;
|
|
1121 my($This, $FileName, $Status);
|
|
1122
|
|
1123 if ((@_ == 2) && (_IsFingerprintsSDFileIO($FirstParameter))) {
|
|
1124 ($This, $FileName) = ($FirstParameter, $SecondParameter);
|
|
1125 }
|
|
1126 else {
|
|
1127 $FileName = $FirstParameter;
|
|
1128 }
|
|
1129
|
|
1130 # Check file extension...
|
|
1131 $Status = FileUtil::CheckFileType($FileName, "sdf sd");
|
|
1132
|
|
1133 return $Status;
|
|
1134 }
|
|
1135
|
|
1136 # Is it a FingerprintsSDFileIO object?
|
|
1137 sub _IsFingerprintsSDFileIO {
|
|
1138 my($Object) = @_;
|
|
1139
|
|
1140 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
|
|
1141 }
|
|
1142
|
|
1143 1;
|
|
1144
|
|
1145 __END__
|
|
1146
|
|
1147 =head1 NAME
|
|
1148
|
|
1149 FingerprintsSDFileIO
|
|
1150
|
|
1151 =head1 SYNOPSIS
|
|
1152
|
|
1153 use FileIO::FingerprintsSDFileIO;
|
|
1154
|
|
1155 use FileIO::FingerprintsSDFileIO qw(:all);
|
|
1156
|
|
1157 =head1 DESCRIPTION
|
|
1158
|
|
1159 B<FingerprintsSDFileIO> class provides the following methods:
|
|
1160
|
|
1161 new, GetCompoundString, GetFingerprints, GetFingerprintsString,
|
|
1162 IsFingerprintsDataValid, IsFingerprintsFileDataValid, IsFingerprintsSDFile, Next,
|
|
1163 Read, SetBitStringFormat, SetBitsOrder, SetCompoundIDMode, SetCompoundString,
|
|
1164 SetDetailLevel, SetFingerprints, SetFingerprintsString, SetFingerprintsStringMode,
|
|
1165 SetVectorStringFormat, WriteFingerprints, WriteFingerprintsString
|
|
1166
|
|
1167 The following methods can also be used as functions:
|
|
1168
|
|
1169 IsFingerprintsSDFile
|
|
1170
|
|
1171 B<FingerprintsSDFileIO> class is derived from I<FileIO> class and uses its methods to support
|
|
1172 generic file related functionality.
|
|
1173
|
|
1174 The fingerprints SD file format with B<.sdf> or B<.sd> file extensions supports two types of
|
|
1175 fingerprints string data: fingerprints bit-vectors and fingerprints vector strings. The fingerprints
|
|
1176 string data is treated as value of a fingerprints data field label in a SD file.
|
|
1177
|
|
1178 Example of SD file format containing fingerprints string data:
|
|
1179
|
|
1180 ... ...
|
|
1181 ... ...
|
|
1182 $$$$
|
|
1183 ... ...
|
|
1184 ... ...
|
|
1185 ... ...
|
|
1186 41 44 0 0 0 0 0 0 0 0999 V2000
|
|
1187 -3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1188 ... ...
|
|
1189 2 3 1 0 0 0 0
|
|
1190 ... ...
|
|
1191 M END
|
|
1192 > <CmpdID>
|
|
1193 Test
|
|
1194
|
|
1195 > <PathLengthFingerprints>
|
|
1196 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLengt
|
|
1197 h1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a49913991a66
|
|
1198 03130b0a19e8051c89184414953800cc2151082844a201042800130860308e8204d4028
|
|
1199 00831048940e44281c00060449a5000ac80c894114e006321264401600846c050164462
|
|
1200 08190410805000304a10205b0100e04c0038ba0fad0209c0ca8b1200012268b61c0026a
|
|
1201 aa0660a11014a011d46
|
|
1202
|
|
1203 $$$$
|
|
1204 ... ...
|
|
1205 ... ...
|
|
1206
|
|
1207 The current release of MayaChemTools supports the following types of fingerprint
|
|
1208 bit-vector and vector strings:
|
|
1209
|
|
1210 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi
|
|
1211 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT
|
|
1212 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X
|
|
1213 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A
|
|
1214 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2
|
|
1215 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B...
|
|
1216
|
|
1217 FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS
|
|
1218 ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2
|
|
1219 .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1
|
|
1220 O.X1.BO2;2 4 14 3 10 1 1 1 3 2
|
|
1221
|
|
1222 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume
|
|
1223 ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F
|
|
1224 N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1
|
|
1225
|
|
1226 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN
|
|
1227 umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C
|
|
1228 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N
|
|
1229 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8
|
|
1230 O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1
|
|
1231 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0...
|
|
1232
|
|
1233 FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs
|
|
1234 AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN
|
|
1235 H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3
|
|
1236 .024 -2.270
|
|
1237
|
|
1238 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues;
|
|
1239 ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435
|
|
1240 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1
|
|
1241 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1242 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1243
|
|
1244 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi
|
|
1245 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391
|
|
1246 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414
|
|
1247 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103
|
|
1248 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338
|
|
1249 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303...
|
|
1250
|
|
1251 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes
|
|
1252 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524
|
|
1253 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649
|
|
1254 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...;
|
|
1255 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2
|
|
1256 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1
|
|
1257
|
|
1258 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp
|
|
1259 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100
|
|
1260 0000000001010000000110000011000000000000100000000000000000000000100001
|
|
1261 1000000110000000000000000000000000010011000000000000000000000000010000
|
|
1262 0000000000000000000000000010000000000000000001000000000000000000000000
|
|
1263 0000000000010000100001000000000000101000000000000000100000000000000...
|
|
1264
|
|
1265 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu
|
|
1266 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8
|
|
1267 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567
|
|
1268 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012
|
|
1269 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455
|
|
1270 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404...
|
|
1271
|
|
1272 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp
|
|
1273 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184
|
|
1274 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450
|
|
1275 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430
|
|
1276 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134
|
|
1277 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566...
|
|
1278
|
|
1279 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000
|
|
1280 0000000000000000000000000000000001001000010010000000010010000000011100
|
|
1281 0100101010111100011011000100110110000011011110100110111111111111011111
|
|
1282 11111111111110111000
|
|
1283
|
|
1284 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011
|
|
1285 1110011111100101111111000111101100110000000000000011100010000000000000
|
|
1286 0000000000000000000000000000000000000000000000101000000000000000000000
|
|
1287 0000000000000000000000000000000000000000000000000000000000000000000000
|
|
1288 0000000000000000000000000000000000000011000000000000000000000000000000
|
|
1289 0000000000000000000000000000000000000000
|
|
1290
|
|
1291 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri
|
|
1292 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1293 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0
|
|
1294 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0
|
|
1295 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1
|
|
1296 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1
|
|
1297
|
|
1298 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri
|
|
1299 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0
|
|
1300 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0
|
|
1301 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1302 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0
|
|
1303 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
|
|
1304
|
|
1305 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
|
|
1306 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110
|
|
1307 0100010101011000101001011100110001000010001001101000001001001001001000
|
|
1308 0010110100000111001001000001001010100100100000000011000000101001011100
|
|
1309 0010000001000101010100000100111100110111011011011000000010110111001101
|
|
1310 0101100011000000010001000011000010100011101100001000001000100000000...
|
|
1311
|
|
1312 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength
|
|
1313 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2
|
|
1314 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X
|
|
1315 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1
|
|
1316 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO
|
|
1317 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C....
|
|
1318
|
|
1319 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt
|
|
1320 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1
|
|
1321 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N
|
|
1322 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1
|
|
1323 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR
|
|
1324 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ...
|
|
1325
|
|
1326 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD
|
|
1327 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1
|
|
1328 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3.
|
|
1329 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...;
|
|
1330 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1
|
|
1331 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1...
|
|
1332
|
|
1333 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi
|
|
1334 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar
|
|
1335 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H
|
|
1336 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...;
|
|
1337 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4
|
|
1338 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ...
|
|
1339
|
|
1340 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3
|
|
1341 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-
|
|
1342 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO
|
|
1343 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...;
|
|
1344 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1
|
|
1345
|
|
1346 FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica
|
|
1347 lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC
|
|
1348 H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC-
|
|
1349 ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...;
|
|
1350 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2
|
|
1351
|
|
1352 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M
|
|
1353 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1
|
|
1354 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1
|
|
1355 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1
|
|
1356 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....;
|
|
1357 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2
|
|
1358 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8...
|
|
1359
|
|
1360 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1
|
|
1361 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C
|
|
1362 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3-
|
|
1363 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2
|
|
1364 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C.
|
|
1365 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7...
|
|
1366
|
|
1367 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min
|
|
1368 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H
|
|
1369 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2-
|
|
1370 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H
|
|
1371 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...;
|
|
1372 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10
|
|
1373 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1
|
|
1374
|
|
1375 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist
|
|
1376 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0
|
|
1377 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1
|
|
1378 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0
|
|
1379 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0
|
|
1380 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18...
|
|
1381
|
|
1382 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize:
|
|
1383 MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1-
|
|
1384 Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1
|
|
1385 -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1-
|
|
1386 HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...;
|
|
1387 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23
|
|
1388 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1
|
|
1389 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ...
|
|
1390
|
|
1391 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD
|
|
1392 istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106
|
|
1393 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0
|
|
1394 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26
|
|
1395 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0
|
|
1396 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ...
|
|
1397
|
|
1398 =head2 METHODS
|
|
1399
|
|
1400 =over 4
|
|
1401
|
|
1402 =item B<new>
|
|
1403
|
|
1404 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO(%IOParameters);
|
|
1405
|
|
1406 Using specified I<IOParameters> names and values hash, B<new> method creates a new
|
|
1407 object and returns a reference to a newly created B<FingerprintsSDFileIO> object. By default,
|
|
1408 the following properties are initialized during I<Read> mode:
|
|
1409
|
|
1410 Name = '';
|
|
1411 Mode = 'Read';
|
|
1412 Status = 0;
|
|
1413 FingerprintsStringMode = 'AutoDetect';
|
|
1414 FingerprintsFieldLabel = 'AutoDetect';
|
|
1415 CompoundIDMode = 'LabelPrefix';
|
|
1416 CompoundIDFieldLabel = undef;
|
|
1417 CompoundIDPrefix = 'Cmpd';
|
|
1418 ValidateData = 1;
|
|
1419 DetailLevel = 1;
|
|
1420
|
|
1421 During I<Write> mode, the following properties get initialize by default:
|
|
1422
|
|
1423 FingerprintsStringMode = undef;
|
|
1424
|
|
1425 BitStringFormat = HexadecimalString;
|
|
1426 BitsOrder = Ascending;
|
|
1427
|
|
1428 VectorStringFormat = NumericalValuesString or ValuesString;
|
|
1429
|
|
1430 Examples:
|
|
1431
|
|
1432 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO(
|
|
1433 'Name' => 'Sample.sdf',
|
|
1434 'Mode' => 'Read');
|
|
1435
|
|
1436 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO(
|
|
1437 'Name' => 'Sample.sdf',
|
|
1438 'Mode' => 'Read',;
|
|
1439 'FingerprintsStringMode' =>
|
|
1440 'AutoDetect',
|
|
1441 'FingerprintsFieldLabel' =>
|
|
1442 'Fingerprints',
|
|
1443 'CompoundIDMode' =>
|
|
1444 'DataField',
|
|
1445 'CompoundIDFieldLabel' =>
|
|
1446 'CompoundID');
|
|
1447
|
|
1448 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO(
|
|
1449 'Name' => 'Sample.sdf',
|
|
1450 'Mode' => 'Write',
|
|
1451 'FingerprintsStringMode' =>
|
|
1452 'FingerprintsBitVectorString',
|
|
1453 'Overwrite' => 1,
|
|
1454 'BitStringFormat' => 'HexadecimalString',
|
|
1455 'BitsOrder' => 'Ascending');
|
|
1456
|
|
1457 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO(
|
|
1458 'Name' => 'Sample.sd',
|
|
1459 'Mode' => 'Write',
|
|
1460 'FingerprintsStringMode' =>
|
|
1461 'FingerprintsVectorString',
|
|
1462 'Overwrite' => 1,
|
|
1463 'VectorStringFormat' => 'IDsAndValuesString',
|
|
1464 'FingerprintsLabel' => 'Fingerprints');
|
|
1465
|
|
1466 =item B<GetCompoundString>
|
|
1467
|
|
1468 $CompoundString = $FingerprintsSDFileIO->GetCompoundString();
|
|
1469
|
|
1470 Returns B<CompoundString> for current compound.
|
|
1471
|
|
1472 =item B<GetFingerprints>
|
|
1473
|
|
1474 $FingerprintsObject = $FingerprintsSDFileIO->GetFingerprints();
|
|
1475
|
|
1476 Returns B<FingerprintsObject> generated for current compound using fingerprints bit-vector
|
|
1477 or vector string data. The fingerprints object corresponds to any of the supported fingerprints
|
|
1478 such as PathLengthFingerprints, ExtendedConnectivity, and so on.
|
|
1479
|
|
1480 =item B<GetFingerprintsString>
|
|
1481
|
|
1482 $FingerprintsString = $FingerprintsSDFileIO->GetFingerprintsString();
|
|
1483
|
|
1484 Returns B<FingerprintsString> for current compound.
|
|
1485
|
|
1486 =item B<IsFingerprintsDataValid>
|
|
1487
|
|
1488 $Status = $FingerprintsSDFileIO->IsFingerprintsDataValid();
|
|
1489
|
|
1490 Returns 1 or 0 based on whether B<FingerprintsObject> is valid.
|
|
1491
|
|
1492 =item B<IsFingerprintsFileDataValid>
|
|
1493
|
|
1494 $Status = $FingerprintsSDFileIO->IsFingerprintsFileDataValid();
|
|
1495
|
|
1496 Returns 1 or 0 based on whether fingerprints file contains valid fingerprints data.
|
|
1497
|
|
1498 =item B<IsFingerprintsSDFile>
|
|
1499
|
|
1500 $Status = $FingerprintsSDFileIO->IsFingerprintsSDFile($FileName);
|
|
1501 $Status = FileIO::FingerprintsSDFileIO::IsFingerprintsSDFile($FileName);
|
|
1502
|
|
1503 Returns 1 or 0 based on whether I<FileName> is a SD file.
|
|
1504
|
|
1505 =item B<Next or Read>
|
|
1506
|
|
1507 $FingerprintsSDFileIO = $FingerprintsSDFileIO->Next();
|
|
1508 $FingerprintsSDFileIO = $FingerprintsSDFileIO->Read();
|
|
1509
|
|
1510 Reads next available compound fingerprints in SD file, processes the data, generates appropriate
|
|
1511 fingerprints object, and returns B<FingerprintsSDFileIO>. The generated fingerprints object is available
|
|
1512 using method B<GetFingerprints>.
|
|
1513
|
|
1514 =item B<SetBitStringFormat>
|
|
1515
|
|
1516 $FingerprintsSDFileIO->SetBitStringFormat($Format);
|
|
1517
|
|
1518 Sets bit string I<Format> for fingerprints bit-vector string data in a SD file and returns B<FingerprintsSDFileIO>.
|
|
1519 Possible values for B<BitStringFormat>: I<BinaryString or HexadecimalString>.
|
|
1520
|
|
1521 =item B<SetBitsOrder>
|
|
1522
|
|
1523 $FingerprintsSDFileIO->SetBitsOrder($BitsOrder);
|
|
1524
|
|
1525 Sets I<BitsOrder> for fingerprints bit-vector string data in SD file and returns B<FingerprintsSDFileIO>.
|
|
1526 Possible values for B<BitsOrder>: I<Ascending or Descending>.
|
|
1527
|
|
1528 =item B<SetCompoundIDMode>
|
|
1529
|
|
1530 $FingerprintsSDFileIO->SetCompoundIDMode($Mode);
|
|
1531
|
|
1532 Sets compound ID I<Mode> for fingerprints bit-vector string data in a SD file and returns B<FingerprintsSDFileIO>.
|
|
1533 Possible values for B<CompoundIDMode>: I<DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix>.
|
|
1534
|
|
1535 =item B<SetCompoundString>
|
|
1536
|
|
1537 $FingerprintsSDFileIO->SetCompoundString($CompoundString);
|
|
1538
|
|
1539 Sets I<CompoundString> and returns B<FingerprintsSDFileIO>.
|
|
1540
|
|
1541 =item B<SetDetailLevel>
|
|
1542
|
|
1543 $FingerprintsSDFileIO->SetDetailLevel($Level);
|
|
1544
|
|
1545 Sets details I<Level> for generating diagnostics messages during SD file processing and returns
|
|
1546 B<FingerprintsSDFileIO>. Possible values: I<Positive integers>.
|
|
1547
|
|
1548 =item B<SetFingerprints>
|
|
1549
|
|
1550 $FingerprintsSDFileIO->SetFingerprints($FingerprintsObject);
|
|
1551
|
|
1552 Sets I<FingerprintsObject> for current data line and returns B<FingerprintsSDFileIO>.
|
|
1553
|
|
1554 =item B<SetFingerprintsString>
|
|
1555
|
|
1556 $FingerprintsSDFileIO->SetFingerprintsString($FingerprintsString);
|
|
1557
|
|
1558 Sets I<FingerprintsString> for current data line and returns B<FingerprintsSDFileIO>.
|
|
1559
|
|
1560 =item B<SetFingerprintsStringMode>
|
|
1561
|
|
1562 $FingerprintsSDFileIO->SetFingerprintsStringMode($Mode);
|
|
1563
|
|
1564 Sets I<FingerprintsStringMode> for SD file and returns B<FingerprintsFPFileIO>.
|
|
1565 Possible values: I<AutoDetect, FingerprintsBitVectorString, or FingerprintsVectorString>
|
|
1566
|
|
1567 =item B<SetVectorStringFormat>
|
|
1568
|
|
1569 $FingerprintsSDFileIO->SetVectorStringFormat($Format);
|
|
1570
|
|
1571 Sets I<VectorStringFormat> for SD file and returns B<FingerprintsFPFileIO>. Possible values:
|
|
1572 I<IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString>.
|
|
1573
|
|
1574 =item B<WriteFingerprints>
|
|
1575
|
|
1576 $FingerprintsFPFileIO->WriteFingerprints($FingerprintsObject,
|
|
1577 $CompoundID);
|
|
1578
|
|
1579 Writes fingerprints string generated from I<FingerprintsObject> object and other data including
|
|
1580 I<CompoundID> to SD file and returns B<FingerprintsSDFileIO>.
|
|
1581
|
|
1582 =item B<WriteFingerprintsString>
|
|
1583
|
|
1584 $FingerprintsSDFileIO->WriteFingerprints($FingerprintsString,
|
|
1585 $CompoundID);
|
|
1586
|
|
1587 Writes I<FingerprintsString> and other data including I<CompoundID> to SD file and returns
|
|
1588 B<FingerprintsSDFileIO>.
|
|
1589
|
|
1590 Caveats:
|
|
1591
|
|
1592 o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat
|
|
1593 values are ignored during writing of fingerprints and it's written to the file
|
|
1594 as it is.
|
|
1595 o CompoundString is not checked to remove any existing fingerprints data
|
|
1596
|
|
1597
|
|
1598 =back
|
|
1599
|
|
1600 =head1 AUTHOR
|
|
1601
|
|
1602 Manish Sud <msud@san.rr.com>
|
|
1603
|
|
1604 =head1 SEE ALSO
|
|
1605
|
|
1606 FingerprintsTextFileIO.pm, FingerprintsFPFileIO.pm, SDFileIO.pm
|
|
1607
|
|
1608 =head1 COPYRIGHT
|
|
1609
|
|
1610 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
1611
|
|
1612 This file is part of MayaChemTools.
|
|
1613
|
|
1614 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
1615 the terms of the GNU Lesser General Public License as published by the Free
|
|
1616 Software Foundation; either version 3 of the License, or (at your option)
|
|
1617 any later version.
|
|
1618
|
|
1619 =cut
|