0
|
1 package FileIO::FingerprintsTextFileIO;
|
|
2 #
|
|
3 # $RCSfile: FingerprintsTextFileIO.pm,v $
|
|
4 # $Date: 2015/02/28 20:48:43 $
|
|
5 # $Revision: 1.19 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Exporter;
|
|
32 use Scalar::Util ();
|
|
33 use TextUtil ();
|
|
34 use FileUtil ();
|
|
35 use Fingerprints::FingerprintsStringUtil ();
|
|
36 use FileIO::FileIO;
|
|
37
|
|
38 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
39
|
|
40 @ISA = qw(FileIO::FileIO Exporter);
|
|
41 @EXPORT = qw();
|
|
42 @EXPORT_OK = qw(IsFingerprintsTextFile);
|
|
43
|
|
44 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
|
|
45
|
|
46 # Setup class variables...
|
|
47 my($ClassName);
|
|
48 _InitializeClass();
|
|
49
|
|
50 # Class constructor...
|
|
51 sub new {
|
|
52 my($Class, %NamesAndValues) = @_;
|
|
53
|
|
54 # Initialize object...
|
|
55 my $This = $Class->SUPER::new();
|
|
56 bless $This, ref($Class) || $Class;
|
|
57 $This->_InitializeFingerprintsTextFileIO();
|
|
58
|
|
59 $This->_InitializeFingerprintsTextFileIOProperties(%NamesAndValues);
|
|
60
|
|
61 return $This;
|
|
62 }
|
|
63
|
|
64 # Initialize object data...
|
|
65 #
|
|
66 sub _InitializeFingerprintsTextFileIO {
|
|
67 my($This) = @_;
|
|
68
|
|
69 # Fingerprints string data format during read/write...
|
|
70 #
|
|
71 # For file read:
|
|
72 #
|
|
73 # AutoDetect - automatically detect format of fingerprints string
|
|
74 # FingerprintsBitVectorString - Bit vector fingerprints string format
|
|
75 # FingerprintsVectorString - Vector fingerprints string format
|
|
76 #
|
|
77 # Default value: AutoDetect
|
|
78 #
|
|
79 # For file write:
|
|
80 #
|
|
81 # FingerprintsBitVectorString - Bit vector fingerprints string format
|
|
82 # FingerprintsVectorString - Vector fingerprints string format
|
|
83 #
|
|
84 # Default value: undef
|
|
85 #
|
|
86 $This->{FingerprintsStringMode} = undef;
|
|
87
|
|
88 # For file read:
|
|
89 #
|
|
90 # o Fingerprints bit-vector and vector object for current fingerprints string
|
|
91 #
|
|
92 # For file write:
|
|
93 #
|
|
94 # o Fingerprints bit-vector and vector object for current fingerprints string
|
|
95 # o Any supported fingerprints object: PathLengthFingerprints, ExtendedConnectivity, and so on.
|
|
96 #
|
|
97 $This->{FingerprintsObject} = undef;
|
|
98
|
|
99 # Fingepritns string for current line during read/write...
|
|
100 $This->{FingerprintsString} = undef;
|
|
101
|
|
102 # First data line read/write...
|
|
103 $This->{FirstDataLineIO} = 1;
|
|
104
|
|
105 # Current fingerprints string data line number during read/write...
|
|
106 $This->{LineNum} = 0;
|
|
107
|
|
108 # Text line data during read/write...
|
|
109 $This->{DataLine} = undef;
|
|
110 @{$This->{DataLineWords}} = ();
|
|
111
|
|
112 # Text file column data during read/write...
|
|
113 @{$This->{DataColLabels}} = ();
|
|
114
|
|
115 # Text file delimiter during read/write...
|
|
116 $This->{Delim} = '';
|
|
117
|
|
118 # Initialize parameters for read...
|
|
119 $This->_InitializeFingerprintsTextFileIORead();
|
|
120
|
|
121 # Initialize parameters for write...
|
|
122 $This->_InitializeFingerprintsTextFileIOWrite();
|
|
123
|
|
124 return $This;
|
|
125 }
|
|
126
|
|
127 # Initialize class ...
|
|
128 sub _InitializeClass {
|
|
129 #Class name...
|
|
130 $ClassName = __PACKAGE__;
|
|
131
|
|
132 }
|
|
133
|
|
134 # Initialize object data for reading fingerprints text file...
|
|
135 #
|
|
136 sub _InitializeFingerprintsTextFileIORead {
|
|
137 my($This) = @_;
|
|
138
|
|
139 # Column ID specification for identification of comound ID or fingerints string
|
|
140 # data column...
|
|
141 #
|
|
142 # ColNum - A valid column number
|
|
143 # ColLabel - A valid column name
|
|
144 #
|
|
145 $This->{ColMode} = 'ColNum';
|
|
146
|
|
147 # Fingerprints column to use for retrieving fingerprints string data...
|
|
148 #
|
|
149 # Value of AutoDetect implies use first column containing the word Fingerprints in its
|
|
150 # column label to retrieve fingerprints string data. Othwewise, a valid column number
|
|
151 # or column name must be specified based on the value of ColMode.
|
|
152 #
|
|
153 $This->{FingerprintsCol} = 'AutoDetect';
|
|
154
|
|
155 # Compound ID column to use for retrieving compound IDs for fingerprints...
|
|
156 #
|
|
157 # Value of AutoDetect implies use first column containing the word CompoundID in its column
|
|
158 # label to retrieve compound IDs or assign seqyentially generated compound IDs. Othwewise,
|
|
159 # a valid column number or column name must be specified based on the value of ColMode.
|
|
160 #
|
|
161 $This->{CompoundIDCol} = 'AutoDetect';
|
|
162
|
|
163 # A prefix string used for generating compound IDs like LabelPrefixString<Number> during
|
|
164 # sequential generation of compound IDs. Default value, Cmpd, generates compound IDs
|
|
165 # which look like like Cmpd<Number>.
|
|
166 #
|
|
167 $This->{CompoundIDPrefix} = 'Cmpd';
|
|
168
|
|
169 # Input delimiter for fingerprints CSV text file. Possible values: comma, semicolon or tab. This
|
|
170 # option is ignored for TSV text file and tab is used as the delimiter.
|
|
171 #
|
|
172 $This->{InDelim} = 'comma';
|
|
173
|
|
174 # By default, the fingerprints data corresponding to FingerprintsCol is assumed to
|
|
175 # be valid and no validation is performed before generating fingerprints objects...
|
|
176 #
|
|
177 $This->{ValidateData} = 1;
|
|
178
|
|
179 # Level of detail to print during validation of data for invalid or missing data...
|
|
180 $This->{DetailLevel} = 1;
|
|
181
|
|
182 # Number of missing and invalid fingerprints string data lines...
|
|
183 $This->{NumOfLinesWithMissingData} = 0;
|
|
184 $This->{NumOfLinesWithInvalidData} = 0;
|
|
185
|
|
186 # Compound ID for current fingerprints string...
|
|
187 $This->{CompoundID} = undef;
|
|
188
|
|
189 # Status of data in fingerprints text file...
|
|
190 $This->{ValidFileData} = 0;
|
|
191
|
|
192 $This->{ValidCompoundIDCol} = 0;
|
|
193 $This->{ValidFingerprintsCol} = 0;
|
|
194
|
|
195 $This->{ValidFingerprintsStringMode} = 0;
|
|
196
|
|
197 return $This;
|
|
198 }
|
|
199
|
|
200 # Initialize object data for writing fingerprints text file...
|
|
201 #
|
|
202 sub _InitializeFingerprintsTextFileIOWrite {
|
|
203 my($This) = @_;
|
|
204
|
|
205 # Fingerprints bit vector string format...
|
|
206 #
|
|
207 # Possible values: BinaryString or HexadecimalString [Default]
|
|
208 #
|
|
209 # Default BitStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat.
|
|
210 #
|
|
211 $This->{BitStringFormat} = undef;
|
|
212
|
|
213 # Bits order in fingerprints bit vector string...
|
|
214 #
|
|
215 # Ascending - First bit in each byte as the lowest bit [Default]
|
|
216 # Descending - First bit in each byte as the highest bit
|
|
217 #
|
|
218 # Default BitsOrder is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder.
|
|
219 #
|
|
220 $This->{BitsOrder} = undef;
|
|
221
|
|
222 # Fingerprints vector string format...
|
|
223 #
|
|
224 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString
|
|
225 #
|
|
226 # Default VectorStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat.
|
|
227 # For fingerprints vector object containing vector NumericalValues, it corresponds to IDsAndValuesString; othwerwise,
|
|
228 # it's set to ValuesString.
|
|
229 #
|
|
230 $This->{VectorStringFormat} = undef;
|
|
231
|
|
232 # Delimiter for output fingerprints CSV/TSV file. Possible values: comma, tab, semicolon. This
|
|
233 # option is ignored for TSV text file and tab is used as the delimiter.
|
|
234 #
|
|
235 $This->{OutDelim} = 'comma';
|
|
236
|
|
237 # Quotes around column values for output fingerprints CSV/TSV text file...
|
|
238 $This->{OutQuote} = 1;
|
|
239
|
|
240 # Overwriting existing file...
|
|
241 $This->{Overwrite} = 0;
|
|
242
|
|
243 return $This;
|
|
244 }
|
|
245
|
|
246 # Initialize object values...
|
|
247 sub _InitializeFingerprintsTextFileIOProperties {
|
|
248 my($This, %NamesAndValues) = @_;
|
|
249
|
|
250 # All other property names and values along with all Set/Get<PropertyName> methods
|
|
251 # are implemented on-demand using ObjectProperty class.
|
|
252
|
|
253 my($Name, $Value, $MethodName);
|
|
254 while (($Name, $Value) = each %NamesAndValues) {
|
|
255 $MethodName = "Set${Name}";
|
|
256 $This->$MethodName($Value);
|
|
257 }
|
|
258
|
|
259 if (!exists $NamesAndValues{Name}) {
|
|
260 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name...";
|
|
261 }
|
|
262
|
|
263 # Make sure it's a fingerprints file...
|
|
264 $Name = $NamesAndValues{Name};
|
|
265 if (!$This->IsFingerprintsTextFile($Name)) {
|
|
266 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be fingerprints format...";
|
|
267 }
|
|
268
|
|
269 if ($This->GetMode() =~ /^Read$/i) {
|
|
270 $This->_InitializeFingerprintsTextFileIOReadProperties(%NamesAndValues);
|
|
271 }
|
|
272 elsif ($This->GetMode() =~ /^(Write|Append)$/i) {
|
|
273 $This->_InitializeFingerprintsTextFileIOWriteProperties(%NamesAndValues);
|
|
274 }
|
|
275
|
|
276 return $This;
|
|
277 }
|
|
278
|
|
279 # Initialize object properties for reading fingerprints text file...
|
|
280 #
|
|
281 sub _InitializeFingerprintsTextFileIOReadProperties {
|
|
282 my($This, %NamesAndValues) = @_;
|
|
283
|
|
284 # Set default value for FingerprintsStringMode...
|
|
285 if (!$This->{FingerprintsStringMode}) {
|
|
286 $This->{FingerprintsStringMode} = 'AutoDetect';
|
|
287 }
|
|
288
|
|
289 $This->_PrepareForReadingFingerprintsTextFileData();
|
|
290
|
|
291 return $This;
|
|
292 }
|
|
293
|
|
294 # Initialize object properties for writing fingerprints text file...
|
|
295 #
|
|
296 sub _InitializeFingerprintsTextFileIOWriteProperties {
|
|
297 my($This, %NamesAndValues) = @_;
|
|
298
|
|
299 # Check FingerprintsStringMode value...
|
|
300 if (!exists $NamesAndValues{FingerprintsStringMode}) {
|
|
301 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying FingerprintsStringMode...";
|
|
302 }
|
|
303
|
|
304 if ($This->{FingerprintsStringMode} !~ /^(FingerprintsBitVectorString|FingerprintsVectorString)$/i) {
|
|
305 croak "Error: ${ClassName}->: Object can't be instantiated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, is not valid; Supported values for write/append: FingerprintsBitVectorString or FingerprintsVectorString...";
|
|
306 }
|
|
307
|
|
308 if (!exists $NamesAndValues{DataColLabels}) {
|
|
309 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying DataColLabels...";
|
|
310 }
|
|
311
|
|
312 if ($This->{OutDelim} =~ /semicolon/i && !$This->{OutQuote}) {
|
|
313 croak "Error: ${ClassName}->: Object can't be instantiated: The value specified, $This->{OutQuote}, using \"OutQuote\" is not allowed with semicolon value of \"OutDelim\": Fingerprints string use semicolon as delimiter for various data fields and must be quoted.\n";
|
|
314 }
|
|
315
|
|
316 $This->_PrepareForWritingFingerprintsTextFileData();
|
|
317
|
|
318 return $This;
|
|
319 }
|
|
320
|
|
321 # Set FingerprintsStringMode...
|
|
322 #
|
|
323 sub SetFingerprintsStringMode {
|
|
324 my($This, $Value) = @_;
|
|
325
|
|
326 # AutoDetect - automatically detect format of fingerprints string
|
|
327 # FingerprintsBitVectorString - Bit vector fingerprints string format
|
|
328 # FingerprintsVectorString - Vector fingerprints string format
|
|
329
|
|
330 if ($Value !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) {
|
|
331 croak "Error: ${ClassName}->SetFingerprintsStringMode: FingerprintsStringMode value, $Value, is not valid; Supported values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString...";
|
|
332 }
|
|
333
|
|
334 $This->{FingerprintsStringMode} = $Value;
|
|
335
|
|
336 return $This;
|
|
337 }
|
|
338
|
|
339 # Set ColMode...
|
|
340 #
|
|
341 sub SetColMode {
|
|
342 my($This, $Value) = @_;
|
|
343
|
|
344 if ($Value !~ /^(ColNum|ColLabel)$/i) {
|
|
345 croak "Error: ${ClassName}->SetColMode: ColMode value, $Value, is not valid; Supported values: ColNum or ColLabel...";
|
|
346 }
|
|
347
|
|
348 $This->{ColMode} = $Value;
|
|
349
|
|
350 return $This;
|
|
351 }
|
|
352
|
|
353 # Set InDelim...
|
|
354 #
|
|
355 sub SetInDelim {
|
|
356 my($This, $Value) = @_;
|
|
357
|
|
358 if ($Value !~ /^(comma|semicolon|tab)$/i) {
|
|
359 croak "Error: ${ClassName}->SetInDelim: InDelim value, $Value, is not valid; Supported values: comma, semicolon, or tab...";
|
|
360 }
|
|
361
|
|
362 $This->{InDelim} = $Value;
|
|
363
|
|
364 return $This;
|
|
365 }
|
|
366
|
|
367 # Set DetailLevel...
|
|
368 #
|
|
369 sub SetDetailLevel {
|
|
370 my($This, $Value) = @_;
|
|
371
|
|
372 if (!TextUtil::IsPositiveInteger($Value)) {
|
|
373 croak "Error: ${ClassName}->SetDetailLevel: DetailLevel value, $Value, is not valid; Supported values: > 0...";
|
|
374 }
|
|
375
|
|
376 $This->{DetailLevel} = $Value;
|
|
377
|
|
378 return $This;
|
|
379 }
|
|
380
|
|
381 # Set BitStringFormat...
|
|
382 #
|
|
383 sub SetBitStringFormat {
|
|
384 my($This, $Value) = @_;
|
|
385
|
|
386 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) {
|
|
387 croak "Error: ${ClassName}->SetBitStringFormat: BitStringFormat value, $Value, is not valid; Supported values: BinaryString or HexadecimalString...";
|
|
388 }
|
|
389
|
|
390 $This->{BitStringFormat} = $Value;
|
|
391
|
|
392 return $This;
|
|
393 }
|
|
394
|
|
395 # Set BitsOrder...
|
|
396 #
|
|
397 sub SetBitsOrder {
|
|
398 my($This, $Value) = @_;
|
|
399
|
|
400 # Ascending - First bit in each byte as the lowest bit
|
|
401 # Descending - First bit in each byte as the highest bit
|
|
402 #
|
|
403 if ($Value !~ /^(Ascending|Descending)$/i) {
|
|
404 croak "Error: ${ClassName}->SetBitsOrder: FingerprintsStringMode value, $Value, is not valid; Supported values: Ascending or Descending...";
|
|
405 }
|
|
406
|
|
407 $This->{BitsOrder} = $Value;
|
|
408
|
|
409 return $This;
|
|
410 }
|
|
411
|
|
412 # Set VectorStringFormat...
|
|
413 #
|
|
414 sub SetVectorStringFormat {
|
|
415 my($This, $Value) = @_;
|
|
416
|
|
417 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString
|
|
418
|
|
419 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) {
|
|
420 croak "Error: ${ClassName}->SetVectorStringFormat: FingerprintsStringMode value, $Value, is not valid; Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString...";
|
|
421 }
|
|
422
|
|
423 $This->{VectorStringFormat} = $Value;
|
|
424
|
|
425 return $This;
|
|
426 }
|
|
427
|
|
428 # Set FingerprintsStringMode...
|
|
429 #
|
|
430 sub SetOutDelim {
|
|
431 my($This, $Value) = @_;
|
|
432
|
|
433 if ($Value !~ /^(comma|tab|semicolon)$/i) {
|
|
434 croak "Error: ${ClassName}->SetOutDelim: OutDelim value, $Value, is not valid; Supported values: comma, tab or semicolon...";
|
|
435 }
|
|
436
|
|
437 $This->{OutDelim} = $Value;
|
|
438
|
|
439 return $This;
|
|
440 }
|
|
441
|
|
442 # Set DataColLabels...
|
|
443 #
|
|
444 # Set output data column labels using:
|
|
445 # o List of column labels
|
|
446 # o Reference to an list of column labels
|
|
447 #
|
|
448 sub SetDataColLabels {
|
|
449 my($This, @Values) = @_;
|
|
450 my($FirstValue, $TypeOfFirstValue);
|
|
451
|
|
452 if (!@Values) {
|
|
453 carp "Warning: ${ClassName}->_SetDataColLabels: No data column labels specified...";
|
|
454 return $This;
|
|
455 }
|
|
456
|
|
457 @{$This->{DataColLabels}} = ();
|
|
458
|
|
459 $FirstValue = $Values[0];
|
|
460 $TypeOfFirstValue = ref $FirstValue;
|
|
461
|
|
462 if ($TypeOfFirstValue =~ /^ARRAY/) {
|
|
463 # Initialize using array refernce...
|
|
464 push @{$This->{DataColLabels}}, @{$FirstValue};
|
|
465 }
|
|
466 else {
|
|
467 # It's a list of values...
|
|
468 push @{$This->{DataColLabels}}, @Values;
|
|
469 }
|
|
470
|
|
471 return $This;
|
|
472 }
|
|
473
|
|
474 # Get column labels or number of column labels in first text line...
|
|
475 #
|
|
476 sub GetDataColLabels {
|
|
477 my($This) = @_;
|
|
478
|
|
479 return wantarray ? @{$This->{DataColLabels}} : scalar @{$This->{DataColLabels}};
|
|
480 }
|
|
481
|
|
482 # Get words or number of words in current data line...
|
|
483 #
|
|
484 sub GetDataLineWords {
|
|
485 my($This) = @_;
|
|
486
|
|
487 return wantarray ? @{$This->{DataLineWords}} : scalar @{$This->{DataLineWords}};
|
|
488 }
|
|
489
|
|
490 # Set DataLineWords...
|
|
491 #
|
|
492 # Set data line words using:
|
|
493 # o List of line words
|
|
494 # o Reference to an list of line words
|
|
495 #
|
|
496 sub SetDataLineWords {
|
|
497 my($This, @Values) = @_;
|
|
498 my($FirstValue, $TypeOfFirstValue);
|
|
499
|
|
500 if (!@Values) {
|
|
501 carp "Warning: ${ClassName}->SetDataLineWords: No line words specified...";
|
|
502 return $This;
|
|
503 }
|
|
504
|
|
505 @{$This->{DataLineWords}} = ();
|
|
506
|
|
507 $FirstValue = $Values[0];
|
|
508 $TypeOfFirstValue = ref $FirstValue;
|
|
509
|
|
510 if ($TypeOfFirstValue =~ /^ARRAY/) {
|
|
511 # Initialize using array refernce...
|
|
512 push @{$This->{DataLineWords}}, @{$FirstValue};
|
|
513 }
|
|
514 else {
|
|
515 # It's a list of values...
|
|
516 push @{$This->{DataLineWords}}, @Values;
|
|
517 }
|
|
518
|
|
519 return $This;
|
|
520 }
|
|
521
|
|
522 # Get fingerprints object for current data line using fingerprints, fingerprints bit-vector
|
|
523 # fingerprints vector object. Fingerprints object correspond to any of supported fingerprints
|
|
524 # objects such as PathLengthFingerprints, ExtendedConnectivity, and so on.
|
|
525 #
|
|
526 sub GetFingerprints {
|
|
527 my($This) = @_;
|
|
528
|
|
529 return $This->{FingerprintsObject};
|
|
530 }
|
|
531
|
|
532 # Set fingerprints object for current data line...
|
|
533 #
|
|
534 sub SetFingerprints {
|
|
535 my($This, $FingerprintsObject) = @_;
|
|
536
|
|
537 $This->{FingerprintsObject} = $FingerprintsObject;
|
|
538
|
|
539 return $This;
|
|
540 }
|
|
541
|
|
542 # Get fingerprints string for current data line...
|
|
543 #
|
|
544 sub GetFingerprintsString {
|
|
545 my($This) = @_;
|
|
546
|
|
547 return $This->{FingerprintsString} ? $This->{FingerprintsString} : 'None';
|
|
548 }
|
|
549
|
|
550 # Set fingerprints string for current data line...
|
|
551 #
|
|
552 sub SetFingerprintsString {
|
|
553 my($This, $FingerprintsString) = @_;
|
|
554
|
|
555 $This->{FingerprintsString} = $FingerprintsString;
|
|
556
|
|
557 return $This;
|
|
558 }
|
|
559
|
|
560 # Does fingerprints text file contain valid data?
|
|
561 #
|
|
562 sub IsFingerprintsFileDataValid {
|
|
563 my($This) = @_;
|
|
564
|
|
565 return $This->{ValidFileData} ? 1 : 0;
|
|
566 }
|
|
567
|
|
568 # Does current data line contains valid fingerprints object data?
|
|
569 #
|
|
570 sub IsFingerprintsDataValid {
|
|
571 my($This) = @_;
|
|
572
|
|
573 return defined $This->{FingerprintsObject} ? 1 : 0;
|
|
574 }
|
|
575
|
|
576 # Read next available fingerprints line, process it and generate appropriate fingerprints
|
|
577 # objects...
|
|
578 #
|
|
579 sub Read {
|
|
580 my($This) = @_;
|
|
581
|
|
582 # Read data line...
|
|
583 if (!$This->_ReadDataLine()) {
|
|
584 return undef;
|
|
585 }
|
|
586
|
|
587 # No need to process invalid text file with invalid data...
|
|
588 if (!$This->{ValidFileData}) {
|
|
589 if ($This->{ValidateData}) {
|
|
590 $This->{NumOfLinesWithMissingData} += 1;
|
|
591 }
|
|
592 return $This;
|
|
593 }
|
|
594
|
|
595 # Perform data validation...
|
|
596 if ($This->{ValidateData}) {
|
|
597 if (!$This->_ValidateReadDataLine()) {
|
|
598 return $This;
|
|
599 }
|
|
600 }
|
|
601
|
|
602 # Setup fingerprints string after checking again to handle problematic data for
|
|
603 # non-validated data lines...
|
|
604 #
|
|
605 if ($This->{FingerprintsColNum} <= $#{$This->{DataLineWords}}) {
|
|
606 $This->{FingerprintsString} = $This->{DataLineWords}[$This->{FingerprintsColNum}];
|
|
607 }
|
|
608
|
|
609 # Generate fingeprints object...
|
|
610 $This->_GenerateFingerprintsObject();
|
|
611
|
|
612 # Setup fingerprints compound ID for fingerprints string...
|
|
613 $This->_GenerateCompoundID();
|
|
614
|
|
615 return $This;
|
|
616 }
|
|
617
|
|
618 # Read next available fingerprints line, process it and generate appropriate fingerprints
|
|
619 # objects...
|
|
620 #
|
|
621 sub Next {
|
|
622 my($This) = @_;
|
|
623
|
|
624 return $This->Read();
|
|
625 }
|
|
626
|
|
627 # Read fingerprints data line line...
|
|
628 #
|
|
629 sub _ReadDataLine {
|
|
630 my($This) = @_;
|
|
631
|
|
632 if ($This->{FirstDataLineIO}) {
|
|
633 $This->_ProcessFirstDataLineRead();
|
|
634 }
|
|
635
|
|
636 # Initialize data for current line...
|
|
637 $This->_InitializeReadDataLine();
|
|
638
|
|
639 # Get next data line...
|
|
640 $This->{DataLine} = TextUtil::GetTextLine($This->{FileHandle});
|
|
641 if (!$This->{DataLine}) {
|
|
642 return 0;
|
|
643 }
|
|
644
|
|
645 # Get line words...
|
|
646 $This->{LineNum} += 1;
|
|
647 @{$This->{DataLineWords}} = TextUtil::SplitWords($This->{DataLine}, $This->{Delim});
|
|
648
|
|
649 return 1;
|
|
650 }
|
|
651
|
|
652 # Initialize data line for reading...
|
|
653 #
|
|
654 sub _InitializeReadDataLine {
|
|
655 my($This) = @_;
|
|
656
|
|
657 $This->{CompoundID} = undef;
|
|
658
|
|
659 $This->{DataLine} = undef;
|
|
660 @{$This->{DataLineWords}} = ();
|
|
661
|
|
662 $This->{FingerprintsObject} = undef;
|
|
663 $This->{FingerprintsString} = undef;
|
|
664
|
|
665 return $This;
|
|
666 }
|
|
667
|
|
668 # Validate fingerprints string data line...
|
|
669 #
|
|
670 sub _ValidateReadDataLine {
|
|
671 my($This) = @_;
|
|
672
|
|
673 # Check for missing data...
|
|
674 if ($This->{FingerprintsColNum} > $#{$This->{DataLineWords}}) {
|
|
675 # Missing data...
|
|
676 $This->{NumOfLinesWithMissingData} += 1;
|
|
677 if ($This->{DetailLevel} >= 3) {
|
|
678 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains no fingerprints data: $This->{DataLine}...";
|
|
679 }
|
|
680 elsif ($This->{DetailLevel} >= 2) {
|
|
681 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains no fingerprints data...";
|
|
682 }
|
|
683 return 0;
|
|
684 }
|
|
685
|
|
686 # Check for invalid data...
|
|
687 my($InvalidFingerprintsData, $FingerprintsColNum, $FingerprintsType, $FingerprintsDescription);
|
|
688
|
|
689 $InvalidFingerprintsData = 0;
|
|
690 $FingerprintsColNum = $This->{FingerprintsColNum};
|
|
691
|
|
692 if (Fingerprints::FingerprintsStringUtil::AreFingerprintsStringValuesValid($This->{DataLineWords}[$FingerprintsColNum])) {
|
|
693 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{DataLineWords}[$FingerprintsColNum]);
|
|
694 if ($This->{FirstFingerprintsStringType} !~ /^$FingerprintsType$/i || $This->{FirstFingerprintsStringDescription} !~ /^$FingerprintsDescription$/i) {
|
|
695 $InvalidFingerprintsData = 1;
|
|
696 }
|
|
697 }
|
|
698 else {
|
|
699 $InvalidFingerprintsData = 1;
|
|
700 }
|
|
701
|
|
702 if ($InvalidFingerprintsData) {
|
|
703 $This->{NumOfLinesWithInvalidData} += 1;
|
|
704 if ($This->{DetailLevel} >= 3) {
|
|
705 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains invalid fingerprints data: $This->{DataLine}...";
|
|
706 }
|
|
707 elsif ($This->{DetailLevel} >= 2) {
|
|
708 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains invalid fingerprints data...";
|
|
709 }
|
|
710 return 0;
|
|
711 }
|
|
712
|
|
713 return 1;
|
|
714 }
|
|
715
|
|
716 # Setup fingerprints compound ID for fingerprints string...
|
|
717 sub _GenerateCompoundID {
|
|
718 my($This) = @_;
|
|
719 my($CompoundID);
|
|
720
|
|
721 $CompoundID = '';
|
|
722
|
|
723 if ($This->{UseSequentialCompoundIDs} || ($This->{CompoundIDColNum} > $#{$This->{DataLineWords}})) {
|
|
724 my($CompoundNum);
|
|
725
|
|
726 $CompoundNum = $This->{LineNum} - 1;
|
|
727 $CompoundID = "$This->{CompoundIDPrefix}${CompoundNum}";
|
|
728 }
|
|
729 else {
|
|
730 $CompoundID = $This->{DataLineWords}[$This->{CompoundIDColNum}];
|
|
731 }
|
|
732
|
|
733 $This->{CompoundID} = $CompoundID;
|
|
734
|
|
735 # Set fingerprints ID...
|
|
736 if ($This->{FingerprintsObject}) {
|
|
737 $This->{FingerprintsObject}->SetID($This->{CompoundID});
|
|
738 }
|
|
739
|
|
740 return $This;
|
|
741 }
|
|
742
|
|
743 # Process first read...
|
|
744 #
|
|
745 sub _ProcessFirstDataLineRead {
|
|
746 my($This) = @_;
|
|
747
|
|
748 # Skip column label line...
|
|
749 $This->{LineNum} += 1;
|
|
750 TextUtil::GetTextLine($This->{FileHandle});
|
|
751
|
|
752 $This->{FirstDataLineIO} = 0;
|
|
753
|
|
754 return $This;
|
|
755 }
|
|
756
|
|
757 # Get ready for reading fingerprints text file...
|
|
758 #
|
|
759 sub _PrepareForReadingFingerprintsTextFileData {
|
|
760 my($This) = @_;
|
|
761
|
|
762 # Retrieve text file columns information....
|
|
763 $This->_RetrieveTextFileColData();
|
|
764
|
|
765 # Validate columns information...
|
|
766 $This->_ValidateReadCompoundIDCol();
|
|
767 $This->_ValidateReadFingerprintsCol();
|
|
768
|
|
769 # Validate fingeprints string mode information...
|
|
770 if ($This->{ValidFingerprintsCol}) {
|
|
771 $This->_ValidateReadFingerprintsStringMode();
|
|
772 }
|
|
773
|
|
774 # Set status of text file data...
|
|
775 $This->{ValidFileData} = ($This->{ValidCompoundIDCol} && $This->{ValidFingerprintsCol} && $This->{ValidFingerprintsStringMode}) ? 1 : 0;
|
|
776
|
|
777 return $This;
|
|
778 }
|
|
779
|
|
780 # Retrieve information about columns and fingerprints string...
|
|
781 #
|
|
782 sub _RetrieveTextFileColData {
|
|
783 my($This) = @_;
|
|
784 my($TextFile, $FileDir, $FileName, $FileExt, $InDelim, $Line, $ColLabel, $ColNum, @ColLabels);
|
|
785
|
|
786 @{$This->{DataColLabels}} = ();
|
|
787 %{$This->{DataColLabelToNumMap}} = ();
|
|
788
|
|
789 $TextFile = $This->{Name};
|
|
790
|
|
791 if (!(-e $TextFile)) {
|
|
792 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $TextFile, doesn't exist...";
|
|
793 }
|
|
794
|
|
795 $FileDir = ""; $FileName = ""; $FileExt = "";
|
|
796 ($FileDir, $FileName, $FileExt) = FileUtil::ParseFileName($TextFile);
|
|
797
|
|
798 $InDelim = ($FileExt =~ /^tsv$/i) ? "\t" : ($This->{InDelim} =~ /semicolon/i ? "\;" : "\,");
|
|
799 $This->{Delim} = $InDelim;
|
|
800
|
|
801 if (!open TEXTFILE, "$TextFile") {
|
|
802 croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input text file $TextFile: $! ...";
|
|
803 }
|
|
804
|
|
805 # Get column label line...
|
|
806 $Line = TextUtil::GetTextLine(\*TEXTFILE);
|
|
807
|
|
808 close TEXTFILE;
|
|
809
|
|
810 @ColLabels = TextUtil::SplitWords($Line, $InDelim);
|
|
811
|
|
812 # Set text file columns info....
|
|
813 push @{$This->{DataColLabels}}, @ColLabels;
|
|
814
|
|
815 for $ColNum (0 .. $#ColLabels) {
|
|
816 $ColLabel = $ColLabels[$ColNum];
|
|
817 $This->{DataColLabelToNumMap}{$ColLabel} = $ColNum;
|
|
818 }
|
|
819
|
|
820 return $This;
|
|
821 }
|
|
822
|
|
823 # Validate compound ID column information...
|
|
824 #
|
|
825 sub _ValidateReadCompoundIDCol {
|
|
826 my($This) = @_;
|
|
827 my($CompoundIDCol, $CompoundIDColNum, $UseSequentialCompoundIDs, $ColFound, $ColLabel, $ColNum);
|
|
828
|
|
829 $This->{ValidCompoundIDCol} = 0;
|
|
830 $This->{CompoundIDColNum} = undef;
|
|
831 $This->{UseSequentialCompoundIDs} = 0;
|
|
832
|
|
833 $CompoundIDCol = $This->{CompoundIDCol};
|
|
834
|
|
835 $UseSequentialCompoundIDs = 0;
|
|
836 $CompoundIDColNum = '';
|
|
837
|
|
838 if ($CompoundIDCol =~ /^AutoDetect$/i) {
|
|
839 # First column containing the word CompoundID in its label or sequential generation...
|
|
840
|
|
841 $ColFound = 0;
|
|
842 COLLABEL: for $ColLabel (@{$This->{DataColLabels}}) {
|
|
843 if ($ColLabel =~ /CompoundID/i) {
|
|
844 $ColFound = 1;
|
|
845 $ColNum = $This->{DataColLabelToNumMap}{$ColLabel};
|
|
846 last COLLABEL;
|
|
847 }
|
|
848 }
|
|
849 if ($ColFound) {
|
|
850 $CompoundIDColNum = $ColNum;
|
|
851 }
|
|
852 else {
|
|
853 $UseSequentialCompoundIDs = 1;
|
|
854 }
|
|
855 }
|
|
856 else {
|
|
857 if ($This->{ColMode} =~ /^ColNum$/i) {
|
|
858 # Is it a valid column number?
|
|
859 if ($CompoundIDCol > scalar @{$This->{DataColLabels}}) {
|
|
860 carp "Warning: ${ClassName}->_ValidateReadCompoundIDCol: Column number, $CompoundIDCol, specified using CompoundIDCol doesn't exist...";
|
|
861 return 0;
|
|
862 }
|
|
863 $CompoundIDColNum = $CompoundIDCol - 1;
|
|
864 }
|
|
865 elsif ($This->{ColMode} =~ /^ColLabel$/i) {
|
|
866 # Does this column exists?
|
|
867 if (!exists $This->{DataColLabelToNumMap}{$CompoundIDCol}) {
|
|
868 carp "Warning: ${ClassName}->_ValidateReadCompoundIDCol: Column name, $CompoundIDCol, specified using CompoundIDCol doesn't exist...";
|
|
869 return 0;
|
|
870 }
|
|
871 $CompoundIDColNum = $This->{DataColLabelToNumMap}{$CompoundIDCol};
|
|
872 }
|
|
873 }
|
|
874
|
|
875 $This->{ValidCompoundIDCol} = 1;
|
|
876 $This->{CompoundIDColNum} = $CompoundIDColNum;
|
|
877 $This->{UseSequentialCompoundIDs} = $UseSequentialCompoundIDs;
|
|
878
|
|
879 return 1;
|
|
880 }
|
|
881
|
|
882 # Validate fingerprints string column information...
|
|
883 #
|
|
884 sub _ValidateReadFingerprintsCol {
|
|
885 my($This) = @_;
|
|
886 my($FingerprintsColNum, $FingerprintsCol, $ColFound, $ColLabel, $ColNum);
|
|
887
|
|
888 $This->{ValidFingerprintsCol} = 0;
|
|
889 $This->{FingerprintsColNum} = undef;
|
|
890
|
|
891 $FingerprintsColNum = undef;
|
|
892 $FingerprintsCol = $This->{FingerprintsCol};
|
|
893
|
|
894 if ($FingerprintsCol =~ /^AutoDetect$/i) {
|
|
895 # First column containing the word Fingerprints in its label...
|
|
896
|
|
897 $ColFound = 0;
|
|
898 COLLABEL: for $ColLabel (@{$This->{DataColLabels}}) {
|
|
899 if ($ColLabel =~ /Fingerprints/i) {
|
|
900 $ColFound = 1;
|
|
901 $ColNum = $This->{DataColLabelToNumMap}{$ColLabel};
|
|
902 last COLLABEL;
|
|
903 }
|
|
904 }
|
|
905 if (!$ColFound) {
|
|
906 carp "Warning: ${ClassName}->_ValidateReadFingerprintsCol: Column label containing \"Fingerprints\" string in its name doesn't exist...";
|
|
907 return 0;
|
|
908 }
|
|
909 $FingerprintsColNum = $ColNum;
|
|
910 }
|
|
911 else {
|
|
912 if ($This->{ColMode} =~ /^ColNum$/i) {
|
|
913 # Is it a valid column number?
|
|
914 if ($FingerprintsCol > scalar @{$This->{DataColLabels}}) {
|
|
915 carp "Warning: ${ClassName}->_ValidateReadFingerprintsCol: Column number, $FingerprintsCol, specified using FingerprintsCol doesn't exist...";
|
|
916 return 0;
|
|
917 }
|
|
918 $FingerprintsColNum = $FingerprintsCol - 1;
|
|
919 }
|
|
920 elsif ($This->{ColMode} =~ /^ColLabel$/i) {
|
|
921 # Does this column exists?
|
|
922 if (!exists $This->{DataColLabelToNumMap}{$FingerprintsCol}) {
|
|
923 carp "Warning: ${ClassName}->_ValidateReadFingerprintsCol: Column label, $FingerprintsCol, specified using FingerprintsCol doesn't exist...";
|
|
924 return 0;
|
|
925 }
|
|
926 $FingerprintsColNum = $This->{DataColLabelToNumMap}{$FingerprintsCol};
|
|
927 }
|
|
928 }
|
|
929
|
|
930 $This->{ValidFingerprintsCol} = 1;
|
|
931 $This->{FingerprintsColNum} = $FingerprintsColNum;
|
|
932
|
|
933 return 1;
|
|
934 }
|
|
935
|
|
936 # Validate fingerprints string mode information...
|
|
937 #
|
|
938 sub _ValidateReadFingerprintsStringMode {
|
|
939 my($This) = @_;
|
|
940 my($FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FirstFingerprintsStringType, $FirstFingerprintsStringDescription, $TextFile, $Line, $FingerprintsColNum, $InDelim, $FingerprintsType, $FingerprintsDescription, @LineWords);
|
|
941
|
|
942 $This->{ValidFingerprintsStringMode} = 0;
|
|
943
|
|
944 $This->{FingerprintsBitVectorStringMode} = 0;
|
|
945 $This->{FingerprintsVectorStringMode} = 0;
|
|
946
|
|
947 $This->{FirstFingerprintsStringType} = '';
|
|
948 $This->{FirstFingerprintsStringDescription} = '';
|
|
949
|
|
950 $FingerprintsBitVectorStringMode = 0;
|
|
951 $FingerprintsVectorStringMode = 0;
|
|
952
|
|
953 $FirstFingerprintsStringType = '';
|
|
954 $FirstFingerprintsStringDescription = '';
|
|
955
|
|
956 $TextFile = $This->{Name};
|
|
957
|
|
958 if (!open TEXTFILE, "$TextFile") {
|
|
959 croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input text file $TextFile: $! ...";
|
|
960 }
|
|
961
|
|
962 # Skip column label line...
|
|
963 $Line = TextUtil::GetTextLine(\*TEXTFILE);
|
|
964
|
|
965 # First first fingerprints data line...
|
|
966 $Line = TextUtil::GetTextLine(\*TEXTFILE);
|
|
967
|
|
968 close TEXTFILE;
|
|
969
|
|
970 # Get first fingerprints type and description...
|
|
971 $InDelim = $This->{Delim};
|
|
972 @LineWords = TextUtil::SplitWords($Line, $InDelim);
|
|
973
|
|
974 $FingerprintsColNum = $This->{FingerprintsColNum};
|
|
975
|
|
976 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($LineWords[$FingerprintsColNum]);
|
|
977
|
|
978 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) {
|
|
979 if ($FingerprintsType !~ /^FingerprintsBitVector$/i) {
|
|
980 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsBitVectorString, specified using \"FingerprintsStringMode\"...";
|
|
981 return 0;
|
|
982 }
|
|
983 $FingerprintsBitVectorStringMode = 1;
|
|
984 $FirstFingerprintsStringType = 'FingerprintsBitVector';
|
|
985 $FirstFingerprintsStringDescription = $FingerprintsDescription;
|
|
986 }
|
|
987 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) {
|
|
988 if ($FingerprintsType !~ /^FingerprintsVector$/i) {
|
|
989 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsVectorString, specified using \"FingerprintsStringMode\"...";
|
|
990 return 0;
|
|
991 }
|
|
992 $FingerprintsVectorStringMode = 1;
|
|
993 $FirstFingerprintsStringType = 'FingerprintsVector';
|
|
994 $FirstFingerprintsStringDescription = $FingerprintsDescription;
|
|
995 }
|
|
996 else {
|
|
997 # AutoDetect mode...
|
|
998 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) {
|
|
999 $FingerprintsBitVectorStringMode = 1;
|
|
1000 }
|
|
1001 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) {
|
|
1002 $FingerprintsVectorStringMode = 1;
|
|
1003 }
|
|
1004 else {
|
|
1005 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, identified during, AutoDetect, value of \"FingerprintsStringMode\" is not valid; Supported fingerprints types: FingerprintBitVector or FingerprintsVector...";
|
|
1006 return 0;
|
|
1007 }
|
|
1008 $FirstFingerprintsStringType = $FingerprintsType;
|
|
1009 $FirstFingerprintsStringDescription = $FingerprintsDescription;
|
|
1010 }
|
|
1011
|
|
1012 $This->{ValidFingerprintsStringMode} = 1;
|
|
1013
|
|
1014 $This->{FingerprintsBitVectorStringMode} = $FingerprintsBitVectorStringMode;
|
|
1015 $This->{FingerprintsVectorStringMode} = $FingerprintsVectorStringMode;
|
|
1016
|
|
1017 $This->{FirstFingerprintsStringType} = $FirstFingerprintsStringType;
|
|
1018 $This->{FirstFingerprintsStringDescription} = $FirstFingerprintsStringDescription;
|
|
1019
|
|
1020 return 1;
|
|
1021 }
|
|
1022
|
|
1023 # Write fingerprints string generated from specified fingerprints, fingerprints-bit vector, or
|
|
1024 # fingerprints vector object and other data to text file...
|
|
1025 #
|
|
1026 sub WriteFingerprints {
|
|
1027 my($This, $FingerprintsObject, @DataColValues) = @_;
|
|
1028
|
|
1029 # Initialize data for current line...
|
|
1030 $This->_InitializeWriteDataLine();
|
|
1031
|
|
1032 # Set fingerprints object...
|
|
1033 $This->{FingerprintsObject} = $FingerprintsObject;
|
|
1034
|
|
1035 # Generate fingerprints string...
|
|
1036 $This->_GenerateFingerprintsString();
|
|
1037
|
|
1038 # Set data line words...
|
|
1039 $This->SetDataLineWords(@DataColValues);
|
|
1040 push @{$This->{DataLineWords}}, $This->{FingerprintsString};
|
|
1041
|
|
1042 # Write data line..
|
|
1043 $This->_WriteDataLine();
|
|
1044
|
|
1045 return $This;
|
|
1046 }
|
|
1047
|
|
1048 # Write fingerprints string and other data to text file...
|
|
1049 #
|
|
1050 # Note:
|
|
1051 # o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat values
|
|
1052 # are ignored during writing of fingerprints and it's written to the file as it is.
|
|
1053 #
|
|
1054 #
|
|
1055 sub WriteFingerprintsString {
|
|
1056 my($This, $FingerprintsString, @DataColValues) = @_;
|
|
1057
|
|
1058 # Initialize data for current line...
|
|
1059 $This->_InitializeWriteDataLine();
|
|
1060
|
|
1061 # Set fingerprints string...
|
|
1062 $This->{FingerprintsString} = $FingerprintsString;
|
|
1063
|
|
1064 # Generate fingerprints object...
|
|
1065 $This->_GenerateFingerprintsObject();
|
|
1066
|
|
1067 # Set data line words...
|
|
1068 $This->SetDataLineWords(@DataColValues);
|
|
1069 push @{$This->{DataLineWords}}, $FingerprintsString;
|
|
1070
|
|
1071 # Write data line..
|
|
1072 $This->_WriteDataLine();
|
|
1073
|
|
1074 return $This;
|
|
1075 }
|
|
1076
|
|
1077 # Initialize data line for reading...
|
|
1078 #
|
|
1079 sub _InitializeWriteDataLine {
|
|
1080 my($This) = @_;
|
|
1081
|
|
1082 $This->{DataLine} = undef;
|
|
1083 @{$This->{DataLineWords}} = ();
|
|
1084
|
|
1085 $This->{FingerprintsObject} = undef;
|
|
1086 $This->{FingerprintsString} = undef;
|
|
1087
|
|
1088 return $This;
|
|
1089 }
|
|
1090
|
|
1091 # Write fingerprints data line line...
|
|
1092 #
|
|
1093 sub _WriteDataLine {
|
|
1094 my($This) = @_;
|
|
1095 my($FileHandle, $Line);
|
|
1096
|
|
1097 if ($This->{FirstDataLineIO}) {
|
|
1098 $This->_ProcessFirstDataLineWrite();
|
|
1099 }
|
|
1100
|
|
1101 # Write out line words...
|
|
1102 $Line = TextUtil::JoinWords(\@{$This->{DataLineWords}}, $This->{Delim}, $This->{OutQuote});
|
|
1103
|
|
1104 $This->{LineNum} += 1;
|
|
1105 $FileHandle = $This->{FileHandle};
|
|
1106 print $FileHandle "$Line\n";
|
|
1107
|
|
1108 $This->{DataLine} = $Line;
|
|
1109
|
|
1110 return $This;
|
|
1111 }
|
|
1112
|
|
1113 # Process first write...
|
|
1114 #
|
|
1115 sub _ProcessFirstDataLineWrite {
|
|
1116 my($This) = @_;
|
|
1117 my($Line, $FileHandle);
|
|
1118
|
|
1119 $This->{FirstDataLineIO} = 0;
|
|
1120
|
|
1121 if ($This->GetMode() =~ /^Write$/i) {
|
|
1122 # Write out column label line...
|
|
1123 $Line = TextUtil::JoinWords(\@{$This->{DataColLabels}}, $This->{Delim}, $This->{OutQuote});
|
|
1124
|
|
1125 $This->{LineNum} += 1;
|
|
1126 $FileHandle = $This->{FileHandle};
|
|
1127 print $FileHandle "$Line\n";
|
|
1128 }
|
|
1129
|
|
1130 return $This;
|
|
1131 }
|
|
1132
|
|
1133 # Get ready for writing fingerprints text file...
|
|
1134 #
|
|
1135 sub _PrepareForWritingFingerprintsTextFileData {
|
|
1136 my($This) = @_;
|
|
1137 my($TextFile, $FileDir, $FileName, $FileExt, $OutDelim);
|
|
1138
|
|
1139 $TextFile = $This->{Name};
|
|
1140 if (!$This->{Overwrite}) {
|
|
1141 if (-e $TextFile) {
|
|
1142 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $TextFile, already exist. Use overwrite option...";
|
|
1143 }
|
|
1144 }
|
|
1145
|
|
1146 # Set up delimiter for writing file...
|
|
1147
|
|
1148 $FileDir = ""; $FileName = ""; $FileExt = "";
|
|
1149 ($FileDir, $FileName, $FileExt) = FileUtil::ParseFileName($TextFile);
|
|
1150
|
|
1151 $OutDelim = ($FileExt =~ /^tsv$/i) ? "\t" : ($This->{OutDelim} =~ /semicolon/i ? "\;" : "\,");
|
|
1152 $This->{Delim} = $OutDelim;
|
|
1153
|
|
1154 # Setup FingerprintsStringMode status...
|
|
1155
|
|
1156 $This->{FingerprintsBitVectorStringMode} = 0;
|
|
1157 $This->{FingerprintsVectorStringMode} = 0;
|
|
1158 $This->{ValidFingerprintsStringMode} = 0;
|
|
1159
|
|
1160 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) {
|
|
1161 $This->{FingerprintsBitVectorStringMode} = 1;
|
|
1162 }
|
|
1163 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) {
|
|
1164 $This->{FingerprintsVectorStringMode} = 1;
|
|
1165 }
|
|
1166
|
|
1167 $This->{ValidFingerprintsStringMode} = ($This->{FingerprintsBitVectorStringMode} || $This->{FingerprintsVectorStringMode}) ? 1 : 0;
|
|
1168
|
|
1169 if ($This->{FingerprintsBitVectorStringMode}) {
|
|
1170 $This->_SetDefaultBitStringFormat();
|
|
1171 $This->_SetDefaultBitsOrder();
|
|
1172 }
|
|
1173 elsif ($This->{FingerprintsVectorStringMode}) {
|
|
1174 $This->_SetDefaultVectorStringFormat();
|
|
1175 }
|
|
1176
|
|
1177 return $This;
|
|
1178 }
|
|
1179
|
|
1180 # Set default value for bit string format...
|
|
1181 #
|
|
1182 sub _SetDefaultBitStringFormat {
|
|
1183 my($This) = @_;
|
|
1184
|
|
1185 if (!$This->{BitStringFormat}) {
|
|
1186 $This->{BitStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat();
|
|
1187 }
|
|
1188
|
|
1189 return $This;
|
|
1190 }
|
|
1191
|
|
1192 # Set default value for bit string format...
|
|
1193 #
|
|
1194 sub _SetDefaultBitsOrder {
|
|
1195 my($This) = @_;
|
|
1196
|
|
1197 if (!$This->{BitsOrder}) {
|
|
1198 $This->{BitsOrder} = Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder();
|
|
1199 }
|
|
1200
|
|
1201 return $This;
|
|
1202 }
|
|
1203
|
|
1204 # Set default value for vector string format...
|
|
1205 #
|
|
1206 sub _SetDefaultVectorStringFormat {
|
|
1207 my($This) = @_;
|
|
1208
|
|
1209 if (!$This->{VectorStringFormat} && $This->{FingerprintsObject}) {
|
|
1210 $This->{VectorStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat($This->{FingerprintsObject});
|
|
1211 }
|
|
1212
|
|
1213 return $This;
|
|
1214 }
|
|
1215
|
|
1216 # Generate fingerprints object using current fingerprints string...
|
|
1217 #
|
|
1218 sub _GenerateFingerprintsObject {
|
|
1219 my($This) = @_;
|
|
1220
|
|
1221 $This->{FingerprintsObject} = undef;
|
|
1222
|
|
1223 if (!$This->{FingerprintsString}) {
|
|
1224 return $This;
|
|
1225 }
|
|
1226
|
|
1227 if ($This->{FingerprintsBitVectorStringMode}) {
|
|
1228 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsBitVectorString($This->{FingerprintsString});
|
|
1229 }
|
|
1230 elsif ($This->{FingerprintsVectorStringMode}) {
|
|
1231 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsVectorString($This->{FingerprintsString});
|
|
1232 }
|
|
1233 else {
|
|
1234 return undef;
|
|
1235 }
|
|
1236
|
|
1237 return $This;
|
|
1238 }
|
|
1239
|
|
1240 # Generate fingerprints string using current fingerprints object...
|
|
1241 #
|
|
1242 sub _GenerateFingerprintsString {
|
|
1243 my($This) = @_;
|
|
1244
|
|
1245 $This->{FingerprintsString} = '';
|
|
1246
|
|
1247 if (!$This->{FingerprintsObject}) {
|
|
1248 return $This;
|
|
1249 }
|
|
1250
|
|
1251 if ($This->{FingerprintsBitVectorStringMode}) {
|
|
1252 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{BitStringFormat}, $This->{BitsOrder});
|
|
1253 }
|
|
1254 elsif ($This->{FingerprintsVectorStringMode}) {
|
|
1255 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{VectorStringFormat});
|
|
1256 }
|
|
1257
|
|
1258 return $This;
|
|
1259 }
|
|
1260
|
|
1261 # Is it a fingerprints file?
|
|
1262 sub IsFingerprintsTextFile ($;$) {
|
|
1263 my($FirstParameter, $SecondParameter) = @_;
|
|
1264 my($This, $FileName, $Status);
|
|
1265
|
|
1266 if ((@_ == 2) && (_IsFingerprintsTextFileIO($FirstParameter))) {
|
|
1267 ($This, $FileName) = ($FirstParameter, $SecondParameter);
|
|
1268 }
|
|
1269 else {
|
|
1270 $FileName = $FirstParameter;
|
|
1271 }
|
|
1272
|
|
1273 # Check file extension...
|
|
1274 $Status = FileUtil::CheckFileType($FileName, "csv tsv");
|
|
1275
|
|
1276 return $Status;
|
|
1277 }
|
|
1278
|
|
1279 # Is it a FingerprintsTextFileIO object?
|
|
1280 sub _IsFingerprintsTextFileIO {
|
|
1281 my($Object) = @_;
|
|
1282
|
|
1283 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
|
|
1284 }
|
|
1285
|
|
1286 1;
|
|
1287
|
|
1288 __END__
|
|
1289
|
|
1290 =head1 NAME
|
|
1291
|
|
1292 FingerprintsTextFileIO
|
|
1293
|
|
1294 =head1 SYNOPSIS
|
|
1295
|
|
1296 use FileIO::FingerprintsTextFileIO;
|
|
1297
|
|
1298 use FileIO::FingerprintsTextFileIO qw(:all);
|
|
1299
|
|
1300 =head1 DESCRIPTION
|
|
1301
|
|
1302 B<FingerprintsTextFileIO> class provides the following methods:
|
|
1303
|
|
1304 new, GetDataColLabels, GetDataLineWords, GetFingerprints, GetFingerprintsString,
|
|
1305 IsFingerprintsDataValid, IsFingerprintsFileDataValid, IsFingerprintsTextFile,
|
|
1306 Next, Read, SetBitStringFormat, SetBitsOrder, SetColMode, SetDataColLabels,
|
|
1307 SetDataLineWords, SetDetailLevel, SetFingerprints, SetFingerprintsString,
|
|
1308 SetFingerprintsStringMode, SetInDelim, SetOutDelim, SetVectorStringFormat,
|
|
1309 WriteFingerprints, WriteFingerprintsString
|
|
1310
|
|
1311 The following methods can also be used as functions:
|
|
1312
|
|
1313 IsFingerprintsTextFile
|
|
1314
|
|
1315 B<FingerprintsTextFileIO> class is derived from I<FileIO> class and uses its methods to support
|
|
1316 generic file related functionality.
|
|
1317
|
|
1318 The fingerprints CSV/TSV text file format with B<.csv> or B<.tsv> file extensions supports two
|
|
1319 types of fingerprints string data: fingerprints bit-vectors and fingerprints vector strings. The
|
|
1320 fingerprints string data is treated as column value in a text file.
|
|
1321
|
|
1322 Example of text file format containing fingerprints string data:
|
|
1323
|
|
1324 "CompoundID","PathLengthFingerprints"
|
|
1325 "Cmpd1","FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes
|
|
1326 :MinLength1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a4
|
|
1327 9913991a6603130b0a19e8051c89184414953800cc2151082844a20104280013086030
|
|
1328 8e8204d402800831048940e44281c00060449a5000ac80c894114e006321264401..."
|
|
1329 ... ...
|
|
1330 ... ...
|
|
1331
|
|
1332 The current release of MayaChemTools supports the following types of fingerprint
|
|
1333 bit-vector and vector strings:
|
|
1334
|
|
1335 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi
|
|
1336 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT
|
|
1337 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X
|
|
1338 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A
|
|
1339 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2
|
|
1340 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B...
|
|
1341
|
|
1342 FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS
|
|
1343 ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2
|
|
1344 .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1
|
|
1345 O.X1.BO2;2 4 14 3 10 1 1 1 3 2
|
|
1346
|
|
1347 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume
|
|
1348 ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F
|
|
1349 N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1
|
|
1350
|
|
1351 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN
|
|
1352 umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C
|
|
1353 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N
|
|
1354 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8
|
|
1355 O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1
|
|
1356 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0...
|
|
1357
|
|
1358 FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs
|
|
1359 AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN
|
|
1360 H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3
|
|
1361 .024 -2.270
|
|
1362
|
|
1363 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues;
|
|
1364 ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435
|
|
1365 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1
|
|
1366 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1367 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1368
|
|
1369 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi
|
|
1370 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391
|
|
1371 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414
|
|
1372 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103
|
|
1373 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338
|
|
1374 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303...
|
|
1375
|
|
1376 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes
|
|
1377 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524
|
|
1378 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649
|
|
1379 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...;
|
|
1380 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2
|
|
1381 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1
|
|
1382
|
|
1383 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp
|
|
1384 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100
|
|
1385 0000000001010000000110000011000000000000100000000000000000000000100001
|
|
1386 1000000110000000000000000000000000010011000000000000000000000000010000
|
|
1387 0000000000000000000000000010000000000000000001000000000000000000000000
|
|
1388 0000000000010000100001000000000000101000000000000000100000000000000...
|
|
1389
|
|
1390 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu
|
|
1391 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8
|
|
1392 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567
|
|
1393 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012
|
|
1394 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455
|
|
1395 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404...
|
|
1396
|
|
1397 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp
|
|
1398 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184
|
|
1399 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450
|
|
1400 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430
|
|
1401 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134
|
|
1402 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566...
|
|
1403
|
|
1404 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000
|
|
1405 0000000000000000000000000000000001001000010010000000010010000000011100
|
|
1406 0100101010111100011011000100110110000011011110100110111111111111011111
|
|
1407 11111111111110111000
|
|
1408
|
|
1409 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011
|
|
1410 1110011111100101111111000111101100110000000000000011100010000000000000
|
|
1411 0000000000000000000000000000000000000000000000101000000000000000000000
|
|
1412 0000000000000000000000000000000000000000000000000000000000000000000000
|
|
1413 0000000000000000000000000000000000000011000000000000000000000000000000
|
|
1414 0000000000000000000000000000000000000000
|
|
1415
|
|
1416 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri
|
|
1417 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1418 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0
|
|
1419 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0
|
|
1420 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1
|
|
1421 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1
|
|
1422
|
|
1423 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri
|
|
1424 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0
|
|
1425 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0
|
|
1426 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
1427 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0
|
|
1428 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
|
|
1429
|
|
1430 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
|
|
1431 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110
|
|
1432 0100010101011000101001011100110001000010001001101000001001001001001000
|
|
1433 0010110100000111001001000001001010100100100000000011000000101001011100
|
|
1434 0010000001000101010100000100111100110111011011011000000010110111001101
|
|
1435 0101100011000000010001000011000010100011101100001000001000100000000...
|
|
1436
|
|
1437 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength
|
|
1438 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2
|
|
1439 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X
|
|
1440 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1
|
|
1441 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO
|
|
1442 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C....
|
|
1443
|
|
1444 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt
|
|
1445 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1
|
|
1446 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N
|
|
1447 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1
|
|
1448 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR
|
|
1449 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ...
|
|
1450
|
|
1451 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD
|
|
1452 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1
|
|
1453 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3.
|
|
1454 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...;
|
|
1455 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1
|
|
1456 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1...
|
|
1457
|
|
1458 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi
|
|
1459 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar
|
|
1460 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H
|
|
1461 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...;
|
|
1462 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4
|
|
1463 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ...
|
|
1464
|
|
1465 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3
|
|
1466 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-
|
|
1467 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO
|
|
1468 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...;
|
|
1469 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1
|
|
1470
|
|
1471 FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica
|
|
1472 lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC
|
|
1473 H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC-
|
|
1474 ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...;
|
|
1475 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2
|
|
1476
|
|
1477 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M
|
|
1478 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1
|
|
1479 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1
|
|
1480 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1
|
|
1481 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....;
|
|
1482 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2
|
|
1483 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8...
|
|
1484
|
|
1485 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1
|
|
1486 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C
|
|
1487 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3-
|
|
1488 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2
|
|
1489 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C.
|
|
1490 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7...
|
|
1491
|
|
1492 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min
|
|
1493 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H
|
|
1494 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2-
|
|
1495 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H
|
|
1496 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...;
|
|
1497 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10
|
|
1498 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1
|
|
1499
|
|
1500 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist
|
|
1501 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0
|
|
1502 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1
|
|
1503 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0
|
|
1504 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0
|
|
1505 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18...
|
|
1506
|
|
1507 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize:
|
|
1508 MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1-
|
|
1509 Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1
|
|
1510 -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1-
|
|
1511 HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...;
|
|
1512 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23
|
|
1513 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1
|
|
1514 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ...
|
|
1515
|
|
1516 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD
|
|
1517 istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106
|
|
1518 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0
|
|
1519 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26
|
|
1520 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0
|
|
1521 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ...
|
|
1522
|
|
1523
|
|
1524 =head2 METHODS
|
|
1525
|
|
1526 =over 4
|
|
1527
|
|
1528 =item B<new>
|
|
1529
|
|
1530 $NewFingerprintsTextFileIO = new FileIO::FingerprintsTextFileIO(%IOParameters);
|
|
1531
|
|
1532 Using specified I<IOParameters> names and values hash, B<new> method creates a new
|
|
1533 object and returns a reference to a newly created B<FingerprintsTextFileIO> object. By default,
|
|
1534 the following properties are initialized during I<Read> mode:
|
|
1535
|
|
1536 Name = '';
|
|
1537 Mode = 'Read';
|
|
1538 Status = 0;
|
|
1539 FingerprintsStringMode = 'AutoDetect';
|
|
1540 FingerprintsCol = 'AutoDetect';
|
|
1541 ColMode = 'ColNum';
|
|
1542 CompoundIDCol = 'AutoDetect';
|
|
1543 CompoundIDPrefix = 'Cmpd';
|
|
1544 InDelim = 'Comma';
|
|
1545 ValidateData = 1;
|
|
1546 DetailLevel = 1;
|
|
1547
|
|
1548 During I<Write> mode, the following properties get initialize by default:
|
|
1549
|
|
1550 FingerprintsStringMode = undef;
|
|
1551
|
|
1552 BitStringFormat = HexadecimalString;
|
|
1553 BitsOrder = Ascending;
|
|
1554
|
|
1555 VectorStringFormat = NumericalValuesString or ValuesString;
|
|
1556 OutDelim = 'Comma';
|
|
1557 OutQuote = 1;
|
|
1558
|
|
1559 Examples:
|
|
1560
|
|
1561 $NewFingerprintsTextFileIO = new FileIO::FingerprintsTextFileIO(
|
|
1562 'Name' => 'Sample.csv',
|
|
1563 'Mode' => 'Read');
|
|
1564
|
|
1565 $NewFingerprintsTextFileIO = new FileIO::FingerprintsTextFileIO(
|
|
1566 'Name' => 'Sample.csv',
|
|
1567 'Mode' => 'Read',;
|
|
1568 'FingerprintsStringMode' =>
|
|
1569 'AutoDetect',
|
|
1570 'ColMode' => 'ColLabel',
|
|
1571 'FingerprintsCol' => 'Fingerprints',
|
|
1572 'CompoundIDCol' => 'CompoundID',
|
|
1573 'InDelim' => 'Comma');
|
|
1574
|
|
1575 $NewFingerprintsTextFileIO = new FileIO::FingerprintsTextFileIO(
|
|
1576 'Name' => 'Sample.csv',
|
|
1577 'Mode' => 'Write',
|
|
1578 'FingerprintsStringMode' =>
|
|
1579 'FingerprintsBitVectorString',
|
|
1580 'Overwrite' => 1,
|
|
1581 'BitStringFormat' => 'HexadecimalString',
|
|
1582 'BitsOrder' => 'Ascending');
|
|
1583
|
|
1584 $NewFingerprintsTextFileIO = new FileIO::FingerprintsTextFileIO(
|
|
1585 'Name' => 'Sample.tsv',
|
|
1586 'Mode' => 'Write',
|
|
1587 'FingerprintsStringMode' =>
|
|
1588 'FingerprintsVectorString',
|
|
1589 'Overwrite' => 1,
|
|
1590 'VectorStringFormat' => 'IDsAndValuesString',
|
|
1591 'OutDelim' => 'Tab',
|
|
1592 'OutQuote' => 0);
|
|
1593
|
|
1594 =item B<GetDataColLabels>
|
|
1595
|
|
1596 @ColLabels = $FingerprintsTextFileIO->GetDataColLabels();
|
|
1597 $NumOfColLabels = $FingerprintsTextFileIO->GetDataColLabels();
|
|
1598
|
|
1599 Returns an array of B<ColLabels> from first line in text file. In scalar context, it returns
|
|
1600 number of column labels.
|
|
1601
|
|
1602 =item B<GetDataLineWords>
|
|
1603
|
|
1604 @DataWords = $FingerprintsTextFileIO->GetDataLineWords();
|
|
1605 $NumOfDataWords = $FingerprintsTextFileIO->GetDataLineWords();
|
|
1606
|
|
1607 Returns an array of B<DataWords> in current data line. In scalar context, it returns
|
|
1608 number of data words.
|
|
1609
|
|
1610 =item B<GetFingerprints>
|
|
1611
|
|
1612 $FingerprintsObject = $FingerprintsTextFileIO->GetFingerprints();
|
|
1613
|
|
1614 Returns B<FingerprintsObject> generated for current data line using fingerprints bit-vector
|
|
1615 or vector string data. The fingerprints object corresponds to any of the supported fingerprints
|
|
1616 such as PathLengthFingerprints, ExtendedConnectivity, and so on.
|
|
1617
|
|
1618 =item B<GetFingerprintsString>
|
|
1619
|
|
1620 $FingerprintsString = $FingerprintsTextFileIO->GetFingerprintsString();
|
|
1621
|
|
1622 Returns B<FingerprintsString> for current data line.
|
|
1623
|
|
1624 =item B<IsFingerprintsDataValid>
|
|
1625
|
|
1626 $Status = $FingerprintsTextFileIO->IsFingerprintsDataValid();
|
|
1627
|
|
1628 Returns 1 or 0 based on whether B<FingerprintsObject> is valid.
|
|
1629
|
|
1630 =item B<IsFingerprintsFileDataValid>
|
|
1631
|
|
1632 $Status = $FingerprintsTextFileIO->IsFingerprintsFileDataValid();
|
|
1633
|
|
1634 Returns 1 or 0 based on whether text file contains valid fingerprints data.
|
|
1635
|
|
1636 =item B<IsFingerprintsTextFile>
|
|
1637
|
|
1638 $Status = $FingerprintsTextFileIO->IsFingerprintsTextFile($FileName);
|
|
1639 $Status = FileIO::FingerprintsTextFileIO::IsFingerprintsTextFile($FileName);
|
|
1640
|
|
1641 Returns 1 or 0 based on whether I<FileName> is a fingerprints text file.
|
|
1642
|
|
1643 =item B<Next or Read>
|
|
1644
|
|
1645 $FingerprintsTextFileIO = $FingerprintsTextFileIO->Next();
|
|
1646 $FingerprintsTextFileIO = $FingerprintsTextFileIO->Read();
|
|
1647
|
|
1648 Reads next available fingerprints line in text file, processes the data, generates appropriate
|
|
1649 fingerprints object, and returns B<FingerprintsTextFileIO>. The generated fingerprints object
|
|
1650 is available using method B<GetFingerprints>.
|
|
1651
|
|
1652 =item B<SetBitStringFormat>
|
|
1653
|
|
1654 $FingerprintsTextFileIO->SetBitStringFormat($Format);
|
|
1655
|
|
1656 Sets bit string I<Format> for fingerprints bit-vector string data in a text file and returns
|
|
1657 B<FingerprintsTextFileIO>. Possible values for B<BitStringFormat>: I<BinaryString or HexadecimalString>.
|
|
1658
|
|
1659 =item B<SetBitsOrder>
|
|
1660
|
|
1661 $FingerprintsTextFileIO->SetBitsOrder($BitsOrder);
|
|
1662
|
|
1663 Sets I<BitsOrder> for fingerprints bit-vector string data in a text file and returns B<FingerprintsTextFileIO>.
|
|
1664 Possible values for B<BitsOrder>: I<Ascending or Descending>.
|
|
1665
|
|
1666 =item B<SetColMode>
|
|
1667
|
|
1668 $FingerprintsTextFileIO->SetColMode($ColMode);
|
|
1669
|
|
1670 Sets I<ColMode> for a text file and returns B<FingerprintsTextFileIO>. Possible values for B<ColMode>:
|
|
1671 I<ColNum or ColLabel>.
|
|
1672
|
|
1673 =item B<SetDataColLabels>
|
|
1674
|
|
1675 $FingerprintsTextFileIO->SetDataColLabels(@ColLabels);
|
|
1676 $FingerprintsTextFileIO->SetDataColLabels(\@ColLabels);
|
|
1677
|
|
1678 Sets I<ColLabels> for a text file using an array or a reference to an array containing column labels
|
|
1679 and returns B<FingerprintsTextFileIO>.
|
|
1680
|
|
1681 =item B<SetDataLineWords>
|
|
1682
|
|
1683 $FingerprintsTextFileIO->SetDataLineWords(@LineWords);
|
|
1684 $FingerprintsTextFileIO->SetDataLineWords(\@LineWords);
|
|
1685
|
|
1686 Sets I<DataLineWords> for a text file using an array or a reference to an array containing data words
|
|
1687 and returns B<FingerprintsTextFileIO>.
|
|
1688
|
|
1689 =item B<SetDetailLevel>
|
|
1690
|
|
1691 $FingerprintsTextFileIO->SetDetailLevel($Level);
|
|
1692
|
|
1693 Sets details I<Level> for generating diagnostics messages during text file processing and returns
|
|
1694 B<FingerprintsTextFileIO>. Possible values: I<Positive integers>.
|
|
1695
|
|
1696 =item B<SetFingerprints>
|
|
1697
|
|
1698 $FingerprintsTextFileIO->SetFingerprints($FingerprintsObject);
|
|
1699
|
|
1700 Sets I<FingerprintsObject> for current data line and returns B<FingerprintsTextFileIO>.
|
|
1701
|
|
1702 =item B<SetFingerprintsString>
|
|
1703
|
|
1704 $FingerprintsTextFileIO->SetFingerprintsString($FingerprintsString);
|
|
1705
|
|
1706 Sets I<FingerprintsString> for current data line and returns B<FingerprintsTextFileIO>.
|
|
1707
|
|
1708 =item B<SetFingerprintsStringMode>
|
|
1709
|
|
1710 $FingerprintsTextFileIO->SetFingerprintsStringMode($Mode);
|
|
1711
|
|
1712 Sets I<FingerprintsStringMode> for text file and returns B<FingerprintsTextFileIO>.
|
|
1713 Possible values: I<AutoDetect, FingerprintsBitVectorString, or FingerprintsVectorString>
|
|
1714
|
|
1715 =item B<SetInDelim>
|
|
1716
|
|
1717 $FingerprintsTextFileIO->SetInDelim($InDelim);
|
|
1718
|
|
1719 Sets I<InDelim> for text file and returns B<FingerprintsTextFileIO>. Possible values: I<comma,
|
|
1720 semicolon, tab>.
|
|
1721
|
|
1722 =item B<SetOutDelim>
|
|
1723
|
|
1724 $FingerprintsTextFileIO->SetOutDelim($OutDelim);
|
|
1725
|
|
1726 Sets I<OutDelim> for text file and returns B<FingerprintsTextFileIO>. Possible values: I<comma,
|
|
1727 semicolon, tab>.
|
|
1728
|
|
1729 =item B<SetVectorStringFormat>
|
|
1730
|
|
1731 $FingerprintsTextFileIO->SetVectorStringFormat($Format);
|
|
1732
|
|
1733 Sets I<VectorStringFormat> for text file and returns B<FingerprintsTextFileIO>. Possible values:
|
|
1734 I<IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString>.
|
|
1735
|
|
1736 =item B<WriteFingerprints>
|
|
1737
|
|
1738 $FingerprintsTextFileIO->WriteFingerprints($FingerprintsObject,
|
|
1739 @DataColValues);
|
|
1740
|
|
1741 Writes fingerprints string generated from I<FingerprintsObject> object and other data including
|
|
1742 I<DataColValues> to text file and returns B<FingerprintsTextFileIO>.
|
|
1743
|
|
1744 =item B<WriteFingerprintsString>
|
|
1745
|
|
1746 $FingerprintsSDFileIO->WriteFingerprints($FingerprintsString,
|
|
1747 @DataColValues);
|
|
1748
|
|
1749 Writes I<FingerprintsString> and other data including I<DataColValues> to text file and returns
|
|
1750 B<FingerprintsTextFileIO>.
|
|
1751
|
|
1752 Caveats:
|
|
1753
|
|
1754 o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat
|
|
1755 values are ignored during writing of fingerprints and it's written to the file
|
|
1756 as it is.
|
|
1757
|
|
1758 =back
|
|
1759
|
|
1760 =head1 AUTHOR
|
|
1761
|
|
1762 Manish Sud <msud@san.rr.com>
|
|
1763
|
|
1764 =head1 SEE ALSO
|
|
1765
|
|
1766 FingerprintsSDFileIO.pm, FingerprintsFPFileIO.pm
|
|
1767
|
|
1768 =head1 COPYRIGHT
|
|
1769
|
|
1770 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
1771
|
|
1772 This file is part of MayaChemTools.
|
|
1773
|
|
1774 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
1775 the terms of the GNU Lesser General Public License as published by the Free
|
|
1776 Software Foundation; either version 3 of the License, or (at your option)
|
|
1777 any later version.
|
|
1778
|
|
1779 =cut
|