Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/FileIO/FingerprintsTextFileIO.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4816e4a8ae95 |
---|---|
1 package FileIO::FingerprintsTextFileIO; | |
2 # | |
3 # $RCSfile: FingerprintsTextFileIO.pm,v $ | |
4 # $Date: 2015/02/28 20:48:43 $ | |
5 # $Revision: 1.19 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Carp; | |
31 use Exporter; | |
32 use Scalar::Util (); | |
33 use TextUtil (); | |
34 use FileUtil (); | |
35 use Fingerprints::FingerprintsStringUtil (); | |
36 use FileIO::FileIO; | |
37 | |
38 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
39 | |
40 @ISA = qw(FileIO::FileIO Exporter); | |
41 @EXPORT = qw(); | |
42 @EXPORT_OK = qw(IsFingerprintsTextFile); | |
43 | |
44 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
45 | |
46 # Setup class variables... | |
47 my($ClassName); | |
48 _InitializeClass(); | |
49 | |
50 # Class constructor... | |
51 sub new { | |
52 my($Class, %NamesAndValues) = @_; | |
53 | |
54 # Initialize object... | |
55 my $This = $Class->SUPER::new(); | |
56 bless $This, ref($Class) || $Class; | |
57 $This->_InitializeFingerprintsTextFileIO(); | |
58 | |
59 $This->_InitializeFingerprintsTextFileIOProperties(%NamesAndValues); | |
60 | |
61 return $This; | |
62 } | |
63 | |
64 # Initialize object data... | |
65 # | |
66 sub _InitializeFingerprintsTextFileIO { | |
67 my($This) = @_; | |
68 | |
69 # Fingerprints string data format during read/write... | |
70 # | |
71 # For file read: | |
72 # | |
73 # AutoDetect - automatically detect format of fingerprints string | |
74 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
75 # FingerprintsVectorString - Vector fingerprints string format | |
76 # | |
77 # Default value: AutoDetect | |
78 # | |
79 # For file write: | |
80 # | |
81 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
82 # FingerprintsVectorString - Vector fingerprints string format | |
83 # | |
84 # Default value: undef | |
85 # | |
86 $This->{FingerprintsStringMode} = undef; | |
87 | |
88 # For file read: | |
89 # | |
90 # o Fingerprints bit-vector and vector object for current fingerprints string | |
91 # | |
92 # For file write: | |
93 # | |
94 # o Fingerprints bit-vector and vector object for current fingerprints string | |
95 # o Any supported fingerprints object: PathLengthFingerprints, ExtendedConnectivity, and so on. | |
96 # | |
97 $This->{FingerprintsObject} = undef; | |
98 | |
99 # Fingepritns string for current line during read/write... | |
100 $This->{FingerprintsString} = undef; | |
101 | |
102 # First data line read/write... | |
103 $This->{FirstDataLineIO} = 1; | |
104 | |
105 # Current fingerprints string data line number during read/write... | |
106 $This->{LineNum} = 0; | |
107 | |
108 # Text line data during read/write... | |
109 $This->{DataLine} = undef; | |
110 @{$This->{DataLineWords}} = (); | |
111 | |
112 # Text file column data during read/write... | |
113 @{$This->{DataColLabels}} = (); | |
114 | |
115 # Text file delimiter during read/write... | |
116 $This->{Delim} = ''; | |
117 | |
118 # Initialize parameters for read... | |
119 $This->_InitializeFingerprintsTextFileIORead(); | |
120 | |
121 # Initialize parameters for write... | |
122 $This->_InitializeFingerprintsTextFileIOWrite(); | |
123 | |
124 return $This; | |
125 } | |
126 | |
127 # Initialize class ... | |
128 sub _InitializeClass { | |
129 #Class name... | |
130 $ClassName = __PACKAGE__; | |
131 | |
132 } | |
133 | |
134 # Initialize object data for reading fingerprints text file... | |
135 # | |
136 sub _InitializeFingerprintsTextFileIORead { | |
137 my($This) = @_; | |
138 | |
139 # Column ID specification for identification of comound ID or fingerints string | |
140 # data column... | |
141 # | |
142 # ColNum - A valid column number | |
143 # ColLabel - A valid column name | |
144 # | |
145 $This->{ColMode} = 'ColNum'; | |
146 | |
147 # Fingerprints column to use for retrieving fingerprints string data... | |
148 # | |
149 # Value of AutoDetect implies use first column containing the word Fingerprints in its | |
150 # column label to retrieve fingerprints string data. Othwewise, a valid column number | |
151 # or column name must be specified based on the value of ColMode. | |
152 # | |
153 $This->{FingerprintsCol} = 'AutoDetect'; | |
154 | |
155 # Compound ID column to use for retrieving compound IDs for fingerprints... | |
156 # | |
157 # Value of AutoDetect implies use first column containing the word CompoundID in its column | |
158 # label to retrieve compound IDs or assign seqyentially generated compound IDs. Othwewise, | |
159 # a valid column number or column name must be specified based on the value of ColMode. | |
160 # | |
161 $This->{CompoundIDCol} = 'AutoDetect'; | |
162 | |
163 # A prefix string used for generating compound IDs like LabelPrefixString<Number> during | |
164 # sequential generation of compound IDs. Default value, Cmpd, generates compound IDs | |
165 # which look like like Cmpd<Number>. | |
166 # | |
167 $This->{CompoundIDPrefix} = 'Cmpd'; | |
168 | |
169 # Input delimiter for fingerprints CSV text file. Possible values: comma, semicolon or tab. This | |
170 # option is ignored for TSV text file and tab is used as the delimiter. | |
171 # | |
172 $This->{InDelim} = 'comma'; | |
173 | |
174 # By default, the fingerprints data corresponding to FingerprintsCol is assumed to | |
175 # be valid and no validation is performed before generating fingerprints objects... | |
176 # | |
177 $This->{ValidateData} = 1; | |
178 | |
179 # Level of detail to print during validation of data for invalid or missing data... | |
180 $This->{DetailLevel} = 1; | |
181 | |
182 # Number of missing and invalid fingerprints string data lines... | |
183 $This->{NumOfLinesWithMissingData} = 0; | |
184 $This->{NumOfLinesWithInvalidData} = 0; | |
185 | |
186 # Compound ID for current fingerprints string... | |
187 $This->{CompoundID} = undef; | |
188 | |
189 # Status of data in fingerprints text file... | |
190 $This->{ValidFileData} = 0; | |
191 | |
192 $This->{ValidCompoundIDCol} = 0; | |
193 $This->{ValidFingerprintsCol} = 0; | |
194 | |
195 $This->{ValidFingerprintsStringMode} = 0; | |
196 | |
197 return $This; | |
198 } | |
199 | |
200 # Initialize object data for writing fingerprints text file... | |
201 # | |
202 sub _InitializeFingerprintsTextFileIOWrite { | |
203 my($This) = @_; | |
204 | |
205 # Fingerprints bit vector string format... | |
206 # | |
207 # Possible values: BinaryString or HexadecimalString [Default] | |
208 # | |
209 # Default BitStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat. | |
210 # | |
211 $This->{BitStringFormat} = undef; | |
212 | |
213 # Bits order in fingerprints bit vector string... | |
214 # | |
215 # Ascending - First bit in each byte as the lowest bit [Default] | |
216 # Descending - First bit in each byte as the highest bit | |
217 # | |
218 # Default BitsOrder is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder. | |
219 # | |
220 $This->{BitsOrder} = undef; | |
221 | |
222 # Fingerprints vector string format... | |
223 # | |
224 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString | |
225 # | |
226 # Default VectorStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat. | |
227 # For fingerprints vector object containing vector NumericalValues, it corresponds to IDsAndValuesString; othwerwise, | |
228 # it's set to ValuesString. | |
229 # | |
230 $This->{VectorStringFormat} = undef; | |
231 | |
232 # Delimiter for output fingerprints CSV/TSV file. Possible values: comma, tab, semicolon. This | |
233 # option is ignored for TSV text file and tab is used as the delimiter. | |
234 # | |
235 $This->{OutDelim} = 'comma'; | |
236 | |
237 # Quotes around column values for output fingerprints CSV/TSV text file... | |
238 $This->{OutQuote} = 1; | |
239 | |
240 # Overwriting existing file... | |
241 $This->{Overwrite} = 0; | |
242 | |
243 return $This; | |
244 } | |
245 | |
246 # Initialize object values... | |
247 sub _InitializeFingerprintsTextFileIOProperties { | |
248 my($This, %NamesAndValues) = @_; | |
249 | |
250 # All other property names and values along with all Set/Get<PropertyName> methods | |
251 # are implemented on-demand using ObjectProperty class. | |
252 | |
253 my($Name, $Value, $MethodName); | |
254 while (($Name, $Value) = each %NamesAndValues) { | |
255 $MethodName = "Set${Name}"; | |
256 $This->$MethodName($Value); | |
257 } | |
258 | |
259 if (!exists $NamesAndValues{Name}) { | |
260 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; | |
261 } | |
262 | |
263 # Make sure it's a fingerprints file... | |
264 $Name = $NamesAndValues{Name}; | |
265 if (!$This->IsFingerprintsTextFile($Name)) { | |
266 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be fingerprints format..."; | |
267 } | |
268 | |
269 if ($This->GetMode() =~ /^Read$/i) { | |
270 $This->_InitializeFingerprintsTextFileIOReadProperties(%NamesAndValues); | |
271 } | |
272 elsif ($This->GetMode() =~ /^(Write|Append)$/i) { | |
273 $This->_InitializeFingerprintsTextFileIOWriteProperties(%NamesAndValues); | |
274 } | |
275 | |
276 return $This; | |
277 } | |
278 | |
279 # Initialize object properties for reading fingerprints text file... | |
280 # | |
281 sub _InitializeFingerprintsTextFileIOReadProperties { | |
282 my($This, %NamesAndValues) = @_; | |
283 | |
284 # Set default value for FingerprintsStringMode... | |
285 if (!$This->{FingerprintsStringMode}) { | |
286 $This->{FingerprintsStringMode} = 'AutoDetect'; | |
287 } | |
288 | |
289 $This->_PrepareForReadingFingerprintsTextFileData(); | |
290 | |
291 return $This; | |
292 } | |
293 | |
294 # Initialize object properties for writing fingerprints text file... | |
295 # | |
296 sub _InitializeFingerprintsTextFileIOWriteProperties { | |
297 my($This, %NamesAndValues) = @_; | |
298 | |
299 # Check FingerprintsStringMode value... | |
300 if (!exists $NamesAndValues{FingerprintsStringMode}) { | |
301 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying FingerprintsStringMode..."; | |
302 } | |
303 | |
304 if ($This->{FingerprintsStringMode} !~ /^(FingerprintsBitVectorString|FingerprintsVectorString)$/i) { | |
305 croak "Error: ${ClassName}->: Object can't be instantiated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, is not valid; Supported values for write/append: FingerprintsBitVectorString or FingerprintsVectorString..."; | |
306 } | |
307 | |
308 if (!exists $NamesAndValues{DataColLabels}) { | |
309 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying DataColLabels..."; | |
310 } | |
311 | |
312 if ($This->{OutDelim} =~ /semicolon/i && !$This->{OutQuote}) { | |
313 croak "Error: ${ClassName}->: Object can't be instantiated: The value specified, $This->{OutQuote}, using \"OutQuote\" is not allowed with semicolon value of \"OutDelim\": Fingerprints string use semicolon as delimiter for various data fields and must be quoted.\n"; | |
314 } | |
315 | |
316 $This->_PrepareForWritingFingerprintsTextFileData(); | |
317 | |
318 return $This; | |
319 } | |
320 | |
321 # Set FingerprintsStringMode... | |
322 # | |
323 sub SetFingerprintsStringMode { | |
324 my($This, $Value) = @_; | |
325 | |
326 # AutoDetect - automatically detect format of fingerprints string | |
327 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
328 # FingerprintsVectorString - Vector fingerprints string format | |
329 | |
330 if ($Value !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) { | |
331 croak "Error: ${ClassName}->SetFingerprintsStringMode: FingerprintsStringMode value, $Value, is not valid; Supported values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString..."; | |
332 } | |
333 | |
334 $This->{FingerprintsStringMode} = $Value; | |
335 | |
336 return $This; | |
337 } | |
338 | |
339 # Set ColMode... | |
340 # | |
341 sub SetColMode { | |
342 my($This, $Value) = @_; | |
343 | |
344 if ($Value !~ /^(ColNum|ColLabel)$/i) { | |
345 croak "Error: ${ClassName}->SetColMode: ColMode value, $Value, is not valid; Supported values: ColNum or ColLabel..."; | |
346 } | |
347 | |
348 $This->{ColMode} = $Value; | |
349 | |
350 return $This; | |
351 } | |
352 | |
353 # Set InDelim... | |
354 # | |
355 sub SetInDelim { | |
356 my($This, $Value) = @_; | |
357 | |
358 if ($Value !~ /^(comma|semicolon|tab)$/i) { | |
359 croak "Error: ${ClassName}->SetInDelim: InDelim value, $Value, is not valid; Supported values: comma, semicolon, or tab..."; | |
360 } | |
361 | |
362 $This->{InDelim} = $Value; | |
363 | |
364 return $This; | |
365 } | |
366 | |
367 # Set DetailLevel... | |
368 # | |
369 sub SetDetailLevel { | |
370 my($This, $Value) = @_; | |
371 | |
372 if (!TextUtil::IsPositiveInteger($Value)) { | |
373 croak "Error: ${ClassName}->SetDetailLevel: DetailLevel value, $Value, is not valid; Supported values: > 0..."; | |
374 } | |
375 | |
376 $This->{DetailLevel} = $Value; | |
377 | |
378 return $This; | |
379 } | |
380 | |
381 # Set BitStringFormat... | |
382 # | |
383 sub SetBitStringFormat { | |
384 my($This, $Value) = @_; | |
385 | |
386 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) { | |
387 croak "Error: ${ClassName}->SetBitStringFormat: BitStringFormat value, $Value, is not valid; Supported values: BinaryString or HexadecimalString..."; | |
388 } | |
389 | |
390 $This->{BitStringFormat} = $Value; | |
391 | |
392 return $This; | |
393 } | |
394 | |
395 # Set BitsOrder... | |
396 # | |
397 sub SetBitsOrder { | |
398 my($This, $Value) = @_; | |
399 | |
400 # Ascending - First bit in each byte as the lowest bit | |
401 # Descending - First bit in each byte as the highest bit | |
402 # | |
403 if ($Value !~ /^(Ascending|Descending)$/i) { | |
404 croak "Error: ${ClassName}->SetBitsOrder: FingerprintsStringMode value, $Value, is not valid; Supported values: Ascending or Descending..."; | |
405 } | |
406 | |
407 $This->{BitsOrder} = $Value; | |
408 | |
409 return $This; | |
410 } | |
411 | |
412 # Set VectorStringFormat... | |
413 # | |
414 sub SetVectorStringFormat { | |
415 my($This, $Value) = @_; | |
416 | |
417 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString | |
418 | |
419 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) { | |
420 croak "Error: ${ClassName}->SetVectorStringFormat: FingerprintsStringMode value, $Value, is not valid; Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString..."; | |
421 } | |
422 | |
423 $This->{VectorStringFormat} = $Value; | |
424 | |
425 return $This; | |
426 } | |
427 | |
428 # Set FingerprintsStringMode... | |
429 # | |
430 sub SetOutDelim { | |
431 my($This, $Value) = @_; | |
432 | |
433 if ($Value !~ /^(comma|tab|semicolon)$/i) { | |
434 croak "Error: ${ClassName}->SetOutDelim: OutDelim value, $Value, is not valid; Supported values: comma, tab or semicolon..."; | |
435 } | |
436 | |
437 $This->{OutDelim} = $Value; | |
438 | |
439 return $This; | |
440 } | |
441 | |
442 # Set DataColLabels... | |
443 # | |
444 # Set output data column labels using: | |
445 # o List of column labels | |
446 # o Reference to an list of column labels | |
447 # | |
448 sub SetDataColLabels { | |
449 my($This, @Values) = @_; | |
450 my($FirstValue, $TypeOfFirstValue); | |
451 | |
452 if (!@Values) { | |
453 carp "Warning: ${ClassName}->_SetDataColLabels: No data column labels specified..."; | |
454 return $This; | |
455 } | |
456 | |
457 @{$This->{DataColLabels}} = (); | |
458 | |
459 $FirstValue = $Values[0]; | |
460 $TypeOfFirstValue = ref $FirstValue; | |
461 | |
462 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
463 # Initialize using array refernce... | |
464 push @{$This->{DataColLabels}}, @{$FirstValue}; | |
465 } | |
466 else { | |
467 # It's a list of values... | |
468 push @{$This->{DataColLabels}}, @Values; | |
469 } | |
470 | |
471 return $This; | |
472 } | |
473 | |
474 # Get column labels or number of column labels in first text line... | |
475 # | |
476 sub GetDataColLabels { | |
477 my($This) = @_; | |
478 | |
479 return wantarray ? @{$This->{DataColLabels}} : scalar @{$This->{DataColLabels}}; | |
480 } | |
481 | |
482 # Get words or number of words in current data line... | |
483 # | |
484 sub GetDataLineWords { | |
485 my($This) = @_; | |
486 | |
487 return wantarray ? @{$This->{DataLineWords}} : scalar @{$This->{DataLineWords}}; | |
488 } | |
489 | |
490 # Set DataLineWords... | |
491 # | |
492 # Set data line words using: | |
493 # o List of line words | |
494 # o Reference to an list of line words | |
495 # | |
496 sub SetDataLineWords { | |
497 my($This, @Values) = @_; | |
498 my($FirstValue, $TypeOfFirstValue); | |
499 | |
500 if (!@Values) { | |
501 carp "Warning: ${ClassName}->SetDataLineWords: No line words specified..."; | |
502 return $This; | |
503 } | |
504 | |
505 @{$This->{DataLineWords}} = (); | |
506 | |
507 $FirstValue = $Values[0]; | |
508 $TypeOfFirstValue = ref $FirstValue; | |
509 | |
510 if ($TypeOfFirstValue =~ /^ARRAY/) { | |
511 # Initialize using array refernce... | |
512 push @{$This->{DataLineWords}}, @{$FirstValue}; | |
513 } | |
514 else { | |
515 # It's a list of values... | |
516 push @{$This->{DataLineWords}}, @Values; | |
517 } | |
518 | |
519 return $This; | |
520 } | |
521 | |
522 # Get fingerprints object for current data line using fingerprints, fingerprints bit-vector | |
523 # fingerprints vector object. Fingerprints object correspond to any of supported fingerprints | |
524 # objects such as PathLengthFingerprints, ExtendedConnectivity, and so on. | |
525 # | |
526 sub GetFingerprints { | |
527 my($This) = @_; | |
528 | |
529 return $This->{FingerprintsObject}; | |
530 } | |
531 | |
532 # Set fingerprints object for current data line... | |
533 # | |
534 sub SetFingerprints { | |
535 my($This, $FingerprintsObject) = @_; | |
536 | |
537 $This->{FingerprintsObject} = $FingerprintsObject; | |
538 | |
539 return $This; | |
540 } | |
541 | |
542 # Get fingerprints string for current data line... | |
543 # | |
544 sub GetFingerprintsString { | |
545 my($This) = @_; | |
546 | |
547 return $This->{FingerprintsString} ? $This->{FingerprintsString} : 'None'; | |
548 } | |
549 | |
550 # Set fingerprints string for current data line... | |
551 # | |
552 sub SetFingerprintsString { | |
553 my($This, $FingerprintsString) = @_; | |
554 | |
555 $This->{FingerprintsString} = $FingerprintsString; | |
556 | |
557 return $This; | |
558 } | |
559 | |
560 # Does fingerprints text file contain valid data? | |
561 # | |
562 sub IsFingerprintsFileDataValid { | |
563 my($This) = @_; | |
564 | |
565 return $This->{ValidFileData} ? 1 : 0; | |
566 } | |
567 | |
568 # Does current data line contains valid fingerprints object data? | |
569 # | |
570 sub IsFingerprintsDataValid { | |
571 my($This) = @_; | |
572 | |
573 return defined $This->{FingerprintsObject} ? 1 : 0; | |
574 } | |
575 | |
576 # Read next available fingerprints line, process it and generate appropriate fingerprints | |
577 # objects... | |
578 # | |
579 sub Read { | |
580 my($This) = @_; | |
581 | |
582 # Read data line... | |
583 if (!$This->_ReadDataLine()) { | |
584 return undef; | |
585 } | |
586 | |
587 # No need to process invalid text file with invalid data... | |
588 if (!$This->{ValidFileData}) { | |
589 if ($This->{ValidateData}) { | |
590 $This->{NumOfLinesWithMissingData} += 1; | |
591 } | |
592 return $This; | |
593 } | |
594 | |
595 # Perform data validation... | |
596 if ($This->{ValidateData}) { | |
597 if (!$This->_ValidateReadDataLine()) { | |
598 return $This; | |
599 } | |
600 } | |
601 | |
602 # Setup fingerprints string after checking again to handle problematic data for | |
603 # non-validated data lines... | |
604 # | |
605 if ($This->{FingerprintsColNum} <= $#{$This->{DataLineWords}}) { | |
606 $This->{FingerprintsString} = $This->{DataLineWords}[$This->{FingerprintsColNum}]; | |
607 } | |
608 | |
609 # Generate fingeprints object... | |
610 $This->_GenerateFingerprintsObject(); | |
611 | |
612 # Setup fingerprints compound ID for fingerprints string... | |
613 $This->_GenerateCompoundID(); | |
614 | |
615 return $This; | |
616 } | |
617 | |
618 # Read next available fingerprints line, process it and generate appropriate fingerprints | |
619 # objects... | |
620 # | |
621 sub Next { | |
622 my($This) = @_; | |
623 | |
624 return $This->Read(); | |
625 } | |
626 | |
627 # Read fingerprints data line line... | |
628 # | |
629 sub _ReadDataLine { | |
630 my($This) = @_; | |
631 | |
632 if ($This->{FirstDataLineIO}) { | |
633 $This->_ProcessFirstDataLineRead(); | |
634 } | |
635 | |
636 # Initialize data for current line... | |
637 $This->_InitializeReadDataLine(); | |
638 | |
639 # Get next data line... | |
640 $This->{DataLine} = TextUtil::GetTextLine($This->{FileHandle}); | |
641 if (!$This->{DataLine}) { | |
642 return 0; | |
643 } | |
644 | |
645 # Get line words... | |
646 $This->{LineNum} += 1; | |
647 @{$This->{DataLineWords}} = TextUtil::SplitWords($This->{DataLine}, $This->{Delim}); | |
648 | |
649 return 1; | |
650 } | |
651 | |
652 # Initialize data line for reading... | |
653 # | |
654 sub _InitializeReadDataLine { | |
655 my($This) = @_; | |
656 | |
657 $This->{CompoundID} = undef; | |
658 | |
659 $This->{DataLine} = undef; | |
660 @{$This->{DataLineWords}} = (); | |
661 | |
662 $This->{FingerprintsObject} = undef; | |
663 $This->{FingerprintsString} = undef; | |
664 | |
665 return $This; | |
666 } | |
667 | |
668 # Validate fingerprints string data line... | |
669 # | |
670 sub _ValidateReadDataLine { | |
671 my($This) = @_; | |
672 | |
673 # Check for missing data... | |
674 if ($This->{FingerprintsColNum} > $#{$This->{DataLineWords}}) { | |
675 # Missing data... | |
676 $This->{NumOfLinesWithMissingData} += 1; | |
677 if ($This->{DetailLevel} >= 3) { | |
678 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains no fingerprints data: $This->{DataLine}..."; | |
679 } | |
680 elsif ($This->{DetailLevel} >= 2) { | |
681 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains no fingerprints data..."; | |
682 } | |
683 return 0; | |
684 } | |
685 | |
686 # Check for invalid data... | |
687 my($InvalidFingerprintsData, $FingerprintsColNum, $FingerprintsType, $FingerprintsDescription); | |
688 | |
689 $InvalidFingerprintsData = 0; | |
690 $FingerprintsColNum = $This->{FingerprintsColNum}; | |
691 | |
692 if (Fingerprints::FingerprintsStringUtil::AreFingerprintsStringValuesValid($This->{DataLineWords}[$FingerprintsColNum])) { | |
693 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{DataLineWords}[$FingerprintsColNum]); | |
694 if ($This->{FirstFingerprintsStringType} !~ /^$FingerprintsType$/i || $This->{FirstFingerprintsStringDescription} !~ /^$FingerprintsDescription$/i) { | |
695 $InvalidFingerprintsData = 1; | |
696 } | |
697 } | |
698 else { | |
699 $InvalidFingerprintsData = 1; | |
700 } | |
701 | |
702 if ($InvalidFingerprintsData) { | |
703 $This->{NumOfLinesWithInvalidData} += 1; | |
704 if ($This->{DetailLevel} >= 3) { | |
705 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains invalid fingerprints data: $This->{DataLine}..."; | |
706 } | |
707 elsif ($This->{DetailLevel} >= 2) { | |
708 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains invalid fingerprints data..."; | |
709 } | |
710 return 0; | |
711 } | |
712 | |
713 return 1; | |
714 } | |
715 | |
716 # Setup fingerprints compound ID for fingerprints string... | |
717 sub _GenerateCompoundID { | |
718 my($This) = @_; | |
719 my($CompoundID); | |
720 | |
721 $CompoundID = ''; | |
722 | |
723 if ($This->{UseSequentialCompoundIDs} || ($This->{CompoundIDColNum} > $#{$This->{DataLineWords}})) { | |
724 my($CompoundNum); | |
725 | |
726 $CompoundNum = $This->{LineNum} - 1; | |
727 $CompoundID = "$This->{CompoundIDPrefix}${CompoundNum}"; | |
728 } | |
729 else { | |
730 $CompoundID = $This->{DataLineWords}[$This->{CompoundIDColNum}]; | |
731 } | |
732 | |
733 $This->{CompoundID} = $CompoundID; | |
734 | |
735 # Set fingerprints ID... | |
736 if ($This->{FingerprintsObject}) { | |
737 $This->{FingerprintsObject}->SetID($This->{CompoundID}); | |
738 } | |
739 | |
740 return $This; | |
741 } | |
742 | |
743 # Process first read... | |
744 # | |
745 sub _ProcessFirstDataLineRead { | |
746 my($This) = @_; | |
747 | |
748 # Skip column label line... | |
749 $This->{LineNum} += 1; | |
750 TextUtil::GetTextLine($This->{FileHandle}); | |
751 | |
752 $This->{FirstDataLineIO} = 0; | |
753 | |
754 return $This; | |
755 } | |
756 | |
757 # Get ready for reading fingerprints text file... | |
758 # | |
759 sub _PrepareForReadingFingerprintsTextFileData { | |
760 my($This) = @_; | |
761 | |
762 # Retrieve text file columns information.... | |
763 $This->_RetrieveTextFileColData(); | |
764 | |
765 # Validate columns information... | |
766 $This->_ValidateReadCompoundIDCol(); | |
767 $This->_ValidateReadFingerprintsCol(); | |
768 | |
769 # Validate fingeprints string mode information... | |
770 if ($This->{ValidFingerprintsCol}) { | |
771 $This->_ValidateReadFingerprintsStringMode(); | |
772 } | |
773 | |
774 # Set status of text file data... | |
775 $This->{ValidFileData} = ($This->{ValidCompoundIDCol} && $This->{ValidFingerprintsCol} && $This->{ValidFingerprintsStringMode}) ? 1 : 0; | |
776 | |
777 return $This; | |
778 } | |
779 | |
780 # Retrieve information about columns and fingerprints string... | |
781 # | |
782 sub _RetrieveTextFileColData { | |
783 my($This) = @_; | |
784 my($TextFile, $FileDir, $FileName, $FileExt, $InDelim, $Line, $ColLabel, $ColNum, @ColLabels); | |
785 | |
786 @{$This->{DataColLabels}} = (); | |
787 %{$This->{DataColLabelToNumMap}} = (); | |
788 | |
789 $TextFile = $This->{Name}; | |
790 | |
791 if (!(-e $TextFile)) { | |
792 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $TextFile, doesn't exist..."; | |
793 } | |
794 | |
795 $FileDir = ""; $FileName = ""; $FileExt = ""; | |
796 ($FileDir, $FileName, $FileExt) = FileUtil::ParseFileName($TextFile); | |
797 | |
798 $InDelim = ($FileExt =~ /^tsv$/i) ? "\t" : ($This->{InDelim} =~ /semicolon/i ? "\;" : "\,"); | |
799 $This->{Delim} = $InDelim; | |
800 | |
801 if (!open TEXTFILE, "$TextFile") { | |
802 croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input text file $TextFile: $! ..."; | |
803 } | |
804 | |
805 # Get column label line... | |
806 $Line = TextUtil::GetTextLine(\*TEXTFILE); | |
807 | |
808 close TEXTFILE; | |
809 | |
810 @ColLabels = TextUtil::SplitWords($Line, $InDelim); | |
811 | |
812 # Set text file columns info.... | |
813 push @{$This->{DataColLabels}}, @ColLabels; | |
814 | |
815 for $ColNum (0 .. $#ColLabels) { | |
816 $ColLabel = $ColLabels[$ColNum]; | |
817 $This->{DataColLabelToNumMap}{$ColLabel} = $ColNum; | |
818 } | |
819 | |
820 return $This; | |
821 } | |
822 | |
823 # Validate compound ID column information... | |
824 # | |
825 sub _ValidateReadCompoundIDCol { | |
826 my($This) = @_; | |
827 my($CompoundIDCol, $CompoundIDColNum, $UseSequentialCompoundIDs, $ColFound, $ColLabel, $ColNum); | |
828 | |
829 $This->{ValidCompoundIDCol} = 0; | |
830 $This->{CompoundIDColNum} = undef; | |
831 $This->{UseSequentialCompoundIDs} = 0; | |
832 | |
833 $CompoundIDCol = $This->{CompoundIDCol}; | |
834 | |
835 $UseSequentialCompoundIDs = 0; | |
836 $CompoundIDColNum = ''; | |
837 | |
838 if ($CompoundIDCol =~ /^AutoDetect$/i) { | |
839 # First column containing the word CompoundID in its label or sequential generation... | |
840 | |
841 $ColFound = 0; | |
842 COLLABEL: for $ColLabel (@{$This->{DataColLabels}}) { | |
843 if ($ColLabel =~ /CompoundID/i) { | |
844 $ColFound = 1; | |
845 $ColNum = $This->{DataColLabelToNumMap}{$ColLabel}; | |
846 last COLLABEL; | |
847 } | |
848 } | |
849 if ($ColFound) { | |
850 $CompoundIDColNum = $ColNum; | |
851 } | |
852 else { | |
853 $UseSequentialCompoundIDs = 1; | |
854 } | |
855 } | |
856 else { | |
857 if ($This->{ColMode} =~ /^ColNum$/i) { | |
858 # Is it a valid column number? | |
859 if ($CompoundIDCol > scalar @{$This->{DataColLabels}}) { | |
860 carp "Warning: ${ClassName}->_ValidateReadCompoundIDCol: Column number, $CompoundIDCol, specified using CompoundIDCol doesn't exist..."; | |
861 return 0; | |
862 } | |
863 $CompoundIDColNum = $CompoundIDCol - 1; | |
864 } | |
865 elsif ($This->{ColMode} =~ /^ColLabel$/i) { | |
866 # Does this column exists? | |
867 if (!exists $This->{DataColLabelToNumMap}{$CompoundIDCol}) { | |
868 carp "Warning: ${ClassName}->_ValidateReadCompoundIDCol: Column name, $CompoundIDCol, specified using CompoundIDCol doesn't exist..."; | |
869 return 0; | |
870 } | |
871 $CompoundIDColNum = $This->{DataColLabelToNumMap}{$CompoundIDCol}; | |
872 } | |
873 } | |
874 | |
875 $This->{ValidCompoundIDCol} = 1; | |
876 $This->{CompoundIDColNum} = $CompoundIDColNum; | |
877 $This->{UseSequentialCompoundIDs} = $UseSequentialCompoundIDs; | |
878 | |
879 return 1; | |
880 } | |
881 | |
882 # Validate fingerprints string column information... | |
883 # | |
884 sub _ValidateReadFingerprintsCol { | |
885 my($This) = @_; | |
886 my($FingerprintsColNum, $FingerprintsCol, $ColFound, $ColLabel, $ColNum); | |
887 | |
888 $This->{ValidFingerprintsCol} = 0; | |
889 $This->{FingerprintsColNum} = undef; | |
890 | |
891 $FingerprintsColNum = undef; | |
892 $FingerprintsCol = $This->{FingerprintsCol}; | |
893 | |
894 if ($FingerprintsCol =~ /^AutoDetect$/i) { | |
895 # First column containing the word Fingerprints in its label... | |
896 | |
897 $ColFound = 0; | |
898 COLLABEL: for $ColLabel (@{$This->{DataColLabels}}) { | |
899 if ($ColLabel =~ /Fingerprints/i) { | |
900 $ColFound = 1; | |
901 $ColNum = $This->{DataColLabelToNumMap}{$ColLabel}; | |
902 last COLLABEL; | |
903 } | |
904 } | |
905 if (!$ColFound) { | |
906 carp "Warning: ${ClassName}->_ValidateReadFingerprintsCol: Column label containing \"Fingerprints\" string in its name doesn't exist..."; | |
907 return 0; | |
908 } | |
909 $FingerprintsColNum = $ColNum; | |
910 } | |
911 else { | |
912 if ($This->{ColMode} =~ /^ColNum$/i) { | |
913 # Is it a valid column number? | |
914 if ($FingerprintsCol > scalar @{$This->{DataColLabels}}) { | |
915 carp "Warning: ${ClassName}->_ValidateReadFingerprintsCol: Column number, $FingerprintsCol, specified using FingerprintsCol doesn't exist..."; | |
916 return 0; | |
917 } | |
918 $FingerprintsColNum = $FingerprintsCol - 1; | |
919 } | |
920 elsif ($This->{ColMode} =~ /^ColLabel$/i) { | |
921 # Does this column exists? | |
922 if (!exists $This->{DataColLabelToNumMap}{$FingerprintsCol}) { | |
923 carp "Warning: ${ClassName}->_ValidateReadFingerprintsCol: Column label, $FingerprintsCol, specified using FingerprintsCol doesn't exist..."; | |
924 return 0; | |
925 } | |
926 $FingerprintsColNum = $This->{DataColLabelToNumMap}{$FingerprintsCol}; | |
927 } | |
928 } | |
929 | |
930 $This->{ValidFingerprintsCol} = 1; | |
931 $This->{FingerprintsColNum} = $FingerprintsColNum; | |
932 | |
933 return 1; | |
934 } | |
935 | |
936 # Validate fingerprints string mode information... | |
937 # | |
938 sub _ValidateReadFingerprintsStringMode { | |
939 my($This) = @_; | |
940 my($FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FirstFingerprintsStringType, $FirstFingerprintsStringDescription, $TextFile, $Line, $FingerprintsColNum, $InDelim, $FingerprintsType, $FingerprintsDescription, @LineWords); | |
941 | |
942 $This->{ValidFingerprintsStringMode} = 0; | |
943 | |
944 $This->{FingerprintsBitVectorStringMode} = 0; | |
945 $This->{FingerprintsVectorStringMode} = 0; | |
946 | |
947 $This->{FirstFingerprintsStringType} = ''; | |
948 $This->{FirstFingerprintsStringDescription} = ''; | |
949 | |
950 $FingerprintsBitVectorStringMode = 0; | |
951 $FingerprintsVectorStringMode = 0; | |
952 | |
953 $FirstFingerprintsStringType = ''; | |
954 $FirstFingerprintsStringDescription = ''; | |
955 | |
956 $TextFile = $This->{Name}; | |
957 | |
958 if (!open TEXTFILE, "$TextFile") { | |
959 croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input text file $TextFile: $! ..."; | |
960 } | |
961 | |
962 # Skip column label line... | |
963 $Line = TextUtil::GetTextLine(\*TEXTFILE); | |
964 | |
965 # First first fingerprints data line... | |
966 $Line = TextUtil::GetTextLine(\*TEXTFILE); | |
967 | |
968 close TEXTFILE; | |
969 | |
970 # Get first fingerprints type and description... | |
971 $InDelim = $This->{Delim}; | |
972 @LineWords = TextUtil::SplitWords($Line, $InDelim); | |
973 | |
974 $FingerprintsColNum = $This->{FingerprintsColNum}; | |
975 | |
976 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($LineWords[$FingerprintsColNum]); | |
977 | |
978 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { | |
979 if ($FingerprintsType !~ /^FingerprintsBitVector$/i) { | |
980 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsBitVectorString, specified using \"FingerprintsStringMode\"..."; | |
981 return 0; | |
982 } | |
983 $FingerprintsBitVectorStringMode = 1; | |
984 $FirstFingerprintsStringType = 'FingerprintsBitVector'; | |
985 $FirstFingerprintsStringDescription = $FingerprintsDescription; | |
986 } | |
987 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { | |
988 if ($FingerprintsType !~ /^FingerprintsVector$/i) { | |
989 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsVectorString, specified using \"FingerprintsStringMode\"..."; | |
990 return 0; | |
991 } | |
992 $FingerprintsVectorStringMode = 1; | |
993 $FirstFingerprintsStringType = 'FingerprintsVector'; | |
994 $FirstFingerprintsStringDescription = $FingerprintsDescription; | |
995 } | |
996 else { | |
997 # AutoDetect mode... | |
998 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) { | |
999 $FingerprintsBitVectorStringMode = 1; | |
1000 } | |
1001 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) { | |
1002 $FingerprintsVectorStringMode = 1; | |
1003 } | |
1004 else { | |
1005 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, identified during, AutoDetect, value of \"FingerprintsStringMode\" is not valid; Supported fingerprints types: FingerprintBitVector or FingerprintsVector..."; | |
1006 return 0; | |
1007 } | |
1008 $FirstFingerprintsStringType = $FingerprintsType; | |
1009 $FirstFingerprintsStringDescription = $FingerprintsDescription; | |
1010 } | |
1011 | |
1012 $This->{ValidFingerprintsStringMode} = 1; | |
1013 | |
1014 $This->{FingerprintsBitVectorStringMode} = $FingerprintsBitVectorStringMode; | |
1015 $This->{FingerprintsVectorStringMode} = $FingerprintsVectorStringMode; | |
1016 | |
1017 $This->{FirstFingerprintsStringType} = $FirstFingerprintsStringType; | |
1018 $This->{FirstFingerprintsStringDescription} = $FirstFingerprintsStringDescription; | |
1019 | |
1020 return 1; | |
1021 } | |
1022 | |
1023 # Write fingerprints string generated from specified fingerprints, fingerprints-bit vector, or | |
1024 # fingerprints vector object and other data to text file... | |
1025 # | |
1026 sub WriteFingerprints { | |
1027 my($This, $FingerprintsObject, @DataColValues) = @_; | |
1028 | |
1029 # Initialize data for current line... | |
1030 $This->_InitializeWriteDataLine(); | |
1031 | |
1032 # Set fingerprints object... | |
1033 $This->{FingerprintsObject} = $FingerprintsObject; | |
1034 | |
1035 # Generate fingerprints string... | |
1036 $This->_GenerateFingerprintsString(); | |
1037 | |
1038 # Set data line words... | |
1039 $This->SetDataLineWords(@DataColValues); | |
1040 push @{$This->{DataLineWords}}, $This->{FingerprintsString}; | |
1041 | |
1042 # Write data line.. | |
1043 $This->_WriteDataLine(); | |
1044 | |
1045 return $This; | |
1046 } | |
1047 | |
1048 # Write fingerprints string and other data to text file... | |
1049 # | |
1050 # Note: | |
1051 # o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat values | |
1052 # are ignored during writing of fingerprints and it's written to the file as it is. | |
1053 # | |
1054 # | |
1055 sub WriteFingerprintsString { | |
1056 my($This, $FingerprintsString, @DataColValues) = @_; | |
1057 | |
1058 # Initialize data for current line... | |
1059 $This->_InitializeWriteDataLine(); | |
1060 | |
1061 # Set fingerprints string... | |
1062 $This->{FingerprintsString} = $FingerprintsString; | |
1063 | |
1064 # Generate fingerprints object... | |
1065 $This->_GenerateFingerprintsObject(); | |
1066 | |
1067 # Set data line words... | |
1068 $This->SetDataLineWords(@DataColValues); | |
1069 push @{$This->{DataLineWords}}, $FingerprintsString; | |
1070 | |
1071 # Write data line.. | |
1072 $This->_WriteDataLine(); | |
1073 | |
1074 return $This; | |
1075 } | |
1076 | |
1077 # Initialize data line for reading... | |
1078 # | |
1079 sub _InitializeWriteDataLine { | |
1080 my($This) = @_; | |
1081 | |
1082 $This->{DataLine} = undef; | |
1083 @{$This->{DataLineWords}} = (); | |
1084 | |
1085 $This->{FingerprintsObject} = undef; | |
1086 $This->{FingerprintsString} = undef; | |
1087 | |
1088 return $This; | |
1089 } | |
1090 | |
1091 # Write fingerprints data line line... | |
1092 # | |
1093 sub _WriteDataLine { | |
1094 my($This) = @_; | |
1095 my($FileHandle, $Line); | |
1096 | |
1097 if ($This->{FirstDataLineIO}) { | |
1098 $This->_ProcessFirstDataLineWrite(); | |
1099 } | |
1100 | |
1101 # Write out line words... | |
1102 $Line = TextUtil::JoinWords(\@{$This->{DataLineWords}}, $This->{Delim}, $This->{OutQuote}); | |
1103 | |
1104 $This->{LineNum} += 1; | |
1105 $FileHandle = $This->{FileHandle}; | |
1106 print $FileHandle "$Line\n"; | |
1107 | |
1108 $This->{DataLine} = $Line; | |
1109 | |
1110 return $This; | |
1111 } | |
1112 | |
1113 # Process first write... | |
1114 # | |
1115 sub _ProcessFirstDataLineWrite { | |
1116 my($This) = @_; | |
1117 my($Line, $FileHandle); | |
1118 | |
1119 $This->{FirstDataLineIO} = 0; | |
1120 | |
1121 if ($This->GetMode() =~ /^Write$/i) { | |
1122 # Write out column label line... | |
1123 $Line = TextUtil::JoinWords(\@{$This->{DataColLabels}}, $This->{Delim}, $This->{OutQuote}); | |
1124 | |
1125 $This->{LineNum} += 1; | |
1126 $FileHandle = $This->{FileHandle}; | |
1127 print $FileHandle "$Line\n"; | |
1128 } | |
1129 | |
1130 return $This; | |
1131 } | |
1132 | |
1133 # Get ready for writing fingerprints text file... | |
1134 # | |
1135 sub _PrepareForWritingFingerprintsTextFileData { | |
1136 my($This) = @_; | |
1137 my($TextFile, $FileDir, $FileName, $FileExt, $OutDelim); | |
1138 | |
1139 $TextFile = $This->{Name}; | |
1140 if (!$This->{Overwrite}) { | |
1141 if (-e $TextFile) { | |
1142 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $TextFile, already exist. Use overwrite option..."; | |
1143 } | |
1144 } | |
1145 | |
1146 # Set up delimiter for writing file... | |
1147 | |
1148 $FileDir = ""; $FileName = ""; $FileExt = ""; | |
1149 ($FileDir, $FileName, $FileExt) = FileUtil::ParseFileName($TextFile); | |
1150 | |
1151 $OutDelim = ($FileExt =~ /^tsv$/i) ? "\t" : ($This->{OutDelim} =~ /semicolon/i ? "\;" : "\,"); | |
1152 $This->{Delim} = $OutDelim; | |
1153 | |
1154 # Setup FingerprintsStringMode status... | |
1155 | |
1156 $This->{FingerprintsBitVectorStringMode} = 0; | |
1157 $This->{FingerprintsVectorStringMode} = 0; | |
1158 $This->{ValidFingerprintsStringMode} = 0; | |
1159 | |
1160 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { | |
1161 $This->{FingerprintsBitVectorStringMode} = 1; | |
1162 } | |
1163 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { | |
1164 $This->{FingerprintsVectorStringMode} = 1; | |
1165 } | |
1166 | |
1167 $This->{ValidFingerprintsStringMode} = ($This->{FingerprintsBitVectorStringMode} || $This->{FingerprintsVectorStringMode}) ? 1 : 0; | |
1168 | |
1169 if ($This->{FingerprintsBitVectorStringMode}) { | |
1170 $This->_SetDefaultBitStringFormat(); | |
1171 $This->_SetDefaultBitsOrder(); | |
1172 } | |
1173 elsif ($This->{FingerprintsVectorStringMode}) { | |
1174 $This->_SetDefaultVectorStringFormat(); | |
1175 } | |
1176 | |
1177 return $This; | |
1178 } | |
1179 | |
1180 # Set default value for bit string format... | |
1181 # | |
1182 sub _SetDefaultBitStringFormat { | |
1183 my($This) = @_; | |
1184 | |
1185 if (!$This->{BitStringFormat}) { | |
1186 $This->{BitStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat(); | |
1187 } | |
1188 | |
1189 return $This; | |
1190 } | |
1191 | |
1192 # Set default value for bit string format... | |
1193 # | |
1194 sub _SetDefaultBitsOrder { | |
1195 my($This) = @_; | |
1196 | |
1197 if (!$This->{BitsOrder}) { | |
1198 $This->{BitsOrder} = Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder(); | |
1199 } | |
1200 | |
1201 return $This; | |
1202 } | |
1203 | |
1204 # Set default value for vector string format... | |
1205 # | |
1206 sub _SetDefaultVectorStringFormat { | |
1207 my($This) = @_; | |
1208 | |
1209 if (!$This->{VectorStringFormat} && $This->{FingerprintsObject}) { | |
1210 $This->{VectorStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat($This->{FingerprintsObject}); | |
1211 } | |
1212 | |
1213 return $This; | |
1214 } | |
1215 | |
1216 # Generate fingerprints object using current fingerprints string... | |
1217 # | |
1218 sub _GenerateFingerprintsObject { | |
1219 my($This) = @_; | |
1220 | |
1221 $This->{FingerprintsObject} = undef; | |
1222 | |
1223 if (!$This->{FingerprintsString}) { | |
1224 return $This; | |
1225 } | |
1226 | |
1227 if ($This->{FingerprintsBitVectorStringMode}) { | |
1228 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsBitVectorString($This->{FingerprintsString}); | |
1229 } | |
1230 elsif ($This->{FingerprintsVectorStringMode}) { | |
1231 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsVectorString($This->{FingerprintsString}); | |
1232 } | |
1233 else { | |
1234 return undef; | |
1235 } | |
1236 | |
1237 return $This; | |
1238 } | |
1239 | |
1240 # Generate fingerprints string using current fingerprints object... | |
1241 # | |
1242 sub _GenerateFingerprintsString { | |
1243 my($This) = @_; | |
1244 | |
1245 $This->{FingerprintsString} = ''; | |
1246 | |
1247 if (!$This->{FingerprintsObject}) { | |
1248 return $This; | |
1249 } | |
1250 | |
1251 if ($This->{FingerprintsBitVectorStringMode}) { | |
1252 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{BitStringFormat}, $This->{BitsOrder}); | |
1253 } | |
1254 elsif ($This->{FingerprintsVectorStringMode}) { | |
1255 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{VectorStringFormat}); | |
1256 } | |
1257 | |
1258 return $This; | |
1259 } | |
1260 | |
1261 # Is it a fingerprints file? | |
1262 sub IsFingerprintsTextFile ($;$) { | |
1263 my($FirstParameter, $SecondParameter) = @_; | |
1264 my($This, $FileName, $Status); | |
1265 | |
1266 if ((@_ == 2) && (_IsFingerprintsTextFileIO($FirstParameter))) { | |
1267 ($This, $FileName) = ($FirstParameter, $SecondParameter); | |
1268 } | |
1269 else { | |
1270 $FileName = $FirstParameter; | |
1271 } | |
1272 | |
1273 # Check file extension... | |
1274 $Status = FileUtil::CheckFileType($FileName, "csv tsv"); | |
1275 | |
1276 return $Status; | |
1277 } | |
1278 | |
1279 # Is it a FingerprintsTextFileIO object? | |
1280 sub _IsFingerprintsTextFileIO { | |
1281 my($Object) = @_; | |
1282 | |
1283 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
1284 } | |
1285 | |
1286 1; | |
1287 | |
1288 __END__ | |
1289 | |
1290 =head1 NAME | |
1291 | |
1292 FingerprintsTextFileIO | |
1293 | |
1294 =head1 SYNOPSIS | |
1295 | |
1296 use FileIO::FingerprintsTextFileIO; | |
1297 | |
1298 use FileIO::FingerprintsTextFileIO qw(:all); | |
1299 | |
1300 =head1 DESCRIPTION | |
1301 | |
1302 B<FingerprintsTextFileIO> class provides the following methods: | |
1303 | |
1304 new, GetDataColLabels, GetDataLineWords, GetFingerprints, GetFingerprintsString, | |
1305 IsFingerprintsDataValid, IsFingerprintsFileDataValid, IsFingerprintsTextFile, | |
1306 Next, Read, SetBitStringFormat, SetBitsOrder, SetColMode, SetDataColLabels, | |
1307 SetDataLineWords, SetDetailLevel, SetFingerprints, SetFingerprintsString, | |
1308 SetFingerprintsStringMode, SetInDelim, SetOutDelim, SetVectorStringFormat, | |
1309 WriteFingerprints, WriteFingerprintsString | |
1310 | |
1311 The following methods can also be used as functions: | |
1312 | |
1313 IsFingerprintsTextFile | |
1314 | |
1315 B<FingerprintsTextFileIO> class is derived from I<FileIO> class and uses its methods to support | |
1316 generic file related functionality. | |
1317 | |
1318 The fingerprints CSV/TSV text file format with B<.csv> or B<.tsv> file extensions supports two | |
1319 types of fingerprints string data: fingerprints bit-vectors and fingerprints vector strings. The | |
1320 fingerprints string data is treated as column value in a text file. | |
1321 | |
1322 Example of text file format containing fingerprints string data: | |
1323 | |
1324 "CompoundID","PathLengthFingerprints" | |
1325 "Cmpd1","FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes | |
1326 :MinLength1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a4 | |
1327 9913991a6603130b0a19e8051c89184414953800cc2151082844a20104280013086030 | |
1328 8e8204d402800831048940e44281c00060449a5000ac80c894114e006321264401..." | |
1329 ... ... | |
1330 ... ... | |
1331 | |
1332 The current release of MayaChemTools supports the following types of fingerprint | |
1333 bit-vector and vector strings: | |
1334 | |
1335 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi | |
1336 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT | |
1337 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X | |
1338 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A | |
1339 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2 | |
1340 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B... | |
1341 | |
1342 FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS | |
1343 ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2 | |
1344 .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1 | |
1345 O.X1.BO2;2 4 14 3 10 1 1 1 3 2 | |
1346 | |
1347 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume | |
1348 ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F | |
1349 N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1 | |
1350 | |
1351 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN | |
1352 umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C | |
1353 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N | |
1354 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8 | |
1355 O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1 | |
1356 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0... | |
1357 | |
1358 FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs | |
1359 AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN | |
1360 H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3 | |
1361 .024 -2.270 | |
1362 | |
1363 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues; | |
1364 ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435 | |
1365 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1 | |
1366 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
1367 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
1368 | |
1369 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi | |
1370 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391 | |
1371 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414 | |
1372 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103 | |
1373 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338 | |
1374 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303... | |
1375 | |
1376 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes | |
1377 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524 | |
1378 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 | |
1379 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...; | |
1380 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2 | |
1381 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1 | |
1382 | |
1383 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp | |
1384 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100 | |
1385 0000000001010000000110000011000000000000100000000000000000000000100001 | |
1386 1000000110000000000000000000000000010011000000000000000000000000010000 | |
1387 0000000000000000000000000010000000000000000001000000000000000000000000 | |
1388 0000000000010000100001000000000000101000000000000000100000000000000... | |
1389 | |
1390 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu | |
1391 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8 | |
1392 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567 | |
1393 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012 | |
1394 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455 | |
1395 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404... | |
1396 | |
1397 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp | |
1398 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184 | |
1399 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450 | |
1400 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430 | |
1401 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134 | |
1402 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566... | |
1403 | |
1404 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000 | |
1405 0000000000000000000000000000000001001000010010000000010010000000011100 | |
1406 0100101010111100011011000100110110000011011110100110111111111111011111 | |
1407 11111111111110111000 | |
1408 | |
1409 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011 | |
1410 1110011111100101111111000111101100110000000000000011100010000000000000 | |
1411 0000000000000000000000000000000000000000000000101000000000000000000000 | |
1412 0000000000000000000000000000000000000000000000000000000000000000000000 | |
1413 0000000000000000000000000000000000000011000000000000000000000000000000 | |
1414 0000000000000000000000000000000000000000 | |
1415 | |
1416 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri | |
1417 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
1418 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 | |
1419 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0 | |
1420 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1 | |
1421 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1 | |
1422 | |
1423 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri | |
1424 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0 | |
1425 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0 | |
1426 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
1427 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0 | |
1428 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... | |
1429 | |
1430 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng | |
1431 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110 | |
1432 0100010101011000101001011100110001000010001001101000001001001001001000 | |
1433 0010110100000111001001000001001010100100100000000011000000101001011100 | |
1434 0010000001000101010100000100111100110111011011011000000010110111001101 | |
1435 0101100011000000010001000011000010100011101100001000001000100000000... | |
1436 | |
1437 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength | |
1438 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2 | |
1439 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X | |
1440 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1 | |
1441 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO | |
1442 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C.... | |
1443 | |
1444 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt | |
1445 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1 | |
1446 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N | |
1447 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1 | |
1448 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR | |
1449 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ... | |
1450 | |
1451 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD | |
1452 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1 | |
1453 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3. | |
1454 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...; | |
1455 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1 | |
1456 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1... | |
1457 | |
1458 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi | |
1459 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar | |
1460 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H | |
1461 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...; | |
1462 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4 | |
1463 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ... | |
1464 | |
1465 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3 | |
1466 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4- | |
1467 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO | |
1468 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...; | |
1469 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1 | |
1470 | |
1471 FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica | |
1472 lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC | |
1473 H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC- | |
1474 ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...; | |
1475 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2 | |
1476 | |
1477 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M | |
1478 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1 | |
1479 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1 | |
1480 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1 | |
1481 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....; | |
1482 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2 | |
1483 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8... | |
1484 | |
1485 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1 | |
1486 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C | |
1487 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3- | |
1488 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2 | |
1489 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C. | |
1490 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7... | |
1491 | |
1492 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min | |
1493 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H | |
1494 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2- | |
1495 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H | |
1496 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...; | |
1497 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10 | |
1498 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1 | |
1499 | |
1500 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist | |
1501 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0 | |
1502 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1 | |
1503 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0 | |
1504 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0 | |
1505 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18... | |
1506 | |
1507 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize: | |
1508 MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1- | |
1509 Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1 | |
1510 -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1- | |
1511 HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...; | |
1512 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23 | |
1513 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1 | |
1514 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ... | |
1515 | |
1516 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD | |
1517 istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106 | |
1518 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0 | |
1519 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26 | |
1520 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0 | |
1521 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ... | |
1522 | |
1523 | |
1524 =head2 METHODS | |
1525 | |
1526 =over 4 | |
1527 | |
1528 =item B<new> | |
1529 | |
1530 $NewFingerprintsTextFileIO = new FileIO::FingerprintsTextFileIO(%IOParameters); | |
1531 | |
1532 Using specified I<IOParameters> names and values hash, B<new> method creates a new | |
1533 object and returns a reference to a newly created B<FingerprintsTextFileIO> object. By default, | |
1534 the following properties are initialized during I<Read> mode: | |
1535 | |
1536 Name = ''; | |
1537 Mode = 'Read'; | |
1538 Status = 0; | |
1539 FingerprintsStringMode = 'AutoDetect'; | |
1540 FingerprintsCol = 'AutoDetect'; | |
1541 ColMode = 'ColNum'; | |
1542 CompoundIDCol = 'AutoDetect'; | |
1543 CompoundIDPrefix = 'Cmpd'; | |
1544 InDelim = 'Comma'; | |
1545 ValidateData = 1; | |
1546 DetailLevel = 1; | |
1547 | |
1548 During I<Write> mode, the following properties get initialize by default: | |
1549 | |
1550 FingerprintsStringMode = undef; | |
1551 | |
1552 BitStringFormat = HexadecimalString; | |
1553 BitsOrder = Ascending; | |
1554 | |
1555 VectorStringFormat = NumericalValuesString or ValuesString; | |
1556 OutDelim = 'Comma'; | |
1557 OutQuote = 1; | |
1558 | |
1559 Examples: | |
1560 | |
1561 $NewFingerprintsTextFileIO = new FileIO::FingerprintsTextFileIO( | |
1562 'Name' => 'Sample.csv', | |
1563 'Mode' => 'Read'); | |
1564 | |
1565 $NewFingerprintsTextFileIO = new FileIO::FingerprintsTextFileIO( | |
1566 'Name' => 'Sample.csv', | |
1567 'Mode' => 'Read',; | |
1568 'FingerprintsStringMode' => | |
1569 'AutoDetect', | |
1570 'ColMode' => 'ColLabel', | |
1571 'FingerprintsCol' => 'Fingerprints', | |
1572 'CompoundIDCol' => 'CompoundID', | |
1573 'InDelim' => 'Comma'); | |
1574 | |
1575 $NewFingerprintsTextFileIO = new FileIO::FingerprintsTextFileIO( | |
1576 'Name' => 'Sample.csv', | |
1577 'Mode' => 'Write', | |
1578 'FingerprintsStringMode' => | |
1579 'FingerprintsBitVectorString', | |
1580 'Overwrite' => 1, | |
1581 'BitStringFormat' => 'HexadecimalString', | |
1582 'BitsOrder' => 'Ascending'); | |
1583 | |
1584 $NewFingerprintsTextFileIO = new FileIO::FingerprintsTextFileIO( | |
1585 'Name' => 'Sample.tsv', | |
1586 'Mode' => 'Write', | |
1587 'FingerprintsStringMode' => | |
1588 'FingerprintsVectorString', | |
1589 'Overwrite' => 1, | |
1590 'VectorStringFormat' => 'IDsAndValuesString', | |
1591 'OutDelim' => 'Tab', | |
1592 'OutQuote' => 0); | |
1593 | |
1594 =item B<GetDataColLabels> | |
1595 | |
1596 @ColLabels = $FingerprintsTextFileIO->GetDataColLabels(); | |
1597 $NumOfColLabels = $FingerprintsTextFileIO->GetDataColLabels(); | |
1598 | |
1599 Returns an array of B<ColLabels> from first line in text file. In scalar context, it returns | |
1600 number of column labels. | |
1601 | |
1602 =item B<GetDataLineWords> | |
1603 | |
1604 @DataWords = $FingerprintsTextFileIO->GetDataLineWords(); | |
1605 $NumOfDataWords = $FingerprintsTextFileIO->GetDataLineWords(); | |
1606 | |
1607 Returns an array of B<DataWords> in current data line. In scalar context, it returns | |
1608 number of data words. | |
1609 | |
1610 =item B<GetFingerprints> | |
1611 | |
1612 $FingerprintsObject = $FingerprintsTextFileIO->GetFingerprints(); | |
1613 | |
1614 Returns B<FingerprintsObject> generated for current data line using fingerprints bit-vector | |
1615 or vector string data. The fingerprints object corresponds to any of the supported fingerprints | |
1616 such as PathLengthFingerprints, ExtendedConnectivity, and so on. | |
1617 | |
1618 =item B<GetFingerprintsString> | |
1619 | |
1620 $FingerprintsString = $FingerprintsTextFileIO->GetFingerprintsString(); | |
1621 | |
1622 Returns B<FingerprintsString> for current data line. | |
1623 | |
1624 =item B<IsFingerprintsDataValid> | |
1625 | |
1626 $Status = $FingerprintsTextFileIO->IsFingerprintsDataValid(); | |
1627 | |
1628 Returns 1 or 0 based on whether B<FingerprintsObject> is valid. | |
1629 | |
1630 =item B<IsFingerprintsFileDataValid> | |
1631 | |
1632 $Status = $FingerprintsTextFileIO->IsFingerprintsFileDataValid(); | |
1633 | |
1634 Returns 1 or 0 based on whether text file contains valid fingerprints data. | |
1635 | |
1636 =item B<IsFingerprintsTextFile> | |
1637 | |
1638 $Status = $FingerprintsTextFileIO->IsFingerprintsTextFile($FileName); | |
1639 $Status = FileIO::FingerprintsTextFileIO::IsFingerprintsTextFile($FileName); | |
1640 | |
1641 Returns 1 or 0 based on whether I<FileName> is a fingerprints text file. | |
1642 | |
1643 =item B<Next or Read> | |
1644 | |
1645 $FingerprintsTextFileIO = $FingerprintsTextFileIO->Next(); | |
1646 $FingerprintsTextFileIO = $FingerprintsTextFileIO->Read(); | |
1647 | |
1648 Reads next available fingerprints line in text file, processes the data, generates appropriate | |
1649 fingerprints object, and returns B<FingerprintsTextFileIO>. The generated fingerprints object | |
1650 is available using method B<GetFingerprints>. | |
1651 | |
1652 =item B<SetBitStringFormat> | |
1653 | |
1654 $FingerprintsTextFileIO->SetBitStringFormat($Format); | |
1655 | |
1656 Sets bit string I<Format> for fingerprints bit-vector string data in a text file and returns | |
1657 B<FingerprintsTextFileIO>. Possible values for B<BitStringFormat>: I<BinaryString or HexadecimalString>. | |
1658 | |
1659 =item B<SetBitsOrder> | |
1660 | |
1661 $FingerprintsTextFileIO->SetBitsOrder($BitsOrder); | |
1662 | |
1663 Sets I<BitsOrder> for fingerprints bit-vector string data in a text file and returns B<FingerprintsTextFileIO>. | |
1664 Possible values for B<BitsOrder>: I<Ascending or Descending>. | |
1665 | |
1666 =item B<SetColMode> | |
1667 | |
1668 $FingerprintsTextFileIO->SetColMode($ColMode); | |
1669 | |
1670 Sets I<ColMode> for a text file and returns B<FingerprintsTextFileIO>. Possible values for B<ColMode>: | |
1671 I<ColNum or ColLabel>. | |
1672 | |
1673 =item B<SetDataColLabels> | |
1674 | |
1675 $FingerprintsTextFileIO->SetDataColLabels(@ColLabels); | |
1676 $FingerprintsTextFileIO->SetDataColLabels(\@ColLabels); | |
1677 | |
1678 Sets I<ColLabels> for a text file using an array or a reference to an array containing column labels | |
1679 and returns B<FingerprintsTextFileIO>. | |
1680 | |
1681 =item B<SetDataLineWords> | |
1682 | |
1683 $FingerprintsTextFileIO->SetDataLineWords(@LineWords); | |
1684 $FingerprintsTextFileIO->SetDataLineWords(\@LineWords); | |
1685 | |
1686 Sets I<DataLineWords> for a text file using an array or a reference to an array containing data words | |
1687 and returns B<FingerprintsTextFileIO>. | |
1688 | |
1689 =item B<SetDetailLevel> | |
1690 | |
1691 $FingerprintsTextFileIO->SetDetailLevel($Level); | |
1692 | |
1693 Sets details I<Level> for generating diagnostics messages during text file processing and returns | |
1694 B<FingerprintsTextFileIO>. Possible values: I<Positive integers>. | |
1695 | |
1696 =item B<SetFingerprints> | |
1697 | |
1698 $FingerprintsTextFileIO->SetFingerprints($FingerprintsObject); | |
1699 | |
1700 Sets I<FingerprintsObject> for current data line and returns B<FingerprintsTextFileIO>. | |
1701 | |
1702 =item B<SetFingerprintsString> | |
1703 | |
1704 $FingerprintsTextFileIO->SetFingerprintsString($FingerprintsString); | |
1705 | |
1706 Sets I<FingerprintsString> for current data line and returns B<FingerprintsTextFileIO>. | |
1707 | |
1708 =item B<SetFingerprintsStringMode> | |
1709 | |
1710 $FingerprintsTextFileIO->SetFingerprintsStringMode($Mode); | |
1711 | |
1712 Sets I<FingerprintsStringMode> for text file and returns B<FingerprintsTextFileIO>. | |
1713 Possible values: I<AutoDetect, FingerprintsBitVectorString, or FingerprintsVectorString> | |
1714 | |
1715 =item B<SetInDelim> | |
1716 | |
1717 $FingerprintsTextFileIO->SetInDelim($InDelim); | |
1718 | |
1719 Sets I<InDelim> for text file and returns B<FingerprintsTextFileIO>. Possible values: I<comma, | |
1720 semicolon, tab>. | |
1721 | |
1722 =item B<SetOutDelim> | |
1723 | |
1724 $FingerprintsTextFileIO->SetOutDelim($OutDelim); | |
1725 | |
1726 Sets I<OutDelim> for text file and returns B<FingerprintsTextFileIO>. Possible values: I<comma, | |
1727 semicolon, tab>. | |
1728 | |
1729 =item B<SetVectorStringFormat> | |
1730 | |
1731 $FingerprintsTextFileIO->SetVectorStringFormat($Format); | |
1732 | |
1733 Sets I<VectorStringFormat> for text file and returns B<FingerprintsTextFileIO>. Possible values: | |
1734 I<IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString>. | |
1735 | |
1736 =item B<WriteFingerprints> | |
1737 | |
1738 $FingerprintsTextFileIO->WriteFingerprints($FingerprintsObject, | |
1739 @DataColValues); | |
1740 | |
1741 Writes fingerprints string generated from I<FingerprintsObject> object and other data including | |
1742 I<DataColValues> to text file and returns B<FingerprintsTextFileIO>. | |
1743 | |
1744 =item B<WriteFingerprintsString> | |
1745 | |
1746 $FingerprintsSDFileIO->WriteFingerprints($FingerprintsString, | |
1747 @DataColValues); | |
1748 | |
1749 Writes I<FingerprintsString> and other data including I<DataColValues> to text file and returns | |
1750 B<FingerprintsTextFileIO>. | |
1751 | |
1752 Caveats: | |
1753 | |
1754 o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat | |
1755 values are ignored during writing of fingerprints and it's written to the file | |
1756 as it is. | |
1757 | |
1758 =back | |
1759 | |
1760 =head1 AUTHOR | |
1761 | |
1762 Manish Sud <msud@san.rr.com> | |
1763 | |
1764 =head1 SEE ALSO | |
1765 | |
1766 FingerprintsSDFileIO.pm, FingerprintsFPFileIO.pm | |
1767 | |
1768 =head1 COPYRIGHT | |
1769 | |
1770 Copyright (C) 2015 Manish Sud. All rights reserved. | |
1771 | |
1772 This file is part of MayaChemTools. | |
1773 | |
1774 MayaChemTools is free software; you can redistribute it and/or modify it under | |
1775 the terms of the GNU Lesser General Public License as published by the Free | |
1776 Software Foundation; either version 3 of the License, or (at your option) | |
1777 any later version. | |
1778 | |
1779 =cut |