1 package FileIO::FingerprintsTextFileIO; 2 # 3 # $RCSfile: FingerprintsTextFileIO.pm,v $ 4 # $Date: 2015/02/28 20:48:43 $ 5 # $Revision: 1.19 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Carp; 31 use Exporter; 32 use Scalar::Util (); 33 use TextUtil (); 34 use FileUtil (); 35 use Fingerprints::FingerprintsStringUtil (); 36 use FileIO::FileIO; 37 38 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 39 40 @ISA = qw(FileIO::FileIO Exporter); 41 @EXPORT = qw(); 42 @EXPORT_OK = qw(IsFingerprintsTextFile); 43 44 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 45 46 # Setup class variables... 47 my($ClassName); 48 _InitializeClass(); 49 50 # Class constructor... 51 sub new { 52 my($Class, %NamesAndValues) = @_; 53 54 # Initialize object... 55 my $This = $Class->SUPER::new(); 56 bless $This, ref($Class) || $Class; 57 $This->_InitializeFingerprintsTextFileIO(); 58 59 $This->_InitializeFingerprintsTextFileIOProperties(%NamesAndValues); 60 61 return $This; 62 } 63 64 # Initialize object data... 65 # 66 sub _InitializeFingerprintsTextFileIO { 67 my($This) = @_; 68 69 # Fingerprints string data format during read/write... 70 # 71 # For file read: 72 # 73 # AutoDetect - automatically detect format of fingerprints string 74 # FingerprintsBitVectorString - Bit vector fingerprints string format 75 # FingerprintsVectorString - Vector fingerprints string format 76 # 77 # Default value: AutoDetect 78 # 79 # For file write: 80 # 81 # FingerprintsBitVectorString - Bit vector fingerprints string format 82 # FingerprintsVectorString - Vector fingerprints string format 83 # 84 # Default value: undef 85 # 86 $This->{FingerprintsStringMode} = undef; 87 88 # For file read: 89 # 90 # o Fingerprints bit-vector and vector object for current fingerprints string 91 # 92 # For file write: 93 # 94 # o Fingerprints bit-vector and vector object for current fingerprints string 95 # o Any supported fingerprints object: PathLengthFingerprints, ExtendedConnectivity, and so on. 96 # 97 $This->{FingerprintsObject} = undef; 98 99 # Fingepritns string for current line during read/write... 100 $This->{FingerprintsString} = undef; 101 102 # First data line read/write... 103 $This->{FirstDataLineIO} = 1; 104 105 # Current fingerprints string data line number during read/write... 106 $This->{LineNum} = 0; 107 108 # Text line data during read/write... 109 $This->{DataLine} = undef; 110 @{$This->{DataLineWords}} = (); 111 112 # Text file column data during read/write... 113 @{$This->{DataColLabels}} = (); 114 115 # Text file delimiter during read/write... 116 $This->{Delim} = ''; 117 118 # Initialize parameters for read... 119 $This->_InitializeFingerprintsTextFileIORead(); 120 121 # Initialize parameters for write... 122 $This->_InitializeFingerprintsTextFileIOWrite(); 123 124 return $This; 125 } 126 127 # Initialize class ... 128 sub _InitializeClass { 129 #Class name... 130 $ClassName = __PACKAGE__; 131 132 } 133 134 # Initialize object data for reading fingerprints text file... 135 # 136 sub _InitializeFingerprintsTextFileIORead { 137 my($This) = @_; 138 139 # Column ID specification for identification of comound ID or fingerints string 140 # data column... 141 # 142 # ColNum - A valid column number 143 # ColLabel - A valid column name 144 # 145 $This->{ColMode} = 'ColNum'; 146 147 # Fingerprints column to use for retrieving fingerprints string data... 148 # 149 # Value of AutoDetect implies use first column containing the word Fingerprints in its 150 # column label to retrieve fingerprints string data. Othwewise, a valid column number 151 # or column name must be specified based on the value of ColMode. 152 # 153 $This->{FingerprintsCol} = 'AutoDetect'; 154 155 # Compound ID column to use for retrieving compound IDs for fingerprints... 156 # 157 # Value of AutoDetect implies use first column containing the word CompoundID in its column 158 # label to retrieve compound IDs or assign seqyentially generated compound IDs. Othwewise, 159 # a valid column number or column name must be specified based on the value of ColMode. 160 # 161 $This->{CompoundIDCol} = 'AutoDetect'; 162 163 # A prefix string used for generating compound IDs like LabelPrefixString<Number> during 164 # sequential generation of compound IDs. Default value, Cmpd, generates compound IDs 165 # which look like like Cmpd<Number>. 166 # 167 $This->{CompoundIDPrefix} = 'Cmpd'; 168 169 # Input delimiter for fingerprints CSV text file. Possible values: comma, semicolon or tab. This 170 # option is ignored for TSV text file and tab is used as the delimiter. 171 # 172 $This->{InDelim} = 'comma'; 173 174 # By default, the fingerprints data corresponding to FingerprintsCol is assumed to 175 # be valid and no validation is performed before generating fingerprints objects... 176 # 177 $This->{ValidateData} = 1; 178 179 # Level of detail to print during validation of data for invalid or missing data... 180 $This->{DetailLevel} = 1; 181 182 # Number of missing and invalid fingerprints string data lines... 183 $This->{NumOfLinesWithMissingData} = 0; 184 $This->{NumOfLinesWithInvalidData} = 0; 185 186 # Compound ID for current fingerprints string... 187 $This->{CompoundID} = undef; 188 189 # Status of data in fingerprints text file... 190 $This->{ValidFileData} = 0; 191 192 $This->{ValidCompoundIDCol} = 0; 193 $This->{ValidFingerprintsCol} = 0; 194 195 $This->{ValidFingerprintsStringMode} = 0; 196 197 return $This; 198 } 199 200 # Initialize object data for writing fingerprints text file... 201 # 202 sub _InitializeFingerprintsTextFileIOWrite { 203 my($This) = @_; 204 205 # Fingerprints bit vector string format... 206 # 207 # Possible values: BinaryString or HexadecimalString [Default] 208 # 209 # Default BitStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat. 210 # 211 $This->{BitStringFormat} = undef; 212 213 # Bits order in fingerprints bit vector string... 214 # 215 # Ascending - First bit in each byte as the lowest bit [Default] 216 # Descending - First bit in each byte as the highest bit 217 # 218 # Default BitsOrder is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder. 219 # 220 $This->{BitsOrder} = undef; 221 222 # Fingerprints vector string format... 223 # 224 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString 225 # 226 # Default VectorStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat. 227 # For fingerprints vector object containing vector NumericalValues, it corresponds to IDsAndValuesString; othwerwise, 228 # it's set to ValuesString. 229 # 230 $This->{VectorStringFormat} = undef; 231 232 # Delimiter for output fingerprints CSV/TSV file. Possible values: comma, tab, semicolon. This 233 # option is ignored for TSV text file and tab is used as the delimiter. 234 # 235 $This->{OutDelim} = 'comma'; 236 237 # Quotes around column values for output fingerprints CSV/TSV text file... 238 $This->{OutQuote} = 1; 239 240 # Overwriting existing file... 241 $This->{Overwrite} = 0; 242 243 return $This; 244 } 245 246 # Initialize object values... 247 sub _InitializeFingerprintsTextFileIOProperties { 248 my($This, %NamesAndValues) = @_; 249 250 # All other property names and values along with all Set/Get<PropertyName> methods 251 # are implemented on-demand using ObjectProperty class. 252 253 my($Name, $Value, $MethodName); 254 while (($Name, $Value) = each %NamesAndValues) { 255 $MethodName = "Set${Name}"; 256 $This->$MethodName($Value); 257 } 258 259 if (!exists $NamesAndValues{Name}) { 260 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; 261 } 262 263 # Make sure it's a fingerprints file... 264 $Name = $NamesAndValues{Name}; 265 if (!$This->IsFingerprintsTextFile($Name)) { 266 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be fingerprints format..."; 267 } 268 269 if ($This->GetMode() =~ /^Read$/i) { 270 $This->_InitializeFingerprintsTextFileIOReadProperties(%NamesAndValues); 271 } 272 elsif ($This->GetMode() =~ /^(Write|Append)$/i) { 273 $This->_InitializeFingerprintsTextFileIOWriteProperties(%NamesAndValues); 274 } 275 276 return $This; 277 } 278 279 # Initialize object properties for reading fingerprints text file... 280 # 281 sub _InitializeFingerprintsTextFileIOReadProperties { 282 my($This, %NamesAndValues) = @_; 283 284 # Set default value for FingerprintsStringMode... 285 if (!$This->{FingerprintsStringMode}) { 286 $This->{FingerprintsStringMode} = 'AutoDetect'; 287 } 288 289 $This->_PrepareForReadingFingerprintsTextFileData(); 290 291 return $This; 292 } 293 294 # Initialize object properties for writing fingerprints text file... 295 # 296 sub _InitializeFingerprintsTextFileIOWriteProperties { 297 my($This, %NamesAndValues) = @_; 298 299 # Check FingerprintsStringMode value... 300 if (!exists $NamesAndValues{FingerprintsStringMode}) { 301 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying FingerprintsStringMode..."; 302 } 303 304 if ($This->{FingerprintsStringMode} !~ /^(FingerprintsBitVectorString|FingerprintsVectorString)$/i) { 305 croak "Error: ${ClassName}->: Object can't be instantiated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, is not valid; Supported values for write/append: FingerprintsBitVectorString or FingerprintsVectorString..."; 306 } 307 308 if (!exists $NamesAndValues{DataColLabels}) { 309 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying DataColLabels..."; 310 } 311 312 if ($This->{OutDelim} =~ /semicolon/i && !$This->{OutQuote}) { 313 croak "Error: ${ClassName}->: Object can't be instantiated: The value specified, $This->{OutQuote}, using \"OutQuote\" is not allowed with semicolon value of \"OutDelim\": Fingerprints string use semicolon as delimiter for various data fields and must be quoted.\n"; 314 } 315 316 $This->_PrepareForWritingFingerprintsTextFileData(); 317 318 return $This; 319 } 320 321 # Set FingerprintsStringMode... 322 # 323 sub SetFingerprintsStringMode { 324 my($This, $Value) = @_; 325 326 # AutoDetect - automatically detect format of fingerprints string 327 # FingerprintsBitVectorString - Bit vector fingerprints string format 328 # FingerprintsVectorString - Vector fingerprints string format 329 330 if ($Value !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) { 331 croak "Error: ${ClassName}->SetFingerprintsStringMode: FingerprintsStringMode value, $Value, is not valid; Supported values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString..."; 332 } 333 334 $This->{FingerprintsStringMode} = $Value; 335 336 return $This; 337 } 338 339 # Set ColMode... 340 # 341 sub SetColMode { 342 my($This, $Value) = @_; 343 344 if ($Value !~ /^(ColNum|ColLabel)$/i) { 345 croak "Error: ${ClassName}->SetColMode: ColMode value, $Value, is not valid; Supported values: ColNum or ColLabel..."; 346 } 347 348 $This->{ColMode} = $Value; 349 350 return $This; 351 } 352 353 # Set InDelim... 354 # 355 sub SetInDelim { 356 my($This, $Value) = @_; 357 358 if ($Value !~ /^(comma|semicolon|tab)$/i) { 359 croak "Error: ${ClassName}->SetInDelim: InDelim value, $Value, is not valid; Supported values: comma, semicolon, or tab..."; 360 } 361 362 $This->{InDelim} = $Value; 363 364 return $This; 365 } 366 367 # Set DetailLevel... 368 # 369 sub SetDetailLevel { 370 my($This, $Value) = @_; 371 372 if (!TextUtil::IsPositiveInteger($Value)) { 373 croak "Error: ${ClassName}->SetDetailLevel: DetailLevel value, $Value, is not valid; Supported values: > 0..."; 374 } 375 376 $This->{DetailLevel} = $Value; 377 378 return $This; 379 } 380 381 # Set BitStringFormat... 382 # 383 sub SetBitStringFormat { 384 my($This, $Value) = @_; 385 386 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) { 387 croak "Error: ${ClassName}->SetBitStringFormat: BitStringFormat value, $Value, is not valid; Supported values: BinaryString or HexadecimalString..."; 388 } 389 390 $This->{BitStringFormat} = $Value; 391 392 return $This; 393 } 394 395 # Set BitsOrder... 396 # 397 sub SetBitsOrder { 398 my($This, $Value) = @_; 399 400 # Ascending - First bit in each byte as the lowest bit 401 # Descending - First bit in each byte as the highest bit 402 # 403 if ($Value !~ /^(Ascending|Descending)$/i) { 404 croak "Error: ${ClassName}->SetBitsOrder: FingerprintsStringMode value, $Value, is not valid; Supported values: Ascending or Descending..."; 405 } 406 407 $This->{BitsOrder} = $Value; 408 409 return $This; 410 } 411 412 # Set VectorStringFormat... 413 # 414 sub SetVectorStringFormat { 415 my($This, $Value) = @_; 416 417 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString 418 419 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) { 420 croak "Error: ${ClassName}->SetVectorStringFormat: FingerprintsStringMode value, $Value, is not valid; Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString..."; 421 } 422 423 $This->{VectorStringFormat} = $Value; 424 425 return $This; 426 } 427 428 # Set FingerprintsStringMode... 429 # 430 sub SetOutDelim { 431 my($This, $Value) = @_; 432 433 if ($Value !~ /^(comma|tab|semicolon)$/i) { 434 croak "Error: ${ClassName}->SetOutDelim: OutDelim value, $Value, is not valid; Supported values: comma, tab or semicolon..."; 435 } 436 437 $This->{OutDelim} = $Value; 438 439 return $This; 440 } 441 442 # Set DataColLabels... 443 # 444 # Set output data column labels using: 445 # o List of column labels 446 # o Reference to an list of column labels 447 # 448 sub SetDataColLabels { 449 my($This, @Values) = @_; 450 my($FirstValue, $TypeOfFirstValue); 451 452 if (!@Values) { 453 carp "Warning: ${ClassName}->_SetDataColLabels: No data column labels specified..."; 454 return $This; 455 } 456 457 @{$This->{DataColLabels}} = (); 458 459 $FirstValue = $Values[0]; 460 $TypeOfFirstValue = ref $FirstValue; 461 462 if ($TypeOfFirstValue =~ /^ARRAY/) { 463 # Initialize using array refernce... 464 push @{$This->{DataColLabels}}, @{$FirstValue}; 465 } 466 else { 467 # It's a list of values... 468 push @{$This->{DataColLabels}}, @Values; 469 } 470 471 return $This; 472 } 473 474 # Get column labels or number of column labels in first text line... 475 # 476 sub GetDataColLabels { 477 my($This) = @_; 478 479 return wantarray ? @{$This->{DataColLabels}} : scalar @{$This->{DataColLabels}}; 480 } 481 482 # Get words or number of words in current data line... 483 # 484 sub GetDataLineWords { 485 my($This) = @_; 486 487 return wantarray ? @{$This->{DataLineWords}} : scalar @{$This->{DataLineWords}}; 488 } 489 490 # Set DataLineWords... 491 # 492 # Set data line words using: 493 # o List of line words 494 # o Reference to an list of line words 495 # 496 sub SetDataLineWords { 497 my($This, @Values) = @_; 498 my($FirstValue, $TypeOfFirstValue); 499 500 if (!@Values) { 501 carp "Warning: ${ClassName}->SetDataLineWords: No line words specified..."; 502 return $This; 503 } 504 505 @{$This->{DataLineWords}} = (); 506 507 $FirstValue = $Values[0]; 508 $TypeOfFirstValue = ref $FirstValue; 509 510 if ($TypeOfFirstValue =~ /^ARRAY/) { 511 # Initialize using array refernce... 512 push @{$This->{DataLineWords}}, @{$FirstValue}; 513 } 514 else { 515 # It's a list of values... 516 push @{$This->{DataLineWords}}, @Values; 517 } 518 519 return $This; 520 } 521 522 # Get fingerprints object for current data line using fingerprints, fingerprints bit-vector 523 # fingerprints vector object. Fingerprints object correspond to any of supported fingerprints 524 # objects such as PathLengthFingerprints, ExtendedConnectivity, and so on. 525 # 526 sub GetFingerprints { 527 my($This) = @_; 528 529 return $This->{FingerprintsObject}; 530 } 531 532 # Set fingerprints object for current data line... 533 # 534 sub SetFingerprints { 535 my($This, $FingerprintsObject) = @_; 536 537 $This->{FingerprintsObject} = $FingerprintsObject; 538 539 return $This; 540 } 541 542 # Get fingerprints string for current data line... 543 # 544 sub GetFingerprintsString { 545 my($This) = @_; 546 547 return $This->{FingerprintsString} ? $This->{FingerprintsString} : 'None'; 548 } 549 550 # Set fingerprints string for current data line... 551 # 552 sub SetFingerprintsString { 553 my($This, $FingerprintsString) = @_; 554 555 $This->{FingerprintsString} = $FingerprintsString; 556 557 return $This; 558 } 559 560 # Does fingerprints text file contain valid data? 561 # 562 sub IsFingerprintsFileDataValid { 563 my($This) = @_; 564 565 return $This->{ValidFileData} ? 1 : 0; 566 } 567 568 # Does current data line contains valid fingerprints object data? 569 # 570 sub IsFingerprintsDataValid { 571 my($This) = @_; 572 573 return defined $This->{FingerprintsObject} ? 1 : 0; 574 } 575 576 # Read next available fingerprints line, process it and generate appropriate fingerprints 577 # objects... 578 # 579 sub Read { 580 my($This) = @_; 581 582 # Read data line... 583 if (!$This->_ReadDataLine()) { 584 return undef; 585 } 586 587 # No need to process invalid text file with invalid data... 588 if (!$This->{ValidFileData}) { 589 if ($This->{ValidateData}) { 590 $This->{NumOfLinesWithMissingData} += 1; 591 } 592 return $This; 593 } 594 595 # Perform data validation... 596 if ($This->{ValidateData}) { 597 if (!$This->_ValidateReadDataLine()) { 598 return $This; 599 } 600 } 601 602 # Setup fingerprints string after checking again to handle problematic data for 603 # non-validated data lines... 604 # 605 if ($This->{FingerprintsColNum} <= $#{$This->{DataLineWords}}) { 606 $This->{FingerprintsString} = $This->{DataLineWords}[$This->{FingerprintsColNum}]; 607 } 608 609 # Generate fingeprints object... 610 $This->_GenerateFingerprintsObject(); 611 612 # Setup fingerprints compound ID for fingerprints string... 613 $This->_GenerateCompoundID(); 614 615 return $This; 616 } 617 618 # Read next available fingerprints line, process it and generate appropriate fingerprints 619 # objects... 620 # 621 sub Next { 622 my($This) = @_; 623 624 return $This->Read(); 625 } 626 627 # Read fingerprints data line line... 628 # 629 sub _ReadDataLine { 630 my($This) = @_; 631 632 if ($This->{FirstDataLineIO}) { 633 $This->_ProcessFirstDataLineRead(); 634 } 635 636 # Initialize data for current line... 637 $This->_InitializeReadDataLine(); 638 639 # Get next data line... 640 $This->{DataLine} = TextUtil::GetTextLine($This->{FileHandle}); 641 if (!$This->{DataLine}) { 642 return 0; 643 } 644 645 # Get line words... 646 $This->{LineNum} += 1; 647 @{$This->{DataLineWords}} = TextUtil::SplitWords($This->{DataLine}, $This->{Delim}); 648 649 return 1; 650 } 651 652 # Initialize data line for reading... 653 # 654 sub _InitializeReadDataLine { 655 my($This) = @_; 656 657 $This->{CompoundID} = undef; 658 659 $This->{DataLine} = undef; 660 @{$This->{DataLineWords}} = (); 661 662 $This->{FingerprintsObject} = undef; 663 $This->{FingerprintsString} = undef; 664 665 return $This; 666 } 667 668 # Validate fingerprints string data line... 669 # 670 sub _ValidateReadDataLine { 671 my($This) = @_; 672 673 # Check for missing data... 674 if ($This->{FingerprintsColNum} > $#{$This->{DataLineWords}}) { 675 # Missing data... 676 $This->{NumOfLinesWithMissingData} += 1; 677 if ($This->{DetailLevel} >= 3) { 678 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains no fingerprints data: $This->{DataLine}..."; 679 } 680 elsif ($This->{DetailLevel} >= 2) { 681 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains no fingerprints data..."; 682 } 683 return 0; 684 } 685 686 # Check for invalid data... 687 my($InvalidFingerprintsData, $FingerprintsColNum, $FingerprintsType, $FingerprintsDescription); 688 689 $InvalidFingerprintsData = 0; 690 $FingerprintsColNum = $This->{FingerprintsColNum}; 691 692 if (Fingerprints::FingerprintsStringUtil::AreFingerprintsStringValuesValid($This->{DataLineWords}[$FingerprintsColNum])) { 693 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{DataLineWords}[$FingerprintsColNum]); 694 if ($This->{FirstFingerprintsStringType} !~ /^$FingerprintsType$/i || $This->{FirstFingerprintsStringDescription} !~ /^$FingerprintsDescription$/i) { 695 $InvalidFingerprintsData = 1; 696 } 697 } 698 else { 699 $InvalidFingerprintsData = 1; 700 } 701 702 if ($InvalidFingerprintsData) { 703 $This->{NumOfLinesWithInvalidData} += 1; 704 if ($This->{DetailLevel} >= 3) { 705 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains invalid fingerprints data: $This->{DataLine}..."; 706 } 707 elsif ($This->{DetailLevel} >= 2) { 708 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains invalid fingerprints data..."; 709 } 710 return 0; 711 } 712 713 return 1; 714 } 715 716 # Setup fingerprints compound ID for fingerprints string... 717 sub _GenerateCompoundID { 718 my($This) = @_; 719 my($CompoundID); 720 721 $CompoundID = ''; 722 723 if ($This->{UseSequentialCompoundIDs} || ($This->{CompoundIDColNum} > $#{$This->{DataLineWords}})) { 724 my($CompoundNum); 725 726 $CompoundNum = $This->{LineNum} - 1; 727 $CompoundID = "$This->{CompoundIDPrefix}${CompoundNum}"; 728 } 729 else { 730 $CompoundID = $This->{DataLineWords}[$This->{CompoundIDColNum}]; 731 } 732 733 $This->{CompoundID} = $CompoundID; 734 735 # Set fingerprints ID... 736 if ($This->{FingerprintsObject}) { 737 $This->{FingerprintsObject}->SetID($This->{CompoundID}); 738 } 739 740 return $This; 741 } 742 743 # Process first read... 744 # 745 sub _ProcessFirstDataLineRead { 746 my($This) = @_; 747 748 # Skip column label line... 749 $This->{LineNum} += 1; 750 TextUtil::GetTextLine($This->{FileHandle}); 751 752 $This->{FirstDataLineIO} = 0; 753 754 return $This; 755 } 756 757 # Get ready for reading fingerprints text file... 758 # 759 sub _PrepareForReadingFingerprintsTextFileData { 760 my($This) = @_; 761 762 # Retrieve text file columns information.... 763 $This->_RetrieveTextFileColData(); 764 765 # Validate columns information... 766 $This->_ValidateReadCompoundIDCol(); 767 $This->_ValidateReadFingerprintsCol(); 768 769 # Validate fingeprints string mode information... 770 if ($This->{ValidFingerprintsCol}) { 771 $This->_ValidateReadFingerprintsStringMode(); 772 } 773 774 # Set status of text file data... 775 $This->{ValidFileData} = ($This->{ValidCompoundIDCol} && $This->{ValidFingerprintsCol} && $This->{ValidFingerprintsStringMode}) ? 1 : 0; 776 777 return $This; 778 } 779 780 # Retrieve information about columns and fingerprints string... 781 # 782 sub _RetrieveTextFileColData { 783 my($This) = @_; 784 my($TextFile, $FileDir, $FileName, $FileExt, $InDelim, $Line, $ColLabel, $ColNum, @ColLabels); 785 786 @{$This->{DataColLabels}} = (); 787 %{$This->{DataColLabelToNumMap}} = (); 788 789 $TextFile = $This->{Name}; 790 791 if (!(-e $TextFile)) { 792 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $TextFile, doesn't exist..."; 793 } 794 795 $FileDir = ""; $FileName = ""; $FileExt = ""; 796 ($FileDir, $FileName, $FileExt) = FileUtil::ParseFileName($TextFile); 797 798 $InDelim = ($FileExt =~ /^tsv$/i) ? "\t" : ($This->{InDelim} =~ /semicolon/i ? "\;" : "\,"); 799 $This->{Delim} = $InDelim; 800 801 if (!open TEXTFILE, "$TextFile") { 802 croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input text file $TextFile: $! ..."; 803 } 804 805 # Get column label line... 806 $Line = TextUtil::GetTextLine(\*TEXTFILE); 807 808 close TEXTFILE; 809 810 @ColLabels = TextUtil::SplitWords($Line, $InDelim); 811 812 # Set text file columns info.... 813 push @{$This->{DataColLabels}}, @ColLabels; 814 815 for $ColNum (0 .. $#ColLabels) { 816 $ColLabel = $ColLabels[$ColNum]; 817 $This->{DataColLabelToNumMap}{$ColLabel} = $ColNum; 818 } 819 820 return $This; 821 } 822 823 # Validate compound ID column information... 824 # 825 sub _ValidateReadCompoundIDCol { 826 my($This) = @_; 827 my($CompoundIDCol, $CompoundIDColNum, $UseSequentialCompoundIDs, $ColFound, $ColLabel, $ColNum); 828 829 $This->{ValidCompoundIDCol} = 0; 830 $This->{CompoundIDColNum} = undef; 831 $This->{UseSequentialCompoundIDs} = 0; 832 833 $CompoundIDCol = $This->{CompoundIDCol}; 834 835 $UseSequentialCompoundIDs = 0; 836 $CompoundIDColNum = ''; 837 838 if ($CompoundIDCol =~ /^AutoDetect$/i) { 839 # First column containing the word CompoundID in its label or sequential generation... 840 841 $ColFound = 0; 842 COLLABEL: for $ColLabel (@{$This->{DataColLabels}}) { 843 if ($ColLabel =~ /CompoundID/i) { 844 $ColFound = 1; 845 $ColNum = $This->{DataColLabelToNumMap}{$ColLabel}; 846 last COLLABEL; 847 } 848 } 849 if ($ColFound) { 850 $CompoundIDColNum = $ColNum; 851 } 852 else { 853 $UseSequentialCompoundIDs = 1; 854 } 855 } 856 else { 857 if ($This->{ColMode} =~ /^ColNum$/i) { 858 # Is it a valid column number? 859 if ($CompoundIDCol > scalar @{$This->{DataColLabels}}) { 860 carp "Warning: ${ClassName}->_ValidateReadCompoundIDCol: Column number, $CompoundIDCol, specified using CompoundIDCol doesn't exist..."; 861 return 0; 862 } 863 $CompoundIDColNum = $CompoundIDCol - 1; 864 } 865 elsif ($This->{ColMode} =~ /^ColLabel$/i) { 866 # Does this column exists? 867 if (!exists $This->{DataColLabelToNumMap}{$CompoundIDCol}) { 868 carp "Warning: ${ClassName}->_ValidateReadCompoundIDCol: Column name, $CompoundIDCol, specified using CompoundIDCol doesn't exist..."; 869 return 0; 870 } 871 $CompoundIDColNum = $This->{DataColLabelToNumMap}{$CompoundIDCol}; 872 } 873 } 874 875 $This->{ValidCompoundIDCol} = 1; 876 $This->{CompoundIDColNum} = $CompoundIDColNum; 877 $This->{UseSequentialCompoundIDs} = $UseSequentialCompoundIDs; 878 879 return 1; 880 } 881 882 # Validate fingerprints string column information... 883 # 884 sub _ValidateReadFingerprintsCol { 885 my($This) = @_; 886 my($FingerprintsColNum, $FingerprintsCol, $ColFound, $ColLabel, $ColNum); 887 888 $This->{ValidFingerprintsCol} = 0; 889 $This->{FingerprintsColNum} = undef; 890 891 $FingerprintsColNum = undef; 892 $FingerprintsCol = $This->{FingerprintsCol}; 893 894 if ($FingerprintsCol =~ /^AutoDetect$/i) { 895 # First column containing the word Fingerprints in its label... 896 897 $ColFound = 0; 898 COLLABEL: for $ColLabel (@{$This->{DataColLabels}}) { 899 if ($ColLabel =~ /Fingerprints/i) { 900 $ColFound = 1; 901 $ColNum = $This->{DataColLabelToNumMap}{$ColLabel}; 902 last COLLABEL; 903 } 904 } 905 if (!$ColFound) { 906 carp "Warning: ${ClassName}->_ValidateReadFingerprintsCol: Column label containing \"Fingerprints\" string in its name doesn't exist..."; 907 return 0; 908 } 909 $FingerprintsColNum = $ColNum; 910 } 911 else { 912 if ($This->{ColMode} =~ /^ColNum$/i) { 913 # Is it a valid column number? 914 if ($FingerprintsCol > scalar @{$This->{DataColLabels}}) { 915 carp "Warning: ${ClassName}->_ValidateReadFingerprintsCol: Column number, $FingerprintsCol, specified using FingerprintsCol doesn't exist..."; 916 return 0; 917 } 918 $FingerprintsColNum = $FingerprintsCol - 1; 919 } 920 elsif ($This->{ColMode} =~ /^ColLabel$/i) { 921 # Does this column exists? 922 if (!exists $This->{DataColLabelToNumMap}{$FingerprintsCol}) { 923 carp "Warning: ${ClassName}->_ValidateReadFingerprintsCol: Column label, $FingerprintsCol, specified using FingerprintsCol doesn't exist..."; 924 return 0; 925 } 926 $FingerprintsColNum = $This->{DataColLabelToNumMap}{$FingerprintsCol}; 927 } 928 } 929 930 $This->{ValidFingerprintsCol} = 1; 931 $This->{FingerprintsColNum} = $FingerprintsColNum; 932 933 return 1; 934 } 935 936 # Validate fingerprints string mode information... 937 # 938 sub _ValidateReadFingerprintsStringMode { 939 my($This) = @_; 940 my($FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FirstFingerprintsStringType, $FirstFingerprintsStringDescription, $TextFile, $Line, $FingerprintsColNum, $InDelim, $FingerprintsType, $FingerprintsDescription, @LineWords); 941 942 $This->{ValidFingerprintsStringMode} = 0; 943 944 $This->{FingerprintsBitVectorStringMode} = 0; 945 $This->{FingerprintsVectorStringMode} = 0; 946 947 $This->{FirstFingerprintsStringType} = ''; 948 $This->{FirstFingerprintsStringDescription} = ''; 949 950 $FingerprintsBitVectorStringMode = 0; 951 $FingerprintsVectorStringMode = 0; 952 953 $FirstFingerprintsStringType = ''; 954 $FirstFingerprintsStringDescription = ''; 955 956 $TextFile = $This->{Name}; 957 958 if (!open TEXTFILE, "$TextFile") { 959 croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input text file $TextFile: $! ..."; 960 } 961 962 # Skip column label line... 963 $Line = TextUtil::GetTextLine(\*TEXTFILE); 964 965 # First first fingerprints data line... 966 $Line = TextUtil::GetTextLine(\*TEXTFILE); 967 968 close TEXTFILE; 969 970 # Get first fingerprints type and description... 971 $InDelim = $This->{Delim}; 972 @LineWords = TextUtil::SplitWords($Line, $InDelim); 973 974 $FingerprintsColNum = $This->{FingerprintsColNum}; 975 976 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($LineWords[$FingerprintsColNum]); 977 978 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { 979 if ($FingerprintsType !~ /^FingerprintsBitVector$/i) { 980 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsBitVectorString, specified using \"FingerprintsStringMode\"..."; 981 return 0; 982 } 983 $FingerprintsBitVectorStringMode = 1; 984 $FirstFingerprintsStringType = 'FingerprintsBitVector'; 985 $FirstFingerprintsStringDescription = $FingerprintsDescription; 986 } 987 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { 988 if ($FingerprintsType !~ /^FingerprintsVector$/i) { 989 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsVectorString, specified using \"FingerprintsStringMode\"..."; 990 return 0; 991 } 992 $FingerprintsVectorStringMode = 1; 993 $FirstFingerprintsStringType = 'FingerprintsVector'; 994 $FirstFingerprintsStringDescription = $FingerprintsDescription; 995 } 996 else { 997 # AutoDetect mode... 998 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) { 999 $FingerprintsBitVectorStringMode = 1; 1000 } 1001 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) { 1002 $FingerprintsVectorStringMode = 1; 1003 } 1004 else { 1005 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, identified during, AutoDetect, value of \"FingerprintsStringMode\" is not valid; Supported fingerprints types: FingerprintBitVector or FingerprintsVector..."; 1006 return 0; 1007 } 1008 $FirstFingerprintsStringType = $FingerprintsType; 1009 $FirstFingerprintsStringDescription = $FingerprintsDescription; 1010 } 1011 1012 $This->{ValidFingerprintsStringMode} = 1; 1013 1014 $This->{FingerprintsBitVectorStringMode} = $FingerprintsBitVectorStringMode; 1015 $This->{FingerprintsVectorStringMode} = $FingerprintsVectorStringMode; 1016 1017 $This->{FirstFingerprintsStringType} = $FirstFingerprintsStringType; 1018 $This->{FirstFingerprintsStringDescription} = $FirstFingerprintsStringDescription; 1019 1020 return 1; 1021 } 1022 1023 # Write fingerprints string generated from specified fingerprints, fingerprints-bit vector, or 1024 # fingerprints vector object and other data to text file... 1025 # 1026 sub WriteFingerprints { 1027 my($This, $FingerprintsObject, @DataColValues) = @_; 1028 1029 # Initialize data for current line... 1030 $This->_InitializeWriteDataLine(); 1031 1032 # Set fingerprints object... 1033 $This->{FingerprintsObject} = $FingerprintsObject; 1034 1035 # Generate fingerprints string... 1036 $This->_GenerateFingerprintsString(); 1037 1038 # Set data line words... 1039 $This->SetDataLineWords(@DataColValues); 1040 push @{$This->{DataLineWords}}, $This->{FingerprintsString}; 1041 1042 # Write data line.. 1043 $This->_WriteDataLine(); 1044 1045 return $This; 1046 } 1047 1048 # Write fingerprints string and other data to text file... 1049 # 1050 # Note: 1051 # o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat values 1052 # are ignored during writing of fingerprints and it's written to the file as it is. 1053 # 1054 # 1055 sub WriteFingerprintsString { 1056 my($This, $FingerprintsString, @DataColValues) = @_; 1057 1058 # Initialize data for current line... 1059 $This->_InitializeWriteDataLine(); 1060 1061 # Set fingerprints string... 1062 $This->{FingerprintsString} = $FingerprintsString; 1063 1064 # Generate fingerprints object... 1065 $This->_GenerateFingerprintsObject(); 1066 1067 # Set data line words... 1068 $This->SetDataLineWords(@DataColValues); 1069 push @{$This->{DataLineWords}}, $FingerprintsString; 1070 1071 # Write data line.. 1072 $This->_WriteDataLine(); 1073 1074 return $This; 1075 } 1076 1077 # Initialize data line for reading... 1078 # 1079 sub _InitializeWriteDataLine { 1080 my($This) = @_; 1081 1082 $This->{DataLine} = undef; 1083 @{$This->{DataLineWords}} = (); 1084 1085 $This->{FingerprintsObject} = undef; 1086 $This->{FingerprintsString} = undef; 1087 1088 return $This; 1089 } 1090 1091 # Write fingerprints data line line... 1092 # 1093 sub _WriteDataLine { 1094 my($This) = @_; 1095 my($FileHandle, $Line); 1096 1097 if ($This->{FirstDataLineIO}) { 1098 $This->_ProcessFirstDataLineWrite(); 1099 } 1100 1101 # Write out line words... 1102 $Line = TextUtil::JoinWords(\@{$This->{DataLineWords}}, $This->{Delim}, $This->{OutQuote}); 1103 1104 $This->{LineNum} += 1; 1105 $FileHandle = $This->{FileHandle}; 1106 print $FileHandle "$Line\n"; 1107 1108 $This->{DataLine} = $Line; 1109 1110 return $This; 1111 } 1112 1113 # Process first write... 1114 # 1115 sub _ProcessFirstDataLineWrite { 1116 my($This) = @_; 1117 my($Line, $FileHandle); 1118 1119 $This->{FirstDataLineIO} = 0; 1120 1121 if ($This->GetMode() =~ /^Write$/i) { 1122 # Write out column label line... 1123 $Line = TextUtil::JoinWords(\@{$This->{DataColLabels}}, $This->{Delim}, $This->{OutQuote}); 1124 1125 $This->{LineNum} += 1; 1126 $FileHandle = $This->{FileHandle}; 1127 print $FileHandle "$Line\n"; 1128 } 1129 1130 return $This; 1131 } 1132 1133 # Get ready for writing fingerprints text file... 1134 # 1135 sub _PrepareForWritingFingerprintsTextFileData { 1136 my($This) = @_; 1137 my($TextFile, $FileDir, $FileName, $FileExt, $OutDelim); 1138 1139 $TextFile = $This->{Name}; 1140 if (!$This->{Overwrite}) { 1141 if (-e $TextFile) { 1142 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $TextFile, already exist. Use overwrite option..."; 1143 } 1144 } 1145 1146 # Set up delimiter for writing file... 1147 1148 $FileDir = ""; $FileName = ""; $FileExt = ""; 1149 ($FileDir, $FileName, $FileExt) = FileUtil::ParseFileName($TextFile); 1150 1151 $OutDelim = ($FileExt =~ /^tsv$/i) ? "\t" : ($This->{OutDelim} =~ /semicolon/i ? "\;" : "\,"); 1152 $This->{Delim} = $OutDelim; 1153 1154 # Setup FingerprintsStringMode status... 1155 1156 $This->{FingerprintsBitVectorStringMode} = 0; 1157 $This->{FingerprintsVectorStringMode} = 0; 1158 $This->{ValidFingerprintsStringMode} = 0; 1159 1160 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { 1161 $This->{FingerprintsBitVectorStringMode} = 1; 1162 } 1163 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { 1164 $This->{FingerprintsVectorStringMode} = 1; 1165 } 1166 1167 $This->{ValidFingerprintsStringMode} = ($This->{FingerprintsBitVectorStringMode} || $This->{FingerprintsVectorStringMode}) ? 1 : 0; 1168 1169 if ($This->{FingerprintsBitVectorStringMode}) { 1170 $This->_SetDefaultBitStringFormat(); 1171 $This->_SetDefaultBitsOrder(); 1172 } 1173 elsif ($This->{FingerprintsVectorStringMode}) { 1174 $This->_SetDefaultVectorStringFormat(); 1175 } 1176 1177 return $This; 1178 } 1179 1180 # Set default value for bit string format... 1181 # 1182 sub _SetDefaultBitStringFormat { 1183 my($This) = @_; 1184 1185 if (!$This->{BitStringFormat}) { 1186 $This->{BitStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat(); 1187 } 1188 1189 return $This; 1190 } 1191 1192 # Set default value for bit string format... 1193 # 1194 sub _SetDefaultBitsOrder { 1195 my($This) = @_; 1196 1197 if (!$This->{BitsOrder}) { 1198 $This->{BitsOrder} = Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder(); 1199 } 1200 1201 return $This; 1202 } 1203 1204 # Set default value for vector string format... 1205 # 1206 sub _SetDefaultVectorStringFormat { 1207 my($This) = @_; 1208 1209 if (!$This->{VectorStringFormat} && $This->{FingerprintsObject}) { 1210 $This->{VectorStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat($This->{FingerprintsObject}); 1211 } 1212 1213 return $This; 1214 } 1215 1216 # Generate fingerprints object using current fingerprints string... 1217 # 1218 sub _GenerateFingerprintsObject { 1219 my($This) = @_; 1220 1221 $This->{FingerprintsObject} = undef; 1222 1223 if (!$This->{FingerprintsString}) { 1224 return $This; 1225 } 1226 1227 if ($This->{FingerprintsBitVectorStringMode}) { 1228 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsBitVectorString($This->{FingerprintsString}); 1229 } 1230 elsif ($This->{FingerprintsVectorStringMode}) { 1231 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsVectorString($This->{FingerprintsString}); 1232 } 1233 else { 1234 return undef; 1235 } 1236 1237 return $This; 1238 } 1239 1240 # Generate fingerprints string using current fingerprints object... 1241 # 1242 sub _GenerateFingerprintsString { 1243 my($This) = @_; 1244 1245 $This->{FingerprintsString} = ''; 1246 1247 if (!$This->{FingerprintsObject}) { 1248 return $This; 1249 } 1250 1251 if ($This->{FingerprintsBitVectorStringMode}) { 1252 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{BitStringFormat}, $This->{BitsOrder}); 1253 } 1254 elsif ($This->{FingerprintsVectorStringMode}) { 1255 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{VectorStringFormat}); 1256 } 1257 1258 return $This; 1259 } 1260 1261 # Is it a fingerprints file? 1262 sub IsFingerprintsTextFile ($;$) { 1263 my($FirstParameter, $SecondParameter) = @_; 1264 my($This, $FileName, $Status); 1265 1266 if ((@_ == 2) && (_IsFingerprintsTextFileIO($FirstParameter))) { 1267 ($This, $FileName) = ($FirstParameter, $SecondParameter); 1268 } 1269 else { 1270 $FileName = $FirstParameter; 1271 } 1272 1273 # Check file extension... 1274 $Status = FileUtil::CheckFileType($FileName, "csv tsv"); 1275 1276 return $Status; 1277 } 1278 1279 # Is it a FingerprintsTextFileIO object? 1280 sub _IsFingerprintsTextFileIO { 1281 my($Object) = @_; 1282 1283 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; 1284 } 1285