Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/FileIO/FingerprintsFPFileIO.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
| author | deepakjadmin |
|---|---|
| date | Wed, 20 Jan 2016 09:23:18 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4816e4a8ae95 |
|---|---|
| 1 package FileIO::FingerprintsFPFileIO; | |
| 2 # | |
| 3 # $RCSfile: FingerprintsFPFileIO.pm,v $ | |
| 4 # $Date: 2015/02/28 20:48:43 $ | |
| 5 # $Revision: 1.19 $ | |
| 6 # | |
| 7 # Author: Manish Sud <msud@san.rr.com> | |
| 8 # | |
| 9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 10 # | |
| 11 # This file is part of MayaChemTools. | |
| 12 # | |
| 13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 14 # the terms of the GNU Lesser General Public License as published by the Free | |
| 15 # Software Foundation; either version 3 of the License, or (at your option) any | |
| 16 # later version. | |
| 17 # | |
| 18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
| 19 # any warranty; without even the implied warranty of merchantability of fitness | |
| 20 # for a particular purpose. See the GNU Lesser General Public License for more | |
| 21 # details. | |
| 22 # | |
| 23 # You should have received a copy of the GNU Lesser General Public License | |
| 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
| 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
| 26 # Boston, MA, 02111-1307, USA. | |
| 27 # | |
| 28 | |
| 29 use strict; | |
| 30 use Carp; | |
| 31 use Exporter; | |
| 32 use Scalar::Util (); | |
| 33 use TextUtil (); | |
| 34 use FileUtil (); | |
| 35 use TimeUtil (); | |
| 36 use Fingerprints::FingerprintsStringUtil (); | |
| 37 use PackageInfo (); | |
| 38 use FileIO::FileIO; | |
| 39 | |
| 40 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
| 41 | |
| 42 @ISA = qw(FileIO::FileIO Exporter); | |
| 43 @EXPORT = qw(); | |
| 44 @EXPORT_OK = qw(IsFingerprintsFPFile); | |
| 45 | |
| 46 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
| 47 | |
| 48 # Setup class variables... | |
| 49 my($ClassName); | |
| 50 _InitializeClass(); | |
| 51 | |
| 52 # Class constructor... | |
| 53 sub new { | |
| 54 my($Class, %NamesAndValues) = @_; | |
| 55 | |
| 56 # Initialize object... | |
| 57 my $This = $Class->SUPER::new(); | |
| 58 bless $This, ref($Class) || $Class; | |
| 59 $This->_InitializeFingerprintsFPFileIO(); | |
| 60 | |
| 61 $This->_InitializeFingerprintsFPFileIOProperties(%NamesAndValues); | |
| 62 | |
| 63 return $This; | |
| 64 } | |
| 65 | |
| 66 # Initialize object data... | |
| 67 # | |
| 68 sub _InitializeFingerprintsFPFileIO { | |
| 69 my($This) = @_; | |
| 70 | |
| 71 # Fingerprints string data format during read/write... | |
| 72 # | |
| 73 # For file read: | |
| 74 # | |
| 75 # AutoDetect - automatically detect format of fingerprints string | |
| 76 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
| 77 # FingerprintsVectorString - Vector fingerprints string format | |
| 78 # | |
| 79 # Default value: AutoDetect | |
| 80 # | |
| 81 # For file write: | |
| 82 # | |
| 83 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
| 84 # FingerprintsVectorString - Vector fingerprints string format | |
| 85 # | |
| 86 # Default value: undef | |
| 87 # | |
| 88 $This->{FingerprintsStringMode} = undef; | |
| 89 | |
| 90 # For file read: | |
| 91 # | |
| 92 # o Fingerprints bit-vector and vector object for current fingerprints string | |
| 93 # | |
| 94 # For file write: | |
| 95 # | |
| 96 # o Fingerprints bit-vector and vector object for current fingerprints string | |
| 97 # o Any supported fingerprints object: PathLengthFingerprints, ExtendedConnectivity, and so on. | |
| 98 # | |
| 99 $This->{FingerprintsObject} = undef; | |
| 100 | |
| 101 # Fingeprints string for current line during read/write... | |
| 102 $This->{FingerprintsString} = undef; | |
| 103 | |
| 104 # Partial fingeprints string corresponding to what's on the current line for current | |
| 105 # line during read/write... | |
| 106 $This->{PartialFingerprintsString} = undef; | |
| 107 | |
| 108 # Required header data keys and values during read/write... | |
| 109 @{$This->{RequiredHeaderDataKeys}} = (); | |
| 110 %{$This->{RequiredHeaderDataKeysAndValues}} = (); | |
| 111 | |
| 112 # First data line read/write... | |
| 113 $This->{FirstDataLineIO} = 1; | |
| 114 | |
| 115 # Current fingerprints string data line number during read/write... | |
| 116 $This->{LineNum} = 0; | |
| 117 | |
| 118 # FP line data during read/write... | |
| 119 $This->{DataLine} = undef; | |
| 120 | |
| 121 # Initialize parameters for read... | |
| 122 $This->_InitializeFingerprintsFPFileIORead(); | |
| 123 | |
| 124 # Initialize parameters for write... | |
| 125 $This->_InitializeFingerprintsFPFileIOWrite(); | |
| 126 | |
| 127 return $This; | |
| 128 } | |
| 129 | |
| 130 # Initialize class ... | |
| 131 sub _InitializeClass { | |
| 132 #Class name... | |
| 133 $ClassName = __PACKAGE__; | |
| 134 | |
| 135 } | |
| 136 | |
| 137 # Initialize object data for reading fingerprints FP file... | |
| 138 # | |
| 139 sub _InitializeFingerprintsFPFileIORead { | |
| 140 my($This) = @_; | |
| 141 | |
| 142 # Header data keys and values... | |
| 143 # | |
| 144 @{$This->{HeaderDataKeys}} = (); | |
| 145 %{$This->{HeaderDataKeysAndValues}} = (); | |
| 146 %{$This->{CannonicalHeaderDataKeysAndValues}} = (); | |
| 147 | |
| 148 # By default, the fingerprints data is assumed to be valid and no validation is | |
| 149 # performed before generating fingerprints objects... | |
| 150 # | |
| 151 $This->{ValidateData} = 1; | |
| 152 | |
| 153 # Level of detail to print during validation of data for invalid or missing data... | |
| 154 $This->{DetailLevel} = 1; | |
| 155 | |
| 156 # Number of missing and invalid fingerprints string data lines... | |
| 157 $This->{NumOfLinesWithMissingData} = 0; | |
| 158 $This->{NumOfLinesWithInvalidData} = 0; | |
| 159 | |
| 160 # Compound ID for current fingerprints string... | |
| 161 $This->{CompoundID} = undef; | |
| 162 | |
| 163 # Status of data in fingerprints FP file... | |
| 164 $This->{ValidFileData} = 0; | |
| 165 $This->{ValidRequiredHeaderDataKeys} = 0; | |
| 166 $This->{ValidFingerprintsStringMode} = 0; | |
| 167 | |
| 168 return $This; | |
| 169 } | |
| 170 | |
| 171 # Initialize object data for writing fingerprints FP file... | |
| 172 # | |
| 173 sub _InitializeFingerprintsFPFileIOWrite { | |
| 174 my($This) = @_; | |
| 175 | |
| 176 # Fingerprints bit vector string format... | |
| 177 # | |
| 178 # Possible values: BinaryString or HexadecimalString [Default] | |
| 179 # | |
| 180 # Default BitStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat. | |
| 181 # | |
| 182 $This->{BitStringFormat} = undef; | |
| 183 | |
| 184 # Bits order in fingerprints bit vector string... | |
| 185 # | |
| 186 # Ascending - First bit in each byte as the lowest bit [Default] | |
| 187 # Descending - First bit in each byte as the highest bit | |
| 188 # | |
| 189 # Default BitsOrder is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder. | |
| 190 # | |
| 191 $This->{BitsOrder} = undef; | |
| 192 | |
| 193 # Fingerprints vector string format... | |
| 194 # | |
| 195 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString | |
| 196 # | |
| 197 # Default VectorStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat. | |
| 198 # For fingerprints vector object containing vector NumericalValues, it corresponds to IDsAndValuesString; otherwise, | |
| 199 # it's set to ValuesString. | |
| 200 # | |
| 201 $This->{VectorStringFormat} = undef; | |
| 202 | |
| 203 # Overwriting existing file... | |
| 204 $This->{Overwrite} = 0; | |
| 205 | |
| 206 return $This; | |
| 207 } | |
| 208 | |
| 209 # Initialize object values... | |
| 210 sub _InitializeFingerprintsFPFileIOProperties { | |
| 211 my($This, %NamesAndValues) = @_; | |
| 212 | |
| 213 # All other property names and values along with all Set/Get<PropertyName> methods | |
| 214 # are implemented on-demand using ObjectProperty class. | |
| 215 | |
| 216 my($Name, $Value, $MethodName); | |
| 217 while (($Name, $Value) = each %NamesAndValues) { | |
| 218 $MethodName = "Set${Name}"; | |
| 219 $This->$MethodName($Value); | |
| 220 } | |
| 221 | |
| 222 if (!exists $NamesAndValues{Name}) { | |
| 223 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; | |
| 224 } | |
| 225 | |
| 226 # Make sure it's a fingerprints file... | |
| 227 $Name = $NamesAndValues{Name}; | |
| 228 if (!$This->IsFingerprintsFPFile($Name)) { | |
| 229 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be fingerprints format..."; | |
| 230 } | |
| 231 | |
| 232 if ($This->GetMode() =~ /^Read$/i) { | |
| 233 $This->_InitializeFingerprintsFPFileIOReadProperties(%NamesAndValues); | |
| 234 } | |
| 235 elsif ($This->GetMode() =~ /^(Write|Append)$/i) { | |
| 236 $This->_InitializeFingerprintsFPFileIOWriteProperties(%NamesAndValues); | |
| 237 } | |
| 238 | |
| 239 return $This; | |
| 240 } | |
| 241 | |
| 242 # Initialize object properties for reading fingerprints FP file... | |
| 243 # | |
| 244 sub _InitializeFingerprintsFPFileIOReadProperties { | |
| 245 my($This, %NamesAndValues) = @_; | |
| 246 | |
| 247 # Set default value for FingerprintsStringMode... | |
| 248 if (!$This->{FingerprintsStringMode}) { | |
| 249 $This->{FingerprintsStringMode} = 'AutoDetect'; | |
| 250 } | |
| 251 | |
| 252 $This->_PrepareForReadingFingerprintsFPFileData(); | |
| 253 | |
| 254 return $This; | |
| 255 } | |
| 256 | |
| 257 # Initialize object properties for writing fingerprints FP file... | |
| 258 # | |
| 259 sub _InitializeFingerprintsFPFileIOWriteProperties { | |
| 260 my($This, %NamesAndValues) = @_; | |
| 261 | |
| 262 # Check FingerprintsStringMode value... | |
| 263 if (!exists $NamesAndValues{FingerprintsStringMode}) { | |
| 264 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying FingerprintsStringMode..."; | |
| 265 } | |
| 266 | |
| 267 if ($This->{FingerprintsStringMode} !~ /^(FingerprintsBitVectorString|FingerprintsVectorString)$/i) { | |
| 268 croak "Error: ${ClassName}->New: Object can't be instantiated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, is not valid; Supported values for write/append: FingerprintsBitVectorString or FingerprintsVectorString..."; | |
| 269 } | |
| 270 | |
| 271 $This->_PrepareForWritingFingerprintsFPFileData(); | |
| 272 | |
| 273 return $This; | |
| 274 } | |
| 275 | |
| 276 # Set FingerprintsStringMode... | |
| 277 # | |
| 278 sub SetFingerprintsStringMode { | |
| 279 my($This, $Value) = @_; | |
| 280 | |
| 281 # AutoDetect - automatically detect format of fingerprints string | |
| 282 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
| 283 # FingerprintsVectorString - Vector fingerprints string format | |
| 284 | |
| 285 if ($Value !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) { | |
| 286 croak "Error: ${ClassName}->SetFingerprintsStringMode: FingerprintsStringMode value, $Value, is not valid; Supported values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString..."; | |
| 287 } | |
| 288 | |
| 289 $This->{FingerprintsStringMode} = $Value; | |
| 290 | |
| 291 return $This; | |
| 292 } | |
| 293 | |
| 294 # Set DetailLevel... | |
| 295 # | |
| 296 sub SetDetailLevel { | |
| 297 my($This, $Value) = @_; | |
| 298 | |
| 299 if (!TextUtil::IsPositiveInteger($Value)) { | |
| 300 croak "Error: ${ClassName}->SetDetailLevel: DetailLevel value, $Value, is not valid; Supported values: > 0..."; | |
| 301 } | |
| 302 | |
| 303 $This->{DetailLevel} = $Value; | |
| 304 | |
| 305 return $This; | |
| 306 } | |
| 307 | |
| 308 # Set BitStringFormat... | |
| 309 # | |
| 310 sub SetBitStringFormat { | |
| 311 my($This, $Value) = @_; | |
| 312 | |
| 313 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) { | |
| 314 croak "Error: ${ClassName}->SetBitStringFormat: BitStringFormat value, $Value, is not valid; Supported values: BinaryString or HexadecimalString..."; | |
| 315 } | |
| 316 | |
| 317 $This->{BitStringFormat} = $Value; | |
| 318 | |
| 319 return $This; | |
| 320 } | |
| 321 | |
| 322 # Set BitsOrder... | |
| 323 # | |
| 324 sub SetBitsOrder { | |
| 325 my($This, $Value) = @_; | |
| 326 | |
| 327 # Ascending - First bit in each byte as the lowest bit | |
| 328 # Descending - First bit in each byte as the highest bit | |
| 329 # | |
| 330 if ($Value !~ /^(Ascending|Descending)$/i) { | |
| 331 croak "Error: ${ClassName}->SetBitsOrder: FingerprintsStringMode value, $Value, is not valid; Supported values: Ascending or Descending..."; | |
| 332 } | |
| 333 | |
| 334 $This->{BitsOrder} = $Value; | |
| 335 | |
| 336 return $This; | |
| 337 } | |
| 338 | |
| 339 # Set compound ID... | |
| 340 # | |
| 341 sub SetCompoundID { | |
| 342 my($This, $Value) = @_; | |
| 343 | |
| 344 if ($Value =~ / /) { | |
| 345 $Value =~ s/ //g; | |
| 346 carp "Warning: ${ClassName}->SetCompoundID: Spaces are not allowed in compound ID; They have been removed..."; | |
| 347 } | |
| 348 | |
| 349 $This->{CompoundID} = $Value; | |
| 350 | |
| 351 return $This; | |
| 352 } | |
| 353 | |
| 354 # Set VectorStringFormat... | |
| 355 # | |
| 356 sub SetVectorStringFormat { | |
| 357 my($This, $Value) = @_; | |
| 358 | |
| 359 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString | |
| 360 | |
| 361 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) { | |
| 362 croak "Error: ${ClassName}->SetVectorStringFormat: FingerprintsStringMode value, $Value, is not valid; Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString..."; | |
| 363 } | |
| 364 | |
| 365 $This->{VectorStringFormat} = $Value; | |
| 366 | |
| 367 return $This; | |
| 368 } | |
| 369 | |
| 370 # Get header data keys or number of header data keys in header data block... | |
| 371 # | |
| 372 sub GetHeaderDataKeys { | |
| 373 my($This) = @_; | |
| 374 | |
| 375 return wantarray ? @{$This->{HeaderDataKeys}} : scalar @{$This->{HeaderDataKeys}}; | |
| 376 } | |
| 377 | |
| 378 # Set header data keys... | |
| 379 # | |
| 380 sub SetHeaderDataKeys { | |
| 381 my($This, @Keys) = @_; | |
| 382 | |
| 383 croak "Error: ${ClassName}->SetHeaderDataKeys: Can't set HeaderDataKeys: Not allowed..."; | |
| 384 | |
| 385 return $This; | |
| 386 } | |
| 387 | |
| 388 # Get header data keys and values hash... | |
| 389 # | |
| 390 sub GetHeaderDataKeysAndValues { | |
| 391 my($This) = @_; | |
| 392 | |
| 393 return %{$This->{HeaderDataKeysAndValues}}; | |
| 394 } | |
| 395 | |
| 396 # Set header data keys and values hash... | |
| 397 # | |
| 398 sub SetHeaderDataKeysAndValues { | |
| 399 my($This, %KeysAndValues) = @_; | |
| 400 | |
| 401 croak "Error: ${ClassName}->SetHeaderDataKeysAndValues: Can't set HeaderDataKeysAndValues: Not allowed..."; | |
| 402 | |
| 403 return $This; | |
| 404 } | |
| 405 | |
| 406 # Get required header data keys or number of header data keys in header data block... | |
| 407 # | |
| 408 sub GetRequiredHeaderDataKeys { | |
| 409 my($This) = @_; | |
| 410 | |
| 411 return wantarray ? @{$This->{RequiredHeaderDataKeys}} : scalar @{$This->{RequiredHeaderDataKeys}}; | |
| 412 } | |
| 413 | |
| 414 # Set required header data keys... | |
| 415 # | |
| 416 sub SetRequiredHeaderDataKeys { | |
| 417 my($This, @Keys) = @_; | |
| 418 | |
| 419 croak "Error: ${ClassName}->SetRequiredHeaderDataKeys: Can't set RequiredHeaderDataKeys: Not allowed..."; | |
| 420 | |
| 421 return $This; | |
| 422 } | |
| 423 | |
| 424 # Get required header data keys and values hash... | |
| 425 # | |
| 426 sub GetRequiredHeaderDataKeysAndValues { | |
| 427 my($This) = @_; | |
| 428 | |
| 429 return %{$This->{RequiredHeaderDataKeysAndValues}}; | |
| 430 } | |
| 431 | |
| 432 # Set required header data keys and values hash... | |
| 433 # | |
| 434 sub SetRequiredHeaderDataKeysAndValues { | |
| 435 my($This, %KeysAndValues) = @_; | |
| 436 | |
| 437 croak "Error: ${ClassName}->SetRequiredHeaderDataKeysAndValues: Can't set RequiredHeaderDataKeysAndValues: Not allowed..."; | |
| 438 | |
| 439 return $This; | |
| 440 } | |
| 441 | |
| 442 # Get fingerprints object for current data line... | |
| 443 # | |
| 444 sub GetFingerprints { | |
| 445 my($This) = @_; | |
| 446 | |
| 447 return $This->{FingerprintsObject}; | |
| 448 } | |
| 449 | |
| 450 # Set fingerprints object for current data line... | |
| 451 # | |
| 452 sub SetFingerprints { | |
| 453 my($This, $FingerprintsObject) = @_; | |
| 454 | |
| 455 $This->{FingerprintsObject} = $FingerprintsObject; | |
| 456 | |
| 457 return $This; | |
| 458 } | |
| 459 | |
| 460 # Get fingerprints string for current data line... | |
| 461 # | |
| 462 sub GetFingerprintsString { | |
| 463 my($This) = @_; | |
| 464 | |
| 465 return $This->{FingerprintsString} ? $This->{FingerprintsString} : 'None'; | |
| 466 } | |
| 467 | |
| 468 # Set fingerprints string for current data line... | |
| 469 # | |
| 470 sub SetFingerprintsString { | |
| 471 my($This, $FingerprintsString) = @_; | |
| 472 | |
| 473 $This->{FingerprintsString} = $FingerprintsString; | |
| 474 | |
| 475 return $This; | |
| 476 } | |
| 477 | |
| 478 # Get partial fingerprints string for current data line... | |
| 479 # | |
| 480 sub GetPartialFingerprintsString { | |
| 481 my($This) = @_; | |
| 482 | |
| 483 return $This->{PartialFingerprintsString} ? $This->{PartialFingerprintsString} : 'None'; | |
| 484 } | |
| 485 | |
| 486 # Set partial fingerprints string for current data line... | |
| 487 # | |
| 488 sub SetPartialFingerprintsString { | |
| 489 my($This, $PartialFingerprintsString) = @_; | |
| 490 | |
| 491 $This->{PartialFingerprintsString} = $PartialFingerprintsString; | |
| 492 | |
| 493 return $This; | |
| 494 } | |
| 495 | |
| 496 # Does fingerprints FP file contain valid data? | |
| 497 # | |
| 498 sub IsFingerprintsFileDataValid { | |
| 499 my($This) = @_; | |
| 500 | |
| 501 return $This->{ValidFileData} ? 1 : 0; | |
| 502 } | |
| 503 | |
| 504 # Does current data line contains valid fingerprints object data? | |
| 505 # | |
| 506 sub IsFingerprintsDataValid { | |
| 507 my($This) = @_; | |
| 508 | |
| 509 return defined $This->{FingerprintsObject} ? 1 : 0; | |
| 510 } | |
| 511 | |
| 512 # Check presence of a header data key... | |
| 513 # | |
| 514 sub IsHeaderDataKeyPresent { | |
| 515 my($This, $Key) = @_; | |
| 516 my($CannonicalKey); | |
| 517 | |
| 518 $CannonicalKey = lc $Key; | |
| 519 | |
| 520 return exists $This->{CannonicalHeaderDataKeysAndValues}{$CannonicalKey} ? 1 : 0; | |
| 521 } | |
| 522 | |
| 523 # Get value of header data key... | |
| 524 # | |
| 525 sub GetHeaderDataKeyValue { | |
| 526 my($This, $Key) = @_; | |
| 527 my($CannonicalKey); | |
| 528 | |
| 529 $CannonicalKey = lc $Key; | |
| 530 | |
| 531 return exists $This->{CannonicalHeaderDataKeysAndValues}{$CannonicalKey} ? $This->{CannonicalHeaderDataKeysAndValues}{$CannonicalKey} : undef; | |
| 532 } | |
| 533 | |
| 534 # | |
| 535 # Read next available fingerprints line, process it and generate appropriate fingerprints | |
| 536 # objects... | |
| 537 # | |
| 538 sub Read { | |
| 539 my($This) = @_; | |
| 540 | |
| 541 # Read data line... | |
| 542 if (!$This->_ReadDataLine()) { | |
| 543 return undef; | |
| 544 } | |
| 545 | |
| 546 # No need to process invalid FP file with invalid data... | |
| 547 if (!$This->{ValidFileData}) { | |
| 548 if ($This->{ValidateData}) { | |
| 549 $This->{NumOfLinesWithMissingData} += 1; | |
| 550 } | |
| 551 return $This; | |
| 552 } | |
| 553 | |
| 554 # Perform data validation... | |
| 555 if ($This->{ValidateData}) { | |
| 556 if (!$This->_ValidateReadDataLine()) { | |
| 557 return $This; | |
| 558 } | |
| 559 } | |
| 560 | |
| 561 # Check again to handle problematic data for non-validated data lines... | |
| 562 if (!$This->{FingerprintsString}) { | |
| 563 return $This; | |
| 564 } | |
| 565 | |
| 566 # Generate fingeprints object... | |
| 567 $This->_GenerateFingerprintsObject(); | |
| 568 | |
| 569 # Setup fingerprints compound ID for fingerprints string... | |
| 570 $This->_GenerateCompoundID(); | |
| 571 | |
| 572 return $This; | |
| 573 } | |
| 574 | |
| 575 # Read next available fingerprints line, process it and generate appropriate fingerprints | |
| 576 # objects... | |
| 577 # | |
| 578 sub Next { | |
| 579 my($This) = @_; | |
| 580 | |
| 581 return $This->Read(); | |
| 582 } | |
| 583 | |
| 584 # Read fingerprints data line line... | |
| 585 # | |
| 586 sub _ReadDataLine { | |
| 587 my($This) = @_; | |
| 588 | |
| 589 # Initialize data for current line... | |
| 590 $This->_InitializeReadDataLine(); | |
| 591 | |
| 592 if ($This->{FirstDataLineIO}) { | |
| 593 # Get first data line... | |
| 594 $This->_ProcessFirstDataLineRead(); | |
| 595 } | |
| 596 else { | |
| 597 # Get next data line... | |
| 598 $This->{LineNum} += 1; | |
| 599 $This->{DataLine} = TextUtil::GetTextLine($This->{FileHandle}); | |
| 600 } | |
| 601 | |
| 602 # Is it end of file? | |
| 603 if (!$This->{DataLine}) { | |
| 604 return 0; | |
| 605 } | |
| 606 | |
| 607 # Process data line to retrieve compound ID and fingerprints string information... | |
| 608 $This->_ProcessDataLineRead(); | |
| 609 | |
| 610 return 1; | |
| 611 } | |
| 612 | |
| 613 # Process data line to retrieve compound ID and fingerprints string information... | |
| 614 # | |
| 615 sub _ProcessDataLineRead { | |
| 616 my($This) = @_; | |
| 617 my($CompoundID, $PartialFingerprintsString); | |
| 618 | |
| 619 ($CompoundID, $PartialFingerprintsString) = $This->{DataLine} =~ /^(.*?)[ ]+(.*?)$/; | |
| 620 | |
| 621 if (!(defined($CompoundID) && defined($PartialFingerprintsString))) { | |
| 622 return $This; | |
| 623 } | |
| 624 | |
| 625 $This->{CompoundID} = $CompoundID; | |
| 626 $This->{PartialFingerprintsString} = $PartialFingerprintsString; | |
| 627 | |
| 628 # Set up fingerprints string... | |
| 629 $This->_GenerateFingerprintsStringFromPartialFingerprintsString(); | |
| 630 | |
| 631 return $This; | |
| 632 } | |
| 633 | |
| 634 # Initialize data line for reading... | |
| 635 # | |
| 636 sub _InitializeReadDataLine { | |
| 637 my($This) = @_; | |
| 638 | |
| 639 $This->{CompoundID} = undef; | |
| 640 $This->{DataLine} = undef; | |
| 641 | |
| 642 $This->{FingerprintsObject} = undef; | |
| 643 | |
| 644 $This->{FingerprintsString} = undef; | |
| 645 $This->{PartialFingerprintsString} = undef; | |
| 646 | |
| 647 return $This; | |
| 648 } | |
| 649 | |
| 650 # Validate fingerprints string data line... | |
| 651 # | |
| 652 sub _ValidateReadDataLine { | |
| 653 my($This) = @_; | |
| 654 | |
| 655 # Check for missing data... | |
| 656 if (!($This->{CompoundID} && $This->{PartialFingerprintsString})) { | |
| 657 # Missing data... | |
| 658 $This->{NumOfLinesWithMissingData} += 1; | |
| 659 if ($This->{DetailLevel} >= 3) { | |
| 660 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains no fingerprints data: $This->{DataLine}..."; | |
| 661 } | |
| 662 elsif ($This->{DetailLevel} >= 2) { | |
| 663 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains no fingerprints data..."; | |
| 664 } | |
| 665 return 0; | |
| 666 } | |
| 667 | |
| 668 # Check for invalid data... | |
| 669 my($InvalidFingerprintsData); | |
| 670 | |
| 671 $InvalidFingerprintsData = 0; | |
| 672 if ($This->{FingerprintsString}) { | |
| 673 $InvalidFingerprintsData = Fingerprints::FingerprintsStringUtil::AreFingerprintsStringValuesValid($This->{FingerprintsString}) ? 0 : 1; | |
| 674 } | |
| 675 else { | |
| 676 $InvalidFingerprintsData = 1; | |
| 677 } | |
| 678 | |
| 679 if ($InvalidFingerprintsData) { | |
| 680 $This->{NumOfLinesWithInvalidData} += 1; | |
| 681 if ($This->{DetailLevel} >= 3) { | |
| 682 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains invalid fingerprints data: $This->{DataLine}..."; | |
| 683 } | |
| 684 elsif ($This->{DetailLevel} >= 2) { | |
| 685 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains invalid fingerprints data..."; | |
| 686 } | |
| 687 return 0; | |
| 688 } | |
| 689 | |
| 690 return 1; | |
| 691 } | |
| 692 | |
| 693 # Setup fingerprints compound ID for fingerprints string... | |
| 694 sub _GenerateCompoundID { | |
| 695 my($This) = @_; | |
| 696 | |
| 697 # Set fingerprints ID... | |
| 698 if ($This->{FingerprintsObject}) { | |
| 699 $This->{FingerprintsObject}->SetID($This->{CompoundID}); | |
| 700 } | |
| 701 | |
| 702 return $This; | |
| 703 } | |
| 704 | |
| 705 # Process first read... | |
| 706 # | |
| 707 sub _ProcessFirstDataLineRead { | |
| 708 my($This) = @_; | |
| 709 my($Line); | |
| 710 | |
| 711 $This->{FirstDataLineIO} = 0; | |
| 712 | |
| 713 # Skip over header data lines and collect first data line... | |
| 714 | |
| 715 LINE: while ($Line = TextUtil::GetTextLine($This->{FileHandle})) { | |
| 716 $This->{LineNum} += 1; | |
| 717 | |
| 718 # Is it a header data line? | |
| 719 if ($Line =~ /^#/) { | |
| 720 next LINE; | |
| 721 } | |
| 722 $This->{DataLine} = $Line; | |
| 723 last LINE; | |
| 724 } | |
| 725 | |
| 726 return $This; | |
| 727 } | |
| 728 | |
| 729 # Get ready for reading fingerprints FP file... | |
| 730 # | |
| 731 sub _PrepareForReadingFingerprintsFPFileData { | |
| 732 my($This) = @_; | |
| 733 | |
| 734 # Retrieve FP file data headers information.... | |
| 735 $This->_RetrieveFPFileDataHeaders(); | |
| 736 | |
| 737 # Validate header data keys and values information... | |
| 738 $This->_ValidateReadHeaderDataKeysAndValues(); | |
| 739 | |
| 740 # Validate fingeprints string mode information... | |
| 741 if ($This->{ValidRequiredHeaderDataKeys}) { | |
| 742 $This->_ValidateReadFingerprintsStringMode(); | |
| 743 } | |
| 744 | |
| 745 # Set status of FP file data... | |
| 746 $This->{ValidFileData} = ($This->{ValidRequiredHeaderDataKeys} && $This->{ValidFingerprintsStringMode}) ? 1 : 0; | |
| 747 | |
| 748 return $This; | |
| 749 } | |
| 750 | |
| 751 # Retrieve information about fingerprints date header in FP file... | |
| 752 # | |
| 753 sub _RetrieveFPFileDataHeaders { | |
| 754 my($This) = @_; | |
| 755 my($FPFile, $Line, $Index, $KeyValuePair, $Key, $Value, $KeyValueDelimiter, $KeyValuePairDelimiter, @LineKeyValuePairs); | |
| 756 | |
| 757 $FPFile = $This->{Name}; | |
| 758 | |
| 759 if (!(-e $FPFile)) { | |
| 760 croak "Error: ${ClassName}->_RetrieveFPFileDataHeaders: File, $FPFile, doesn't exist..."; | |
| 761 } | |
| 762 | |
| 763 if (!open FPFILE, "$FPFile") { | |
| 764 croak "Error: ${ClassName}->_RetrieveFPFileDataHeaders: Couldn't open input FP file $FPFile: $! ..."; | |
| 765 } | |
| 766 | |
| 767 # Process header key/value pair data... | |
| 768 # | |
| 769 $KeyValueDelimiter = '='; | |
| 770 $KeyValuePairDelimiter = ';'; | |
| 771 | |
| 772 @{$This->{HeaderDataKeys}} = (); | |
| 773 %{$This->{HeaderDataKeysAndValues}} = (); | |
| 774 %{$This->{CannonicalHeaderDataKeysAndValues}} = (); | |
| 775 | |
| 776 LINE: while ($Line = TextUtil::GetTextLine(\*FPFILE)) { | |
| 777 # Is it a key/value pairs line? | |
| 778 if ($Line !~ /^#/) { | |
| 779 last LINE; | |
| 780 } | |
| 781 | |
| 782 # Take out starting hash mark before processing key/value pairs... | |
| 783 $Line =~ s/^#//; | |
| 784 if (TextUtil::IsEmpty($Line)) { | |
| 785 next LINE; | |
| 786 } | |
| 787 | |
| 788 @LineKeyValuePairs = (); | |
| 789 | |
| 790 for $KeyValuePair (split "$KeyValuePairDelimiter", $Line) { | |
| 791 ($Key, $Value) = split "$KeyValueDelimiter", $KeyValuePair; | |
| 792 | |
| 793 $Key = defined($Key) ? TextUtil::RemoveLeadingAndTrailingWhiteSpaces($Key) : ''; | |
| 794 $Value = defined($Value) ? TextUtil::RemoveLeadingAndTrailingWhiteSpaces($Value) : ''; | |
| 795 | |
| 796 if (TextUtil::IsEmpty($Key) || TextUtil::IsEmpty($Value)) { | |
| 797 carp "Warning: ${ClassName}->_RetrieveFPFileDataHeaders: Data header line containing \"Key = Value\" pairs is not valid: It must contain even number of \"Key = Value\" pairs with valid values. Ignoring data header line: \"$Line\"..."; | |
| 798 next LINE; | |
| 799 } | |
| 800 push @{$This->{HeaderDataKeys}}, $Key; | |
| 801 push @LineKeyValuePairs, ($Key, $Value); | |
| 802 } | |
| 803 | |
| 804 for ($Index = 0; $Index < $#LineKeyValuePairs; $Index += 2) { | |
| 805 $Key = $LineKeyValuePairs[$Index]; $Value = $LineKeyValuePairs[$Index + 1]; | |
| 806 | |
| 807 $This->{HeaderDataKeysAndValues}{$Key} = $Value; | |
| 808 $This->{CannonicalHeaderDataKeysAndValues}{lc($Key)} = $Value; | |
| 809 } | |
| 810 } | |
| 811 close FPFILE; | |
| 812 | |
| 813 return $This; | |
| 814 } | |
| 815 | |
| 816 # Validate header data and keys... | |
| 817 # | |
| 818 sub _ValidateReadHeaderDataKeysAndValues { | |
| 819 my($This) = @_; | |
| 820 my($FingerprintsStringType, $Key, $Value, @RequiredHeaderDataKeys); | |
| 821 | |
| 822 $This->{ValidRequiredHeaderDataKeys} = 0; | |
| 823 @{$This->{RequiredHeaderDataKeys}} = (); | |
| 824 | |
| 825 # Is FingerprintsStringType key is present? | |
| 826 if (!$This->IsHeaderDataKeyPresent('FingerprintsStringType')) { | |
| 827 carp "carp: ${ClassName}->_ValidateReadHeaderDataKeysAndValues: FingerprintsStringType data header key is missing in fingerprints file..."; | |
| 828 return 0; | |
| 829 } | |
| 830 $FingerprintsStringType = $This->GetHeaderDataKeyValue('FingerprintsStringType'); | |
| 831 | |
| 832 # Are all required data header keys present? | |
| 833 # | |
| 834 @RequiredHeaderDataKeys = (); | |
| 835 | |
| 836 if ($FingerprintsStringType =~ /^(FingerprintsBitVector|FingerprintsVector)$/i) { | |
| 837 push @RequiredHeaderDataKeys, $This->_GetRequiredHeaderDataKeys($FingerprintsStringType); | |
| 838 } | |
| 839 else { | |
| 840 carp "Warning: ${ClassName}->_ValidateReadHeaderDataKeysAndValues: FingerprintsStringType data header key value, $FingerprintsStringType, is not valid. SUpported values: FingerprintsBitVector or FingerprintsVector..."; | |
| 841 return 0; | |
| 842 } | |
| 843 | |
| 844 for $Key (@RequiredHeaderDataKeys) { | |
| 845 if (!$This->IsHeaderDataKeyPresent($Key)) { | |
| 846 croak "Error: ${ClassName}->_ValidateReadHeaderDataKeysAndValues: Requires data header key, $Key, is missing in fingerprints file..."; | |
| 847 } | |
| 848 } | |
| 849 | |
| 850 push @{$This->{RequiredHeaderDataKeys}}, @RequiredHeaderDataKeys; | |
| 851 | |
| 852 # Are all required data header key values valid? | |
| 853 # | |
| 854 if (!$This->_ValidateRequiredHeaderDataKeyValues()) { | |
| 855 return 0; | |
| 856 } | |
| 857 | |
| 858 # Process required header key values... | |
| 859 # | |
| 860 $This->_ProcessRequiredHeaderDataKeyValues(); | |
| 861 | |
| 862 $This->{ValidRequiredHeaderDataKeys} = 1; | |
| 863 | |
| 864 return 1; | |
| 865 } | |
| 866 | |
| 867 # Validate data header key values.... | |
| 868 # | |
| 869 sub _ValidateRequiredHeaderDataKeyValues { | |
| 870 my($This) = @_; | |
| 871 my($Key, $Value); | |
| 872 | |
| 873 for $Key (@{$This->{RequiredHeaderDataKeys}}) { | |
| 874 $Value = $This->GetHeaderDataKeyValue($Key); | |
| 875 KEY: { | |
| 876 if ($Key =~ /^FingerprintsStringType$/i) { | |
| 877 if ($Value !~ /^(FingerprintsBitVector|FingerprintsVector)$/i) { | |
| 878 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: FingerprintsBitVector or FingerprintsVector..."; | |
| 879 return 0; | |
| 880 } | |
| 881 last KEY; | |
| 882 } | |
| 883 if ($Key =~ /^Size$/i) { | |
| 884 if (!TextUtil::IsPositiveInteger($Value)) { | |
| 885 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: > 0..."; | |
| 886 return 0; | |
| 887 } | |
| 888 last KEY; | |
| 889 } | |
| 890 if ($Key =~ /^BitStringFormat$/i) { | |
| 891 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) { | |
| 892 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: BinaryString or HexadecimalString ..."; | |
| 893 return 0; | |
| 894 } | |
| 895 last KEY; | |
| 896 } | |
| 897 if ($Key =~ /^BitsOrder$/i) { | |
| 898 if ($Value !~ /^(Ascending|Descending)$/i) { | |
| 899 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: Ascending or Descending..."; | |
| 900 return 0; | |
| 901 } | |
| 902 last KEY; | |
| 903 } | |
| 904 if ($Key =~ /^VectorStringFormat$/i) { | |
| 905 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) { | |
| 906 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString ..."; | |
| 907 return 0; | |
| 908 } | |
| 909 last KEY; | |
| 910 } | |
| 911 if ($Key =~ /^VectorValuesType$/i) { | |
| 912 if ($Value !~ /^(OrderedNumericalValues|NumericalValues|AlphaNumericalValues)$/i) { | |
| 913 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value, $Value, is not valid. Supported values: OrderedNumericalValues, NumericalValues or AlphaNumericalValues..."; | |
| 914 return 0; | |
| 915 } | |
| 916 last KEY; | |
| 917 } | |
| 918 if ($Key =~ /^Description$/i) { | |
| 919 if (TextUtil::IsEmpty($Value)) { | |
| 920 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key value is not valid. Supported value: A no-empty text string..."; | |
| 921 return 0; | |
| 922 } | |
| 923 last KEY; | |
| 924 } | |
| 925 carp "Warning: ${ClassName}->_ValidateRequiredHeaderDataKeyValues: Required $Key data header key is not not supported..."; | |
| 926 return 0; | |
| 927 } | |
| 928 } | |
| 929 | |
| 930 return 1; | |
| 931 } | |
| 932 | |
| 933 # Process required header key valeues for access during complete fingerprints | |
| 934 # string generation from a partial fingerprints string specified on fingerprints | |
| 935 # line... | |
| 936 # | |
| 937 sub _ProcessRequiredHeaderDataKeyValues { | |
| 938 my($This) = @_; | |
| 939 my($Key, $Value, @Keys); | |
| 940 | |
| 941 %{$This->{RequiredHeaderDataKeysAndValues}} = (); | |
| 942 | |
| 943 for $Key (@{$This->{RequiredHeaderDataKeys}}) { | |
| 944 $Value = $This->GetHeaderDataKeyValue($Key); | |
| 945 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $Value; | |
| 946 } | |
| 947 | |
| 948 # Setup prefixes for generating fingerprints strings... | |
| 949 $This->{FingerprintsBitVectorStringPrefix} = ''; | |
| 950 $This->{FingerprintsVectorStringPrefix1} = ''; | |
| 951 $This->{FingerprintsVectorStringPrefix2} = ''; | |
| 952 | |
| 953 if ($This->{RequiredHeaderDataKeysAndValues}{FingerprintsStringType} =~ /^FingerprintsBitVector$/i) { | |
| 954 @Keys = qw(FingerprintsStringType Description Size BitStringFormat BitsOrder); | |
| 955 $This->{FingerprintsBitVectorStringPrefix} = $This->_GenerateFingerprintsPrefixUsingKeys(@Keys); | |
| 956 } | |
| 957 elsif ($This->{RequiredHeaderDataKeysAndValues}{FingerprintsStringType} =~ /^FingerprintsVector$/i) { | |
| 958 @Keys = qw(FingerprintsStringType Description); | |
| 959 $This->{FingerprintsVectorStringPrefix1} = $This->_GenerateFingerprintsPrefixUsingKeys(@Keys); | |
| 960 | |
| 961 @Keys = qw(VectorValuesType VectorStringFormat); | |
| 962 $This->{FingerprintsVectorStringPrefix2} = $This->_GenerateFingerprintsPrefixUsingKeys(@Keys); | |
| 963 } | |
| 964 | |
| 965 return $This; | |
| 966 } | |
| 967 | |
| 968 # Generate fingerprints prefix using header keys data... | |
| 969 # | |
| 970 sub _GenerateFingerprintsPrefixUsingKeys { | |
| 971 my($This, @Keys) = @_; | |
| 972 my($Delimiter, $Key, @Values); | |
| 973 | |
| 974 $Delimiter = Fingerprints::FingerprintsStringUtil::GetFingeprintsStringDelimiter(); | |
| 975 | |
| 976 @Values = (); | |
| 977 for $Key (@Keys) { | |
| 978 push @Values, $This->{RequiredHeaderDataKeysAndValues}{$Key}; | |
| 979 } | |
| 980 | |
| 981 return join($Delimiter, @Values) | |
| 982 } | |
| 983 | |
| 984 # Get required header data keys... | |
| 985 # | |
| 986 sub _GetRequiredHeaderDataKeys { | |
| 987 my($This, $FingerprintsStringType) = @_; | |
| 988 my(@RequiredKeys); | |
| 989 | |
| 990 @RequiredKeys = (); | |
| 991 | |
| 992 if ($FingerprintsStringType =~ /FingerprintsBitVector$/i) { | |
| 993 push @RequiredKeys, qw(FingerprintsStringType Description Size BitStringFormat BitsOrder); | |
| 994 } | |
| 995 elsif ($FingerprintsStringType =~ /^FingerprintsVector/i) { | |
| 996 push @RequiredKeys, qw(FingerprintsStringType Description VectorStringFormat VectorValuesType); | |
| 997 } | |
| 998 else { | |
| 999 carp "Warning: ${ClassName}->GetRequiredHeaderDataKeys: FingerprintsStringType value, $FingerprintsStringType, is not valid. Supported values: FingerprintsBitVector or FingerprintsVector..."; | |
| 1000 } | |
| 1001 | |
| 1002 return @RequiredKeys; | |
| 1003 } | |
| 1004 | |
| 1005 # Validate fingerprints string mode information... | |
| 1006 # | |
| 1007 sub _ValidateReadFingerprintsStringMode { | |
| 1008 my($This) = @_; | |
| 1009 my($FingerprintsStringType, $FingerprintsStringDescription, $FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FirstFingerprintsStringType, $FirstFingerprintsStringDescription); | |
| 1010 | |
| 1011 $This->{ValidFingerprintsStringMode} = 0; | |
| 1012 $This->{FingerprintsBitVectorStringMode} = 0; | |
| 1013 $This->{FingerprintsVectorStringMode} = 0; | |
| 1014 | |
| 1015 $This->{FirstFingerprintsStringType} = ''; | |
| 1016 $This->{FirstFingerprintsStringDescription} = ''; | |
| 1017 | |
| 1018 $FingerprintsBitVectorStringMode = 0; | |
| 1019 $FingerprintsVectorStringMode = 0; | |
| 1020 | |
| 1021 $FirstFingerprintsStringType = ''; | |
| 1022 $FirstFingerprintsStringDescription = ''; | |
| 1023 | |
| 1024 $FingerprintsStringType = $This->GetHeaderDataKeyValue('FingerprintsStringType'); | |
| 1025 $FingerprintsStringDescription = $This->GetHeaderDataKeyValue('Description'); | |
| 1026 | |
| 1027 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { | |
| 1028 if ($FingerprintsStringType !~ /^FingerprintsBitVector$/i) { | |
| 1029 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: Fingerprints string data type, $FingerprintsStringType, doesn't correspond to, FingerprintsBitVectorString, specified using \"FingerprintsStringMode\"..."; | |
| 1030 return 0; | |
| 1031 } | |
| 1032 $FingerprintsBitVectorStringMode = 1; | |
| 1033 $FirstFingerprintsStringType = 'FingerprintsBitVector'; | |
| 1034 $FirstFingerprintsStringDescription = $FingerprintsStringDescription; | |
| 1035 } | |
| 1036 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { | |
| 1037 if ($FingerprintsStringType !~ /^FingerprintsVector$/i) { | |
| 1038 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: Fingerprints string data type, $FingerprintsStringType, doesn't correspond to, FingerprintsVectorString, specified using \"FingerprintsStringMode\"..."; | |
| 1039 return 0; | |
| 1040 } | |
| 1041 $FingerprintsVectorStringMode = 1; | |
| 1042 $FirstFingerprintsStringType = 'FingerprintsVector'; | |
| 1043 $FirstFingerprintsStringDescription = $FingerprintsStringDescription; | |
| 1044 } | |
| 1045 else { | |
| 1046 # AutoDetect mode... | |
| 1047 if ($FingerprintsStringType =~ /^FingerprintsBitVector$/i) { | |
| 1048 $FingerprintsBitVectorStringMode = 1; | |
| 1049 } | |
| 1050 elsif ($FingerprintsStringType =~ /^FingerprintsVector$/i) { | |
| 1051 $FingerprintsVectorStringMode = 1; | |
| 1052 } | |
| 1053 else { | |
| 1054 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: Fingerprints string data type, $FingerprintsStringType, identified during, AutoDetect, value of \"FingerprintsStringMode\" is not valid; Supported fingerprints types: FingerprintBitVector or FingerprintsVector..."; | |
| 1055 return 0; | |
| 1056 } | |
| 1057 $FirstFingerprintsStringType = $FingerprintsStringType; | |
| 1058 $FirstFingerprintsStringDescription = $FingerprintsStringDescription; | |
| 1059 } | |
| 1060 | |
| 1061 $This->{ValidFingerprintsStringMode} = 1; | |
| 1062 | |
| 1063 $This->{FingerprintsBitVectorStringMode} = $FingerprintsBitVectorStringMode; | |
| 1064 $This->{FingerprintsVectorStringMode} = $FingerprintsVectorStringMode; | |
| 1065 | |
| 1066 $This->{FirstFingerprintsStringType} = $FirstFingerprintsStringType; | |
| 1067 $This->{FirstFingerprintsStringDescription} = $FirstFingerprintsStringDescription; | |
| 1068 | |
| 1069 return 1; | |
| 1070 } | |
| 1071 | |
| 1072 # Write fingerprints string generated from specified fingerprints - fingerprints-bit vector or | |
| 1073 # fingerprints vector - object and other data to FP file... | |
| 1074 # | |
| 1075 sub WriteFingerprints { | |
| 1076 my($This, $FingerprintsObject, $CompoundID) = @_; | |
| 1077 | |
| 1078 # Initialize data for current line... | |
| 1079 $This->_InitializeWriteDataLine(); | |
| 1080 | |
| 1081 # Set fingerprints object and compound ID... | |
| 1082 $This->{FingerprintsObject} = $FingerprintsObject; | |
| 1083 $This->SetCompoundID($CompoundID); | |
| 1084 | |
| 1085 # Generate fingerprints string... | |
| 1086 $This->_GenerateFingerprintsString(); | |
| 1087 | |
| 1088 # Generate partial fingerprints string... | |
| 1089 $This->_GeneratePartialFingerprintsStringFromFingerprintsString(); | |
| 1090 | |
| 1091 # Write data line.. | |
| 1092 $This->_WriteDataLine(); | |
| 1093 | |
| 1094 return $This; | |
| 1095 } | |
| 1096 | |
| 1097 # Write fingerprints string and other data to FP file... | |
| 1098 # | |
| 1099 # Notes: | |
| 1100 # o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat values | |
| 1101 # are ignored during writing of fingerprints and it's written to the file as it is. | |
| 1102 # o FingerprintsString is a regular fingerprints string as oppose to a partial fingerprints | |
| 1103 # string. | |
| 1104 # | |
| 1105 sub WriteFingerprintsString { | |
| 1106 my($This, $FingerprintsString, $CompoundID) = @_; | |
| 1107 | |
| 1108 # Initialize data for current line... | |
| 1109 $This->_InitializeWriteDataLine(); | |
| 1110 | |
| 1111 # Set fingerprints string and compound ID... | |
| 1112 $This->{FingerprintsString} = $FingerprintsString; | |
| 1113 $This->SetCompoundID($CompoundID); | |
| 1114 | |
| 1115 # Generate fingerprints object... | |
| 1116 $This->_GenerateFingerprintsObject(); | |
| 1117 | |
| 1118 # Generate partial fingerprints string... | |
| 1119 $This->_GeneratePartialFingerprintsStringFromFingerprintsString(); | |
| 1120 | |
| 1121 # Write data line.. | |
| 1122 $This->_WriteDataLine(); | |
| 1123 | |
| 1124 return $This; | |
| 1125 } | |
| 1126 | |
| 1127 # Initialize data line for reading... | |
| 1128 # | |
| 1129 sub _InitializeWriteDataLine { | |
| 1130 my($This) = @_; | |
| 1131 | |
| 1132 $This->{DataLine} = undef; | |
| 1133 $This->{CompoundID} = undef; | |
| 1134 | |
| 1135 $This->{FingerprintsObject} = undef; | |
| 1136 | |
| 1137 $This->{FingerprintsString} = undef; | |
| 1138 $This->{PartialFingerprintsString} = undef; | |
| 1139 | |
| 1140 return $This; | |
| 1141 } | |
| 1142 | |
| 1143 # Write fingerprints data line line... | |
| 1144 # | |
| 1145 sub _WriteDataLine { | |
| 1146 my($This) = @_; | |
| 1147 my($FileHandle, $Line); | |
| 1148 | |
| 1149 if ($This->{FirstDataLineIO}) { | |
| 1150 $This->_ProcessFirstDataLineWrite(); | |
| 1151 } | |
| 1152 | |
| 1153 # Write data compound ID along with partial fingerprints string... | |
| 1154 $Line = $This->{CompoundID} . ' ' . $This->{PartialFingerprintsString}; | |
| 1155 | |
| 1156 $This->{LineNum} += 1; | |
| 1157 $FileHandle = $This->{FileHandle}; | |
| 1158 print $FileHandle "$Line\n"; | |
| 1159 | |
| 1160 $This->{DataLine} = $Line; | |
| 1161 | |
| 1162 return $This; | |
| 1163 } | |
| 1164 | |
| 1165 # Process first write... | |
| 1166 # | |
| 1167 sub _ProcessFirstDataLineWrite { | |
| 1168 my($This) = @_; | |
| 1169 my($Line, $FileHandle); | |
| 1170 | |
| 1171 $This->{FirstDataLineIO} = 0; | |
| 1172 | |
| 1173 if ($This->GetMode() =~ /^Write$/i) { | |
| 1174 # Skip it for append mode... | |
| 1175 $This->_WritePackageAndTimeStampHeaderKeys(); | |
| 1176 $This->_WriteRequiredHeaderDataKeys(); | |
| 1177 } | |
| 1178 | |
| 1179 return $This; | |
| 1180 } | |
| 1181 | |
| 1182 # Write out package and time stamp information... | |
| 1183 # | |
| 1184 sub _WritePackageAndTimeStampHeaderKeys { | |
| 1185 my($This) = @_; | |
| 1186 my($FileHandle, $Key, $Value); | |
| 1187 | |
| 1188 $FileHandle = $This->{FileHandle}; | |
| 1189 | |
| 1190 # Package information... | |
| 1191 $This->{LineNum} += 1; | |
| 1192 $Key = "Package"; $Value = PackageInfo::GetPackageName() . " " . PackageInfo::GetVersionNumber(); | |
| 1193 print $FileHandle "# $Key = $Value\n"; | |
| 1194 | |
| 1195 $This->{LineNum} += 1; | |
| 1196 $Key = "Release Date"; $Value = PackageInfo::GetReleaseDate(); | |
| 1197 print $FileHandle "# $Key = $Value\n"; | |
| 1198 | |
| 1199 # Timestamp information... | |
| 1200 $This->{LineNum} += 1; | |
| 1201 print $FileHandle "#\n"; | |
| 1202 | |
| 1203 $This->{LineNum} += 1; | |
| 1204 $Key = "TimeStamp"; $Value = TimeUtil::FPFileTimeStamp(); | |
| 1205 print $FileHandle "# $Key = $Value\n"; | |
| 1206 | |
| 1207 return $This; | |
| 1208 } | |
| 1209 | |
| 1210 # Write out required header data keys... | |
| 1211 # | |
| 1212 sub _WriteRequiredHeaderDataKeys { | |
| 1213 my($This) = @_; | |
| 1214 my($FileHandle, $Key, $Value); | |
| 1215 | |
| 1216 $FileHandle = $This->{FileHandle}; | |
| 1217 | |
| 1218 $This->_GenerateWriteRequiredHeaderDataKeys(); | |
| 1219 | |
| 1220 $This->{LineNum} += 1; | |
| 1221 print $FileHandle "#\n"; | |
| 1222 | |
| 1223 for $Key (@{$This->{RequiredHeaderDataKeys}}) { | |
| 1224 $Value = $This->{RequiredHeaderDataKeysAndValues}{$Key}; | |
| 1225 | |
| 1226 $This->{LineNum} += 1; | |
| 1227 print $FileHandle "# $Key = $Value\n"; | |
| 1228 | |
| 1229 if ($Key =~ /^FingerprintsStringType$/i) { | |
| 1230 $This->{LineNum} += 1; | |
| 1231 print $FileHandle "#\n"; | |
| 1232 } | |
| 1233 } | |
| 1234 | |
| 1235 $This->{LineNum} += 1; | |
| 1236 print $FileHandle "#\n"; | |
| 1237 | |
| 1238 return $This; | |
| 1239 } | |
| 1240 | |
| 1241 sub _GenerateWriteRequiredHeaderDataKeys { | |
| 1242 my($This) = @_; | |
| 1243 | |
| 1244 if ($This->{FingerprintsBitVectorStringMode} && ($This->{FingerprintsString} =~ /^FingerprintsBitVector/i)) { | |
| 1245 $This->_GenerateWriteRequiredHeaderDataKeysForBitVectorString(); | |
| 1246 } | |
| 1247 elsif ($This->{FingerprintsVectorStringMode} && ($This->{FingerprintsString} =~ /^FingerprintsVector/i)) { | |
| 1248 $This->_GenerateWriteRequiredHeaderDataKeysForVectorString(); | |
| 1249 } | |
| 1250 else { | |
| 1251 croak "Error: ${ClassName}->_GenerateWriteRequiredHeaderDataKeys: Required header data keys can't be generated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, doesn't correspond to type of first FingerprintsString: $This->{FingerprintsString}..."; | |
| 1252 } | |
| 1253 | |
| 1254 return $This; | |
| 1255 } | |
| 1256 | |
| 1257 # Generate required data header keys and values for writing fingerprints bit vector string... | |
| 1258 # | |
| 1259 sub _GenerateWriteRequiredHeaderDataKeysForBitVectorString { | |
| 1260 my($This) = @_; | |
| 1261 my($Key, $VectorType, $Description, $Size, $BitStringFormat, $BitsOrder); | |
| 1262 | |
| 1263 @{$This->{RequiredHeaderDataKeys}} = (); | |
| 1264 push @{$This->{RequiredHeaderDataKeys}}, $This->_GetRequiredHeaderDataKeys('FingerprintsBitVector'); | |
| 1265 | |
| 1266 ($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($This->{FingerprintsString}); | |
| 1267 | |
| 1268 %{$This->{RequiredHeaderDataKeysAndValues}} = (); | |
| 1269 | |
| 1270 for $Key (@{$This->{RequiredHeaderDataKeys}}) { | |
| 1271 KEYTYPE: { | |
| 1272 if ($Key =~ /^FingerprintsStringType$/i) { | |
| 1273 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $VectorType; | |
| 1274 last KEYTYPE; | |
| 1275 } | |
| 1276 if ($Key =~ /^Description$/i) { | |
| 1277 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $Description; | |
| 1278 last KEYTYPE; | |
| 1279 } | |
| 1280 if ($Key =~ /^Size$/i) { | |
| 1281 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $Size; | |
| 1282 last KEYTYPE; | |
| 1283 } | |
| 1284 if ($Key =~ /^BitStringFormat$/i) { | |
| 1285 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $BitStringFormat; | |
| 1286 last KEYTYPE; | |
| 1287 } | |
| 1288 if ($Key =~ /^BitsOrder$/i) { | |
| 1289 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $BitsOrder; | |
| 1290 last KEYTYPE; | |
| 1291 } | |
| 1292 croak "Error: ${ClassName}->_GenerateWriteRequiredHeaderDataKeysForBitVectorString: Required header data key, $Key, value can't be generated: It's not a known key ..."; | |
| 1293 } | |
| 1294 } | |
| 1295 | |
| 1296 return $This; | |
| 1297 } | |
| 1298 | |
| 1299 # Generate required data header keys and values for writing fingerprints vector string... | |
| 1300 # | |
| 1301 sub _GenerateWriteRequiredHeaderDataKeysForVectorString { | |
| 1302 my($This) = @_; | |
| 1303 my($Key, $Value, $VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat); | |
| 1304 | |
| 1305 @{$This->{RequiredHeaderDataKeys}} = (); | |
| 1306 push @{$This->{RequiredHeaderDataKeys}}, $This->_GetRequiredHeaderDataKeys('FingerprintsVector'); | |
| 1307 | |
| 1308 ($VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($This->{FingerprintsString}); | |
| 1309 | |
| 1310 %{$This->{RequiredHeaderDataKeysAndValues}} = (); | |
| 1311 | |
| 1312 for $Key (@{$This->{RequiredHeaderDataKeys}}) { | |
| 1313 KEYTYPE: { | |
| 1314 if ($Key =~ /^FingerprintsStringType$/i) { | |
| 1315 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $VectorType; | |
| 1316 last KEYTYPE; | |
| 1317 } | |
| 1318 if ($Key =~ /^Description$/i) { | |
| 1319 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $Description; | |
| 1320 last KEYTYPE; | |
| 1321 } | |
| 1322 if ($Key =~ /^VectorValuesType$/i) { | |
| 1323 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $VectorValuesType; | |
| 1324 last KEYTYPE; | |
| 1325 } | |
| 1326 if ($Key =~ /^VectorStringFormat$/i) { | |
| 1327 $This->{RequiredHeaderDataKeysAndValues}{$Key} = $VectorStringFormat; | |
| 1328 last KEYTYPE; | |
| 1329 } | |
| 1330 croak "Error: ${ClassName}->_GenerateWriteRequiredHeaderDataKeysForVectorString: Required header data key, $Key, value can't be generated: It's not a known key ..."; | |
| 1331 } | |
| 1332 } | |
| 1333 | |
| 1334 return $This; | |
| 1335 } | |
| 1336 | |
| 1337 | |
| 1338 # Get ready for writing fingerprints FP file... | |
| 1339 # | |
| 1340 sub _PrepareForWritingFingerprintsFPFileData { | |
| 1341 my($This) = @_; | |
| 1342 my($FPFile, $FileDir, $FileName, $FileExt, $OutDelim); | |
| 1343 | |
| 1344 $FPFile = $This->{Name}; | |
| 1345 if (!$This->{Overwrite}) { | |
| 1346 if (-e $FPFile) { | |
| 1347 croak "Error: ${ClassName}->_PrepareForWritingFingerprintsFPFileData: File, $FPFile, already exist. Use overwrite option..."; | |
| 1348 } | |
| 1349 } | |
| 1350 | |
| 1351 # Setup FingerprintsStringMode status... | |
| 1352 # | |
| 1353 $This->{FingerprintsBitVectorStringMode} = 0; | |
| 1354 $This->{FingerprintsVectorStringMode} = 0; | |
| 1355 $This->{ValidFingerprintsStringMode} = 0; | |
| 1356 | |
| 1357 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { | |
| 1358 $This->{FingerprintsBitVectorStringMode} = 1; | |
| 1359 } | |
| 1360 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { | |
| 1361 $This->{FingerprintsVectorStringMode} = 1; | |
| 1362 } | |
| 1363 | |
| 1364 $This->{ValidFingerprintsStringMode} = ($This->{FingerprintsBitVectorStringMode} || $This->{FingerprintsVectorStringMode}) ? 1 : 0; | |
| 1365 | |
| 1366 if ($This->{FingerprintsBitVectorStringMode}) { | |
| 1367 $This->_SetDefaultBitStringFormat(); | |
| 1368 $This->_SetDefaultBitsOrder(); | |
| 1369 } | |
| 1370 elsif ($This->{FingerprintsVectorStringMode}) { | |
| 1371 $This->_SetDefaultVectorStringFormat(); | |
| 1372 } | |
| 1373 | |
| 1374 return $This; | |
| 1375 } | |
| 1376 | |
| 1377 # Set default value for bit string format... | |
| 1378 # | |
| 1379 sub _SetDefaultBitStringFormat { | |
| 1380 my($This) = @_; | |
| 1381 | |
| 1382 if (!$This->{BitStringFormat}) { | |
| 1383 $This->{BitStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat(); | |
| 1384 } | |
| 1385 | |
| 1386 return $This; | |
| 1387 } | |
| 1388 | |
| 1389 # Set default value for bit string format... | |
| 1390 # | |
| 1391 sub _SetDefaultBitsOrder { | |
| 1392 my($This) = @_; | |
| 1393 | |
| 1394 if (!$This->{BitsOrder}) { | |
| 1395 $This->{BitsOrder} = Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder(); | |
| 1396 } | |
| 1397 | |
| 1398 return $This; | |
| 1399 } | |
| 1400 | |
| 1401 # Set default value for vector string format... | |
| 1402 # | |
| 1403 sub _SetDefaultVectorStringFormat { | |
| 1404 my($This) = @_; | |
| 1405 | |
| 1406 if (!$This->{VectorStringFormat} && $This->{FingerprintsObject}) { | |
| 1407 $This->{VectorStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat($This->{FingerprintsObject}); | |
| 1408 } | |
| 1409 | |
| 1410 return $This; | |
| 1411 } | |
| 1412 | |
| 1413 # Generate fingerprints object using current fingerprints string... | |
| 1414 # | |
| 1415 sub _GenerateFingerprintsObject { | |
| 1416 my($This) = @_; | |
| 1417 | |
| 1418 $This->{FingerprintsObject} = undef; | |
| 1419 | |
| 1420 if (!$This->{FingerprintsString}) { | |
| 1421 return $This; | |
| 1422 } | |
| 1423 | |
| 1424 if ($This->{FingerprintsBitVectorStringMode}) { | |
| 1425 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsBitVectorString($This->{FingerprintsString}); | |
| 1426 } | |
| 1427 elsif ($This->{FingerprintsVectorStringMode}) { | |
| 1428 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsVectorString($This->{FingerprintsString}); | |
| 1429 } | |
| 1430 else { | |
| 1431 return undef; | |
| 1432 } | |
| 1433 | |
| 1434 return $This; | |
| 1435 } | |
| 1436 | |
| 1437 # Generate fingerprints string using current fingerprints object... | |
| 1438 # | |
| 1439 sub _GenerateFingerprintsString { | |
| 1440 my($This) = @_; | |
| 1441 | |
| 1442 $This->{FingerprintsString} = ''; | |
| 1443 | |
| 1444 if (!$This->{FingerprintsObject}) { | |
| 1445 return $This; | |
| 1446 } | |
| 1447 | |
| 1448 if ($This->{FingerprintsBitVectorStringMode}) { | |
| 1449 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{BitStringFormat}, $This->{BitsOrder}); | |
| 1450 } | |
| 1451 elsif ($This->{FingerprintsVectorStringMode}) { | |
| 1452 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{VectorStringFormat}); | |
| 1453 } | |
| 1454 | |
| 1455 return $This; | |
| 1456 } | |
| 1457 | |
| 1458 # Generate fingerprints string using partial fingerprints string and header keys data... | |
| 1459 # | |
| 1460 # Notes: | |
| 1461 # o FP file fingerprints data line only contain partial fingerprints data which | |
| 1462 # can't be used directly to create fingerprints bit-vector or vector objects | |
| 1463 # using functions available in FingerprintsStringUtil.pm module | |
| 1464 # | |
| 1465 sub _GenerateFingerprintsStringFromPartialFingerprintsString { | |
| 1466 my($This) = @_; | |
| 1467 my($FPStringDelim); | |
| 1468 | |
| 1469 $This->{FingerprintsString} = ''; | |
| 1470 | |
| 1471 if (!$This->{PartialFingerprintsString}) { | |
| 1472 return $This; | |
| 1473 } | |
| 1474 | |
| 1475 $FPStringDelim = Fingerprints::FingerprintsStringUtil::GetFingeprintsStringDelimiter(); | |
| 1476 | |
| 1477 if ($This->{FingerprintsBitVectorStringMode}) { | |
| 1478 $This->{FingerprintsString} = $This->{FingerprintsBitVectorStringPrefix} . $FPStringDelim . $This->{PartialFingerprintsString}; | |
| 1479 } | |
| 1480 elsif ($This->{FingerprintsVectorStringMode}) { | |
| 1481 my($NumOfValues, $VectorStringData); | |
| 1482 | |
| 1483 ($NumOfValues, $VectorStringData) = $This->{PartialFingerprintsString} =~ /^(.*?)$FPStringDelim(.*?)$/; | |
| 1484 if (!(defined($NumOfValues) && defined($VectorStringData) && $VectorStringData)) { | |
| 1485 return $This; | |
| 1486 } | |
| 1487 | |
| 1488 $This->{FingerprintsString} = $This->{FingerprintsVectorStringPrefix1} . $FPStringDelim . $NumOfValues . $FPStringDelim . $This->{FingerprintsVectorStringPrefix2} . $FPStringDelim . $VectorStringData; | |
| 1489 } | |
| 1490 | |
| 1491 return $This; | |
| 1492 } | |
| 1493 | |
| 1494 # Generate partial fingerprints string using fingerprints string and header keys data... | |
| 1495 # | |
| 1496 # Notes: | |
| 1497 # o FP file fingerprints data line only contain partial fingerprints data which | |
| 1498 # can't be used directly to create fingerprints bit-vector or vector objects | |
| 1499 # using functions available in FingerprintsStringUtil.pm module | |
| 1500 # | |
| 1501 sub _GeneratePartialFingerprintsStringFromFingerprintsString { | |
| 1502 my($This) = @_; | |
| 1503 | |
| 1504 $This->{PartialFingerprintsString} = ''; | |
| 1505 | |
| 1506 if (!$This->{FingerprintsString}) { | |
| 1507 return $This; | |
| 1508 } | |
| 1509 | |
| 1510 if ($This->{FingerprintsBitVectorStringMode}) { | |
| 1511 my($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString); | |
| 1512 | |
| 1513 ($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($This->{FingerprintsString}); | |
| 1514 $This->{PartialFingerprintsString} = $BitVectorString; | |
| 1515 } | |
| 1516 elsif ($This->{FingerprintsVectorStringMode}) { | |
| 1517 my($FPStringDelim, $VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2, $VectorString); | |
| 1518 | |
| 1519 $FPStringDelim = Fingerprints::FingerprintsStringUtil::GetFingeprintsStringDelimiter(); | |
| 1520 | |
| 1521 ($VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringValues($This->{FingerprintsString}); | |
| 1522 $VectorString = TextUtil::IsEmpty($VectorString2) ? $VectorString1 : "${VectorString1}${FPStringDelim}${VectorString2}"; | |
| 1523 | |
| 1524 $This->{PartialFingerprintsString} = $NumOfValues . $FPStringDelim . $VectorString; | |
| 1525 } | |
| 1526 | |
| 1527 return $This; | |
| 1528 } | |
| 1529 | |
| 1530 # Is it a fingerprints file? | |
| 1531 sub IsFingerprintsFPFile ($;$) { | |
| 1532 my($FirstParameter, $SecondParameter) = @_; | |
| 1533 my($This, $FileName, $Status); | |
| 1534 | |
| 1535 if ((@_ == 2) && (_IsFingerprintsFPFileIO($FirstParameter))) { | |
| 1536 ($This, $FileName) = ($FirstParameter, $SecondParameter); | |
| 1537 } | |
| 1538 else { | |
| 1539 $FileName = $FirstParameter; | |
| 1540 } | |
| 1541 | |
| 1542 # Check file extension... | |
| 1543 $Status = FileUtil::CheckFileType($FileName, "fpf fp"); | |
| 1544 | |
| 1545 return $Status; | |
| 1546 } | |
| 1547 | |
| 1548 # Is it a FingerprintsFPFileIO object? | |
| 1549 sub _IsFingerprintsFPFileIO { | |
| 1550 my($Object) = @_; | |
| 1551 | |
| 1552 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
| 1553 } | |
| 1554 | |
| 1555 1; | |
| 1556 | |
| 1557 __END__ | |
| 1558 | |
| 1559 =head1 NAME | |
| 1560 | |
| 1561 FingerprintsFPFileIO | |
| 1562 | |
| 1563 =head1 SYNOPSIS | |
| 1564 | |
| 1565 use FileIO::FingerprintsFPFileIO; | |
| 1566 | |
| 1567 use FileIO::FingerprintsFPFileIO qw(:all); | |
| 1568 | |
| 1569 =head1 DESCRIPTION | |
| 1570 | |
| 1571 B<FingerprintsFPFileIO> class provides the following methods: | |
| 1572 | |
| 1573 new, GetFingerprints, GetFingerprintsString, GetHeaderDataKeyValue, | |
| 1574 GetHeaderDataKeys, GetHeaderDataKeysAndValues, GetPartialFingerprintsString, | |
| 1575 GetRequiredHeaderDataKeys, GetRequiredHeaderDataKeysAndValues, | |
| 1576 IsFingerprintsDataValid, IsFingerprintsFPFile, IsFingerprintsFileDataValid, | |
| 1577 IsHeaderDataKeyPresent, Next, Read, SetBitStringFormat, SetBitsOrder, | |
| 1578 SetCompoundID, SetDetailLevel, SetFingerprints, SetFingerprintsString, | |
| 1579 SetFingerprintsStringMode, SetPartialFingerprintsString, SetVectorStringFormat, | |
| 1580 WriteFingerprints, WriteFingerprintsString | |
| 1581 | |
| 1582 The following methods can also be used as functions: | |
| 1583 | |
| 1584 IsFingerprintsFPFile | |
| 1585 | |
| 1586 B<FingerprintsFPFileIO> class is derived from I<FileIO> class and uses its methods to support | |
| 1587 generic file related functionality. | |
| 1588 | |
| 1589 The MayaChemTools fingerprints file (FP) format with B<.fpf> or B<.fp> file extensions supports | |
| 1590 two types of fingerprints data: fingerprints bit-vectors and fingerprints vectors. | |
| 1591 | |
| 1592 Example of FP file format containing fingerprints bit-vector string data: | |
| 1593 | |
| 1594 # | |
| 1595 # Package = MayaChemTools 7.4 | |
| 1596 # ReleaseDate = Oct 21, 2010 | |
| 1597 # | |
| 1598 # TimeStamp = Mon Mar 7 15:14:01 2011 | |
| 1599 # | |
| 1600 # FingerprintsStringType = FingerprintsBitVector | |
| 1601 # | |
| 1602 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:... | |
| 1603 # Size = 1024 | |
| 1604 # BitStringFormat = HexadecimalString | |
| 1605 # BitsOrder = Ascending | |
| 1606 # | |
| 1607 Cmpd1 9c8460989ec8a49913991a6603130b0a19e8051c89184414953800cc21510... | |
| 1608 Cmpd2 000000249400840040100042011001001980410c000000001010088001120... | |
| 1609 ... ... | |
| 1610 ... .. | |
| 1611 | |
| 1612 Example of FP file format containing fingerprints vector string data: | |
| 1613 | |
| 1614 # | |
| 1615 # Package = MayaChemTools 7.4 | |
| 1616 # ReleaseDate = Oct 21, 2010 | |
| 1617 # | |
| 1618 # TimeStamp = Mon Mar 7 15:14:01 2011 | |
| 1619 # | |
| 1620 # FingerprintsStringType = FingerprintsVector | |
| 1621 # | |
| 1622 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:... | |
| 1623 # VectorStringFormat = IDsAndValuesString | |
| 1624 # VectorValuesType = NumericalValues | |
| 1625 # | |
| 1626 Cmpd1 338;C F N O C:C C:N C=O CC CF CN CO C:C:C C:C:N C:CC C:CF C:CN C: | |
| 1627 N:C C:NC CC:N CC=O CCC CCN CCO CNC NC=O O=CO C:C:C:C C:C:C:N C:C:CC...; | |
| 1628 33 1 2 5 21 2 2 12 1 3 3 20 2 10 2 2 1 2 2 2 8 2 5 1 1 1 19 2 8 2 2 2 2 | |
| 1629 6 2 2 2 2 2 2 2 2 3 2 2 1 4 1 5 1 1 18 6 2 2 1 2 10 2 1 2 1 2 2 2 2 ... | |
| 1630 Cmpd2 103;C N O C=N C=O CC CN CO CC=O CCC CCN CCO CNC N=CN NC=O NCN O=C | |
| 1631 O C CC=O CCCC CCCN CCCO CCNC CNC=N CNC=O CNCN CCCC=O CCCCC CCCCN CC...; | |
| 1632 15 4 4 1 2 13 5 2 2 15 5 3 2 2 1 1 1 2 17 7 6 5 1 1 1 2 15 8 5 7 2 2 2 2 | |
| 1633 1 2 1 1 3 15 7 6 8 3 4 4 3 2 2 1 2 3 14 2 4 7 4 4 4 4 1 1 1 2 1 1 1 ... | |
| 1634 ... ... | |
| 1635 ... ... | |
| 1636 | |
| 1637 B<FP> file data format consists of two main sections: header section and fingerprints string | |
| 1638 data section. The header section lines start with # and the first line not starting with # represents | |
| 1639 the start of fingerprints string data section. The header section contains both the required and | |
| 1640 optional information which is specified as key = value pairs. The required information | |
| 1641 describes fingerprints bit-vector and vector strings and used to generate fingerprints objects; | |
| 1642 the optional information is ignored during generation of fingerpints objects. | |
| 1643 | |
| 1644 The key = value data specification in the header section and its processing follows these | |
| 1645 rules: | |
| 1646 | |
| 1647 o Leading and trailing spaces for key = value pairs are ignored | |
| 1648 o Key and value strings may contain spaces | |
| 1649 o Multiple key = value pairs on a single are delimited by semicolon | |
| 1650 | |
| 1651 The default optional header data section key = value pairs are: | |
| 1652 | |
| 1653 # Package = MayaChemTools 7.4 | |
| 1654 # ReleaseDate = Oct 21, 2010 | |
| 1655 | |
| 1656 The B<FingerprintsStringType> key is required data header key for both fingerprints bit-vector | |
| 1657 and vector strings. Possible key values: I<FingerprintsBitVector or FingerprintsVector>. | |
| 1658 For example: | |
| 1659 | |
| 1660 # FingerprintsStringType = FingerprintsBitVector | |
| 1661 | |
| 1662 The required data header keys for fingerprints bit-vector string are: B<Description, Size, | |
| 1663 BitStringFormat, and BitsOrder>. Possible values for B<BitStringFormat>: I<HexadecimalString | |
| 1664 or BinaryString>. Possible values for B<BitsOrder>: I<Ascending or Descending>. The B<Description> | |
| 1665 key contains information about various parameters used to generate fingerprints bit-vector | |
| 1666 string. The B<Size> corresponds to number of fingerprints bits and is always less than or equal | |
| 1667 to number of bits in bit-vetor string which might contain extra bits at the end to round off the | |
| 1668 size to make it multiple of 8. For example: | |
| 1669 | |
| 1670 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:... | |
| 1671 # Size = 1024 | |
| 1672 # BitStringFormat = HexadecimalString | |
| 1673 # BitsOrder = Ascending | |
| 1674 | |
| 1675 The required data header keys for fingerprints vector string are: B<Description, VectorStringFormat, | |
| 1676 and VectorValuesType>. Possible values for B<VectorStringFormat>: I<DsAndValuesString, | |
| 1677 IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString or ValuesString>. | |
| 1678 Possible values for B<VectorValuesType>: I<NumericalValues, OrderedNumericalValues or | |
| 1679 AlphaNumericalValues>. The B<Description> keys contains information various parameters used | |
| 1680 to generate fingerprints vector string. For example: | |
| 1681 | |
| 1682 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:... | |
| 1683 # VectorStringFormat = IDsAndValuesString | |
| 1684 # VectorValuesType = NumericalValues | |
| 1685 | |
| 1686 The fingerprints data section for fingerprints bit-vector string contains data in the following | |
| 1687 format: | |
| 1688 | |
| 1689 ... ... | |
| 1690 CmpdID FingerprintsPartialBitVectorString | |
| 1691 ... ... | |
| 1692 | |
| 1693 For example: | |
| 1694 | |
| 1695 ... ... | |
| 1696 Cmpd1 9c8460989ec8a49913991a6603130b0a19e8051c89184414953800cc21510... | |
| 1697 ... ... | |
| 1698 | |
| 1699 The fingerprints data section for fingerprints vector string contains data in the following | |
| 1700 format: | |
| 1701 | |
| 1702 ... ... | |
| 1703 CmpdID Size;FingerprintsPartialVectorString | |
| 1704 ... ... | |
| 1705 | |
| 1706 For example: | |
| 1707 | |
| 1708 ... ... | |
| 1709 Cmpd1 338;C F N O C:C C:N C=O CC CF CN CO C:C:C C:C:N C:CC C:CF C:CN C: | |
| 1710 N:C C:NC CC:N CC=O CCC CCN CCO CNC NC=O O=CO C:C:C:C C:C:C:N C:C:CC...; | |
| 1711 33 1 2 5 21 2 2 12 1 3 3 20 2 10 2 2 1 2 2 2 8 2 5 1 1 1 19 2 8 2 2 2 2 | |
| 1712 6 2 2 2 2 2 2 2 2 3 2 2 1 4 1 5 1 1 18 6 2 2 1 2 10 2 1 2 1 2 2 2 2 ... | |
| 1713 ... ... | |
| 1714 | |
| 1715 Unlike fingerprints bit-vector string, I<Size> is specified for each partial fingerprints vector string: | |
| 1716 It may change from molecule to molecule for same type of fingerprints. | |
| 1717 | |
| 1718 Values IDs are optional for fingerprints vector string containing I<OrderedNumericalValues or | |
| 1719 AlphaNumericalValues>; however, they must be present for for I<NumericalValues>. Due to | |
| 1720 various possible values for B<VectorStringFormat>, the fingerprints data section for fingerprints | |
| 1721 vector string supports following type of data formats: | |
| 1722 | |
| 1723 CmpdID Size;ID1 ID2 ID3...;Value1 Value2 Value3... | |
| 1724 CmpdID Size;ID1 Value1 ID2 Value2 ID3 Value3... ... | |
| 1725 CmpdID Size;ValuesAndIDsString: Value1 Value2 Value3...;ID1 ID2 ID3... | |
| 1726 CmpdID Size;ValuesAndIDsPairsString: Value1 ID1 Value2 ID2 Value3 ID3... ... | |
| 1727 CmpdID Size;Value1 Value2 Value3 ... | |
| 1728 | |
| 1729 However, all the fingerprints vector string data present in FP file must correspond to only | |
| 1730 one of the formats shown above; multiple data formats in the same file are not allowed. | |
| 1731 | |
| 1732 The current release of MayaChemTools supports the following types of fingerprint | |
| 1733 bit-vector and vector strings: | |
| 1734 | |
| 1735 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi | |
| 1736 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT | |
| 1737 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X | |
| 1738 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A | |
| 1739 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2 | |
| 1740 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B... | |
| 1741 | |
| 1742 FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS | |
| 1743 ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2 | |
| 1744 .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1 | |
| 1745 O.X1.BO2;2 4 14 3 10 1 1 1 3 2 | |
| 1746 | |
| 1747 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume | |
| 1748 ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F | |
| 1749 N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1 | |
| 1750 | |
| 1751 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN | |
| 1752 umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C | |
| 1753 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N | |
| 1754 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8 | |
| 1755 O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1 | |
| 1756 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0... | |
| 1757 | |
| 1758 FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs | |
| 1759 AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN | |
| 1760 H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3 | |
| 1761 .024 -2.270 | |
| 1762 | |
| 1763 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues; | |
| 1764 ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435 | |
| 1765 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1 | |
| 1766 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 1767 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 1768 | |
| 1769 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi | |
| 1770 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391 | |
| 1771 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414 | |
| 1772 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103 | |
| 1773 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338 | |
| 1774 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303... | |
| 1775 | |
| 1776 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes | |
| 1777 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524 | |
| 1778 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 | |
| 1779 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...; | |
| 1780 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2 | |
| 1781 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1 | |
| 1782 | |
| 1783 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp | |
| 1784 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100 | |
| 1785 0000000001010000000110000011000000000000100000000000000000000000100001 | |
| 1786 1000000110000000000000000000000000010011000000000000000000000000010000 | |
| 1787 0000000000000000000000000010000000000000000001000000000000000000000000 | |
| 1788 0000000000010000100001000000000000101000000000000000100000000000000... | |
| 1789 | |
| 1790 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu | |
| 1791 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8 | |
| 1792 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567 | |
| 1793 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012 | |
| 1794 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455 | |
| 1795 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404... | |
| 1796 | |
| 1797 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp | |
| 1798 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184 | |
| 1799 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450 | |
| 1800 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430 | |
| 1801 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134 | |
| 1802 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566... | |
| 1803 | |
| 1804 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000 | |
| 1805 0000000000000000000000000000000001001000010010000000010010000000011100 | |
| 1806 0100101010111100011011000100110110000011011110100110111111111111011111 | |
| 1807 11111111111110111000 | |
| 1808 | |
| 1809 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011 | |
| 1810 1110011111100101111111000111101100110000000000000011100010000000000000 | |
| 1811 0000000000000000000000000000000000000000000000101000000000000000000000 | |
| 1812 0000000000000000000000000000000000000000000000000000000000000000000000 | |
| 1813 0000000000000000000000000000000000000011000000000000000000000000000000 | |
| 1814 0000000000000000000000000000000000000000 | |
| 1815 | |
| 1816 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri | |
| 1817 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 1818 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 | |
| 1819 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0 | |
| 1820 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1 | |
| 1821 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1 | |
| 1822 | |
| 1823 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri | |
| 1824 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0 | |
| 1825 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0 | |
| 1826 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 1827 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0 | |
| 1828 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... | |
| 1829 | |
| 1830 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng | |
| 1831 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110 | |
| 1832 0100010101011000101001011100110001000010001001101000001001001001001000 | |
| 1833 0010110100000111001001000001001010100100100000000011000000101001011100 | |
| 1834 0010000001000101010100000100111100110111011011011000000010110111001101 | |
| 1835 0101100011000000010001000011000010100011101100001000001000100000000... | |
| 1836 | |
| 1837 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength | |
| 1838 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2 | |
| 1839 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X | |
| 1840 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1 | |
| 1841 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO | |
| 1842 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C.... | |
| 1843 | |
| 1844 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt | |
| 1845 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1 | |
| 1846 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N | |
| 1847 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1 | |
| 1848 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR | |
| 1849 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ... | |
| 1850 | |
| 1851 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD | |
| 1852 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1 | |
| 1853 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3. | |
| 1854 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...; | |
| 1855 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1 | |
| 1856 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1... | |
| 1857 | |
| 1858 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi | |
| 1859 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar | |
| 1860 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H | |
| 1861 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...; | |
| 1862 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4 | |
| 1863 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ... | |
| 1864 | |
| 1865 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3 | |
| 1866 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4- | |
| 1867 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO | |
| 1868 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...; | |
| 1869 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1 | |
| 1870 | |
| 1871 FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica | |
| 1872 lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC | |
| 1873 H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC- | |
| 1874 ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...; | |
| 1875 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2 | |
| 1876 | |
| 1877 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M | |
| 1878 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1 | |
| 1879 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1 | |
| 1880 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1 | |
| 1881 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....; | |
| 1882 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2 | |
| 1883 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8... | |
| 1884 | |
| 1885 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1 | |
| 1886 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C | |
| 1887 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3- | |
| 1888 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2 | |
| 1889 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C. | |
| 1890 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7... | |
| 1891 | |
| 1892 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min | |
| 1893 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H | |
| 1894 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2- | |
| 1895 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H | |
| 1896 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...; | |
| 1897 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10 | |
| 1898 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1 | |
| 1899 | |
| 1900 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist | |
| 1901 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0 | |
| 1902 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1 | |
| 1903 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0 | |
| 1904 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0 | |
| 1905 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18... | |
| 1906 | |
| 1907 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize: | |
| 1908 MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1- | |
| 1909 Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1 | |
| 1910 -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1- | |
| 1911 HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...; | |
| 1912 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23 | |
| 1913 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1 | |
| 1914 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ... | |
| 1915 | |
| 1916 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD | |
| 1917 istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106 | |
| 1918 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0 | |
| 1919 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26 | |
| 1920 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0 | |
| 1921 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ... | |
| 1922 | |
| 1923 =head2 METHODS | |
| 1924 | |
| 1925 =over 4 | |
| 1926 | |
| 1927 =item B<new> | |
| 1928 | |
| 1929 $NewFingerprintsFPFileIO = new FileIO::FingerprintsFPFileIO(%IOParameters); | |
| 1930 | |
| 1931 Using specified I<IOParameters> names and values hash, B<new> method creates a new | |
| 1932 object and returns a reference to a newly created B<FingerprintsFPFileIO> object. By default, | |
| 1933 the following properties are initialized during I<Read> mode: | |
| 1934 | |
| 1935 Name = ''; | |
| 1936 Mode = 'Read'; | |
| 1937 Status = 0; | |
| 1938 FingerprintsStringMode = 'AutoDetect'; | |
| 1939 ValidateData = 1; | |
| 1940 DetailLevel = 1; | |
| 1941 | |
| 1942 During I<Write> mode, the following properties get initialize by default: | |
| 1943 | |
| 1944 FingerprintsStringMode = undef; | |
| 1945 | |
| 1946 BitStringFormat = HexadecimalString; | |
| 1947 BitsOrder = Ascending; | |
| 1948 | |
| 1949 VectorStringFormat = NumericalValuesString or ValuesString; | |
| 1950 | |
| 1951 Examples: | |
| 1952 | |
| 1953 $NewFingerprintsFPFileIO = new FileIO::FingerprintsFPFileIO( | |
| 1954 'Name' => 'Sample.fpf', | |
| 1955 'Mode' => 'Read', | |
| 1956 'FingerprintsStringMode' => | |
| 1957 'AutoDetect'); | |
| 1958 | |
| 1959 $NewFingerprintsFPFileIO = new FileIO::FingerprintsFPFileIO( | |
| 1960 'Name' => 'Sample.fpf', | |
| 1961 'Mode' => 'Write', | |
| 1962 'FingerprintsStringMode' => | |
| 1963 'FingerprintsBitVectorString', | |
| 1964 'Overwrite' => 1, | |
| 1965 'BitStringFormat' => 'HexadecimalString', | |
| 1966 'BitsOrder' => 'Ascending'); | |
| 1967 | |
| 1968 $NewFingerprintsFPFileIO = new FileIO::FingerprintsFPFileIO( | |
| 1969 'Name' => 'Sample.fp', | |
| 1970 'Mode' => 'Write', | |
| 1971 'FingerprintsStringMode' => | |
| 1972 'FingerprintsVectorString', | |
| 1973 'Overwrite' => 1, | |
| 1974 'VectorStringFormat' => 'IDsAndValuesString'); | |
| 1975 | |
| 1976 =item B<GetFingerprints> | |
| 1977 | |
| 1978 $FingerprintsObject = $FingerprintsFPFileIO->GetFingerprints(); | |
| 1979 | |
| 1980 Returns B<FingerprintsObject> generated for current data line using fingerprints bit-vector | |
| 1981 or vector string data. The fingerprints object corresponds to any of the supported fingerprints | |
| 1982 such as PathLengthFingerprints, ExtendedConnectivity, and so on. | |
| 1983 | |
| 1984 =item B<GetFingerprintsString> | |
| 1985 | |
| 1986 $FingerprintsString = $FingerprintsFPFileIO->GetFingerprintsString(); | |
| 1987 | |
| 1988 Returns B<FingerprintsString> for current data line. | |
| 1989 | |
| 1990 =item B<GetHeaderDataKeyValue> | |
| 1991 | |
| 1992 $KeyValue = $FingerprintsFPFileIO->GetHeaderDataKeyValue($Key); | |
| 1993 | |
| 1994 Returns B<KeyValue> of a data header I<Key>. | |
| 1995 | |
| 1996 =item B<GetHeaderDataKeys> | |
| 1997 | |
| 1998 @Keys = $FingerprintsFPFileIO->GetHeaderDataKeys(); | |
| 1999 $NumOfKeys = $FingerprintsFPFileIO->GetHeaderDataKeys(); | |
| 2000 | |
| 2001 Returns an array of data header B<Keys> retrieved from data header section of fingerprints | |
| 2002 file. In scalar context, it returns number of keys. | |
| 2003 | |
| 2004 =item B<GetHeaderDataKeysAndValues> | |
| 2005 | |
| 2006 %KeysAndValues = $FingerprintsFPFileIO->GetHeaderDataKeysAndValues(); | |
| 2007 | |
| 2008 Returns a hash of data header keys and values retrieved from data header section of fingerprints | |
| 2009 file. | |
| 2010 | |
| 2011 =item B<GetPartialFingerprintsString> | |
| 2012 | |
| 2013 $FingerprintsString = $FingerprintsFPFileIO->GetPartialFingerprintsString(); | |
| 2014 | |
| 2015 Returns partial B<FingerprintsString> for current data line. It corresponds to fingerprints string | |
| 2016 specified present in a line. | |
| 2017 | |
| 2018 =item B<GetRequiredHeaderDataKeys> | |
| 2019 | |
| 2020 @Keys = $FingerprintsFPFileIO->GetRequiredHeaderDataKeys(); | |
| 2021 $NumOfKeys = $FingerprintsFPFileIO->GetRequiredHeaderDataKeys(); | |
| 2022 | |
| 2023 Returns an array of required data header B<Keys> for a fingerprints file containing bit-vector or | |
| 2024 vector strings data. In scalar context, it returns number of keys. | |
| 2025 | |
| 2026 =item B<GetRequiredHeaderDataKeysAndValues> | |
| 2027 | |
| 2028 %KeysAndValues = $FingerprintsFPFileIO-> | |
| 2029 GetRequiredHeaderDataKeysAndValues(); | |
| 2030 | |
| 2031 Returns a hash of required data header keys and values for a fingerprints file containing bit-vector or | |
| 2032 vector strings data | |
| 2033 | |
| 2034 =item B<IsFingerprintsDataValid> | |
| 2035 | |
| 2036 $Status = $FingerprintsFPFileIO->IsFingerprintsDataValid(); | |
| 2037 | |
| 2038 Returns 1 or 0 based on whether B<FingerprintsObject> is valid. | |
| 2039 | |
| 2040 =item B<IsFingerprintsFPFile> | |
| 2041 | |
| 2042 $Status = $FingerprintsFPFileIO->IsFingerprintsFPFile($FileName); | |
| 2043 $Status = FileIO::FingerprintsFPFileIO::IsFingerprintsFPFile($FileName); | |
| 2044 | |
| 2045 Returns 1 or 0 based on whether I<FileName> is a FP file. | |
| 2046 | |
| 2047 =item B<IsFingerprintsFileDataValid> | |
| 2048 | |
| 2049 $Status = $FingerprintsFPFileIO->IsFingerprintsFileDataValid(); | |
| 2050 | |
| 2051 Returns 1 or 0 based on whether fingerprints file contains valid fingerprints data. | |
| 2052 | |
| 2053 =item B<IsHeaderDataKeyPresent> | |
| 2054 | |
| 2055 $Status = $FingerprintsFPFileIO->IsHeaderDataKeyPresent($Key); | |
| 2056 | |
| 2057 Returns 1 or 0 based on whether data header I<Key> is present in data header | |
| 2058 section of a FP file. | |
| 2059 | |
| 2060 =item B<Next or Read> | |
| 2061 | |
| 2062 $FingerprintsFPFileIO = $FingerprintsFPFileIO->Next(); | |
| 2063 $FingerprintsFPFileIO = $FingerprintsFPFileIO->Read(); | |
| 2064 | |
| 2065 Reads next available fingerprints line in FP file, processes the data, generates appropriate fingerprints | |
| 2066 object, and returns B<FingerprintsFPFileIO>. The generated fingerprints object is available using | |
| 2067 method B<GetFingerprints>. | |
| 2068 | |
| 2069 =item B<SetBitStringFormat> | |
| 2070 | |
| 2071 $FingerprintsFPFileIO->SetBitStringFormat($Format); | |
| 2072 | |
| 2073 Sets bit string I<Format> for fingerprints bit-vector string data in a FP file and returns B<FingerprintsFPFileIO>. | |
| 2074 Possible values for B<BitStringFormat>: I<BinaryString or HexadecimalString>. | |
| 2075 | |
| 2076 =item B<SetBitsOrder> | |
| 2077 | |
| 2078 $FingerprintsFPFileIO->SetBitsOrder($BitsOrder); | |
| 2079 | |
| 2080 Sets I<BitsOrder> for fingerprints bit-vector string data in a FP file and returns B<FingerprintsFPFileIO>. | |
| 2081 Possible values for B<BitsOrder>: I<Ascending or Descending>. | |
| 2082 | |
| 2083 =item B<SetCompoundID> | |
| 2084 | |
| 2085 $FingerprintsFPFileIO->SetCompoundID($ID); | |
| 2086 | |
| 2087 Sets compound ID for current data line and returns B<FingerprintsFPFileIO>. Spaces are not allowed | |
| 2088 in compound IDs. | |
| 2089 | |
| 2090 =item B<SetDetailLevel> | |
| 2091 | |
| 2092 $FingerprintsFPFileIO->SetDetailLevel($Level); | |
| 2093 | |
| 2094 Sets details I<Level> for generating diagnostics messages during FP file processing and returns | |
| 2095 B<FingerprintsFPFileIO>. Possible values: I<Positive integers>. | |
| 2096 | |
| 2097 =item B<SetFingerprints> | |
| 2098 | |
| 2099 $FingerprintsFPFileIO->SetFingerprints($FingerprintsObject); | |
| 2100 | |
| 2101 Sets I<FingerprintsObject> for current data line and returns B<FingerprintsFPFileIO>. | |
| 2102 | |
| 2103 =item B<SetFingerprintsString> | |
| 2104 | |
| 2105 $FingerprintsFPFileIO->SetFingerprintsString($FingerprintsString); | |
| 2106 | |
| 2107 Sets I<FingerprintsString> for current data line and returns B<FingerprintsFPFileIO>. | |
| 2108 | |
| 2109 =item B<SetFingerprintsStringMode> | |
| 2110 | |
| 2111 $FingerprintsFPFileIO->SetFingerprintsStringMode($Mode); | |
| 2112 | |
| 2113 Sets I<FingerprintsStringMode> for FP file and returns B<FingerprintsFPFileIO>. | |
| 2114 Possible values: I<AutoDetect, FingerprintsBitVectorString, or FingerprintsVectorString> | |
| 2115 | |
| 2116 =item B<SetPartialFingerprintsString> | |
| 2117 | |
| 2118 $FingerprintsFPFileIO->SetPartialFingerprintsString($PartialString); | |
| 2119 | |
| 2120 Sets I<PartialFingerprintsString> for current data line and returns B<FingerprintsFPFileIO>. | |
| 2121 | |
| 2122 =item B<SetVectorStringFormat> | |
| 2123 | |
| 2124 $FingerprintsFPFileIO->SetVectorStringFormat($Format); | |
| 2125 | |
| 2126 Sets I<VectorStringFormat> for FP file and returns B<FingerprintsFPFileIO>. Possible values: | |
| 2127 I<IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString>. | |
| 2128 | |
| 2129 =item B<WriteFingerprints> | |
| 2130 | |
| 2131 $FingerprintsFPFileIO->WriteFingerprints($FingerprintsObject, | |
| 2132 $CompoundID); | |
| 2133 | |
| 2134 Writes fingerprints string generated from I<FingerprintsObject> object and other data including | |
| 2135 I<CompoundID> to FP file and returns B<FingerprintsFPFileIO>. | |
| 2136 | |
| 2137 =item B<WriteFingerprintsString> | |
| 2138 | |
| 2139 $FingerprintsFPFileIO->WriteFingerprints($FingerprintsString, | |
| 2140 $CompoundID); | |
| 2141 | |
| 2142 Writes I<FingerprintsString> and other data including I<CompoundID> to FP file and returns | |
| 2143 B<FingerprintsFPFileIO>. | |
| 2144 | |
| 2145 Caveats: | |
| 2146 | |
| 2147 o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat | |
| 2148 values are ignored during writing of fingerprints and it's written to | |
| 2149 the file as it is. | |
| 2150 o FingerprintsString is a regular fingerprints string as oppose to a | |
| 2151 partial fingerprints string. | |
| 2152 | |
| 2153 =back | |
| 2154 | |
| 2155 =head1 AUTHOR | |
| 2156 | |
| 2157 Manish Sud <msud@san.rr.com> | |
| 2158 | |
| 2159 =head1 SEE ALSO | |
| 2160 | |
| 2161 FingerprintsSDFileIO.pm, FingerprintsTextFileIO.pm | |
| 2162 | |
| 2163 =head1 COPYRIGHT | |
| 2164 | |
| 2165 Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 2166 | |
| 2167 This file is part of MayaChemTools. | |
| 2168 | |
| 2169 MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 2170 the terms of the GNU Lesser General Public License as published by the Free | |
| 2171 Software Foundation; either version 3 of the License, or (at your option) | |
| 2172 any later version. | |
| 2173 | |
| 2174 =cut |
