Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/FileIO/FingerprintsSDFileIO.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
| author | deepakjadmin |
|---|---|
| date | Wed, 20 Jan 2016 09:23:18 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4816e4a8ae95 |
|---|---|
| 1 package FileIO::FingerprintsSDFileIO; | |
| 2 # | |
| 3 # $RCSfile: FingerprintsSDFileIO.pm,v $ | |
| 4 # $Date: 2015/02/28 20:48:43 $ | |
| 5 # $Revision: 1.18 $ | |
| 6 # | |
| 7 # Author: Manish Sud <msud@san.rr.com> | |
| 8 # | |
| 9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 10 # | |
| 11 # This file is part of MayaChemTools. | |
| 12 # | |
| 13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 14 # the terms of the GNU Lesser General Public License as published by the Free | |
| 15 # Software Foundation; either version 3 of the License, or (at your option) any | |
| 16 # later version. | |
| 17 # | |
| 18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
| 19 # any warranty; without even the implied warranty of merchantability of fitness | |
| 20 # for a particular purpose. See the GNU Lesser General Public License for more | |
| 21 # details. | |
| 22 # | |
| 23 # You should have received a copy of the GNU Lesser General Public License | |
| 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
| 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
| 26 # Boston, MA, 02111-1307, USA. | |
| 27 # | |
| 28 | |
| 29 use strict; | |
| 30 use Carp; | |
| 31 use Exporter; | |
| 32 use Scalar::Util (); | |
| 33 use TextUtil (); | |
| 34 use FileUtil (); | |
| 35 use SDFileUtil (); | |
| 36 use Fingerprints::FingerprintsStringUtil (); | |
| 37 use FileIO::FileIO; | |
| 38 | |
| 39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
| 40 | |
| 41 @ISA = qw(FileIO::FileIO Exporter); | |
| 42 @EXPORT = qw(); | |
| 43 @EXPORT_OK = qw(IsFingerprintsSDFile); | |
| 44 | |
| 45 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
| 46 | |
| 47 # Setup class variables... | |
| 48 my($ClassName); | |
| 49 _InitializeClass(); | |
| 50 | |
| 51 # Class constructor... | |
| 52 sub new { | |
| 53 my($Class, %NamesAndValues) = @_; | |
| 54 | |
| 55 # Initialize object... | |
| 56 my $This = $Class->SUPER::new(); | |
| 57 bless $This, ref($Class) || $Class; | |
| 58 $This->_InitializeFingerprintsSDFileIO(); | |
| 59 | |
| 60 $This->_InitializeFingerprintsSDFileIOProperties(%NamesAndValues); | |
| 61 | |
| 62 return $This; | |
| 63 } | |
| 64 | |
| 65 # Initialize object data... | |
| 66 # | |
| 67 sub _InitializeFingerprintsSDFileIO { | |
| 68 my($This) = @_; | |
| 69 | |
| 70 # Fingerprints string data format during read/write... | |
| 71 # | |
| 72 # For file read: | |
| 73 # | |
| 74 # AutoDetect - automatically detect format of fingerprints string | |
| 75 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
| 76 # FingerprintsVectorString - Vector fingerprints string format | |
| 77 # | |
| 78 # Default value: AutoDetect | |
| 79 # | |
| 80 # For file write: | |
| 81 # | |
| 82 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
| 83 # FingerprintsVectorString - Vector fingerprints string format | |
| 84 # | |
| 85 # Default value: undef | |
| 86 # | |
| 87 $This->{FingerprintsStringMode} = undef; | |
| 88 | |
| 89 # For file read: | |
| 90 # | |
| 91 # o Fingerprints bit-vector and vector object for current fingerprints string | |
| 92 # | |
| 93 # For file write: | |
| 94 # | |
| 95 # o Fingerprints bit-vector and vector object for current fingerprints string | |
| 96 # o Any supported fingerprints object: PathLengthFingerprints, ExtendedConnectivity, and so on. | |
| 97 # | |
| 98 $This->{FingerprintsObject} = undef; | |
| 99 | |
| 100 # Fingerprints SD file data field label during read/write | |
| 101 # | |
| 102 # For file read: | |
| 103 # | |
| 104 # Value of AutoDetect implies use first data field containing the word Fingerprints in its | |
| 105 # data field label to retrieve fingerprints string data. Othwewise, a valid data field name | |
| 106 # must be specified. | |
| 107 # | |
| 108 # For file write: | |
| 109 # | |
| 110 # Data field label to use for writing fingerprints string. Default: Fingerprints | |
| 111 # | |
| 112 $This->{FingerprintsFieldLabel} = undef; | |
| 113 | |
| 114 # Fingepritns string for current line during read/write... | |
| 115 $This->{FingerprintsString} = undef; | |
| 116 | |
| 117 # First compound data string read/write... | |
| 118 $This->{FirstCompoundDataIO} = 1; | |
| 119 | |
| 120 # Current fingerprints string data compound number during read/write... | |
| 121 $This->{CompoundNum} = 0; | |
| 122 | |
| 123 # Compound data string during read/write... | |
| 124 $This->{CompoundString} = undef; | |
| 125 | |
| 126 # Initialize parameters for read... | |
| 127 $This->_InitializeFingerprintsSDFileIORead(); | |
| 128 | |
| 129 # Initialize parameters for write... | |
| 130 $This->_InitializeFingerprintsSDFileIOWrite(); | |
| 131 | |
| 132 return $This; | |
| 133 } | |
| 134 | |
| 135 # Initialize class ... | |
| 136 sub _InitializeClass { | |
| 137 #Class name... | |
| 138 $ClassName = __PACKAGE__; | |
| 139 | |
| 140 } | |
| 141 | |
| 142 # Initialize object data for reading fingerprints SD file... | |
| 143 # | |
| 144 sub _InitializeFingerprintsSDFileIORead { | |
| 145 my($This) = @_; | |
| 146 | |
| 147 # Compound ID mode to use for retrieving compound IDs for fingerprints... | |
| 148 # | |
| 149 # Specify how to generate compound IDs: use a SD file datafield value; use molname line from | |
| 150 # SD file; generate a sequential ID with specific prefix; use combination of both MolName and | |
| 151 # LabelPrefix with usage of LabelPrefix values for empty molname lines. | |
| 152 # | |
| 153 # Possible values: DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix. Default: LabelPrefix. | |
| 154 # | |
| 155 # For MolNameAndLabelPrefix value of CompoundIDMode, molname line in SD file takes precedence over | |
| 156 # sequential compound IDs generated using LabelPrefix and only empty molname values are replaced | |
| 157 # with sequential compound IDs. | |
| 158 # | |
| 159 $This->{CompoundIDMode} = 'LabelPrefix'; | |
| 160 | |
| 161 # | |
| 162 # Compound ID data field label name whose value is used as compound ID during DatafField value of | |
| 163 # CompoundIDMode | |
| 164 # | |
| 165 $This->{CompoundIDFieldLabel} = undef; | |
| 166 | |
| 167 # A prefix string used for generating compound IDs like LabelPrefixString<Number> during LabelPrefix | |
| 168 # or MolNameOrLabelPrefix value of CompoundIDMode. Default value, Cmpd, generates compound IDs | |
| 169 # which look like Cmpd<Number>. | |
| 170 # | |
| 171 $This->{CompoundIDPrefix} = 'Cmpd'; | |
| 172 | |
| 173 # By default, the fingerprints data corresponding to FingerprintsCol is assumed to | |
| 174 # be valid and no validation is performed before generating fingerprints objects... | |
| 175 # | |
| 176 $This->{ValidateData} = 1; | |
| 177 | |
| 178 # Level of detail to print during validation of data for invalid or missing data... | |
| 179 $This->{DetailLevel} = 1; | |
| 180 | |
| 181 # Number of missing and invalid fingerprints string data compound strings... | |
| 182 $This->{NumOfCmpdsWithMissingData} = 0; | |
| 183 $This->{NumOfCmpdsWithInvalidData} = 0; | |
| 184 | |
| 185 # Compound ID for current fingerprints string... | |
| 186 $This->{CompoundID} = undef; | |
| 187 | |
| 188 # Compound data field labels and values map for current compound data... | |
| 189 %{$This->{DataFieldLabelsAndValues}} = (); | |
| 190 | |
| 191 # Status of data in fingerprints SD file... | |
| 192 $This->{ValidFileData} = 0; | |
| 193 | |
| 194 $This->{ValidCompoundIDField} = 0; | |
| 195 $This->{ValidFingerprintsField} = 0; | |
| 196 | |
| 197 $This->{ValidFingerprintsStringMode} = 0; | |
| 198 | |
| 199 return $This; | |
| 200 } | |
| 201 | |
| 202 # Initialize object data for writing fingerprints SD file... | |
| 203 # | |
| 204 sub _InitializeFingerprintsSDFileIOWrite { | |
| 205 my($This) = @_; | |
| 206 | |
| 207 # Fingerprints bit vector string format... | |
| 208 # | |
| 209 # Possible values: BinaryString or HexadecimalString [Default] | |
| 210 # | |
| 211 # Default BitStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat. | |
| 212 # | |
| 213 $This->{BitStringFormat} = undef; | |
| 214 | |
| 215 # Bits order in fingerprints bit vector string... | |
| 216 # | |
| 217 # Ascending - First bit in each byte as the lowest bit [Default] | |
| 218 # Descending - First bit in each byte as the highest bit | |
| 219 # | |
| 220 # Default BitsOrder is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder. | |
| 221 # | |
| 222 $This->{BitsOrder} = undef; | |
| 223 | |
| 224 # Fingerprints vector string format... | |
| 225 # | |
| 226 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString | |
| 227 # | |
| 228 # Default VectorStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat. | |
| 229 # For fingerprints vector object containing vector NumericalValues, it corresponds to IDsAndValuesString; othwerwise, | |
| 230 # it's set to ValuesString. | |
| 231 # | |
| 232 $This->{VectorStringFormat} = undef; | |
| 233 | |
| 234 # Overwriting existing file... | |
| 235 $This->{Overwrite} = 0; | |
| 236 | |
| 237 return $This; | |
| 238 } | |
| 239 | |
| 240 # Initialize object values... | |
| 241 sub _InitializeFingerprintsSDFileIOProperties { | |
| 242 my($This, %NamesAndValues) = @_; | |
| 243 | |
| 244 # All other property names and values along with all Set/Get<PropertyName> methods | |
| 245 # are implemented on-demand using ObjectProperty class. | |
| 246 | |
| 247 my($Name, $Value, $MethodName); | |
| 248 while (($Name, $Value) = each %NamesAndValues) { | |
| 249 $MethodName = "Set${Name}"; | |
| 250 $This->$MethodName($Value); | |
| 251 } | |
| 252 | |
| 253 if (!exists $NamesAndValues{Name}) { | |
| 254 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; | |
| 255 } | |
| 256 | |
| 257 # Make sure it's a fingerprints file... | |
| 258 $Name = $NamesAndValues{Name}; | |
| 259 if (!$This->IsFingerprintsSDFile($Name)) { | |
| 260 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be fingerprints format..."; | |
| 261 } | |
| 262 | |
| 263 if ($This->GetMode() =~ /^Read$/i) { | |
| 264 $This->_InitializeFingerprintsSDFileIOReadProperties(%NamesAndValues); | |
| 265 } | |
| 266 elsif ($This->GetMode() =~ /^(Write|Append)$/i) { | |
| 267 $This->_InitializeFingerprintsSDFileIOWriteProperties(%NamesAndValues); | |
| 268 } | |
| 269 | |
| 270 return $This; | |
| 271 } | |
| 272 | |
| 273 # Initialize object properties for reading fingerprints SD file... | |
| 274 # | |
| 275 sub _InitializeFingerprintsSDFileIOReadProperties { | |
| 276 my($This, %NamesAndValues) = @_; | |
| 277 | |
| 278 # Set default value for FingerprintsStringMode... | |
| 279 if (!$This->{FingerprintsStringMode}) { | |
| 280 $This->{FingerprintsStringMode} = 'AutoDetect'; | |
| 281 } | |
| 282 | |
| 283 # Set default value for FingerprintsFieldLabel... | |
| 284 if (!$This->{FingerprintsFieldLabel}) { | |
| 285 $This->{FingerprintsFieldLabel} = 'AutoDetect'; | |
| 286 } | |
| 287 | |
| 288 # Check compound ID data field... | |
| 289 if (($This->{CompoundIDMode} =~ /^DataField$/i) && (!defined($This->{CompoundIDFieldLabel}))) { | |
| 290 croak "Error: ${ClassName}->: Object can't be instantiated: Compound ID data field lable must be specifed using \"CompoundIDFieldLabel\" during \"DataField\" value of \"CompoundIDMode\"..."; | |
| 291 } | |
| 292 | |
| 293 $This->_PrepareForReadingFingerprintsSDFileData(); | |
| 294 | |
| 295 return $This; | |
| 296 } | |
| 297 | |
| 298 # Initialize object properties for writing fingerprints SD file... | |
| 299 # | |
| 300 sub _InitializeFingerprintsSDFileIOWriteProperties { | |
| 301 my($This, %NamesAndValues) = @_; | |
| 302 | |
| 303 # Check FingerprintsStringMode value... | |
| 304 if (!exists $NamesAndValues{FingerprintsStringMode}) { | |
| 305 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying FingerprintsStringMode..."; | |
| 306 } | |
| 307 | |
| 308 if ($This->{FingerprintsStringMode} !~ /^(FingerprintsBitVectorString|FingerprintsVectorString)$/i) { | |
| 309 croak "Error: ${ClassName}->: Object can't be instantiated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, is not valid; Supported values for write/append: FingerprintsBitVectorString or FingerprintsVectorString..."; | |
| 310 } | |
| 311 | |
| 312 # Set default value for FingerprintsFieldLabel... | |
| 313 if (!$This->{FingerprintsFieldLabel}) { | |
| 314 $This->{FingerprintsFieldLabel} = 'Fingerprints'; | |
| 315 } | |
| 316 | |
| 317 $This->_PrepareForWritingFingerprintsSDFileData(); | |
| 318 | |
| 319 return $This; | |
| 320 } | |
| 321 | |
| 322 # Set FingerprintsStringMode... | |
| 323 # | |
| 324 sub SetFingerprintsStringMode { | |
| 325 my($This, $Value) = @_; | |
| 326 | |
| 327 # AutoDetect - automatically detect format of fingerprints string | |
| 328 # FingerprintsBitVectorString - Bit vector fingerprints string format | |
| 329 # FingerprintsVectorString - Vector fingerprints string format | |
| 330 | |
| 331 if ($Value !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) { | |
| 332 croak "Error: ${ClassName}->SetFingerprintsStringMode: FingerprintsStringMode value, $Value, is not valid; Supported values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString..."; | |
| 333 } | |
| 334 | |
| 335 $This->{FingerprintsStringMode} = $Value; | |
| 336 | |
| 337 return $This; | |
| 338 } | |
| 339 | |
| 340 # Set CompoundIDMode... | |
| 341 # | |
| 342 sub SetCompoundIDMode { | |
| 343 my($This, $Value) = @_; | |
| 344 | |
| 345 if ($Value !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) { | |
| 346 croak "Error: ${ClassName}->SetFingerprintsStringMode: CompoundIDMode value, $Value, is not valid; Supported values: DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix..."; | |
| 347 } | |
| 348 | |
| 349 $This->{CompoundIDMode} = $Value; | |
| 350 | |
| 351 return $This; | |
| 352 } | |
| 353 | |
| 354 # Set DetailLevel... | |
| 355 # | |
| 356 sub SetDetailLevel { | |
| 357 my($This, $Value) = @_; | |
| 358 | |
| 359 if (!TextUtil::IsPositiveInteger($Value)) { | |
| 360 croak "Error: ${ClassName}->SetDetailLevel: DetailLevel value, $Value, is not valid; Supported values: > 0..."; | |
| 361 } | |
| 362 | |
| 363 $This->{DetailLevel} = $Value; | |
| 364 | |
| 365 return $This; | |
| 366 } | |
| 367 | |
| 368 # Set BitStringFormat... | |
| 369 # | |
| 370 sub SetBitStringFormat { | |
| 371 my($This, $Value) = @_; | |
| 372 | |
| 373 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) { | |
| 374 croak "Error: ${ClassName}->SetBitStringFormat: BitStringFormat value, $Value, is not valid; Supported values: BinaryString or HexadecimalString..."; | |
| 375 } | |
| 376 | |
| 377 $This->{BitStringFormat} = $Value; | |
| 378 | |
| 379 return $This; | |
| 380 } | |
| 381 | |
| 382 # Set BitsOrder... | |
| 383 # | |
| 384 sub SetBitsOrder { | |
| 385 my($This, $Value) = @_; | |
| 386 | |
| 387 # Ascending - First bit in each byte as the lowest bit | |
| 388 # Descending - First bit in each byte as the highest bit | |
| 389 # | |
| 390 if ($Value !~ /^(Ascending|Descending)$/i) { | |
| 391 croak "Error: ${ClassName}->SetBitsOrder: FingerprintsStringMode value, $Value, is not valid; Supported values: Ascending or Descending..."; | |
| 392 } | |
| 393 | |
| 394 $This->{BitsOrder} = $Value; | |
| 395 | |
| 396 return $This; | |
| 397 } | |
| 398 | |
| 399 # Set VectorStringFormat... | |
| 400 # | |
| 401 sub SetVectorStringFormat { | |
| 402 my($This, $Value) = @_; | |
| 403 | |
| 404 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString | |
| 405 | |
| 406 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) { | |
| 407 croak "Error: ${ClassName}->SetVectorStringFormat: FingerprintsStringMode value, $Value, is not valid; Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString..."; | |
| 408 } | |
| 409 | |
| 410 $This->{VectorStringFormat} = $Value; | |
| 411 | |
| 412 return $This; | |
| 413 } | |
| 414 | |
| 415 # Get compound string for current compound with optional removal of fingerprints data.. | |
| 416 # | |
| 417 sub GetCompoundString { | |
| 418 my($This, $RemoveFingerprintsData) = @_; | |
| 419 | |
| 420 $RemoveFingerprintsData = defined $RemoveFingerprintsData ? $RemoveFingerprintsData : 0; | |
| 421 | |
| 422 if ($RemoveFingerprintsData && $This->_IsFingerprintsDataPresentInCompoundString()) { | |
| 423 return SDFileUtil::RemoveCmpdDataHeaderLabelAndValue($This->{CompoundString}, $This->{FingerprintsFieldLabel}); | |
| 424 } | |
| 425 | |
| 426 return $This->{CompoundString}; | |
| 427 } | |
| 428 | |
| 429 # Set compound string for current compound.. | |
| 430 # | |
| 431 sub SetCompoundString { | |
| 432 my($This, $CompoundString) = @_; | |
| 433 | |
| 434 $This->{CompoundString} = $CompoundString; | |
| 435 | |
| 436 return $This; | |
| 437 } | |
| 438 | |
| 439 # Get fingerprints object for current compound using fingerprints, fingerprints bit-vector | |
| 440 # fingerprints vector object. Fingerprints object correspond to any of supported fingerprints | |
| 441 # objects such as PathLengthFingerprints, ExtendedConnectivity, and so on. | |
| 442 # | |
| 443 sub GetFingerprints { | |
| 444 my($This) = @_; | |
| 445 | |
| 446 return $This->{FingerprintsObject}; | |
| 447 } | |
| 448 | |
| 449 # Set fingerprints object for current compound... | |
| 450 # | |
| 451 sub SetFingerprints { | |
| 452 my($This, $FingerprintsObject) = @_; | |
| 453 | |
| 454 $This->{FingerprintsObject} = $FingerprintsObject; | |
| 455 | |
| 456 return $This; | |
| 457 } | |
| 458 | |
| 459 # Get fingerprints string for current compound... | |
| 460 # | |
| 461 sub GetFingerprintsString { | |
| 462 my($This) = @_; | |
| 463 | |
| 464 return $This->{FingerprintsString} ? $This->{FingerprintsString} : 'None'; | |
| 465 } | |
| 466 | |
| 467 # Set fingerprints string for current compound... | |
| 468 # | |
| 469 sub SetFingerprintsString { | |
| 470 my($This, $FingerprintsString) = @_; | |
| 471 | |
| 472 $This->{FingerprintsString} = $FingerprintsString; | |
| 473 | |
| 474 return $This; | |
| 475 } | |
| 476 | |
| 477 # Does fingerprints SD file contain valid data? | |
| 478 # | |
| 479 sub IsFingerprintsFileDataValid { | |
| 480 my($This) = @_; | |
| 481 | |
| 482 return $This->{ValidFileData} ? 1 : 0; | |
| 483 } | |
| 484 | |
| 485 # Does current compound contains valid fingerprints object data? | |
| 486 # | |
| 487 sub IsFingerprintsDataValid { | |
| 488 my($This) = @_; | |
| 489 | |
| 490 return defined $This->{FingerprintsObject} ? 1 : 0; | |
| 491 } | |
| 492 | |
| 493 # Read next available compound data string, process it and generate appropriate fingerprints | |
| 494 # objects... | |
| 495 # | |
| 496 sub Read { | |
| 497 my($This) = @_; | |
| 498 | |
| 499 # Read compound data string... | |
| 500 if (!$This->_ReadCompoundDataString()) { | |
| 501 return undef; | |
| 502 } | |
| 503 | |
| 504 # No need to process invalid SD file with invalid data... | |
| 505 if (!$This->{ValidFileData}) { | |
| 506 if ($This->{ValidateData}) { | |
| 507 $This->{NumOfCmpdsWithMissingData} += 1; | |
| 508 } | |
| 509 return $This; | |
| 510 } | |
| 511 | |
| 512 # Perform data validation... | |
| 513 if ($This->{ValidateData}) { | |
| 514 if (!$This->_ValidateReadCompoundDataString()) { | |
| 515 return $This; | |
| 516 } | |
| 517 } | |
| 518 | |
| 519 # Setup fingerprints string after checking again to handle problematic data for | |
| 520 # non-validated compound string data... | |
| 521 # | |
| 522 my($FingerprintsFieldLabel); | |
| 523 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; | |
| 524 if (exists $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}) { | |
| 525 $This->{FingerprintsString} = $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}; | |
| 526 } | |
| 527 | |
| 528 # Generate fingeprints object... | |
| 529 $This->_GenerateFingerprintsObject(); | |
| 530 | |
| 531 # Setup fingerprints compound ID for fingerprints string... | |
| 532 $This->_GenerateCompoundID(); | |
| 533 | |
| 534 return $This; | |
| 535 } | |
| 536 | |
| 537 # Read next available compound data string, process it and generate appropriate fingerprints | |
| 538 # objects... | |
| 539 # | |
| 540 sub Next { | |
| 541 my($This) = @_; | |
| 542 | |
| 543 return $This->Read(); | |
| 544 } | |
| 545 | |
| 546 # Read compound data string... | |
| 547 # | |
| 548 sub _ReadCompoundDataString { | |
| 549 my($This) = @_; | |
| 550 my(@CmpdLines); | |
| 551 | |
| 552 if ($This->{FirstCompoundDataIO}) { | |
| 553 $This->_ProcessFirstCompoundDataStringRead(); | |
| 554 } | |
| 555 | |
| 556 # Initialize data for current compound data string... | |
| 557 $This->_InitializeReadCompoundDataString(); | |
| 558 | |
| 559 # Get next compound data line... | |
| 560 $This->{CompoundString} = SDFileUtil::ReadCmpdString($This->{FileHandle}); | |
| 561 if (!$This->{CompoundString}) { | |
| 562 return 0; | |
| 563 } | |
| 564 | |
| 565 $This->{CompoundNum} += 1; | |
| 566 | |
| 567 # Set up data field labels and values... | |
| 568 @CmpdLines = split "\n", $This->{CompoundString}; | |
| 569 %{$This->{DataFieldLabelsAndValues}} = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@CmpdLines); | |
| 570 | |
| 571 return 1; | |
| 572 } | |
| 573 | |
| 574 # Initialize compound data string for reading... | |
| 575 # | |
| 576 sub _InitializeReadCompoundDataString { | |
| 577 my($This) = @_; | |
| 578 | |
| 579 $This->{CompoundID} = undef; | |
| 580 $This->{CompoundString} = undef; | |
| 581 | |
| 582 %{$This->{DataFieldLabelsAndValues}} = (); | |
| 583 | |
| 584 $This->{FingerprintsObject} = undef; | |
| 585 $This->{FingerprintsString} = undef; | |
| 586 | |
| 587 return $This; | |
| 588 } | |
| 589 | |
| 590 # Validate compound data string containing fingerprints data... | |
| 591 # | |
| 592 sub _ValidateReadCompoundDataString { | |
| 593 my($This) = @_; | |
| 594 my($FingerprintsFieldLabel); | |
| 595 | |
| 596 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; | |
| 597 | |
| 598 # Check for missing data... | |
| 599 if (!exists $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}) { | |
| 600 # Missing data... | |
| 601 $This->{NumOfCmpdsWithMissingData} += 1; | |
| 602 if ($This->{DetailLevel} >= 3) { | |
| 603 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains no fingerprints data: $This->{CompoundString}..."; | |
| 604 } | |
| 605 elsif ($This->{DetailLevel} >= 2) { | |
| 606 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains no fingerprints data..."; | |
| 607 } | |
| 608 return 0; | |
| 609 } | |
| 610 | |
| 611 # Check for invalid data... | |
| 612 my($InvalidFingerprintsData, $FingerprintsType, $FingerprintsDescription); | |
| 613 | |
| 614 $InvalidFingerprintsData = 0; | |
| 615 | |
| 616 if (Fingerprints::FingerprintsStringUtil::AreFingerprintsStringValuesValid($This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel})) { | |
| 617 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}); | |
| 618 if (defined($FingerprintsType) && defined($FingerprintsDescription)) { | |
| 619 if ($This->{FirstFingerprintsStringType} !~ /^$FingerprintsType$/i || $This->{FirstFingerprintsStringDescription} !~ /^$FingerprintsDescription$/i) { | |
| 620 $InvalidFingerprintsData = 1; | |
| 621 } | |
| 622 } | |
| 623 else { | |
| 624 $InvalidFingerprintsData = 1; | |
| 625 } | |
| 626 } | |
| 627 else { | |
| 628 $InvalidFingerprintsData = 1; | |
| 629 } | |
| 630 | |
| 631 if ($InvalidFingerprintsData) { | |
| 632 $This->{NumOfCmpdsWithInvalidData} += 1; | |
| 633 if ($This->{DetailLevel} >= 3) { | |
| 634 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains invalid fingerprints data: $This->{DataLine}..."; | |
| 635 } | |
| 636 elsif ($This->{DetailLevel} >= 2) { | |
| 637 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains invalid fingerprints data..."; | |
| 638 } | |
| 639 return 0; | |
| 640 } | |
| 641 | |
| 642 return 1; | |
| 643 } | |
| 644 | |
| 645 # Setup fingerprints compound ID for fingerprints string... | |
| 646 sub _GenerateCompoundID { | |
| 647 my($This) = @_; | |
| 648 my($CompoundID, $MolName); | |
| 649 | |
| 650 $CompoundID = ''; | |
| 651 | |
| 652 if ($This->{CompoundIDMode} =~ /^LabelPrefix$/i) { | |
| 653 $CompoundID = "$This->{CompoundIDPrefix}$This->{CompoundNum}"; | |
| 654 } | |
| 655 elsif ($This->{CompoundIDMode} =~ /^DataField$/i) { | |
| 656 my($SpecifiedDataField); | |
| 657 $SpecifiedDataField = $This->{CompoundIDFieldLabel}; | |
| 658 $CompoundID = exists $This->{DataFieldLabelsAndValues}{$SpecifiedDataField} ? $This->{DataFieldLabelsAndValues}{$SpecifiedDataField} : ''; | |
| 659 } | |
| 660 elsif ($This->{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) { | |
| 661 ($MolName) = split "\n", $This->{CompoundString}; | |
| 662 $CompoundID = TextUtil::IsNotEmpty($MolName) ? $MolName : "$This->{CompoundIDPrefix}$This->{CompoundNum}"; | |
| 663 } | |
| 664 elsif ($This->{CompoundIDMode} =~ /^MolName$/i) { | |
| 665 ($MolName) = split "\n", $This->{CompoundString}; | |
| 666 $CompoundID = $MolName; | |
| 667 } | |
| 668 | |
| 669 $This->{CompoundID} = $CompoundID; | |
| 670 | |
| 671 return $This; | |
| 672 } | |
| 673 | |
| 674 # Process first compound data string read... | |
| 675 # | |
| 676 sub _ProcessFirstCompoundDataStringRead { | |
| 677 my($This) = @_; | |
| 678 my($Line, $FileHandle); | |
| 679 | |
| 680 $This->{FirstCompoundDataIO} = 0; | |
| 681 | |
| 682 return $This; | |
| 683 } | |
| 684 | |
| 685 # Get ready for reading fingerprints SD file... | |
| 686 # | |
| 687 sub _PrepareForReadingFingerprintsSDFileData { | |
| 688 my($This) = @_; | |
| 689 | |
| 690 # Retrieve SD file data fields information.... | |
| 691 $This->_RetrieveSDFileDataFields(); | |
| 692 | |
| 693 # Validate compound and fingerprints field information... | |
| 694 $This->_ValidateReadCompoundIDField(); | |
| 695 $This->_ValidateReadFingerprintsField(); | |
| 696 | |
| 697 # Validate fingeprints string mode information... | |
| 698 if ($This->{ValidFingerprintsField}) { | |
| 699 $This->_ValidateReadFingerprintsStringMode(); | |
| 700 } | |
| 701 | |
| 702 # Set status of SD file data... | |
| 703 $This->{ValidFileData} = ($This->{ValidCompoundIDField} && $This->{ValidFingerprintsField} && $This->{ValidFingerprintsStringMode}) ? 1 : 0; | |
| 704 | |
| 705 return $This; | |
| 706 } | |
| 707 | |
| 708 # Retrieve information data fields and fingerprints string... | |
| 709 # | |
| 710 sub _RetrieveSDFileDataFields { | |
| 711 my($This) = @_; | |
| 712 my($SDFile, $CmpdString, @CmpdLines); | |
| 713 | |
| 714 $SDFile = $This->{Name}; | |
| 715 | |
| 716 if (!(-e $SDFile)) { | |
| 717 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $SDFile, doesn't exist..."; | |
| 718 } | |
| 719 | |
| 720 if (!open SDFILE, "$SDFile") { | |
| 721 croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input SD file $SDFile: $! ..."; | |
| 722 } | |
| 723 $CmpdString = SDFileUtil::ReadCmpdString(\*SDFILE); | |
| 724 close SDFILE; | |
| 725 | |
| 726 # Set up data field labels and values for first compound string data... | |
| 727 @CmpdLines = split "\n", $CmpdString; | |
| 728 | |
| 729 %{$This->{FirstDataFieldLabelsAndValues}} = (); | |
| 730 %{$This->{FirstDataFieldLabelsAndValues}} = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@CmpdLines); | |
| 731 | |
| 732 return $This; | |
| 733 } | |
| 734 | |
| 735 # Validate compound ID field information... | |
| 736 # | |
| 737 sub _ValidateReadCompoundIDField { | |
| 738 my($This) = @_; | |
| 739 my($SpecifiedDataField); | |
| 740 | |
| 741 $This->{ValidCompoundIDField} = 0; | |
| 742 | |
| 743 if ($This->{CompoundIDMode} =~ /^DataField$/i) { | |
| 744 $SpecifiedDataField = $This->{CompoundIDFieldLabel}; | |
| 745 if (! exists $This->{FirstDataFieldLabelsAndValues}{$SpecifiedDataField}) { | |
| 746 carp "Warning: ${ClassName}->_ValidateReadCompoundIDField: Compound ID data field, $SpecifiedDataField, specified using \"CompoundIDField\" in \"DataField\" \"CompoundIDMode\" doesn't exist..."; | |
| 747 return 0; | |
| 748 } | |
| 749 } | |
| 750 | |
| 751 $This->{ValidCompoundIDField} = 1; | |
| 752 | |
| 753 return 1; | |
| 754 } | |
| 755 | |
| 756 # Validate fingerprints string field information... | |
| 757 # | |
| 758 sub _ValidateReadFingerprintsField { | |
| 759 my($This) = @_; | |
| 760 my($FingerprintsFieldLabel); | |
| 761 | |
| 762 $This->{ValidFingerprintsField} = 0; | |
| 763 | |
| 764 $FingerprintsFieldLabel = ''; | |
| 765 | |
| 766 if ($This->{FingerprintsFieldLabel} !~ /^AutoDetect$/i) { | |
| 767 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; | |
| 768 if (! exists $This->{FirstDataFieldLabelsAndValues}{$FingerprintsFieldLabel}) { | |
| 769 carp "Warning: ${ClassName}->_ValidateReadFingerprintsField: Fingerprints data field value, $FingerprintsFieldLabel, specified using \"FingerprintsFieldLabel\" doesn't exist..."; | |
| 770 return 0; | |
| 771 } | |
| 772 } | |
| 773 else { | |
| 774 # Make sure default fingerprints field does exist... | |
| 775 my($FingerprintsFieldFound, $DataFieldLabel); | |
| 776 $FingerprintsFieldFound = 0; | |
| 777 | |
| 778 DATAFIELDLABEL: for $DataFieldLabel (keys %{$This->{FirstDataFieldLabelsAndValues}}) { | |
| 779 if ($DataFieldLabel =~ /Fingerprints/i) { | |
| 780 $FingerprintsFieldFound = 1; | |
| 781 $FingerprintsFieldLabel = $DataFieldLabel; | |
| 782 last DATAFIELDLABEL; | |
| 783 } | |
| 784 } | |
| 785 if (!$FingerprintsFieldFound) { | |
| 786 carp "Warning: ${ClassName}->_ValidateReadFingerprintsField: Data field label containing \"Fingerprints\" string in its name doesn't exist..."; | |
| 787 return 0; | |
| 788 } | |
| 789 } | |
| 790 | |
| 791 $This->{ValidFingerprintsField} = 1; | |
| 792 $This->{FingerprintsFieldLabel} = $FingerprintsFieldLabel; | |
| 793 | |
| 794 return 1; | |
| 795 } | |
| 796 | |
| 797 # Validate fingerprints string mode information... | |
| 798 # | |
| 799 sub _ValidateReadFingerprintsStringMode { | |
| 800 my($This) = @_; | |
| 801 my($FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FirstFingerprintsStringType, $FirstFingerprintsStringDescription, $FingerprintsFieldLabel, $FingerprintsType, $FingerprintsDescription); | |
| 802 | |
| 803 $This->{ValidFingerprintsStringMode} = 0; | |
| 804 | |
| 805 $This->{FingerprintsBitVectorStringMode} = 0; | |
| 806 $This->{FingerprintsVectorStringMode} = 0; | |
| 807 | |
| 808 $This->{FirstFingerprintsStringType} = ''; | |
| 809 $This->{FirstFingerprintsStringDescription} = ''; | |
| 810 | |
| 811 $FingerprintsBitVectorStringMode = 0; | |
| 812 $FingerprintsVectorStringMode = 0; | |
| 813 | |
| 814 $FirstFingerprintsStringType = ''; | |
| 815 $FirstFingerprintsStringDescription = ''; | |
| 816 | |
| 817 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; | |
| 818 | |
| 819 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{FirstDataFieldLabelsAndValues}{$FingerprintsFieldLabel}); | |
| 820 | |
| 821 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { | |
| 822 if ($FingerprintsType !~ /^FingerprintsBitVector$/i) { | |
| 823 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsBitVectorString, specified using \"FingerprintsStringMode\"..."; | |
| 824 return 0; | |
| 825 } | |
| 826 $FingerprintsBitVectorStringMode = 1; | |
| 827 $FirstFingerprintsStringType = 'FingerprintsBitVector'; | |
| 828 $FirstFingerprintsStringDescription = $FingerprintsDescription; | |
| 829 } | |
| 830 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { | |
| 831 if ($FingerprintsType !~ /^FingerprintsVector$/i) { | |
| 832 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsVectorString, specified using \"FingerprintsStringMode\"..."; | |
| 833 return 0; | |
| 834 } | |
| 835 $FingerprintsVectorStringMode = 1; | |
| 836 $FirstFingerprintsStringType = 'FingerprintsVector'; | |
| 837 $FirstFingerprintsStringDescription = $FingerprintsDescription; | |
| 838 } | |
| 839 else { | |
| 840 # AutoDetect mode... | |
| 841 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) { | |
| 842 $FingerprintsBitVectorStringMode = 1; | |
| 843 } | |
| 844 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) { | |
| 845 $FingerprintsVectorStringMode = 1; | |
| 846 } | |
| 847 else { | |
| 848 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, identified during, AutoDetect, value of \"FingerprintsStringMode\" is not valid; Supported fingerprints types: FingerprintBitVector or FingerprintsVector..."; | |
| 849 return 0; | |
| 850 } | |
| 851 $FirstFingerprintsStringType = $FingerprintsType; | |
| 852 $FirstFingerprintsStringDescription = $FingerprintsDescription; | |
| 853 } | |
| 854 | |
| 855 $This->{ValidFingerprintsStringMode} = 1; | |
| 856 | |
| 857 $This->{FingerprintsBitVectorStringMode} = $FingerprintsBitVectorStringMode; | |
| 858 $This->{FingerprintsVectorStringMode} = $FingerprintsVectorStringMode; | |
| 859 | |
| 860 $This->{FirstFingerprintsStringType} = $FirstFingerprintsStringType; | |
| 861 $This->{FirstFingerprintsStringDescription} = $FirstFingerprintsStringDescription; | |
| 862 | |
| 863 return 1; | |
| 864 } | |
| 865 | |
| 866 # Write fingerprints string generated from specified fingerprints, fingerprints-bit vector, or | |
| 867 # fingerprints vector object and other data to SD file... | |
| 868 # | |
| 869 sub WriteFingerprints { | |
| 870 my($This, $FingerprintsObject, $CompoundString) = @_; | |
| 871 | |
| 872 # Initialize data for current compound... | |
| 873 $This->_InitializeWriteCompoundDataString(); | |
| 874 | |
| 875 # Set fingerprints object... | |
| 876 $This->{FingerprintsObject} = $FingerprintsObject; | |
| 877 | |
| 878 # Generate fingerprints string... | |
| 879 $This->_GenerateFingerprintsString(); | |
| 880 | |
| 881 # Set and update compound string... | |
| 882 $This->{CompoundString} = $CompoundString; | |
| 883 $This->_AddFingerprintsDataToCompoundString(); | |
| 884 | |
| 885 # Write it out... | |
| 886 $This->_WriteCompoundDataString(); | |
| 887 | |
| 888 return $This; | |
| 889 } | |
| 890 | |
| 891 # Write fingerprints string and other data to SD file... | |
| 892 # | |
| 893 # Note: | |
| 894 # o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat values | |
| 895 # are ignored during writing of fingerprints and it's written to the file as it is. | |
| 896 # o CompoundString contains no fingerprints data | |
| 897 # | |
| 898 sub WriteFingerprintsString { | |
| 899 my($This, $FingerprintsString, $CompoundString) = @_; | |
| 900 | |
| 901 # Initialize data for current compound... | |
| 902 $This->_InitializeWriteCompoundDataString(); | |
| 903 | |
| 904 # Set fingerprints string... | |
| 905 $This->{FingerprintsString} = $FingerprintsString; | |
| 906 | |
| 907 # Generate fingerprints object... | |
| 908 $This->_GenerateFingerprintsObject(); | |
| 909 | |
| 910 # Set and update compound string... | |
| 911 $This->{CompoundString} = $CompoundString; | |
| 912 $This->_AddFingerprintsDataToCompoundString(); | |
| 913 | |
| 914 # Write it out... | |
| 915 $This->_WriteCompoundDataString(); | |
| 916 | |
| 917 return $This; | |
| 918 } | |
| 919 | |
| 920 # Initialize compound data string for writing... | |
| 921 # | |
| 922 sub _InitializeWriteCompoundDataString { | |
| 923 my($This) = @_; | |
| 924 | |
| 925 $This->{CompoundString} = undef; | |
| 926 | |
| 927 $This->{FingerprintsObject} = undef; | |
| 928 $This->{FingerprintsString} = undef; | |
| 929 | |
| 930 return $This; | |
| 931 } | |
| 932 | |
| 933 # Writi compound data string... | |
| 934 # | |
| 935 sub _WriteCompoundDataString { | |
| 936 my($This) = @_; | |
| 937 my($FileHandle); | |
| 938 | |
| 939 if ($This->{FirstCompoundDataIO}) { | |
| 940 $This->_ProcessFirstCompoundDataStringWrite(); | |
| 941 } | |
| 942 | |
| 943 $This->{CompoundNum} += 1; | |
| 944 $FileHandle = $This->{FileHandle}; | |
| 945 | |
| 946 print $FileHandle "$This->{CompoundString}\n"; | |
| 947 | |
| 948 return $This; | |
| 949 } | |
| 950 | |
| 951 # Process first compound data string write... | |
| 952 # | |
| 953 sub _ProcessFirstCompoundDataStringWrite { | |
| 954 my($This) = @_; | |
| 955 my($Line, $FileHandle); | |
| 956 | |
| 957 $This->{FirstCompoundDataIO} = 0; | |
| 958 | |
| 959 return $This; | |
| 960 } | |
| 961 | |
| 962 # Get ready for writing fingerprints SD file... | |
| 963 # | |
| 964 sub _PrepareForWritingFingerprintsSDFileData { | |
| 965 my($This) = @_; | |
| 966 my($SDFile); | |
| 967 | |
| 968 $SDFile = $This->{Name}; | |
| 969 if (!$This->{Overwrite}) { | |
| 970 if (-e $SDFile) { | |
| 971 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $SDFile, already exist. Use overwrite option..."; | |
| 972 } | |
| 973 } | |
| 974 | |
| 975 # Setup FingerprintsStringMode status... | |
| 976 | |
| 977 $This->{FingerprintsBitVectorStringMode} = 0; | |
| 978 $This->{FingerprintsVectorStringMode} = 0; | |
| 979 $This->{ValidFingerprintsStringMode} = 0; | |
| 980 | |
| 981 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { | |
| 982 $This->{FingerprintsBitVectorStringMode} = 1; | |
| 983 } | |
| 984 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { | |
| 985 $This->{FingerprintsVectorStringMode} = 1; | |
| 986 } | |
| 987 | |
| 988 $This->{ValidFingerprintsStringMode} = ($This->{FingerprintsBitVectorStringMode} || $This->{FingerprintsVectorStringMode}) ? 1 : 0; | |
| 989 | |
| 990 if ($This->{FingerprintsBitVectorStringMode}) { | |
| 991 $This->_SetDefaultBitStringFormat(); | |
| 992 $This->_SetDefaultBitsOrder(); | |
| 993 } | |
| 994 elsif ($This->{FingerprintsVectorStringMode}) { | |
| 995 $This->_SetDefaultVectorStringFormat(); | |
| 996 } | |
| 997 | |
| 998 return $This; | |
| 999 } | |
| 1000 | |
| 1001 # Set default value for bit string format... | |
| 1002 # | |
| 1003 sub _SetDefaultBitStringFormat { | |
| 1004 my($This) = @_; | |
| 1005 | |
| 1006 if (!$This->{BitStringFormat}) { | |
| 1007 $This->{BitStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat(); | |
| 1008 } | |
| 1009 | |
| 1010 return $This; | |
| 1011 } | |
| 1012 | |
| 1013 # Set default value for bit string format... | |
| 1014 # | |
| 1015 sub _SetDefaultBitsOrder { | |
| 1016 my($This) = @_; | |
| 1017 | |
| 1018 if (!$This->{BitsOrder}) { | |
| 1019 $This->{BitsOrder} = Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder(); | |
| 1020 } | |
| 1021 | |
| 1022 return $This; | |
| 1023 } | |
| 1024 | |
| 1025 # Set default value for vector string format... | |
| 1026 # | |
| 1027 sub _SetDefaultVectorStringFormat { | |
| 1028 my($This) = @_; | |
| 1029 | |
| 1030 if (!$This->{VectorStringFormat} && $This->{FingerprintsObject}) { | |
| 1031 $This->{VectorStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat($This->{FingerprintsObject}); | |
| 1032 } | |
| 1033 | |
| 1034 return $This; | |
| 1035 } | |
| 1036 | |
| 1037 # Add fingerprints data to compound string... | |
| 1038 # | |
| 1039 sub _AddFingerprintsDataToCompoundString { | |
| 1040 my($This) = @_; | |
| 1041 my($CmpdString); | |
| 1042 | |
| 1043 # Check and remove existing fingerprints data... | |
| 1044 if ($This->_IsFingerprintsDataPresentInCompoundString()) { | |
| 1045 carp "Warning: ${ClassName}->_AddFingerprintsDataToCompoundString: The compound string already contains fingerprints data corresponding to fingerprints field label $This->{FingerprintsFieldLabel}; It has been replaced with new fingerprints data..."; | |
| 1046 $This->{CompoundString} = SDFileUtil::RemoveCmpdDataHeaderLabelAndValue($This->{CompoundString}, $This->{FingerprintsFieldLabel}); | |
| 1047 } | |
| 1048 | |
| 1049 $CmpdString = $This->{CompoundString}; | |
| 1050 | |
| 1051 $CmpdString =~ s/\$\$\$\$$//; | |
| 1052 | |
| 1053 $This->{CompoundString} = "${CmpdString}> <$This->{FingerprintsFieldLabel}>\n$This->{FingerprintsString}\n\n\$\$\$\$"; | |
| 1054 | |
| 1055 return $This; | |
| 1056 } | |
| 1057 | |
| 1058 # Is fingerprints data already present in compound string? | |
| 1059 # | |
| 1060 sub _IsFingerprintsDataPresentInCompoundString { | |
| 1061 my($This) = @_; | |
| 1062 my($FingerprintsFieldLabel); | |
| 1063 | |
| 1064 if (TextUtil::IsEmpty($This->{CompoundString}) || TextUtil::IsEmpty($This->{FingerprintsFieldLabel})) { | |
| 1065 return 0; | |
| 1066 } | |
| 1067 | |
| 1068 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; | |
| 1069 | |
| 1070 return ($This->{CompoundString} =~ /<$FingerprintsFieldLabel>/) ? 1 : 0; | |
| 1071 } | |
| 1072 | |
| 1073 # Generate fingerprints object using current fingerprints string... | |
| 1074 # | |
| 1075 sub _GenerateFingerprintsObject { | |
| 1076 my($This) = @_; | |
| 1077 | |
| 1078 $This->{FingerprintsObject} = undef; | |
| 1079 | |
| 1080 if (!$This->{FingerprintsString}) { | |
| 1081 return $This; | |
| 1082 } | |
| 1083 | |
| 1084 if ($This->{FingerprintsBitVectorStringMode}) { | |
| 1085 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsBitVectorString($This->{FingerprintsString}); | |
| 1086 } | |
| 1087 elsif ($This->{FingerprintsVectorStringMode}) { | |
| 1088 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsVectorString($This->{FingerprintsString}); | |
| 1089 } | |
| 1090 else { | |
| 1091 return undef; | |
| 1092 } | |
| 1093 | |
| 1094 return $This; | |
| 1095 } | |
| 1096 | |
| 1097 # Generate fingerprints string using current fingerprints object... | |
| 1098 # | |
| 1099 sub _GenerateFingerprintsString { | |
| 1100 my($This) = @_; | |
| 1101 | |
| 1102 $This->{FingerprintsString} = ''; | |
| 1103 | |
| 1104 if (!$This->{FingerprintsObject}) { | |
| 1105 return $This; | |
| 1106 } | |
| 1107 | |
| 1108 if ($This->{FingerprintsBitVectorStringMode}) { | |
| 1109 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{BitStringFormat}, $This->{BitsOrder}); | |
| 1110 } | |
| 1111 elsif ($This->{FingerprintsVectorStringMode}) { | |
| 1112 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{VectorStringFormat}); | |
| 1113 } | |
| 1114 | |
| 1115 return $This; | |
| 1116 } | |
| 1117 | |
| 1118 # Is it a fingerprints file? | |
| 1119 sub IsFingerprintsSDFile ($;$) { | |
| 1120 my($FirstParameter, $SecondParameter) = @_; | |
| 1121 my($This, $FileName, $Status); | |
| 1122 | |
| 1123 if ((@_ == 2) && (_IsFingerprintsSDFileIO($FirstParameter))) { | |
| 1124 ($This, $FileName) = ($FirstParameter, $SecondParameter); | |
| 1125 } | |
| 1126 else { | |
| 1127 $FileName = $FirstParameter; | |
| 1128 } | |
| 1129 | |
| 1130 # Check file extension... | |
| 1131 $Status = FileUtil::CheckFileType($FileName, "sdf sd"); | |
| 1132 | |
| 1133 return $Status; | |
| 1134 } | |
| 1135 | |
| 1136 # Is it a FingerprintsSDFileIO object? | |
| 1137 sub _IsFingerprintsSDFileIO { | |
| 1138 my($Object) = @_; | |
| 1139 | |
| 1140 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
| 1141 } | |
| 1142 | |
| 1143 1; | |
| 1144 | |
| 1145 __END__ | |
| 1146 | |
| 1147 =head1 NAME | |
| 1148 | |
| 1149 FingerprintsSDFileIO | |
| 1150 | |
| 1151 =head1 SYNOPSIS | |
| 1152 | |
| 1153 use FileIO::FingerprintsSDFileIO; | |
| 1154 | |
| 1155 use FileIO::FingerprintsSDFileIO qw(:all); | |
| 1156 | |
| 1157 =head1 DESCRIPTION | |
| 1158 | |
| 1159 B<FingerprintsSDFileIO> class provides the following methods: | |
| 1160 | |
| 1161 new, GetCompoundString, GetFingerprints, GetFingerprintsString, | |
| 1162 IsFingerprintsDataValid, IsFingerprintsFileDataValid, IsFingerprintsSDFile, Next, | |
| 1163 Read, SetBitStringFormat, SetBitsOrder, SetCompoundIDMode, SetCompoundString, | |
| 1164 SetDetailLevel, SetFingerprints, SetFingerprintsString, SetFingerprintsStringMode, | |
| 1165 SetVectorStringFormat, WriteFingerprints, WriteFingerprintsString | |
| 1166 | |
| 1167 The following methods can also be used as functions: | |
| 1168 | |
| 1169 IsFingerprintsSDFile | |
| 1170 | |
| 1171 B<FingerprintsSDFileIO> class is derived from I<FileIO> class and uses its methods to support | |
| 1172 generic file related functionality. | |
| 1173 | |
| 1174 The fingerprints SD file format with B<.sdf> or B<.sd> file extensions supports two types of | |
| 1175 fingerprints string data: fingerprints bit-vectors and fingerprints vector strings. The fingerprints | |
| 1176 string data is treated as value of a fingerprints data field label in a SD file. | |
| 1177 | |
| 1178 Example of SD file format containing fingerprints string data: | |
| 1179 | |
| 1180 ... ... | |
| 1181 ... ... | |
| 1182 $$$$ | |
| 1183 ... ... | |
| 1184 ... ... | |
| 1185 ... ... | |
| 1186 41 44 0 0 0 0 0 0 0 0999 V2000 | |
| 1187 -3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 1188 ... ... | |
| 1189 2 3 1 0 0 0 0 | |
| 1190 ... ... | |
| 1191 M END | |
| 1192 > <CmpdID> | |
| 1193 Test | |
| 1194 | |
| 1195 > <PathLengthFingerprints> | |
| 1196 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLengt | |
| 1197 h1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a49913991a66 | |
| 1198 03130b0a19e8051c89184414953800cc2151082844a201042800130860308e8204d4028 | |
| 1199 00831048940e44281c00060449a5000ac80c894114e006321264401600846c050164462 | |
| 1200 08190410805000304a10205b0100e04c0038ba0fad0209c0ca8b1200012268b61c0026a | |
| 1201 aa0660a11014a011d46 | |
| 1202 | |
| 1203 $$$$ | |
| 1204 ... ... | |
| 1205 ... ... | |
| 1206 | |
| 1207 The current release of MayaChemTools supports the following types of fingerprint | |
| 1208 bit-vector and vector strings: | |
| 1209 | |
| 1210 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi | |
| 1211 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT | |
| 1212 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X | |
| 1213 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A | |
| 1214 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2 | |
| 1215 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B... | |
| 1216 | |
| 1217 FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS | |
| 1218 ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2 | |
| 1219 .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1 | |
| 1220 O.X1.BO2;2 4 14 3 10 1 1 1 3 2 | |
| 1221 | |
| 1222 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume | |
| 1223 ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F | |
| 1224 N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1 | |
| 1225 | |
| 1226 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN | |
| 1227 umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C | |
| 1228 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N | |
| 1229 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8 | |
| 1230 O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1 | |
| 1231 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0... | |
| 1232 | |
| 1233 FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs | |
| 1234 AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN | |
| 1235 H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3 | |
| 1236 .024 -2.270 | |
| 1237 | |
| 1238 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues; | |
| 1239 ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435 | |
| 1240 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1 | |
| 1241 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 1242 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 1243 | |
| 1244 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi | |
| 1245 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391 | |
| 1246 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414 | |
| 1247 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103 | |
| 1248 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338 | |
| 1249 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303... | |
| 1250 | |
| 1251 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes | |
| 1252 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524 | |
| 1253 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 | |
| 1254 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...; | |
| 1255 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2 | |
| 1256 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1 | |
| 1257 | |
| 1258 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp | |
| 1259 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100 | |
| 1260 0000000001010000000110000011000000000000100000000000000000000000100001 | |
| 1261 1000000110000000000000000000000000010011000000000000000000000000010000 | |
| 1262 0000000000000000000000000010000000000000000001000000000000000000000000 | |
| 1263 0000000000010000100001000000000000101000000000000000100000000000000... | |
| 1264 | |
| 1265 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu | |
| 1266 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8 | |
| 1267 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567 | |
| 1268 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012 | |
| 1269 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455 | |
| 1270 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404... | |
| 1271 | |
| 1272 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp | |
| 1273 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184 | |
| 1274 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450 | |
| 1275 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430 | |
| 1276 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134 | |
| 1277 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566... | |
| 1278 | |
| 1279 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000 | |
| 1280 0000000000000000000000000000000001001000010010000000010010000000011100 | |
| 1281 0100101010111100011011000100110110000011011110100110111111111111011111 | |
| 1282 11111111111110111000 | |
| 1283 | |
| 1284 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011 | |
| 1285 1110011111100101111111000111101100110000000000000011100010000000000000 | |
| 1286 0000000000000000000000000000000000000000000000101000000000000000000000 | |
| 1287 0000000000000000000000000000000000000000000000000000000000000000000000 | |
| 1288 0000000000000000000000000000000000000011000000000000000000000000000000 | |
| 1289 0000000000000000000000000000000000000000 | |
| 1290 | |
| 1291 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri | |
| 1292 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 1293 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 | |
| 1294 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0 | |
| 1295 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1 | |
| 1296 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1 | |
| 1297 | |
| 1298 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri | |
| 1299 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0 | |
| 1300 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0 | |
| 1301 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 1302 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0 | |
| 1303 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... | |
| 1304 | |
| 1305 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng | |
| 1306 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110 | |
| 1307 0100010101011000101001011100110001000010001001101000001001001001001000 | |
| 1308 0010110100000111001001000001001010100100100000000011000000101001011100 | |
| 1309 0010000001000101010100000100111100110111011011011000000010110111001101 | |
| 1310 0101100011000000010001000011000010100011101100001000001000100000000... | |
| 1311 | |
| 1312 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength | |
| 1313 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2 | |
| 1314 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X | |
| 1315 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1 | |
| 1316 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO | |
| 1317 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C.... | |
| 1318 | |
| 1319 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt | |
| 1320 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1 | |
| 1321 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N | |
| 1322 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1 | |
| 1323 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR | |
| 1324 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ... | |
| 1325 | |
| 1326 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD | |
| 1327 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1 | |
| 1328 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3. | |
| 1329 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...; | |
| 1330 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1 | |
| 1331 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1... | |
| 1332 | |
| 1333 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi | |
| 1334 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar | |
| 1335 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H | |
| 1336 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...; | |
| 1337 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4 | |
| 1338 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ... | |
| 1339 | |
| 1340 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3 | |
| 1341 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4- | |
| 1342 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO | |
| 1343 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...; | |
| 1344 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1 | |
| 1345 | |
| 1346 FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica | |
| 1347 lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC | |
| 1348 H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC- | |
| 1349 ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...; | |
| 1350 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2 | |
| 1351 | |
| 1352 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M | |
| 1353 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1 | |
| 1354 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1 | |
| 1355 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1 | |
| 1356 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....; | |
| 1357 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2 | |
| 1358 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8... | |
| 1359 | |
| 1360 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1 | |
| 1361 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C | |
| 1362 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3- | |
| 1363 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2 | |
| 1364 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C. | |
| 1365 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7... | |
| 1366 | |
| 1367 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min | |
| 1368 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H | |
| 1369 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2- | |
| 1370 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H | |
| 1371 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...; | |
| 1372 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10 | |
| 1373 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1 | |
| 1374 | |
| 1375 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist | |
| 1376 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0 | |
| 1377 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1 | |
| 1378 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0 | |
| 1379 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0 | |
| 1380 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18... | |
| 1381 | |
| 1382 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize: | |
| 1383 MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1- | |
| 1384 Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1 | |
| 1385 -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1- | |
| 1386 HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...; | |
| 1387 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23 | |
| 1388 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1 | |
| 1389 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ... | |
| 1390 | |
| 1391 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD | |
| 1392 istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106 | |
| 1393 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0 | |
| 1394 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26 | |
| 1395 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0 | |
| 1396 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ... | |
| 1397 | |
| 1398 =head2 METHODS | |
| 1399 | |
| 1400 =over 4 | |
| 1401 | |
| 1402 =item B<new> | |
| 1403 | |
| 1404 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO(%IOParameters); | |
| 1405 | |
| 1406 Using specified I<IOParameters> names and values hash, B<new> method creates a new | |
| 1407 object and returns a reference to a newly created B<FingerprintsSDFileIO> object. By default, | |
| 1408 the following properties are initialized during I<Read> mode: | |
| 1409 | |
| 1410 Name = ''; | |
| 1411 Mode = 'Read'; | |
| 1412 Status = 0; | |
| 1413 FingerprintsStringMode = 'AutoDetect'; | |
| 1414 FingerprintsFieldLabel = 'AutoDetect'; | |
| 1415 CompoundIDMode = 'LabelPrefix'; | |
| 1416 CompoundIDFieldLabel = undef; | |
| 1417 CompoundIDPrefix = 'Cmpd'; | |
| 1418 ValidateData = 1; | |
| 1419 DetailLevel = 1; | |
| 1420 | |
| 1421 During I<Write> mode, the following properties get initialize by default: | |
| 1422 | |
| 1423 FingerprintsStringMode = undef; | |
| 1424 | |
| 1425 BitStringFormat = HexadecimalString; | |
| 1426 BitsOrder = Ascending; | |
| 1427 | |
| 1428 VectorStringFormat = NumericalValuesString or ValuesString; | |
| 1429 | |
| 1430 Examples: | |
| 1431 | |
| 1432 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO( | |
| 1433 'Name' => 'Sample.sdf', | |
| 1434 'Mode' => 'Read'); | |
| 1435 | |
| 1436 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO( | |
| 1437 'Name' => 'Sample.sdf', | |
| 1438 'Mode' => 'Read',; | |
| 1439 'FingerprintsStringMode' => | |
| 1440 'AutoDetect', | |
| 1441 'FingerprintsFieldLabel' => | |
| 1442 'Fingerprints', | |
| 1443 'CompoundIDMode' => | |
| 1444 'DataField', | |
| 1445 'CompoundIDFieldLabel' => | |
| 1446 'CompoundID'); | |
| 1447 | |
| 1448 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO( | |
| 1449 'Name' => 'Sample.sdf', | |
| 1450 'Mode' => 'Write', | |
| 1451 'FingerprintsStringMode' => | |
| 1452 'FingerprintsBitVectorString', | |
| 1453 'Overwrite' => 1, | |
| 1454 'BitStringFormat' => 'HexadecimalString', | |
| 1455 'BitsOrder' => 'Ascending'); | |
| 1456 | |
| 1457 $NewFingerprintsSDFileIO = new FileIO::FingerprintsSDFileIO( | |
| 1458 'Name' => 'Sample.sd', | |
| 1459 'Mode' => 'Write', | |
| 1460 'FingerprintsStringMode' => | |
| 1461 'FingerprintsVectorString', | |
| 1462 'Overwrite' => 1, | |
| 1463 'VectorStringFormat' => 'IDsAndValuesString', | |
| 1464 'FingerprintsLabel' => 'Fingerprints'); | |
| 1465 | |
| 1466 =item B<GetCompoundString> | |
| 1467 | |
| 1468 $CompoundString = $FingerprintsSDFileIO->GetCompoundString(); | |
| 1469 | |
| 1470 Returns B<CompoundString> for current compound. | |
| 1471 | |
| 1472 =item B<GetFingerprints> | |
| 1473 | |
| 1474 $FingerprintsObject = $FingerprintsSDFileIO->GetFingerprints(); | |
| 1475 | |
| 1476 Returns B<FingerprintsObject> generated for current compound using fingerprints bit-vector | |
| 1477 or vector string data. The fingerprints object corresponds to any of the supported fingerprints | |
| 1478 such as PathLengthFingerprints, ExtendedConnectivity, and so on. | |
| 1479 | |
| 1480 =item B<GetFingerprintsString> | |
| 1481 | |
| 1482 $FingerprintsString = $FingerprintsSDFileIO->GetFingerprintsString(); | |
| 1483 | |
| 1484 Returns B<FingerprintsString> for current compound. | |
| 1485 | |
| 1486 =item B<IsFingerprintsDataValid> | |
| 1487 | |
| 1488 $Status = $FingerprintsSDFileIO->IsFingerprintsDataValid(); | |
| 1489 | |
| 1490 Returns 1 or 0 based on whether B<FingerprintsObject> is valid. | |
| 1491 | |
| 1492 =item B<IsFingerprintsFileDataValid> | |
| 1493 | |
| 1494 $Status = $FingerprintsSDFileIO->IsFingerprintsFileDataValid(); | |
| 1495 | |
| 1496 Returns 1 or 0 based on whether fingerprints file contains valid fingerprints data. | |
| 1497 | |
| 1498 =item B<IsFingerprintsSDFile> | |
| 1499 | |
| 1500 $Status = $FingerprintsSDFileIO->IsFingerprintsSDFile($FileName); | |
| 1501 $Status = FileIO::FingerprintsSDFileIO::IsFingerprintsSDFile($FileName); | |
| 1502 | |
| 1503 Returns 1 or 0 based on whether I<FileName> is a SD file. | |
| 1504 | |
| 1505 =item B<Next or Read> | |
| 1506 | |
| 1507 $FingerprintsSDFileIO = $FingerprintsSDFileIO->Next(); | |
| 1508 $FingerprintsSDFileIO = $FingerprintsSDFileIO->Read(); | |
| 1509 | |
| 1510 Reads next available compound fingerprints in SD file, processes the data, generates appropriate | |
| 1511 fingerprints object, and returns B<FingerprintsSDFileIO>. The generated fingerprints object is available | |
| 1512 using method B<GetFingerprints>. | |
| 1513 | |
| 1514 =item B<SetBitStringFormat> | |
| 1515 | |
| 1516 $FingerprintsSDFileIO->SetBitStringFormat($Format); | |
| 1517 | |
| 1518 Sets bit string I<Format> for fingerprints bit-vector string data in a SD file and returns B<FingerprintsSDFileIO>. | |
| 1519 Possible values for B<BitStringFormat>: I<BinaryString or HexadecimalString>. | |
| 1520 | |
| 1521 =item B<SetBitsOrder> | |
| 1522 | |
| 1523 $FingerprintsSDFileIO->SetBitsOrder($BitsOrder); | |
| 1524 | |
| 1525 Sets I<BitsOrder> for fingerprints bit-vector string data in SD file and returns B<FingerprintsSDFileIO>. | |
| 1526 Possible values for B<BitsOrder>: I<Ascending or Descending>. | |
| 1527 | |
| 1528 =item B<SetCompoundIDMode> | |
| 1529 | |
| 1530 $FingerprintsSDFileIO->SetCompoundIDMode($Mode); | |
| 1531 | |
| 1532 Sets compound ID I<Mode> for fingerprints bit-vector string data in a SD file and returns B<FingerprintsSDFileIO>. | |
| 1533 Possible values for B<CompoundIDMode>: I<DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix>. | |
| 1534 | |
| 1535 =item B<SetCompoundString> | |
| 1536 | |
| 1537 $FingerprintsSDFileIO->SetCompoundString($CompoundString); | |
| 1538 | |
| 1539 Sets I<CompoundString> and returns B<FingerprintsSDFileIO>. | |
| 1540 | |
| 1541 =item B<SetDetailLevel> | |
| 1542 | |
| 1543 $FingerprintsSDFileIO->SetDetailLevel($Level); | |
| 1544 | |
| 1545 Sets details I<Level> for generating diagnostics messages during SD file processing and returns | |
| 1546 B<FingerprintsSDFileIO>. Possible values: I<Positive integers>. | |
| 1547 | |
| 1548 =item B<SetFingerprints> | |
| 1549 | |
| 1550 $FingerprintsSDFileIO->SetFingerprints($FingerprintsObject); | |
| 1551 | |
| 1552 Sets I<FingerprintsObject> for current data line and returns B<FingerprintsSDFileIO>. | |
| 1553 | |
| 1554 =item B<SetFingerprintsString> | |
| 1555 | |
| 1556 $FingerprintsSDFileIO->SetFingerprintsString($FingerprintsString); | |
| 1557 | |
| 1558 Sets I<FingerprintsString> for current data line and returns B<FingerprintsSDFileIO>. | |
| 1559 | |
| 1560 =item B<SetFingerprintsStringMode> | |
| 1561 | |
| 1562 $FingerprintsSDFileIO->SetFingerprintsStringMode($Mode); | |
| 1563 | |
| 1564 Sets I<FingerprintsStringMode> for SD file and returns B<FingerprintsFPFileIO>. | |
| 1565 Possible values: I<AutoDetect, FingerprintsBitVectorString, or FingerprintsVectorString> | |
| 1566 | |
| 1567 =item B<SetVectorStringFormat> | |
| 1568 | |
| 1569 $FingerprintsSDFileIO->SetVectorStringFormat($Format); | |
| 1570 | |
| 1571 Sets I<VectorStringFormat> for SD file and returns B<FingerprintsFPFileIO>. Possible values: | |
| 1572 I<IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString>. | |
| 1573 | |
| 1574 =item B<WriteFingerprints> | |
| 1575 | |
| 1576 $FingerprintsFPFileIO->WriteFingerprints($FingerprintsObject, | |
| 1577 $CompoundID); | |
| 1578 | |
| 1579 Writes fingerprints string generated from I<FingerprintsObject> object and other data including | |
| 1580 I<CompoundID> to SD file and returns B<FingerprintsSDFileIO>. | |
| 1581 | |
| 1582 =item B<WriteFingerprintsString> | |
| 1583 | |
| 1584 $FingerprintsSDFileIO->WriteFingerprints($FingerprintsString, | |
| 1585 $CompoundID); | |
| 1586 | |
| 1587 Writes I<FingerprintsString> and other data including I<CompoundID> to SD file and returns | |
| 1588 B<FingerprintsSDFileIO>. | |
| 1589 | |
| 1590 Caveats: | |
| 1591 | |
| 1592 o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat | |
| 1593 values are ignored during writing of fingerprints and it's written to the file | |
| 1594 as it is. | |
| 1595 o CompoundString is not checked to remove any existing fingerprints data | |
| 1596 | |
| 1597 | |
| 1598 =back | |
| 1599 | |
| 1600 =head1 AUTHOR | |
| 1601 | |
| 1602 Manish Sud <msud@san.rr.com> | |
| 1603 | |
| 1604 =head1 SEE ALSO | |
| 1605 | |
| 1606 FingerprintsTextFileIO.pm, FingerprintsFPFileIO.pm, SDFileIO.pm | |
| 1607 | |
| 1608 =head1 COPYRIGHT | |
| 1609 | |
| 1610 Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 1611 | |
| 1612 This file is part of MayaChemTools. | |
| 1613 | |
| 1614 MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 1615 the terms of the GNU Lesser General Public License as published by the Free | |
| 1616 Software Foundation; either version 3 of the License, or (at your option) | |
| 1617 any later version. | |
| 1618 | |
| 1619 =cut |
