1 package Fingerprints::FingerprintsStringUtil; 2 # 3 # $RCSfile: FingerprintsStringUtil.pm,v $ 4 # $Date: 2015/02/28 20:48:54 $ 5 # $Revision: 1.24 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Exporter; 31 use Carp; 32 use TextUtil (); 33 use Fingerprints::FingerprintsBitVector; 34 use Fingerprints::FingerprintsVector; 35 36 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 37 38 @ISA = qw(Exporter); 39 @EXPORT = qw(); 40 @EXPORT_OK = qw(AreFingerprintsStringValuesValid GenerateFingerprintsString GenerateFingerprintsBitVectorString GenerateFingerprintsVectorString GetFingerprintsStringTypeAndDescription GetDefaultBitsOrder GetDefaultBitStringFormat GetDefaultVectorStringFormat GetFingeprintsStringDelimiter GetFingerprintsStringValues ParseFingerprintsString ParseFingerprintsBitVectorString ParseFingerprintsVectorString); 41 42 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 43 44 # Fingerprint string values delimiter... 45 my($FPStringDelim) = ';'; 46 47 # Generate fingerprints string... 48 # 49 sub GenerateFingerprintsString { 50 my($FingerprintsObject) = @_; 51 my($VectorType); 52 53 $VectorType = $FingerprintsObject->GetVectorType(); 54 55 VECTORTYPE : { 56 if ($VectorType =~ /^FingerprintsBitVector$/i) { return GenerateFingerprintsBitVectorString(@_); last VECTORTYPE; } 57 if ($VectorType =~ /^FingerprintsVector$/i) { return GenerateFingerprintsVectorString(@_); last VECTORTYPE; } 58 croak "Error: FingerprintsStringUtil::GenerateFingerprintsString: Fingerprints object vector type, $VectorType, is not supported. Valid values: FingerprintsBitVector or FingerprintsVector..."; 59 } 60 return ''; 61 } 62 63 # Generate fingerprints bit vector string... 64 # 65 sub GenerateFingerprintsBitVectorString { 66 my($FingerprintsObject, $BitStringFormat, $BitsOrder) = @_; 67 my($FingerprintsString, $FingerprintsBitVector, @FingerprintsStringValues); 68 69 if (!$BitStringFormat) { $BitStringFormat = GetDefaultBitStringFormat(); } 70 if (!$BitsOrder) {$BitsOrder = GetDefaultBitsOrder(); } 71 72 $FingerprintsString = ''; 73 $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::IsFingerprintsBitVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsBitVector(); 74 75 # Use specified size instead of size: it corresponds to actual size of the fingerprints bit vector; 76 # size reflects actual internal size including any padding. 77 # 78 79 @FingerprintsStringValues = (); 80 push @FingerprintsStringValues, ($FingerprintsObject->GetVectorType(), _GetFingerprintsDescription($FingerprintsObject), $FingerprintsBitVector->GetSpecifiedSize(), $BitStringFormat, $BitsOrder); 81 82 $FingerprintsString = join("${FPStringDelim}", @FingerprintsStringValues) . "${FPStringDelim}" . _GetFingerprintBitVectorString($FingerprintsBitVector, $BitStringFormat, $BitsOrder); 83 84 return $FingerprintsString; 85 } 86 87 # Get fingerprint bit vector string... 88 # 89 sub _GetFingerprintBitVectorString { 90 my($FingerprintsBitVector, $BitStringFormat, $BitsOrder) = @_; 91 my($FingerprintBitString); 92 93 if (!$BitStringFormat) { $BitStringFormat = GetDefaultBitStringFormat(); } 94 if (!$BitsOrder) {$BitsOrder = GetDefaultBitsOrder(); } 95 96 $FingerprintBitString = ''; 97 if (!$FingerprintsBitVector) {return $FingerprintBitString;} 98 99 BITSTRINGFORMAT : { 100 if ($BitStringFormat =~ /^(BinaryString|Binary|Bin)$/i) { return $FingerprintsBitVector->GetBitsAsBinaryString($BitsOrder); last BITSTRINGFORMAT; } 101 if ($BitStringFormat =~ /^(HexadecimalString|Hexadecimal|Hex)$/i) { return $FingerprintsBitVector->GetBitsAsHexadecimalString($BitsOrder); last BITSTRINGFORMAT; } 102 croak "Error: FingerprintsStringUtil::_GetFingerprintBitsAsString: Specified bit vector string format, $BitStringFormat, is not supported. Value values: Binary, Bin, BinaryString, Hexdecimal, Hex, HexadecimalString..."; 103 } 104 return $FingerprintBitString; 105 } 106 107 # Generate fingerprints vector string... 108 # 109 sub GenerateFingerprintsVectorString { 110 my($FingerprintsObject, $VectorStringFormat) = @_; 111 my($FingerprintsString, $FingerprintsVector, @FingerprintsStringValues); 112 113 $FingerprintsString = ''; 114 $FingerprintsVector = Fingerprints::FingerprintsVector::IsFingerprintsVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsVector(); 115 116 if (!$VectorStringFormat) { $VectorStringFormat = _GetDefaultVectorStringFormat($FingerprintsVector); } 117 118 @FingerprintsStringValues = (); 119 push @FingerprintsStringValues, ($FingerprintsObject->GetVectorType(), _GetFingerprintsDescription($FingerprintsObject), $FingerprintsVector->GetNumOfValues(), $FingerprintsVector->GetType(), $VectorStringFormat); 120 121 $FingerprintsString = join("${FPStringDelim}", @FingerprintsStringValues) . "${FPStringDelim}" . _GetFingerprintVectorString($FingerprintsVector, $VectorStringFormat); 122 123 return $FingerprintsString; 124 } 125 126 # Get fingerprint vector string... 127 # 128 sub _GetFingerprintVectorString { 129 my($FingerprintsVector, $VectorStringFormat) = @_; 130 my($FingerprintString); 131 132 if (!$VectorStringFormat) { $VectorStringFormat = _GetDefaultVectorStringFormat($FingerprintsVector);} 133 134 $FingerprintString = ''; 135 if (!$FingerprintsVector) {return $FingerprintString;} 136 137 VECTORSTRINGFORMAT : { 138 if ($VectorStringFormat =~ /^(IDsAndValuesString|IDsAndValues)$/i) { return $FingerprintsVector->GetIDsAndValuesString(); last VECTORSTRINGFORMAT; } 139 if ($VectorStringFormat =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) { return $FingerprintsVector->GetIDsAndValuesPairsString(); last VECTORSTRINGFORMAT; } 140 if ($VectorStringFormat =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) { return $FingerprintsVector->GetValuesAndIDsString(); last VECTORSTRINGFORMAT; } 141 if ($VectorStringFormat =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) { return $FingerprintsVector->GetValuesAndIDsPairsString(); last VECTORSTRINGFORMAT; } 142 if ($VectorStringFormat =~ /^(ValuesString|Values)$/i) { return $FingerprintsVector->GetValuesString(); last VECTORSTRINGFORMAT; } 143 croak "Error: FingerprintsStringUtil::_GetFingerprintVectorString: Specified vector string format, $VectorStringFormat, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValuesString, Values..."; 144 } 145 return $FingerprintString; 146 } 147 148 # Get fingerprints string type and description... 149 sub GetFingerprintsStringTypeAndDescription { 150 my($FingerprintsString) = @_; 151 my($Type, $Description); 152 153 ($Type, $Description) = _ParseFingerprintsStringValues($FingerprintsString); 154 155 return ($Type, $Description); 156 } 157 158 # Get all fingerprints string values... 159 sub GetFingerprintsStringValues { 160 my($FingerprintsString) = @_; 161 162 return _ParseFingerprintsStringValues($FingerprintsString); 163 } 164 165 # Parse fingerprints string and return FingerprintsBitVector or FingerprintsVector object... 166 # 167 sub ParseFingerprintsString { 168 my($FingerprintsString) = @_; 169 170 VECTORTYPE : { 171 if ($FingerprintsString =~ /^FingerprintsBitVector/i) { return ParseFingerprintsBitVectorString(@_); last VECTORTYPE; } 172 if ($FingerprintsString =~ /^FingerprintsVector/i) { return ParseFingerprintsVectorString(@_); last VECTORTYPE; } 173 croak "Error: FingerprintsStringUtil::ParseFingerprintsString: Fingerprints string vector type is not supported. Valid values: FingerprintsBitVector or FingerprintsVector..."; 174 } 175 return undef; 176 } 177 178 # Parse fingerprints bit vector string and retrun bit vector... 179 # 180 sub ParseFingerprintsBitVectorString { 181 my($FingerprintsString, $ValidateValues) = @_; 182 my($ErrorMsgPrefix, $VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString); 183 184 $ErrorMsgPrefix = "Error: ParsePathLengthFingerprintsBitVectorString"; 185 ($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString) = _ParseFingerprintsStringValues($FingerprintsString); 186 if ($ValidateValues) { 187 _ValidateFingerprintsStringValues($ErrorMsgPrefix, $VectorType, $Size, $BitStringFormat, $BitsOrder, $BitVectorString); 188 } 189 190 return _GenerateFingerprintBitVector($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString); 191 } 192 193 # Generate fingerints bit vector... 194 # 195 sub _GenerateFingerprintBitVector { 196 my($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString) = @_; 197 my($FingerprintsBitVector); 198 199 $FingerprintsBitVector = undef; 200 201 BITSTRINGFORMAT : { 202 if ($BitStringFormat =~ /^(BinaryString|Binary|Bin)$/i) { 203 $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::NewFromBinaryString($BitVectorString, $BitsOrder); 204 last BITSTRINGFORMAT; 205 } 206 if ($BitStringFormat =~ /^(HexadecimalString|Hexadecimal|Hex)$/i) { 207 $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::NewFromHexadecimalString($BitVectorString, $BitsOrder); 208 last BITSTRINGFORMAT; 209 } 210 croak "Error: FingerprintsStringUtil::_GenerateFingerprintBitVector: Specified bit vector string format, $BitStringFormat, is not supported. Value values: Binary, Bin, BinaryString, Hexdecimal, Hex, HexadecimalString..."; 211 } 212 213 if (defined $FingerprintsBitVector) { 214 # Set fingerints vector type and description... 215 $FingerprintsBitVector->SetVectorType($VectorType); 216 $FingerprintsBitVector->SetDescription($Description); 217 218 # Set specified size which might be different from the bit string size due to padding 219 # used by Perl vec function to handle bit vectors in BitVectot class... 220 # 221 $FingerprintsBitVector->SetSpecifiedSize($Size); 222 } 223 224 return $FingerprintsBitVector; 225 } 226 227 # Parse fingerprints vector string and retrun vector... 228 # 229 sub ParseFingerprintsVectorString { 230 my($FingerprintsString, $ValidateValues) = @_; 231 my($ErrorMsgPrefix, $VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2); 232 233 $ErrorMsgPrefix = "Error: ParseFingerprintsVectorString"; 234 ($VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2) = _ParseFingerprintsStringValues($FingerprintsString); 235 236 # No need to check $VectorString1 and $VectorString2 values as they would be 237 # checked later during the creation of FingerprintsVector... 238 # 239 if ($ValidateValues) { 240 _ValidateFingerprintsStringValues($ErrorMsgPrefix, $VectorType, $NumOfValues, $VectorValuesType, $VectorStringFormat); 241 } 242 243 return _GenerateFingerprintVector($VectorType, $Description, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2); 244 } 245 246 # Generate fingerints vector... 247 # 248 sub _GenerateFingerprintVector { 249 my($VectorType, $Description, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2) = @_; 250 my($FingerprintsVector, $VectorString); 251 252 $VectorString = TextUtil::IsEmpty($VectorString2) ? $VectorString1 : "${VectorString1};${VectorString2}"; 253 $FingerprintsVector = undef; 254 255 VECTORSTRINGFORMAT : { 256 if ($VectorStringFormat =~ /^(ValuesString|Values)$/i) { 257 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesString($VectorValuesType, $VectorString); 258 last VECTORSTRINGFORMAT; 259 } 260 if ($VectorStringFormat =~ /^(IDsAndValuesString|IDsAndValues)$/i) { 261 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesString($VectorValuesType, $VectorString); 262 last VECTORSTRINGFORMAT; 263 } 264 if ($VectorStringFormat =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) { 265 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesPairsString($VectorValuesType, $VectorString); 266 last VECTORSTRINGFORMAT; 267 } 268 if ($VectorStringFormat =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) { 269 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsString($VectorValuesType, $VectorString); 270 last VECTORSTRINGFORMAT; 271 } 272 if ($VectorStringFormat =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) { 273 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsPairsString($VectorValuesType, $VectorString); 274 last VECTORSTRINGFORMAT; 275 } 276 croak "Error: FingerprintsStringUtil::_GenerateFingerprintVector: Specified vector string format, $VectorStringFormat, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValuesString, Values..."; 277 } 278 279 if (defined $FingerprintsVector) { 280 # Set fingerints vector type and description... 281 $FingerprintsVector->SetVectorType($VectorType); 282 $FingerprintsVector->SetDescription($Description); 283 } 284 285 return $FingerprintsVector; 286 } 287 288 # Validate fingerint string values... 289 # 290 sub AreFingerprintsStringValuesValid { 291 my($FingerprintsString) = @_; 292 my($Value); 293 294 for $Value (_ParseFingerprintsStringValues($FingerprintsString)) { 295 if (TextUtil::IsEmpty($Value)) { 296 return 0; 297 } 298 } 299 return 1; 300 } 301 302 # Get fingerprints description... 303 # 304 sub _GetFingerprintsDescription { 305 my($FingerprintsObject) = @_; 306 my($Description); 307 308 $Description = $FingerprintsObject->GetDescription(); 309 310 return TextUtil::IsEmpty($Description) ? 'No description available for fingerprints' : $Description; 311 } 312 313 # Parse fingerprints string values... 314 # 315 sub _ParseFingerprintsStringValues { 316 my($FingerprintsString) = @_; 317 318 return split "${FPStringDelim}", $FingerprintsString; 319 } 320 321 # Check to make sure already parsed fingerprints string values are valid.... 322 # 323 sub _ValidateFingerprintsStringValues { 324 my($ErrorMsgPrefix, @Values) = @_; 325 my($Value); 326 327 for $Value (@Values) { 328 if (TextUtil::IsEmpty($Value)) { 329 croak("${ErrorMsgPrefix}: _ValidateFingerprintsStringValues: Fingerprints string format is not valid: An empty value found..."); 330 } 331 } 332 } 333 334 # Default bit string format... 335 # 336 sub GetDefaultBitStringFormat { 337 return 'HexadecimalString'; 338 } 339 340 # Default bit order... 341 # 342 sub GetDefaultBitsOrder { 343 return 'Ascending'; 344 } 345 346 # Default vector string format using fingerprints or fingerprints vector object... 347 # 348 sub GetDefaultVectorStringFormat { 349 my($FingerprintsObject) = @_; 350 my($FingerprintsVector); 351 352 $FingerprintsVector = Fingerprints::FingerprintsVector::IsFingerprintsVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsVector(); 353 354 return _GetDefaultVectorStringFormat($FingerprintsVector); 355 } 356 357 # Default vector string format using fingerprits vector object... 358 # 359 sub _GetDefaultVectorStringFormat { 360 my($FingerprintsVector) = @_; 361 my($Type); 362 363 $Type = $FingerprintsVector->GetType(); 364 365 return ($Type =~ /^NumericalValues$/i) ? 'IDsAndValuesString' : 'ValuesString'; 366 } 367 368 # Fingerprints string delimiter... 369 # 370 sub GetFingeprintsStringDelimiter { 371 return $FPStringDelim; 372 } 373