MayaChemTools

   1 package Fingerprints::FingerprintsStringUtil;
   2 #
   3 # $RCSfile: FingerprintsStringUtil.pm,v $
   4 # $Date: 2015/02/28 20:48:54 $
   5 # $Revision: 1.24 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Exporter;
  31 use Carp;
  32 use TextUtil ();
  33 use Fingerprints::FingerprintsBitVector;
  34 use Fingerprints::FingerprintsVector;
  35 
  36 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  37 
  38 @ISA = qw(Exporter);
  39 @EXPORT = qw();
  40 @EXPORT_OK = qw(AreFingerprintsStringValuesValid GenerateFingerprintsString GenerateFingerprintsBitVectorString GenerateFingerprintsVectorString GetFingerprintsStringTypeAndDescription GetDefaultBitsOrder GetDefaultBitStringFormat GetDefaultVectorStringFormat GetFingeprintsStringDelimiter GetFingerprintsStringValues ParseFingerprintsString ParseFingerprintsBitVectorString ParseFingerprintsVectorString);
  41 
  42 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  43 
  44 # Fingerprint string values delimiter...
  45 my($FPStringDelim) = ';';
  46 
  47 # Generate fingerprints string...
  48 #
  49 sub GenerateFingerprintsString {
  50   my($FingerprintsObject) = @_;
  51   my($VectorType);
  52 
  53   $VectorType = $FingerprintsObject->GetVectorType();
  54 
  55   VECTORTYPE : {
  56     if ($VectorType =~ /^FingerprintsBitVector$/i) { return GenerateFingerprintsBitVectorString(@_); last VECTORTYPE; }
  57     if ($VectorType =~ /^FingerprintsVector$/i) { return GenerateFingerprintsVectorString(@_); last VECTORTYPE; }
  58     croak "Error: FingerprintsStringUtil::GenerateFingerprintsString: Fingerprints object vector type, $VectorType, is not supported. Valid values: FingerprintsBitVector or FingerprintsVector...";
  59   }
  60   return '';
  61 }
  62 
  63 # Generate fingerprints bit vector string...
  64 #
  65 sub GenerateFingerprintsBitVectorString {
  66   my($FingerprintsObject, $BitStringFormat, $BitsOrder) = @_;
  67   my($FingerprintsString, $FingerprintsBitVector, @FingerprintsStringValues);
  68 
  69   if (!$BitStringFormat) { $BitStringFormat = GetDefaultBitStringFormat(); }
  70   if (!$BitsOrder) {$BitsOrder = GetDefaultBitsOrder(); }
  71 
  72   $FingerprintsString = '';
  73   $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::IsFingerprintsBitVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsBitVector();
  74 
  75   # Use specified size instead of size: it corresponds to actual size of the fingerprints bit vector;
  76   # size reflects actual internal size including any padding.
  77   #
  78 
  79   @FingerprintsStringValues = ();
  80   push @FingerprintsStringValues, ($FingerprintsObject->GetVectorType(), _GetFingerprintsDescription($FingerprintsObject), $FingerprintsBitVector->GetSpecifiedSize(), $BitStringFormat, $BitsOrder);
  81 
  82   $FingerprintsString = join("${FPStringDelim}",  @FingerprintsStringValues) . "${FPStringDelim}" . _GetFingerprintBitVectorString($FingerprintsBitVector, $BitStringFormat, $BitsOrder);
  83 
  84   return $FingerprintsString;
  85 }
  86 
  87 # Get fingerprint bit vector string...
  88 #
  89 sub _GetFingerprintBitVectorString {
  90   my($FingerprintsBitVector, $BitStringFormat, $BitsOrder) = @_;
  91   my($FingerprintBitString);
  92 
  93   if (!$BitStringFormat) { $BitStringFormat = GetDefaultBitStringFormat(); }
  94   if (!$BitsOrder) {$BitsOrder = GetDefaultBitsOrder(); }
  95 
  96   $FingerprintBitString = '';
  97   if (!$FingerprintsBitVector) {return $FingerprintBitString;}
  98 
  99   BITSTRINGFORMAT : {
 100     if ($BitStringFormat =~ /^(BinaryString|Binary|Bin)$/i) { return $FingerprintsBitVector->GetBitsAsBinaryString($BitsOrder); last BITSTRINGFORMAT; }
 101     if ($BitStringFormat =~ /^(HexadecimalString|Hexadecimal|Hex)$/i) { return $FingerprintsBitVector->GetBitsAsHexadecimalString($BitsOrder); last BITSTRINGFORMAT; }
 102     croak "Error: FingerprintsStringUtil::_GetFingerprintBitsAsString: Specified bit vector string format, $BitStringFormat, is not supported. Value values: Binary, Bin, BinaryString, Hexdecimal, Hex, HexadecimalString...";
 103   }
 104   return $FingerprintBitString;
 105 }
 106 
 107 # Generate fingerprints vector string...
 108 #
 109 sub GenerateFingerprintsVectorString {
 110   my($FingerprintsObject, $VectorStringFormat) = @_;
 111   my($FingerprintsString, $FingerprintsVector, @FingerprintsStringValues);
 112 
 113   $FingerprintsString = '';
 114   $FingerprintsVector = Fingerprints::FingerprintsVector::IsFingerprintsVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsVector();
 115 
 116   if (!$VectorStringFormat) { $VectorStringFormat = _GetDefaultVectorStringFormat($FingerprintsVector); }
 117 
 118   @FingerprintsStringValues = ();
 119   push @FingerprintsStringValues, ($FingerprintsObject->GetVectorType(), _GetFingerprintsDescription($FingerprintsObject), $FingerprintsVector->GetNumOfValues(), $FingerprintsVector->GetType(), $VectorStringFormat);
 120 
 121   $FingerprintsString = join("${FPStringDelim}",  @FingerprintsStringValues) . "${FPStringDelim}" . _GetFingerprintVectorString($FingerprintsVector, $VectorStringFormat);
 122 
 123   return $FingerprintsString;
 124 }
 125 
 126 # Get fingerprint vector string...
 127 #
 128 sub _GetFingerprintVectorString {
 129   my($FingerprintsVector, $VectorStringFormat) = @_;
 130   my($FingerprintString);
 131 
 132   if (!$VectorStringFormat) { $VectorStringFormat = _GetDefaultVectorStringFormat($FingerprintsVector);}
 133 
 134   $FingerprintString = '';
 135   if (!$FingerprintsVector) {return $FingerprintString;}
 136 
 137   VECTORSTRINGFORMAT : {
 138     if ($VectorStringFormat =~ /^(IDsAndValuesString|IDsAndValues)$/i) { return $FingerprintsVector->GetIDsAndValuesString(); last VECTORSTRINGFORMAT; }
 139     if ($VectorStringFormat =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) { return $FingerprintsVector->GetIDsAndValuesPairsString(); last VECTORSTRINGFORMAT; }
 140     if ($VectorStringFormat =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) { return $FingerprintsVector->GetValuesAndIDsString(); last VECTORSTRINGFORMAT; }
 141     if ($VectorStringFormat =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) { return $FingerprintsVector->GetValuesAndIDsPairsString(); last VECTORSTRINGFORMAT; }
 142     if ($VectorStringFormat =~ /^(ValuesString|Values)$/i) { return $FingerprintsVector->GetValuesString(); last VECTORSTRINGFORMAT; }
 143     croak "Error: FingerprintsStringUtil::_GetFingerprintVectorString: Specified vector string format, $VectorStringFormat, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValuesString, Values...";
 144   }
 145   return $FingerprintString;
 146 }
 147 
 148 # Get fingerprints string type and description...
 149 sub GetFingerprintsStringTypeAndDescription {
 150   my($FingerprintsString) = @_;
 151   my($Type, $Description);
 152 
 153   ($Type, $Description) = _ParseFingerprintsStringValues($FingerprintsString);
 154 
 155   return ($Type, $Description);
 156 }
 157 
 158 # Get all fingerprints string values...
 159 sub GetFingerprintsStringValues {
 160   my($FingerprintsString) = @_;
 161 
 162   return _ParseFingerprintsStringValues($FingerprintsString);
 163 }
 164 
 165 # Parse fingerprints string and return FingerprintsBitVector or FingerprintsVector object...
 166 #
 167 sub ParseFingerprintsString {
 168   my($FingerprintsString) = @_;
 169 
 170   VECTORTYPE : {
 171     if ($FingerprintsString =~ /^FingerprintsBitVector/i) { return ParseFingerprintsBitVectorString(@_); last VECTORTYPE; }
 172     if ($FingerprintsString =~ /^FingerprintsVector/i) { return ParseFingerprintsVectorString(@_); last VECTORTYPE; }
 173     croak "Error: FingerprintsStringUtil::ParseFingerprintsString: Fingerprints string vector type is not supported. Valid values: FingerprintsBitVector or FingerprintsVector...";
 174   }
 175   return undef;
 176 }
 177 
 178 # Parse fingerprints bit vector string and retrun bit vector...
 179 #
 180 sub ParseFingerprintsBitVectorString {
 181   my($FingerprintsString, $ValidateValues) = @_;
 182   my($ErrorMsgPrefix, $VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString);
 183 
 184   $ErrorMsgPrefix = "Error: ParsePathLengthFingerprintsBitVectorString";
 185   ($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString) = _ParseFingerprintsStringValues($FingerprintsString);
 186   if ($ValidateValues) {
 187     _ValidateFingerprintsStringValues($ErrorMsgPrefix, $VectorType, $Size, $BitStringFormat, $BitsOrder, $BitVectorString);
 188   }
 189 
 190   return _GenerateFingerprintBitVector($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString);
 191 }
 192 
 193 # Generate fingerints bit vector...
 194 #
 195 sub _GenerateFingerprintBitVector {
 196   my($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString) = @_;
 197   my($FingerprintsBitVector);
 198 
 199   $FingerprintsBitVector = undef;
 200 
 201   BITSTRINGFORMAT : {
 202     if ($BitStringFormat =~ /^(BinaryString|Binary|Bin)$/i) {
 203       $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::NewFromBinaryString($BitVectorString, $BitsOrder);
 204       last BITSTRINGFORMAT;
 205     }
 206     if ($BitStringFormat =~ /^(HexadecimalString|Hexadecimal|Hex)$/i) {
 207       $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::NewFromHexadecimalString($BitVectorString, $BitsOrder);
 208       last BITSTRINGFORMAT;
 209     }
 210     croak "Error: FingerprintsStringUtil::_GenerateFingerprintBitVector: Specified bit vector string format, $BitStringFormat, is not supported. Value values: Binary, Bin, BinaryString, Hexdecimal, Hex, HexadecimalString...";
 211   }
 212 
 213   if (defined $FingerprintsBitVector) {
 214     # Set fingerints vector type and description...
 215     $FingerprintsBitVector->SetVectorType($VectorType);
 216     $FingerprintsBitVector->SetDescription($Description);
 217 
 218     # Set specified size which might be different from the bit string size due to padding
 219     # used by Perl vec function to handle bit vectors in BitVectot class...
 220     #
 221     $FingerprintsBitVector->SetSpecifiedSize($Size);
 222   }
 223 
 224   return $FingerprintsBitVector;
 225 }
 226 
 227 # Parse fingerprints vector string and retrun vector...
 228 #
 229 sub ParseFingerprintsVectorString {
 230   my($FingerprintsString, $ValidateValues) = @_;
 231   my($ErrorMsgPrefix, $VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2);
 232 
 233   $ErrorMsgPrefix = "Error: ParseFingerprintsVectorString";
 234   ($VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2) = _ParseFingerprintsStringValues($FingerprintsString);
 235 
 236   # No need to check $VectorString1 and $VectorString2 values as they would be
 237   # checked later during the creation of FingerprintsVector...
 238   #
 239   if ($ValidateValues) {
 240     _ValidateFingerprintsStringValues($ErrorMsgPrefix, $VectorType, $NumOfValues, $VectorValuesType, $VectorStringFormat);
 241   }
 242 
 243   return _GenerateFingerprintVector($VectorType, $Description, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2);
 244 }
 245 
 246 # Generate fingerints vector...
 247 #
 248 sub _GenerateFingerprintVector {
 249   my($VectorType, $Description, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2) = @_;
 250   my($FingerprintsVector, $VectorString);
 251 
 252   $VectorString = TextUtil::IsEmpty($VectorString2) ? $VectorString1 : "${VectorString1};${VectorString2}";
 253   $FingerprintsVector = undef;
 254 
 255   VECTORSTRINGFORMAT : {
 256     if ($VectorStringFormat =~ /^(ValuesString|Values)$/i) {
 257       $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesString($VectorValuesType, $VectorString);
 258       last VECTORSTRINGFORMAT;
 259     }
 260     if ($VectorStringFormat =~ /^(IDsAndValuesString|IDsAndValues)$/i) {
 261       $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesString($VectorValuesType, $VectorString);
 262       last VECTORSTRINGFORMAT;
 263     }
 264     if ($VectorStringFormat =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) {
 265       $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesPairsString($VectorValuesType, $VectorString);
 266       last VECTORSTRINGFORMAT;
 267     }
 268     if ($VectorStringFormat =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) {
 269       $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsString($VectorValuesType, $VectorString);
 270       last VECTORSTRINGFORMAT;
 271     }
 272     if ($VectorStringFormat =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) {
 273       $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsPairsString($VectorValuesType, $VectorString);
 274       last VECTORSTRINGFORMAT;
 275     }
 276     croak "Error: FingerprintsStringUtil::_GenerateFingerprintVector: Specified vector string format, $VectorStringFormat, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValuesString, Values...";
 277   }
 278 
 279   if (defined $FingerprintsVector) {
 280     # Set fingerints vector type and description...
 281     $FingerprintsVector->SetVectorType($VectorType);
 282     $FingerprintsVector->SetDescription($Description);
 283   }
 284 
 285   return $FingerprintsVector;
 286 }
 287 
 288 # Validate fingerint string values...
 289 #
 290 sub AreFingerprintsStringValuesValid {
 291   my($FingerprintsString) = @_;
 292   my($Value);
 293 
 294   for $Value (_ParseFingerprintsStringValues($FingerprintsString)) {
 295     if (TextUtil::IsEmpty($Value)) {
 296       return 0;
 297     }
 298   }
 299   return 1;
 300 }
 301 
 302 # Get fingerprints description...
 303 #
 304 sub _GetFingerprintsDescription {
 305   my($FingerprintsObject) = @_;
 306   my($Description);
 307 
 308   $Description = $FingerprintsObject->GetDescription();
 309 
 310   return TextUtil::IsEmpty($Description) ? 'No description available for fingerprints' : $Description;
 311 }
 312 
 313 # Parse fingerprints string values...
 314 #
 315 sub _ParseFingerprintsStringValues {
 316   my($FingerprintsString) = @_;
 317 
 318   return split "${FPStringDelim}", $FingerprintsString;
 319 }
 320 
 321 # Check to make sure already parsed fingerprints string values are valid....
 322 #
 323 sub _ValidateFingerprintsStringValues {
 324   my($ErrorMsgPrefix, @Values) = @_;
 325   my($Value);
 326 
 327   for $Value (@Values) {
 328     if (TextUtil::IsEmpty($Value)) {
 329       croak("${ErrorMsgPrefix}: _ValidateFingerprintsStringValues: Fingerprints string format is not valid: An empty value found...");
 330     }
 331   }
 332 }
 333 
 334 # Default bit string format...
 335 #
 336 sub GetDefaultBitStringFormat {
 337   return 'HexadecimalString';
 338 }
 339 
 340 # Default bit order...
 341 #
 342 sub GetDefaultBitsOrder {
 343   return 'Ascending';
 344 }
 345 
 346 # Default vector string format using fingerprints or fingerprints vector object...
 347 #
 348 sub GetDefaultVectorStringFormat {
 349   my($FingerprintsObject) = @_;
 350   my($FingerprintsVector);
 351 
 352   $FingerprintsVector = Fingerprints::FingerprintsVector::IsFingerprintsVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsVector();
 353 
 354   return _GetDefaultVectorStringFormat($FingerprintsVector);
 355 }
 356 
 357 # Default vector string format using fingerprits vector object...
 358 #
 359 sub _GetDefaultVectorStringFormat {
 360   my($FingerprintsVector) = @_;
 361   my($Type);
 362 
 363   $Type = $FingerprintsVector->GetType();
 364 
 365   return ($Type =~ /^NumericalValues$/i) ? 'IDsAndValuesString' : 'ValuesString';
 366 }
 367 
 368 # Fingerprints string delimiter...
 369 #
 370 sub GetFingeprintsStringDelimiter {
 371   return $FPStringDelim;
 372 }
 373