MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # $RCSfile: SimilarityMatricesFingerprints.pl,v $
   4 # $Date: 2015/02/28 20:46:20 $
   5 # $Revision: 1.21 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use FindBin; use lib "$FindBin::Bin/../lib";
  31 use Getopt::Long;
  32 use File::Basename;
  33 use File::Copy;
  34 use Text::ParseWords;
  35 use Benchmark;
  36 use FileUtil;
  37 use TextUtil;
  38 use Fingerprints::FingerprintsFileUtil;
  39 use Fingerprints::FingerprintsBitVector;
  40 use Fingerprints::FingerprintsVector;
  41 
  42 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  43 
  44 # Autoflush STDOUT
  45 $| = 1;
  46 
  47 # Starting message...
  48 $ScriptName = basename($0);
  49 print "\n$ScriptName: Starting...\n\n";
  50 $StartTime = new Benchmark;
  51 
  52 # Get the options and setup script...
  53 SetupScriptUsage();
  54 if ($Options{help} || @ARGV < 1) {
  55   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  56 }
  57 
  58 my(@FingerprintsFilesList);
  59 @FingerprintsFilesList = ExpandFileNames(\@ARGV, "sdf sd fpf fp csv tsv");
  60 
  61 # Process options...
  62 print "Processing options...\n";
  63 my(%OptionsInfo);
  64 ProcessOptions();
  65 
  66 # Setup information about input files...
  67 print "Checking input fingerprints file(s)...\n";
  68 my(%FingerprintsFilesInfo);
  69 RetrieveFingerprintsFilesInfo();
  70 
  71 # Process input files..
  72 my($FileIndex);
  73 if (@FingerprintsFilesList > 1) {
  74   print "\nProcessing fingerprints files...\n";
  75 }
  76 for $FileIndex (0 .. $#FingerprintsFilesList) {
  77   if ($FingerprintsFilesInfo{FileOkay}[$FileIndex]) {
  78     print "\nProcessing file $FingerprintsFilesList[$FileIndex]...\n";
  79     GenerateSimilarityMatrices($FileIndex);
  80   }
  81 }
  82 print "\n$ScriptName:Done...\n\n";
  83 
  84 $EndTime = new Benchmark;
  85 $TotalTime = timediff ($EndTime, $StartTime);
  86 print "Total time: ", timestr($TotalTime), "\n";
  87 
  88 ###############################################################################
  89 
  90 # Generate similarity matrices using fingerprints data in text file...
  91 #
  92 sub GenerateSimilarityMatrices {
  93   my($FileIndex) = @_;
  94 
  95   ProcessFingerprintsData($FileIndex);
  96 
  97   if ($FingerprintsFilesInfo{FingerprintsBitVectorStringMode}[$FileIndex]) {
  98     GenerateSimilarityMatricesForFingerprintsBitVectors($FileIndex);
  99   }
 100   elsif ($FingerprintsFilesInfo{FingerprintsVectorStringMode}[$FileIndex]) {
 101     GenerateSimilarityMatricesForFingerprintsVectors($FileIndex);
 102   }
 103 
 104   CleanupFingerprintsData($FileIndex);
 105 }
 106 
 107 # Generate bit vector similarity matrices...
 108 #
 109 sub GenerateSimilarityMatricesForFingerprintsBitVectors {
 110   my($FileIndex) = @_;
 111   my($SpecifiedComparisonMeasure, $ComparisonMeasure, $NewTextFile, $SimilarityMatrixRef, $MethodName, @MethodParameters);
 112 
 113   for $SpecifiedComparisonMeasure (@{$OptionsInfo{SpecifiedBitVectorComparisonsRef}}) {
 114     $ComparisonMeasure = $OptionsInfo{SpecifiedBitVectorComparisonsNameRef}->{lc($SpecifiedComparisonMeasure)};
 115     $NewTextFile = $FingerprintsFilesInfo{OutFileRoot}[$FileIndex] . "${ComparisonMeasure}." . $FingerprintsFilesInfo{OutFileExt}[$FileIndex];
 116 
 117     $MethodName = $OptionsInfo{SpecifiedBitVectorComparisonsMethodRef}->{lc($ComparisonMeasure)};
 118 
 119     @MethodParameters = ();
 120     @MethodParameters = @{$OptionsInfo{SpecifiedBitVectorComparisonsParameterRef}->{lc($ComparisonMeasure)}};
 121 
 122     GenerateSimilarityMatrix($FileIndex, $NewTextFile, $MethodName, \@MethodParameters);
 123   }
 124 }
 125 
 126 # Generate vector similarity and/or distance matrices...
 127 #
 128 sub GenerateSimilarityMatricesForFingerprintsVectors {
 129   my($FileIndex) = @_;
 130   my($SpecifiedComparisonMeasure, $ComparisonMode, $ComparisonMeasure, $NewTextFile, $MethodName, @MethodParameters);
 131 
 132   for $SpecifiedComparisonMeasure (@{$OptionsInfo{SpecifiedVectorComparisonsRef}}) {
 133     $ComparisonMeasure = $OptionsInfo{SpecifiedVectorComparisonsNameRef}->{lc($SpecifiedComparisonMeasure)};
 134 
 135     for $ComparisonMode (@{$OptionsInfo{SpecifiedVectorComparisonModesRef}}) {
 136       $NewTextFile = $FingerprintsFilesInfo{OutFileRoot}[$FileIndex] . "${ComparisonMeasure}${ComparisonMode}." . $FingerprintsFilesInfo{OutFileExt}[$FileIndex];
 137 
 138       $MethodName = $OptionsInfo{SpecifiedVectorComparisonsMethodRef}->{lc($ComparisonMeasure)};
 139 
 140       @MethodParameters = ();
 141       push @MethodParameters, $ComparisonMode;
 142       push @MethodParameters, @{$OptionsInfo{SpecifiedVectorComparisonsParameterRef}->{lc($ComparisonMeasure)}};
 143 
 144       GenerateSimilarityMatrix($FileIndex, $NewTextFile, $MethodName, \@MethodParameters);
 145     }
 146   }
 147 }
 148 
 149 # Calculate similarity matrix and write it out...
 150 #
 151 sub GenerateSimilarityMatrix {
 152   my($FileIndex, $NewTextFile, $MethodName, $MethodParametersRef) = @_;
 153 
 154   print "\nGenerating $NewTextFile...\n";
 155 
 156   # Open new file and write out column labels...
 157   open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n";
 158   WriteColumnLabels($FileIndex, \*NEWTEXTFILE);
 159 
 160   # Calculate and write out similarity matrix values...
 161   if ($OptionsInfo{InputDataMode} =~ /^LoadInMemory$/i) {
 162     GenerateSimilarityMatrixUsingMemoryData($FileIndex, \*NEWTEXTFILE, $MethodName, $MethodParametersRef);
 163   }
 164   elsif ($OptionsInfo{InputDataMode} =~ /^ScanFile$/i) {
 165     GenerateSimilarityMatrixUsingFileData($FileIndex, \*NEWTEXTFILE, $MethodName, $MethodParametersRef);
 166   }
 167   else {
 168     warn "Warning: Input data mode, $OptionsInfo{InputDataMode}, is not supported.\n";
 169   }
 170 
 171   # Close new text file...
 172   close NEWTEXTFILE;
 173 
 174 }
 175 
 176 # Calculate and write out similarity values using fingerprints data already loaded in
 177 # memory...
 178 #
 179 sub GenerateSimilarityMatrixUsingMemoryData {
 180   my($FileIndex, $NewTextFileRef, $MethodName, $MethodParametersRef) = @_;
 181   my($RowIndex, $ColIndex, $CmpdID1, $CmpdID2, $FingerprintsObject1, $FingerprintsObject2, $Value, $Line, @LineWords);
 182 
 183   for $RowIndex (0 .. $#{$FingerprintsFilesInfo{FingerprintsObjectsRef}}) {
 184     $FingerprintsObject1 = $FingerprintsFilesInfo{FingerprintsObjectsRef}->[$RowIndex];
 185     $CmpdID1 = $FingerprintsFilesInfo{CompundIDsRef}->[$RowIndex];
 186 
 187     if ($OptionsInfo{WriteRowsAndColumns}) {
 188       print $NewTextFileRef "$OptionsInfo{OutQuoteValue}${CmpdID1}$OptionsInfo{OutQuoteValue}";
 189     }
 190 
 191     COLINDEX: for $ColIndex (0 .. $#{$FingerprintsFilesInfo{FingerprintsObjectsRef}}) {
 192       if (SkipMatrixData($RowIndex, $ColIndex)) {
 193         next COLINDEX;
 194       }
 195 
 196       $FingerprintsObject2 = $FingerprintsFilesInfo{FingerprintsObjectsRef}->[$ColIndex];
 197 
 198       $Value = $FingerprintsObject1->$MethodName($FingerprintsObject2, @{$MethodParametersRef});
 199       $Value = (defined($Value) && length($Value)) ? (sprintf("%.$OptionsInfo{Precision}f", $Value) + 0) : '';
 200 
 201       if ($OptionsInfo{WriteRowsAndColumns}) {
 202         print $NewTextFileRef "$OptionsInfo{OutDelim}$OptionsInfo{OutQuoteValue}${Value}$OptionsInfo{OutQuoteValue}";
 203       }
 204       elsif ($OptionsInfo{WriteIDPairsAndValue}) {
 205         $CmpdID2 = $FingerprintsFilesInfo{CompundIDsRef}->[$ColIndex];
 206 
 207         @LineWords = ();
 208         push @LineWords,  ($CmpdID1, $CmpdID2, $Value);
 209         $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 210         print $NewTextFileRef "$Line\n";
 211       }
 212     }
 213     if ($OptionsInfo{WriteRowsAndColumns}) {
 214       print $NewTextFileRef "\n";
 215     }
 216   }
 217 }
 218 
 219 # Calculate and write out similarity values by retrieving and prcessing data
 220 # from fingerprint file...
 221 #
 222 sub GenerateSimilarityMatrixUsingFileData {
 223   my($FileIndex, $NewTextFileRef, $MethodName, $MethodParametersRef) = @_;
 224   my($RowIndex, $ColIndex, $FingerprintsFileIO, $TmpFingerprintsFileIO, $FingerprintsObject1, $FingerprintsObject2, $CmpdID1, $CmpdID2, $FingerprintsCount, $IgnoredFingerprintsCount, $Value, $Line, @LineWords);
 225 
 226   print "\nReading and processing fingerprints data...\n";
 227 
 228   $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$FileIndex]});
 229   $FingerprintsFileIO->Open();
 230 
 231   $RowIndex = 0; $ColIndex = 0;
 232   $FingerprintsCount = 0; $IgnoredFingerprintsCount = 0;
 233 
 234   FINGERPRINTSFILEIO: while ($FingerprintsFileIO->Read()) {
 235     $FingerprintsCount++;
 236 
 237     if (!$FingerprintsFileIO->IsFingerprintsDataValid()) {
 238       $IgnoredFingerprintsCount++;
 239       next FINGERPRINTSFILEIO;
 240     }
 241     $RowIndex++;
 242     $FingerprintsObject1 = $FingerprintsFileIO->GetFingerprints();
 243     $CmpdID1 = $FingerprintsFileIO->GetCompoundID();
 244 
 245     if ($OptionsInfo{WriteRowsAndColumns}) {
 246       print $NewTextFileRef "$OptionsInfo{OutQuoteValue}${CmpdID1}$OptionsInfo{OutQuoteValue}";
 247     }
 248 
 249     # Force detail level of 1 to avoid duplicate printing of diagnostic messages for invalid
 250     # fingerprints data...
 251     $TmpFingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{TmpFingerprintsFileIOParameters}[$FileIndex]}, "DetailLevel" => 1);
 252     $TmpFingerprintsFileIO->Open();
 253 
 254     $ColIndex = 0;
 255     TMPFINGERPRINTSFILEIO: while ($TmpFingerprintsFileIO->Read()) {
 256       if (!$TmpFingerprintsFileIO->IsFingerprintsDataValid()) {
 257         next TMPFINGERPRINTSFILEIO;
 258       }
 259       $ColIndex++;
 260 
 261       if (SkipMatrixData($RowIndex, $ColIndex)) {
 262         next TMPFINGERPRINTSFILEIO;
 263       }
 264 
 265       $FingerprintsObject2 = $TmpFingerprintsFileIO->GetFingerprints();
 266 
 267       $Value = $FingerprintsObject1->$MethodName($FingerprintsObject2, @{$MethodParametersRef});
 268       $Value = (defined($Value) && length($Value)) ? (sprintf("%.$OptionsInfo{Precision}f", $Value) + 0) : '';
 269 
 270       if ($OptionsInfo{WriteRowsAndColumns}) {
 271         print $NewTextFileRef "$OptionsInfo{OutDelim}$OptionsInfo{OutQuoteValue}${Value}$OptionsInfo{OutQuoteValue}";
 272       }
 273       elsif ($OptionsInfo{WriteIDPairsAndValue}) {
 274         $CmpdID2 = $TmpFingerprintsFileIO->GetCompoundID();
 275 
 276         @LineWords = ();
 277         push @LineWords,  ($CmpdID1, $CmpdID2, $Value);
 278         $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 279         print $NewTextFileRef "$Line\n";
 280       }
 281     }
 282     $TmpFingerprintsFileIO->Close();
 283 
 284     if ($OptionsInfo{WriteRowsAndColumns}) {
 285       print $NewTextFileRef "\n";
 286     }
 287   }
 288 
 289   $FingerprintsFileIO->Close();
 290 
 291   print "Number of fingerprints data entries in database fingerprints file: $FingerprintsCount\n";
 292   print "Number of fingerprints date entries processed successfully: ", ($FingerprintsCount - $IgnoredFingerprintsCount)  , "\n";
 293   print "Number of fingerprints data entries ignored due to missing/invalid data: $IgnoredFingerprintsCount\n\n";
 294 }
 295 
 296 # Check whether matrix data need to be skipped...
 297 #
 298 sub SkipMatrixData {
 299   my($RowIndex, $ColIndex) = @_;
 300 
 301   if ($OptionsInfo{WriteFullMatrix}) {
 302     return 0;
 303   }
 304   elsif ($OptionsInfo{WriteUpperTriangularMatrix}) {
 305     return ($RowIndex > $ColIndex) ? 1 : 0;
 306   }
 307   elsif ($OptionsInfo{WriteLowerTriangularMatrix}) {
 308     return ($RowIndex < $ColIndex) ? 1 : 0;
 309   }
 310 
 311   return 0;
 312 }
 313 
 314 # Write out column labels...
 315 #
 316 sub WriteColumnLabels {
 317   my($FileIndex, $NewTextFileRef) = @_;
 318   my($Line, @LineWords);
 319 
 320   if ($OptionsInfo{OutMatrixFormat} =~ /^IDPairsAndValue$/i) {
 321     @LineWords = ();
 322     push @LineWords, ('CmpdID1', 'CmpdID2', 'Coefficient Value');
 323     $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 324     print $NewTextFileRef "$Line\n";
 325   }
 326   elsif ($OptionsInfo{OutMatrixFormat} =~ /^RowsAndColumns$/i) {
 327     if ($OptionsInfo{InputDataMode} =~ /^LoadInMemory$/i) {
 328       @LineWords = ();
 329       push @LineWords, '';
 330       push @LineWords, @{$FingerprintsFilesInfo{CompundIDsRef}};
 331       $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 332       print $NewTextFileRef "$Line\n";
 333     }
 334     elsif ($OptionsInfo{InputDataMode} =~ /^ScanFile$/i) {
 335       my( $FingerprintsFileIO, $CmpdID);
 336 
 337       # Scan file to retrieve compound IDs...
 338       #
 339       print "\nProcessing fingerprints file to generate compound IDs...\n";
 340 
 341       # Force detail level of 1 to avoid diagnostics messages for invalid fingeprints data during
 342       # retrieval of compound IDs as these get printed out during calculation of matrix...
 343       #
 344       $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$FileIndex]}, "DetailLevel" => 1);
 345       $FingerprintsFileIO->Open();
 346 
 347       print $NewTextFileRef "$OptionsInfo{OutQuoteValue}$OptionsInfo{OutQuoteValue}";
 348 
 349       FINGERPRINTSFILEIO: while ($FingerprintsFileIO->Read()) {
 350         if (!$FingerprintsFileIO->IsFingerprintsDataValid()) {
 351           next FINGERPRINTSFILEIO;
 352         }
 353         $CmpdID = $FingerprintsFileIO->GetCompoundID();
 354         print $NewTextFileRef "$OptionsInfo{OutDelim}$OptionsInfo{OutQuoteValue}${CmpdID}$OptionsInfo{OutQuoteValue}";
 355       }
 356       $FingerprintsFileIO->Close();
 357 
 358       print $NewTextFileRef "\n";
 359 
 360       print "Processing fingerprints file to generate matrix...\n";
 361     }
 362   }
 363   else {
 364     warn "Warning: Output matrix format, $OptionsInfo{OutMatrixFormat}, is not supported.\n";
 365   }
 366 }
 367 
 368 # Process fingerprints data...
 369 #
 370 sub ProcessFingerprintsData {
 371   my($FileIndex) = @_;
 372   my($FingerprintsFileIO);
 373 
 374   $FingerprintsFilesInfo{CompundIDsRef}  = undef;
 375   $FingerprintsFilesInfo{FingerprintsObjectsRef} = undef;
 376 
 377   if ($OptionsInfo{InputDataMode} =~ /^LoadInMemory$/i) {
 378     my($FingerprintsFileIO);
 379 
 380     $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$FileIndex]});
 381     ($FingerprintsFilesInfo{CompundIDsRef}, $FingerprintsFilesInfo{FingerprintsObjectsRef}) = Fingerprints::FingerprintsFileUtil::ReadAndProcessFingerpritsData($FingerprintsFileIO);
 382   }
 383   elsif ($OptionsInfo{InputDataMode} =~ /^ScanFile$/i) {
 384     my($FingerprintsFile, $TmpFingerprintsFile);
 385 
 386     $FingerprintsFile = $FingerprintsFilesList[$FileIndex];
 387     $TmpFingerprintsFile = $FingerprintsFilesInfo{TmpFingerprintsFile}[$FileIndex];
 388 
 389     # Copy fingerprints file to a tmp file for calculating similarity matrix...
 390     print "\nCopying fingerprints file, $FingerprintsFile, to temporary fingperints file, $TmpFingerprintsFile...\n";
 391     copy $FingerprintsFile, $TmpFingerprintsFile or die "Error: Couldn't copy $FingerprintsFile to $TmpFingerprintsFile: $! \n";
 392   }
 393 }
 394 
 395 # Clean up fingerprints data...
 396 #
 397 sub CleanupFingerprintsData {
 398   my($FileIndex) = @_;
 399 
 400   if ($OptionsInfo{InputDataMode} =~ /^LoadInMemory$/i) {
 401     $FingerprintsFilesInfo{CompundIDsRef}  = undef;
 402     $FingerprintsFilesInfo{FingerprintsObjectsRef} = undef;
 403   }
 404   elsif ($OptionsInfo{InputDataMode} =~ /^ScanFile$/i) {
 405     my($TmpFingerprintsFile);
 406 
 407     # Delete temporary fingerprints file...
 408     $TmpFingerprintsFile = $FingerprintsFilesInfo{TmpFingerprintsFile}[$FileIndex];
 409 
 410     print "\nDeleting temporary fingerprints file $TmpFingerprintsFile...\n";
 411     unlink $TmpFingerprintsFile or die "Error: Couldn't unlink $TmpFingerprintsFile: $! \n";
 412   }
 413 }
 414 
 415 # Retrieve information about fingerprints files...
 416 #
 417 sub RetrieveFingerprintsFilesInfo {
 418   my($FingerprintsFile, $TmpFingerprintsFile, $FingerprintsFileIO, $FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FileType, $Index, $FileDir, $FileExt, $FileName, $InDelim, $OutFileRoot, $OutFileExt, %FingerprintsFileIOParameters);
 419 
 420   %FingerprintsFilesInfo = ();
 421   @{$FingerprintsFilesInfo{FileOkay}} = ();
 422   @{$FingerprintsFilesInfo{FileType}} = ();
 423   @{$FingerprintsFilesInfo{InDelim}} = ();
 424   @{$FingerprintsFilesInfo{OutFileRoot}} = ();
 425   @{$FingerprintsFilesInfo{OutFileExt}} = ();
 426 
 427   @{$FingerprintsFilesInfo{TmpFingerprintsFile}} = ();
 428 
 429   @{$FingerprintsFilesInfo{FingerprintsFileIOParameters}} = ();
 430   @{$FingerprintsFilesInfo{TmpFingerprintsFileIOParameters}} = ();
 431 
 432   @{$FingerprintsFilesInfo{FingerprintsBitVectorStringMode}} = ();
 433   @{$FingerprintsFilesInfo{FingerprintsVectorStringMode}} = ();
 434 
 435   FILELIST: for $Index (0 .. $#FingerprintsFilesList) {
 436     $FingerprintsFilesInfo{FileOkay}[$Index] = 0;
 437     $FingerprintsFilesInfo{FileType}[$Index] = '';
 438     $FingerprintsFilesInfo{InDelim}[$Index] = "";
 439     $FingerprintsFilesInfo{OutFileRoot}[$Index] = '';
 440     $FingerprintsFilesInfo{OutFileExt}[$Index] = '';
 441 
 442     %{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$Index]} = ();
 443 
 444     $FingerprintsFilesInfo{TmpFingerprintsFile}[$Index] = "";
 445     %{$FingerprintsFilesInfo{TmpFingerprintsFileIOParameters}[$Index]} = ();
 446 
 447     $FingerprintsFilesInfo{FingerprintsBitVectorStringMode}[$Index] = 0;
 448     $FingerprintsFilesInfo{FingerprintsVectorStringMode}[$Index] = 0;
 449 
 450     $FingerprintsFile = $FingerprintsFilesList[$Index];
 451     if (!(-e $FingerprintsFile)) {
 452       warn "Warning: Ignoring file $FingerprintsFile: It doesn't exist\n";
 453       next FILELIST;
 454     }
 455 
 456     $FileType = Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType($FingerprintsFile);
 457     if (IsEmpty($FileType)) {
 458       warn "Warning: Ignoring file $FingerprintsFile: It's not a fingerprints file\n";
 459       next FILELIST;
 460     }
 461 
 462     $FileDir = ""; $FileName = ""; $FileExt = "";
 463     ($FileDir, $FileName, $FileExt) = ParseFileName($FingerprintsFile);
 464 
 465     # Setup temporary fingerprints file name for scan file mode...
 466     $TmpFingerprintsFile = "${FileName}Tmp.${FileExt}";
 467 
 468     $InDelim = ($FileExt =~ /^tsv$/i) ? 'Tab' : $OptionsInfo{InDelim};
 469 
 470     # Setup output file names...
 471     $OutFileExt = "csv";
 472     if ($Options{outdelim} =~ /^tab$/i) {
 473       $OutFileExt = "tsv";
 474     }
 475 
 476     $OutFileRoot = $FileName;
 477     if ($OptionsInfo{OutFileRoot} && (@FingerprintsFilesList == 1)) {
 478       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
 479       if ($RootFileName && $RootFileExt) {
 480         $FileName = $RootFileName;
 481       }
 482       else {
 483         $FileName = $OptionsInfo{OutFileRoot};
 484       }
 485       $OutFileRoot = $FileName;
 486     }
 487 
 488     if (!$Options{overwrite}) {
 489       # Similarity matrices output file names for bit-vector strings...
 490       my($SpecifiedComparisonMeasure, $ComparisonMeasure);
 491       for $SpecifiedComparisonMeasure (@{$OptionsInfo{SpecifiedBitVectorComparisonsRef}}) {
 492         $ComparisonMeasure = $OptionsInfo{SpecifiedBitVectorComparisonsNameRef}->{lc($SpecifiedComparisonMeasure)};
 493         if (-e "${OutFileRoot}${ComparisonMeasure}.${OutFileExt}") {
 494           warn "Warning: Ignoring file $FingerprintsFile: The file ${OutFileRoot}${ComparisonMeasure}.${OutFileExt} already exists.\n";
 495           next FILELIST;
 496         }
 497       }
 498       # Similarity matrices output file names for vector strings...
 499       my($ComparisonMode);
 500       for $SpecifiedComparisonMeasure (@{$OptionsInfo{SpecifiedVectorComparisonsRef}}) {
 501         $ComparisonMeasure = $OptionsInfo{SpecifiedVectorComparisonsNameRef}->{lc($SpecifiedComparisonMeasure)};
 502         for $ComparisonMode (@{$OptionsInfo{SpecifiedVectorComparisonModesRef}}) {
 503           if (-e "${OutFileRoot}${ComparisonMeasure}${ComparisonMode}.${OutFileExt}") {
 504             warn "Warning: Ignoring file $FingerprintsFile: The file ${OutFileRoot}${ComparisonMeasure}${ComparisonMode}.${OutFileExt} already exists.\n";
 505             next FILELIST;
 506           }
 507         }
 508       }
 509     }
 510 
 511     # Setup FingerprintsFileIO parameters...
 512     %FingerprintsFileIOParameters = ();
 513     FILEIOPARAMETERS: {
 514       if ($FileType =~ /^SD$/i) {
 515         %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'FingerprintsStringMode' => $OptionsInfo{Mode}, 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' =>  $OptionsInfo{Detail}, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsField}, 'CompoundIDMode' => $OptionsInfo{CompoundIDMode}, 'CompoundIDFieldLabel' => $OptionsInfo{CompoundIDField}, 'CompoundIDPrefix' => $OptionsInfo{CompoundIDPrefix});
 516         last FILEIOPARAMETERS;
 517       }
 518       if ($FileType =~ /^FP$/i) {
 519         %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'FingerprintsStringMode' => $OptionsInfo{Mode}, 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' =>  $OptionsInfo{Detail});
 520         last FILEIOPARAMETERS;
 521       }
 522       if ($FileType =~ /^Text$/i) {
 523         %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'FingerprintsStringMode' => $OptionsInfo{Mode}, 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' =>  $OptionsInfo{Detail}, 'FingerprintsCol' => $OptionsInfo{FingerprintsCol}, 'ColMode' => $OptionsInfo{ColMode}, 'CompoundIDCol' => $OptionsInfo{CompoundIDCol}, 'CompoundIDPrefix' => $OptionsInfo{CompoundIDPrefix}, 'InDelim' => $OptionsInfo{InDelim});
 524         last FILEIOPARAMETERS;
 525       }
 526       warn "Warning: File type for fingerprints file, $FingerprintsFile, is not valid. Supported file types: SD, FP or Text\n";
 527       next FILELIST;
 528     }
 529 
 530     # Retrieve fingerints file string mode information...
 531     $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%FingerprintsFileIOParameters);
 532 
 533     if (!$FingerprintsFileIO) {
 534       warn "Warning: Ignoring fingerprints file $FingerprintsFile: It contains invalid fingerprints data\n";
 535       next FILELIST;
 536     }
 537     if (!$FingerprintsFileIO->IsFingerprintsFileDataValid()) {
 538       warn "Warning: Ignoring fingerprints file $FingerprintsFile: It contains invalid fingerprints data\n";
 539       next FILELIST;
 540     }
 541     $FingerprintsBitVectorStringMode = $FingerprintsFileIO->GetFingerprintsBitVectorStringMode();
 542     $FingerprintsVectorStringMode = $FingerprintsFileIO->GetFingerprintsVectorStringMode();
 543 
 544 
 545     $FingerprintsFilesInfo{FileOkay}[$Index] = 1;
 546     $FingerprintsFilesInfo{FileType}[$Index] = $FileType;
 547 
 548     $FingerprintsFilesInfo{InDelim}[$Index] = $InDelim;
 549 
 550     $FingerprintsFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
 551     $FingerprintsFilesInfo{OutFileExt}[$Index] = $OutFileExt;
 552 
 553     %{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$Index]} = %FingerprintsFileIOParameters;
 554 
 555     $FingerprintsFilesInfo{TmpFingerprintsFile}[$Index] = $TmpFingerprintsFile;
 556 
 557     $FingerprintsFileIOParameters{Name} = $TmpFingerprintsFile;
 558     %{$FingerprintsFilesInfo{TmpFingerprintsFileIOParameters}[$Index]} = %FingerprintsFileIOParameters;
 559 
 560     $FingerprintsFilesInfo{FingerprintsBitVectorStringMode}[$Index] = $FingerprintsBitVectorStringMode;
 561     $FingerprintsFilesInfo{FingerprintsVectorStringMode}[$Index] = $FingerprintsVectorStringMode;
 562   }
 563 }
 564 
 565 # Process option values...
 566 sub ProcessOptions {
 567   %OptionsInfo = ();
 568 
 569   $OptionsInfo{Mode} = $Options{mode};
 570 
 571   $OptionsInfo{InputDataMode} = $Options{inputdatamode};
 572 
 573   ProcessBitVectorComparisonOptions();
 574   ProcessVectorComparisonOptions();
 575 
 576   $OptionsInfo{CompoundIDPrefix} = $Options{compoundidprefix} ? $Options{compoundidprefix} : 'Cmpd';
 577 
 578   # Compound ID and fingerprints column options for text files...
 579   $OptionsInfo{ColMode} = $Options{colmode};
 580 
 581   if (IsNotEmpty($Options{compoundidcol})) {
 582     if ($Options{colmode} =~ /^ColNum$/i) {
 583       if (!IsPositiveInteger($Options{compoundidcol})) {
 584         die "Error: Column value, $Options{compoundidcol}, specified using \"--CompoundIDCol\" is not valid: Allowed integer values: > 0\n";
 585       }
 586     }
 587     $OptionsInfo{CompoundIDCol} = $Options{compoundidcol};
 588   }
 589   else {
 590     $OptionsInfo{CompoundIDCol} = 'AutoDetect';
 591   }
 592 
 593   if (IsNotEmpty($Options{fingerprintscol})) {
 594     if ($Options{colmode} =~ /^ColNum$/i) {
 595       if (!IsPositiveInteger($Options{fingerprintscol})) {
 596         die "Error: Column value, $Options{fingerprintscol}, specified using \"--FingerprintsCol\" is not valid: Allowed integer values: > 0\n";
 597       }
 598     }
 599     $OptionsInfo{FingerprintsCol} = $Options{fingerprintscol};
 600   }
 601   else {
 602     $OptionsInfo{FingerprintsCol} = 'AutoDetect';
 603   }
 604 
 605   if (IsNotEmpty($Options{compoundidcol}) && IsNotEmpty($Options{fingerprintscol})) {
 606     if (IsPositiveInteger($Options{compoundidcol}) && IsPositiveInteger($Options{fingerprintscol})) {
 607       if (($Options{compoundidcol} == $Options{fingerprintscol})) {
 608         die "Error: Values specified using \"--CompoundIDCol\" and \"--FingerprintsCol\", $Options{compoundidcol}, must be different.\n";
 609       }
 610     }
 611     else {
 612       if (($Options{compoundidcol} eq $Options{fingerprintscol})) {
 613         die "Error: Values specified using \"--CompoundIDCol\" and \"--FingerprintsCol\", $Options{compoundidcol}, must be different.\n";
 614       }
 615     }
 616   }
 617 
 618   # Compound ID and fingerprints field options for SD files...
 619   $OptionsInfo{CompoundIDMode} = $Options{compoundidmode};
 620   $OptionsInfo{CompoundIDField} = '';
 621 
 622   if ($Options{compoundidmode} =~ /^DataField$/i) {
 623     if (!$Options{compoundidfield}) {
 624       die "Error: You must specify a value for \"--CompoundIDField\" option in \"DataField\" \"--CompoundIDMode\". \n";
 625     }
 626     $OptionsInfo{CompoundIDField} = $Options{compoundidfield};
 627   }
 628 
 629 
 630   if (IsNotEmpty($Options{fingerprintsfield})) {
 631     $OptionsInfo{FingerprintsField} = $Options{fingerprintsfield};
 632   }
 633   else {
 634     $OptionsInfo{FingerprintsField} = 'AutoDetect';
 635   }
 636 
 637   if ($Options{compoundidfield} && IsNotEmpty($Options{fingerprintsfield})) {
 638     if (($Options{compoundidfield} eq $Options{fingerprintsfield})) {
 639       die "Error: Values specified using \"--CompoundIDField\" and \"--Fingerprintsfield\", $Options{compoundidfield}, must be different.\n";
 640     }
 641   }
 642 
 643   $OptionsInfo{Detail} = $Options{detail};
 644 
 645   $OptionsInfo{InDelim} = $Options{indelim};
 646   $OptionsInfo{OutDelim} = ($Options{outdelim} =~ /tab/i ) ? "\t" : (($Options{outdelim} =~ /semicolon/i) ? "\;" : "\,");
 647   $OptionsInfo{OutQuote} = ($Options{quote} =~ /^Yes$/i) ? 1 : 0;
 648   $OptionsInfo{OutQuoteValue} = ($Options{quote} =~ /^Yes$/i) ? '"' : '';
 649 
 650   $OptionsInfo{OutMatrixFormat} = $Options{outmatrixformat};
 651 
 652   $OptionsInfo{WriteRowsAndColumns} = 0; $OptionsInfo{WriteIDPairsAndValue} = 0;
 653   OUTMATRIXFORMAT: {
 654     if ($OptionsInfo{OutMatrixFormat} =~ /^RowsAndColumns$/i) {
 655       $OptionsInfo{WriteRowsAndColumns} = 1; last OUTMATRIXFORMAT;
 656     }
 657     if ($OptionsInfo{OutMatrixFormat} =~ /^IDPairsAndValue$/i) {
 658       $OptionsInfo{WriteIDPairsAndValue} = 1; last OUTMATRIXFORMAT;
 659     }
 660     die "Error: The value specified, $Options{outmatrixformat}, for option \"--OutMatrixFormat\" is not valid. Allowed values: RowsAndColumns or IDPairsAndValue\n";
 661   }
 662 
 663   $OptionsInfo{OutMatrixType} = $Options{outmatrixtype};
 664 
 665   $OptionsInfo{WriteFullMatrix} = 0;
 666   $OptionsInfo{WriteUpperTriangularMatrix} = 0; $OptionsInfo{WriteLowerTriangularMatrix} = 0;
 667   OUTMATRIXTYPE: {
 668     if ($OptionsInfo{OutMatrixType} =~ /^FullMatrix$/i) {
 669       $OptionsInfo{WriteFullMatrix} = 1; last OUTMATRIXTYPE;
 670     }
 671     if ($OptionsInfo{OutMatrixType} =~ /^UpperTriangularMatrix$/i) {
 672       $OptionsInfo{WriteUpperTriangularMatrix} = 1; last OUTMATRIXTYPE;
 673     }
 674     if ($OptionsInfo{OutMatrixType} =~ /^LowerTriangularMatrix$/i) {
 675       $OptionsInfo{WriteLowerTriangularMatrix} = 1; last OUTMATRIXTYPE;
 676     }
 677     die "Error: The value specified, $Options{outmatrixtype}, for option \"--OutMatrixType\" is not valid. Allowed values: FullMatrix, UpperTriangularMatrix or LowerTriangularMatrix\n";
 678   }
 679 
 680   $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
 681   $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
 682 
 683   $OptionsInfo{Fast} = $Options{fast} ? 1 : 0;
 684   $OptionsInfo{ValidateData} = $Options{fast} ? 0 : 1;
 685 
 686   $OptionsInfo{Precision} = $Options{precision};
 687 
 688 }
 689 
 690 # Process options related to comparion of bit vector strings...
 691 #
 692 sub ProcessBitVectorComparisonOptions {
 693   # Setup supported bit vector similarity coefficients for bit vector strings...
 694   my($ComparisonMeasure, $SupportedComparisonMeasure, @SupportedComparisonMeasures, %SupportedComparisonMeasuresNameMap, %SupportedComparisonMeasuresMethodMap);
 695 
 696   @SupportedComparisonMeasures = ();
 697   %SupportedComparisonMeasuresNameMap = ();
 698   %SupportedComparisonMeasuresMethodMap = ();
 699 
 700   for $SupportedComparisonMeasure (Fingerprints::FingerprintsBitVector::GetSupportedSimilarityCoefficients()) {
 701     # Similarity coefficient function/method names contain "Coefficient" in their names.
 702     # So take 'em out and setup a map to original function/method name...
 703     $ComparisonMeasure = $SupportedComparisonMeasure;
 704     $ComparisonMeasure =~ s/Coefficient$//;
 705 
 706     push @SupportedComparisonMeasures, $ComparisonMeasure;
 707     $SupportedComparisonMeasuresNameMap{lc($ComparisonMeasure)} = $ComparisonMeasure;
 708     $SupportedComparisonMeasuresMethodMap{lc($ComparisonMeasure)} = $SupportedComparisonMeasure;
 709   }
 710 
 711   # Setup a list of similarity coefficients to use for calculating similarity matrices for bit vector strings...
 712   my($SpecifiedMeasure, @SpecifiedComparisonMeasures, %SpecifiedComparisonMeasuresNameMap, %SpecifiedComparisonMeasuresMethodMap, %SpecifiedComparisonMeasuresParameterMap);
 713 
 714   @SpecifiedComparisonMeasures = ();
 715   %SpecifiedComparisonMeasuresNameMap = ();
 716   %SpecifiedComparisonMeasuresMethodMap = ();
 717   %SpecifiedComparisonMeasuresParameterMap = ();
 718 
 719   if ($Options{bitvectorcomparisonmode} =~ /^All$/i) {
 720     push @SpecifiedComparisonMeasures, @SupportedComparisonMeasures;
 721   }
 722   else {
 723     # Comma delimited list of similarity coefficients...
 724     my($BitVectorComparisonMode, @SpecifiedMeasures, @UnsupportedSpecifiedMeasures);
 725 
 726     $BitVectorComparisonMode = $Options{bitvectorcomparisonmode};
 727     $BitVectorComparisonMode =~ s/ //g;
 728     @SpecifiedMeasures = split ",", $BitVectorComparisonMode;
 729     @UnsupportedSpecifiedMeasures = ();
 730 
 731     for $SpecifiedMeasure (@SpecifiedMeasures) {
 732       if (exists($SupportedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)})) {
 733         push @SpecifiedComparisonMeasures, $SpecifiedMeasure;
 734       }
 735       else {
 736         push @UnsupportedSpecifiedMeasures, $SpecifiedMeasure;
 737       }
 738     }
 739     if (@UnsupportedSpecifiedMeasures) {
 740       if (@UnsupportedSpecifiedMeasures > 1) {
 741         warn "Error: The values specified - ", JoinWords(\@UnsupportedSpecifiedMeasures, ", ", 0)," - for option \"-b --BitVectorComparisonMode\" are not valid.\n";
 742       }
 743       else {
 744         warn "Error: The value specified, @UnsupportedSpecifiedMeasures, for option \"-b --BitVectorComparisonMode\" is not valid.\n";
 745       }
 746       die "Allowed values:", JoinWords(\@SupportedComparisonMeasures, ", ", 0), "\n";
 747     }
 748   }
 749   for $SpecifiedMeasure (@SpecifiedComparisonMeasures) {
 750     $SpecifiedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)} = $SupportedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)};
 751     $SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure)} = $SupportedComparisonMeasuresNameMap{lc($SpecifiedMeasure)};
 752   }
 753 
 754   $OptionsInfo{BitVectorComparisonMode} = $Options{bitvectorcomparisonmode};
 755   $OptionsInfo{SpecifiedBitVectorComparisonsRef} = \@SpecifiedComparisonMeasures;
 756   $OptionsInfo{SpecifiedBitVectorComparisonsNameRef} = \%SpecifiedComparisonMeasuresNameMap;
 757   $OptionsInfo{SpecifiedBitVectorComparisonsMethodRef} = \%SpecifiedComparisonMeasuresMethodMap;
 758 
 759   # Make sure valid alpha parameter is specified for Tversky calculation...
 760   my($SpecifiedMeasure1, $SpecifiedMeasure2);
 761   $OptionsInfo{Alpha} = '';
 762   $SpecifiedMeasure1 = 'TverskySimilarity';
 763   $SpecifiedMeasure2 = 'WeightedTverskySimilarity';
 764   if ($SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure1)} || $SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure2)}) {
 765     if (IsEmpty($Options{alpha})) {
 766       die "Error: You must specify a value for \"-a, --alpha\" option in \"$SpecifiedMeasure1, $SpecifiedMeasure2, or All\" \"-m --mode\". \n";
 767     }
 768     my($Alpha);
 769     $Alpha = $Options{alpha};
 770     if (!(IsFloat($Alpha) && $Alpha >=0 && $Alpha <= 1)) {
 771       die "Error: The value specified, $Options{alpha}, for option \"-a, --alpha\" is not valid. Allowed values: >= 0 and <= 1\n";
 772     }
 773     $OptionsInfo{Alpha} = $Alpha;
 774   }
 775 
 776   # Make sure valid beta parameter is specified for WeightedTanimoto and WeightedTversky
 777   # calculations...
 778   $OptionsInfo{Beta} = '';
 779   $SpecifiedMeasure1 = 'WeightedTverskySimilarity';
 780   $SpecifiedMeasure2 = 'WeightedTanimotoSimilarity';
 781   if ($SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure1)} || $SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure2)}) {
 782     if (IsEmpty($Options{beta})) {
 783       die "Error: You must specify a value for \"-b, --beta\" option in \"$SpecifiedMeasure1, $SpecifiedMeasure2, or All\" \"-m --mode\". \n";
 784     }
 785     my($Beta);
 786     $Beta = $Options{beta};
 787     if (!(IsFloat($Beta) && $Beta >=0 && $Beta <= 1)) {
 788       die "Error: The value specified, $Options{beta}, for option \"-b, --beta\" is not valid. Allowed values: >= 0 and <= 1\n";
 789     }
 790     $OptionsInfo{Beta} = $Beta;
 791   }
 792 
 793   # Setup any parameters required for specified comparison menthod...
 794   for $SpecifiedMeasure (@SpecifiedComparisonMeasures) {
 795     @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}} = ();
 796     if ($SpecifiedMeasure =~ /^TverskySimilarity$/i) {
 797       push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, $OptionsInfo{Alpha};
 798     }
 799     elsif ($SpecifiedMeasure =~ /^WeightedTverskySimilarity$/i) {
 800       push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, $OptionsInfo{Alpha};
 801       push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, $OptionsInfo{Beta};
 802     }
 803     elsif ($SpecifiedMeasure =~ /^WeightedTanimotoSimilarity$/i) {
 804       push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, $OptionsInfo{Beta};
 805     }
 806   }
 807   $OptionsInfo{SpecifiedBitVectorComparisonsParameterRef} = \%SpecifiedComparisonMeasuresParameterMap;
 808 }
 809 
 810 # Process options related to comparion of vector strings...
 811 #
 812 sub ProcessVectorComparisonOptions {
 813   # Setup specified similarity coefficients for vector strings..
 814   my($ComparisonMeasure, $SupportedComparisonMeasure, @SupportedComparisonMeasures, %SupportedComparisonMeasuresNameMap, %SupportedComparisonMeasuresMethodMap);
 815 
 816   @SupportedComparisonMeasures = ();
 817   %SupportedComparisonMeasuresNameMap = ();
 818   %SupportedComparisonMeasuresMethodMap = ();
 819   for $SupportedComparisonMeasure (Fingerprints::FingerprintsVector::GetSupportedDistanceAndSimilarityCoefficients()) {
 820     # Similarity and distance coefficient function/method names contain "Coefficient" in their names.
 821     # So take 'em out and setup a map to original function/method name...
 822     $ComparisonMeasure = $SupportedComparisonMeasure;
 823     if ($ComparisonMeasure =~ /Coefficient$/i) {
 824       $ComparisonMeasure =~ s/Coefficient$//i;
 825     }
 826     push @SupportedComparisonMeasures, $ComparisonMeasure;
 827     $SupportedComparisonMeasuresNameMap{lc($ComparisonMeasure)} = $ComparisonMeasure;
 828     $SupportedComparisonMeasuresMethodMap{lc($ComparisonMeasure)} = $SupportedComparisonMeasure;
 829   }
 830 
 831   # Setup a list of similarity coefficients to use for calculating similarity matrices for bit vector strings...
 832   my($SpecifiedMeasure, @SpecifiedComparisonMeasures, %SpecifiedComparisonMeasuresNameMap, %SpecifiedComparisonMeasuresMethodMap, %SpecifiedComparisonMeasuresParameterMap);
 833 
 834   @SpecifiedComparisonMeasures = ();
 835   %SpecifiedComparisonMeasuresNameMap = ();
 836   %SpecifiedComparisonMeasuresMethodMap = ();
 837 
 838   if ($Options{vectorcomparisonmode} =~ /^All$/i) {
 839     push @SpecifiedComparisonMeasures, @SupportedComparisonMeasures;
 840   }
 841   else {
 842     # Comma delimited list of similarity coefficients...
 843     my($VectorComparisonMode, @SpecifiedMeasures, @UnsupportedSpecifiedMeasures);
 844 
 845     $VectorComparisonMode = $Options{vectorcomparisonmode};
 846     $VectorComparisonMode =~ s/ //g;
 847     @SpecifiedMeasures = split ",", $VectorComparisonMode;
 848     @UnsupportedSpecifiedMeasures = ();
 849 
 850     for $SpecifiedMeasure (@SpecifiedMeasures) {
 851       if (exists($SupportedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)})) {
 852         push @SpecifiedComparisonMeasures, $SpecifiedMeasure;
 853       }
 854       else {
 855         push @UnsupportedSpecifiedMeasures, $SpecifiedMeasure;
 856       }
 857     }
 858     if (@UnsupportedSpecifiedMeasures) {
 859       if (@UnsupportedSpecifiedMeasures > 1) {
 860         warn "Error: The values specified - ", JoinWords(\@UnsupportedSpecifiedMeasures, ", ", 0)," - for option \"-v --VectorComparisonMode\" are not valid.\n";
 861       }
 862       else {
 863         warn "Error: The value specified, @UnsupportedSpecifiedMeasures, for option \"-v --VectorComparisonMode\" is not valid.\n";
 864       }
 865       die "Allowed values:", JoinWords(\@SupportedComparisonMeasures, ", ", 0), "\n";
 866     }
 867   }
 868   for $SpecifiedMeasure (@SpecifiedComparisonMeasures) {
 869     $SpecifiedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)} = $SupportedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)};
 870     $SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure)} = $SupportedComparisonMeasuresNameMap{lc($SpecifiedMeasure)};
 871   }
 872 
 873   $OptionsInfo{VectorComparisonMode} = $Options{vectorcomparisonmode};
 874   $OptionsInfo{SpecifiedVectorComparisonsRef} = \@SpecifiedComparisonMeasures;
 875   $OptionsInfo{SpecifiedVectorComparisonsNameRef} = \%SpecifiedComparisonMeasuresNameMap;
 876   $OptionsInfo{SpecifiedVectorComparisonsMethodRef} = \%SpecifiedComparisonMeasuresMethodMap;
 877 
 878   # Setup specified vector comparison calculation modes...
 879   my(@SpecifiedVectorComparisonModes);
 880   @SpecifiedVectorComparisonModes = ();
 881   if ($Options{vectorcomparisonformulism} =~ /^All$/i) {
 882     push @SpecifiedVectorComparisonModes, ("AlgebraicForm", "BinaryForm", "SetTheoreticForm");
 883   }
 884   else {
 885     my($SpecifiedFormulism, @SpecifiedFormulismWords);
 886 
 887     @SpecifiedFormulismWords = split /\,/, $Options{vectorcomparisonformulism};
 888     for $SpecifiedFormulism (@SpecifiedFormulismWords) {
 889       if ($SpecifiedFormulism !~ /^(AlgebraicForm|BinaryForm|SetTheoreticForm)$/i) {
 890         die "Error: The value specified, $SpecifiedFormulism, for option \"--VectorComparisonFormulism\" is not valid. Allowed values: AlgebraicForm, BinaryForm or SetTheoreticForm\n";
 891       }
 892       push @SpecifiedVectorComparisonModes, $SpecifiedFormulism;
 893     }
 894   }
 895   $OptionsInfo{VectorComparisonFormulism} = $Options{vectorcomparisonformulism};
 896   $OptionsInfo{SpecifiedVectorComparisonModesRef} = \@SpecifiedVectorComparisonModes;
 897 
 898   # Setup any parameters required for specified comparison menthod...
 899   for $SpecifiedMeasure (@SpecifiedComparisonMeasures) {
 900     @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}} = ();
 901     push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, ($Options{fast} ? 1 : 0);
 902   }
 903   $OptionsInfo{SpecifiedVectorComparisonsParameterRef} = \%SpecifiedComparisonMeasuresParameterMap;
 904 }
 905 
 906 # Setup script usage  and retrieve command line arguments specified using various options...
 907 sub SetupScriptUsage {
 908 
 909   # Retrieve all the options...
 910   %Options = ();
 911 
 912   $Options{alpha} = 0.5;
 913   $Options{beta} = 1;
 914 
 915   $Options{bitvectorcomparisonmode} = "TanimotoSimilarity";
 916 
 917   $Options{colmode} = 'colnum';
 918 
 919   $Options{compoundidprefix} = 'Cmpd';
 920   $Options{compoundidmode} = 'LabelPrefix';
 921 
 922   $Options{detail} = 1;
 923 
 924   $Options{indelim} = 'comma';
 925   $Options{outdelim} = 'comma';
 926 
 927   $Options{inputdatamode} = 'LoadInMemory';
 928 
 929   $Options{mode} = 'AutoDetect';
 930 
 931   $Options{outmatrixformat} = 'RowsAndColumns';
 932 
 933   $Options{outmatrixtype} = 'FullMatrix';
 934 
 935   $Options{quote} = 'yes';
 936   $Options{precision} = 2;
 937 
 938   $Options{vectorcomparisonmode} = "TanimotoSimilarity";
 939   $Options{vectorcomparisonformulism} = "AlgebraicForm";
 940 
 941   if (!GetOptions(\%Options, "alpha=f", "beta=f", "bitvectorcomparisonmode|b=s", "colmode|c=s", "compoundidcol=s", "compoundidprefix=s", "compoundidfield=s", "compoundidmode=s", "detail|d=i", "fast|f", "fingerprintscol=s", "fingerprintsfield=s", "help|h", "indelim=s", "inputdatamode=s", "mode|m=s", "outdelim=s", "overwrite|o", "outmatrixformat=s", "outmatrixtype=s", "precision|p=s", "quote|q=s", "root|r=s", "vectorcomparisonmode|v=s", "vectorcomparisonformulism=s", "workingdir|w=s")) {
 942     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 943   }
 944   if ($Options{workingdir}) {
 945     if (! -d $Options{workingdir}) {
 946       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 947     }
 948     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 949   }
 950   if ($Options{colmode} !~ /^(ColNum|ColLabel)$/i) {
 951     die "Error: The value specified, $Options{colmode}, for option \"-c, --ColMode\" is not valid. Allowed values: ColNum, or ColLabel\n";
 952   }
 953   if ($Options{compoundidmode} !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) {
 954     die "Error: The value specified, $Options{compoundidmode}, for option \"--CompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n";
 955   }
 956   if (!IsPositiveInteger($Options{detail})) {
 957     die "Error: The value specified, $Options{detail}, for option \"-d, --detail\" is not valid. Allowed values: > 0 \n";
 958   }
 959   if ($Options{inputdatamode} !~ /^(LoadInMemory|ScanFile)$/i) {
 960     die "Error: The value specified, $Options{inputdatamode}, for option \"--InputDataMode\" is not valid. Allowed values: LoadInMemory or ScanFile\n";
 961   }
 962   if ($Options{mode} !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) {
 963     die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString \n";
 964   }
 965   if ($Options{indelim} !~ /^(comma|semicolon)$/i) {
 966     die "Error: The value specified, $Options{indelim}, for option \"--InDelim\" is not valid. Allowed values: comma, or semicolon\n";
 967   }
 968   if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
 969     die "Error: The value specified, $Options{outdelim}, for option \"--OutDelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
 970   }
 971   if ($Options{outmatrixformat} !~ /^(RowsAndColumns|IDPairsAndValue)$/i) {
 972     die "Error: The value specified, $Options{outmatrixformat}, for option \"--OutMatrixFormat\" is not valid. Allowed values: RowsAndColumns or IDPairsAndValue\n";
 973   }
 974   if ($Options{outmatrixtype} !~ /^(FullMatrix|UpperTriangularMatrix|LowerTriangularMatrix)$/i) {
 975     die "Error: The value specified, $Options{outmatrixtype}, for option \"--OutMatrixType\" is not valid. Allowed values: FullMatrix, UpperTriangularMatrix or LowerTriangularMatrix\n";
 976   }
 977   if ($Options{quote} !~ /^(Yes|No)$/i) {
 978     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: Yes or No\n";
 979   }
 980   if (!IsPositiveInteger($Options{precision})) {
 981     die "Error: The value specified, $Options{precision}, for option \"--precision\" is not valid. Allowed values: > 0 \n";
 982   }
 983 }
 984