MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # $RCSfile: TopologicalPharmacophoreAtomTripletsFingerprints.pl,v $
   4 # $Date: 2015/02/28 20:46:23 $
   5 # $Revision: 1.34 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use FindBin; use lib "$FindBin::Bin/../lib";
  31 use Getopt::Long;
  32 use File::Basename;
  33 use Text::ParseWords;
  34 use Benchmark;
  35 use FileUtil;
  36 use TextUtil;
  37 use SDFileUtil;
  38 use MoleculeFileIO;
  39 use FileIO::FingerprintsSDFileIO;
  40 use FileIO::FingerprintsTextFileIO;
  41 use FileIO::FingerprintsFPFileIO;
  42 use AtomTypes::FunctionalClassAtomTypes;
  43 use Fingerprints::TopologicalPharmacophoreAtomTripletsFingerprints;
  44 
  45 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  46 
  47 # Autoflush STDOUT
  48 $| = 1;
  49 
  50 # Starting message...
  51 $ScriptName = basename($0);
  52 print "\n$ScriptName: Starting...\n\n";
  53 $StartTime = new Benchmark;
  54 
  55 # Get the options and setup script...
  56 SetupScriptUsage();
  57 if ($Options{help} || @ARGV < 1) {
  58   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  59 }
  60 
  61 my(@SDFilesList);
  62 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
  63 
  64 # Process options...
  65 print "Processing options...\n";
  66 my(%OptionsInfo);
  67 ProcessOptions();
  68 
  69 # Setup information about input files...
  70 print "Checking input SD file(s)...\n";
  71 my(%SDFilesInfo);
  72 RetrieveSDFilesInfo();
  73 
  74 # Process input files..
  75 my($FileIndex);
  76 if (@SDFilesList > 1) {
  77   print "\nProcessing SD files...\n";
  78 }
  79 for $FileIndex (0 .. $#SDFilesList) {
  80   if ($SDFilesInfo{FileOkay}[$FileIndex]) {
  81     print "\nProcessing file $SDFilesList[$FileIndex]...\n";
  82     GenerateTopologicalPharmacophoreAtomTripletsFingerprints($FileIndex);
  83   }
  84 }
  85 print "\n$ScriptName:Done...\n\n";
  86 
  87 $EndTime = new Benchmark;
  88 $TotalTime = timediff ($EndTime, $StartTime);
  89 print "Total time: ", timestr($TotalTime), "\n";
  90 
  91 ###############################################################################
  92 
  93 # Generate fingerprints for a SD file...
  94 #
  95 sub GenerateTopologicalPharmacophoreAtomTripletsFingerprints {
  96   my($FileIndex) = @_;
  97   my($CmpdCount, $IgnoredCmpdCount, $SDFile, $MoleculeFileIO, $Molecule, $TopologicalPharmacophoreAtomTripletsFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO, $SetupOutputFiles);
  98 
  99   $SDFile = $SDFilesList[$FileIndex];
 100 
 101   ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = (undef) x 3;
 102   $SetupOutputFiles = 1;
 103 
 104   $MoleculeFileIO = new MoleculeFileIO('Name' => $SDFile);
 105   $MoleculeFileIO->Open();
 106 
 107   $CmpdCount = 0;
 108   $IgnoredCmpdCount = 0;
 109 
 110   COMPOUND: while ($Molecule = $MoleculeFileIO->ReadMolecule()) {
 111     $CmpdCount++;
 112 
 113     # Filter compound data before calculating fingerprints...
 114     if ($OptionsInfo{Filter}) {
 115       if (CheckAndFilterCompound($CmpdCount, $Molecule)) {
 116         $IgnoredCmpdCount++;
 117         next COMPOUND;
 118       }
 119     }
 120 
 121     $TopologicalPharmacophoreAtomTripletsFingerprints = GenerateMoleculeFingerprints($Molecule);
 122     if (!$TopologicalPharmacophoreAtomTripletsFingerprints) {
 123       $IgnoredCmpdCount++;
 124       ProcessIgnoredCompound('FingerprintsGenerationFailed', $CmpdCount, $Molecule);
 125       next COMPOUND;
 126     }
 127 
 128     if ($SetupOutputFiles) {
 129       $SetupOutputFiles = 0;
 130       SetupFingerprintsLabelValueIDs($TopologicalPharmacophoreAtomTripletsFingerprints);
 131       ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = SetupAndOpenOutputFiles($FileIndex);
 132     }
 133 
 134     WriteDataToOutputFiles($FileIndex, $CmpdCount, $Molecule, $TopologicalPharmacophoreAtomTripletsFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
 135   }
 136   $MoleculeFileIO->Close();
 137 
 138   if ($NewFPSDFileIO) {
 139     $NewFPSDFileIO->Close();
 140   }
 141   if ($NewFPTextFileIO) {
 142     $NewFPTextFileIO->Close();
 143   }
 144   if ($NewFPFileIO) {
 145     $NewFPFileIO->Close();
 146   }
 147 
 148   WriteFingerprintsGenerationSummaryStatistics($CmpdCount, $IgnoredCmpdCount);
 149 }
 150 
 151 # Process compound being ignored due to problems in fingerprints geneation...
 152 #
 153 sub ProcessIgnoredCompound {
 154   my($Mode, $CmpdCount, $Molecule) = @_;
 155   my($CmpdID, $DataFieldLabelAndValuesRef);
 156 
 157   $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
 158   $CmpdID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
 159 
 160   MODE: {
 161     if ($Mode =~ /^ContainsNonElementalData$/i) {
 162       warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains atom data corresponding to non-elemental atom symbol(s)...\n\n";
 163       next MODE;
 164     }
 165 
 166     if ($Mode =~ /^ContainsNoElementalData$/i) {
 167       warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains no atom data...\n\n";
 168       next MODE;
 169     }
 170 
 171     if ($Mode =~ /^FingerprintsGenerationFailed$/i) {
 172       warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n";
 173       next MODE;
 174     }
 175     warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n";
 176   }
 177 }
 178 
 179 # Check and filter compounds....
 180 #
 181 sub CheckAndFilterCompound {
 182   my($CmpdCount, $Molecule) = @_;
 183   my($ElementCount, $NonElementCount);
 184 
 185   ($ElementCount, $NonElementCount) = $Molecule->GetNumOfElementsAndNonElements();
 186 
 187   if ($NonElementCount) {
 188     ProcessIgnoredCompound('ContainsNonElementalData', $CmpdCount, $Molecule);
 189     return 1;
 190   }
 191 
 192   if (!$ElementCount) {
 193     ProcessIgnoredCompound('ContainsNoElementalData', $CmpdCount, $Molecule);
 194     return 1;
 195   }
 196 
 197   return 0;
 198 }
 199 
 200 # Write out compounds fingerprints generation summary statistics...
 201 #
 202 sub WriteFingerprintsGenerationSummaryStatistics {
 203   my($CmpdCount, $IgnoredCmpdCount) = @_;
 204   my($ProcessedCmpdCount);
 205 
 206   $ProcessedCmpdCount = $CmpdCount - $IgnoredCmpdCount;
 207 
 208   print "\nNumber of compounds: $CmpdCount\n";
 209   print "Number of compounds processed successfully during fingerprints generation: $ProcessedCmpdCount\n";
 210   print "Number of compounds ignored during fingerprints generation: $IgnoredCmpdCount\n";
 211 }
 212 
 213 # Append atom pair value IDs to fingerprint label...
 214 #
 215 sub SetupFingerprintsLabelValueIDs {
 216   my($TopologicalPharmacophoreAtomTripletsFingerprints) = @_;
 217 
 218   if ($OptionsInfo{AtomTripletsSetSizeToUse} =~ /^ArbitrarySize$/i ||
 219       $OptionsInfo{FingerprintsLabelMode} !~ /^FingerprintsLabelWithIDs$/i) {
 220     return;
 221   }
 222   $OptionsInfo{FingerprintsLabel} .= "; Value IDs: " . $TopologicalPharmacophoreAtomTripletsFingerprints->GetFingerprintsVector->GetValueIDsString();
 223 }
 224 
 225 # Open output files...
 226 #
 227 sub SetupAndOpenOutputFiles {
 228   my($FileIndex) = @_;
 229   my($NewFPSDFile, $NewFPFile, $NewFPTextFile, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO, %FingerprintsFileIOParams);
 230 
 231   ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = (undef) x 3;
 232 
 233   # Setup common parameters for fingerprints file IO objects...
 234   #
 235   %FingerprintsFileIOParams = ('Mode' => 'Write', 'Overwrite' => $OptionsInfo{OverwriteFiles}, 'FingerprintsStringMode' => 'FingerprintsVectorString', 'VectorStringFormat' => $OptionsInfo{VectorStringFormat});
 236 
 237   if ($OptionsInfo{SDOutput}) {
 238     $NewFPSDFile = $SDFilesInfo{SDOutFileNames}[$FileIndex];
 239     print "Generating SD file $NewFPSDFile...\n";
 240     $NewFPSDFileIO = new FileIO::FingerprintsSDFileIO('Name' => $NewFPSDFile, %FingerprintsFileIOParams, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsLabel});
 241     $NewFPSDFileIO->Open();
 242   }
 243 
 244   if ($OptionsInfo{FPOutput}) {
 245     $NewFPFile = $SDFilesInfo{FPOutFileNames}[$FileIndex];
 246     print "Generating FP file $NewFPFile...\n";
 247     $NewFPFileIO = new FileIO::FingerprintsFPFileIO('Name' => $NewFPFile, %FingerprintsFileIOParams);
 248     $NewFPFileIO->Open();
 249   }
 250 
 251   if ($OptionsInfo{TextOutput}) {
 252     my($ColLabelsRef);
 253 
 254     $NewFPTextFile = $SDFilesInfo{TextOutFileNames}[$FileIndex];
 255     $ColLabelsRef = SetupFPTextFileCoulmnLabels($FileIndex);
 256 
 257     print "Generating text file $NewFPTextFile...\n";
 258     $NewFPTextFileIO = new FileIO::FingerprintsTextFileIO('Name' => $NewFPTextFile, %FingerprintsFileIOParams, 'DataColLabels' => $ColLabelsRef, 'OutDelim' => $OptionsInfo{OutDelim}, 'OutQuote' => $OptionsInfo{OutQuote});
 259     $NewFPTextFileIO->Open();
 260   }
 261 
 262   return ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
 263 }
 264 
 265 # Write fingerpritns and other data to appropriate output files...
 266 #
 267 sub WriteDataToOutputFiles {
 268   my($FileIndex, $CmpdCount, $Molecule, $TopologicalPharmacophoreAtomTripletsFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = @_;
 269   my($DataFieldLabelAndValuesRef);
 270 
 271   $DataFieldLabelAndValuesRef = undef;
 272   if ($NewFPTextFileIO || $NewFPFileIO) {
 273     $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
 274   }
 275 
 276   if ($NewFPSDFileIO) {
 277     my($CmpdString);
 278 
 279     $CmpdString = $Molecule->GetInputMoleculeString();
 280     $NewFPSDFileIO->WriteFingerprints($TopologicalPharmacophoreAtomTripletsFingerprints, $CmpdString);
 281   }
 282 
 283   if ($NewFPTextFileIO) {
 284     my($ColValuesRef);
 285 
 286     $ColValuesRef = SetupFPTextFileCoulmnValues($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
 287     $NewFPTextFileIO->WriteFingerprints($TopologicalPharmacophoreAtomTripletsFingerprints, $ColValuesRef);
 288   }
 289 
 290   if ($NewFPFileIO) {
 291     my($CompoundID);
 292 
 293     $CompoundID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
 294     $NewFPFileIO->WriteFingerprints($TopologicalPharmacophoreAtomTripletsFingerprints, $CompoundID);
 295   }
 296 }
 297 
 298 # Generate approriate column labels for FPText output file...
 299 #
 300 sub SetupFPTextFileCoulmnLabels {
 301   my($FileIndex) = @_;
 302   my($Line, @ColLabels);
 303 
 304   @ColLabels = ();
 305   if ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
 306     push @ColLabels, @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
 307   }
 308   elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
 309     push @ColLabels, @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
 310   }
 311   elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
 312     push @ColLabels, @{$OptionsInfo{SpecifiedDataFields}};
 313   }
 314   elsif ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
 315     push @ColLabels, $OptionsInfo{CompoundIDLabel};
 316   }
 317   # Add fingerprints label...
 318   push @ColLabels, $OptionsInfo{FingerprintsLabel};
 319 
 320   return \@ColLabels;
 321 }
 322 
 323 # Generate column values FPText output file..
 324 #
 325 sub SetupFPTextFileCoulmnValues {
 326   my($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_;
 327   my(@ColValues);
 328 
 329   @ColValues = ();
 330   if ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
 331     push @ColValues, SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
 332   }
 333   elsif ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
 334     @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
 335   }
 336   elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
 337     @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
 338   }
 339   elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
 340     @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$OptionsInfo{SpecifiedDataFields}};
 341   }
 342 
 343   return \@ColValues;
 344 }
 345 
 346 # Generate compound ID for FP and FPText output files..
 347 #
 348 sub SetupCmpdIDForOutputFiles {
 349   my($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_;
 350   my($CmpdID);
 351 
 352   $CmpdID = '';
 353   if ($OptionsInfo{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) {
 354     my($MolName);
 355     $MolName = $Molecule->GetName();
 356     $CmpdID = $MolName ? $MolName : "$OptionsInfo{CompoundID}${CmpdCount}";
 357   }
 358   elsif ($OptionsInfo{CompoundIDMode} =~ /^LabelPrefix$/i) {
 359     $CmpdID = "$OptionsInfo{CompoundID}${CmpdCount}";
 360   }
 361   elsif ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i) {
 362     my($SpecifiedDataField);
 363     $SpecifiedDataField = $OptionsInfo{CompoundID};
 364     $CmpdID = exists $DataFieldLabelAndValuesRef->{$SpecifiedDataField} ? $DataFieldLabelAndValuesRef->{$SpecifiedDataField} : '';
 365   }
 366   elsif ($OptionsInfo{CompoundIDMode} =~ /^MolName$/i) {
 367     $CmpdID = $Molecule->GetName();
 368   }
 369   return $CmpdID;
 370 }
 371 
 372 # Generate fingerprints for molecule...
 373 #
 374 sub GenerateMoleculeFingerprints {
 375   my($Molecule) = @_;
 376   my($TopologicalPharmacophoreAtomTripletsFingerprints);
 377 
 378   if ($OptionsInfo{KeepLargestComponent}) {
 379     $Molecule->KeepLargestComponent();
 380   }
 381   if (!$Molecule->DetectRings()) {
 382     return undef;
 383   }
 384   $Molecule->SetAromaticityModel($OptionsInfo{AromaticityModel});
 385   $Molecule->DetectAromaticity();
 386 
 387   $TopologicalPharmacophoreAtomTripletsFingerprints = new Fingerprints::TopologicalPharmacophoreAtomTripletsFingerprints('Molecule' => $Molecule, 'AtomTripletsSetSizeToUse' => $OptionsInfo{AtomTripletsSetSizeToUse}, 'MinDistance' => $OptionsInfo{MinDistance},  'MaxDistance' => $OptionsInfo{MaxDistance}, 'DistanceBinSize' => $OptionsInfo{DistanceBinSize}, 'UseTriangleInequality' => $OptionsInfo{UseTriangleInequality}, 'AtomTypesToUse' => \@{$OptionsInfo{AtomTypesToUse}});
 388 
 389   # Generate fingerprints...
 390   $TopologicalPharmacophoreAtomTripletsFingerprints->GenerateFingerprints();
 391 
 392   # Make sure fingerprints generation is successful...
 393   if (!$TopologicalPharmacophoreAtomTripletsFingerprints->IsFingerprintsGenerationSuccessful()) {
 394     return undef;
 395   }
 396 
 397   return $TopologicalPharmacophoreAtomTripletsFingerprints;
 398 }
 399 
 400 # Retrieve information about SD files...
 401 #
 402 sub RetrieveSDFilesInfo {
 403   my($SDFile, $Index, $FileDir, $FileExt, $FileName, $OutFileRoot, $TextOutFileExt, $SDOutFileExt, $FPOutFileExt, $NewSDFileName, $NewFPFileName, $NewTextFileName, $CheckDataField, $CollectDataFields, $AllDataFieldsRef, $CommonDataFieldsRef);
 404 
 405   %SDFilesInfo = ();
 406   @{$SDFilesInfo{FileOkay}} = ();
 407   @{$SDFilesInfo{OutFileRoot}} = ();
 408   @{$SDFilesInfo{SDOutFileNames}} = ();
 409   @{$SDFilesInfo{FPOutFileNames}} = ();
 410   @{$SDFilesInfo{TextOutFileNames}} = ();
 411   @{$SDFilesInfo{AllDataFieldsRef}} = ();
 412   @{$SDFilesInfo{CommonDataFieldsRef}} = ();
 413 
 414   $CheckDataField = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) && ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i)) ? 1 : 0;
 415   $CollectDataFields = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^(All|Common)$/i)) ? 1 : 0;
 416 
 417   FILELIST: for $Index (0 .. $#SDFilesList) {
 418     $SDFile = $SDFilesList[$Index];
 419 
 420     $SDFilesInfo{FileOkay}[$Index] = 0;
 421     $SDFilesInfo{OutFileRoot}[$Index] = '';
 422     $SDFilesInfo{SDOutFileNames}[$Index] = '';
 423     $SDFilesInfo{FPOutFileNames}[$Index] = '';
 424     $SDFilesInfo{TextOutFileNames}[$Index] = '';
 425 
 426     $SDFile = $SDFilesList[$Index];
 427     if (!(-e $SDFile)) {
 428       warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
 429       next FILELIST;
 430     }
 431     if (!CheckFileType($SDFile, "sd sdf")) {
 432       warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
 433       next FILELIST;
 434     }
 435 
 436     if ($CheckDataField) {
 437       # Make sure data field exists in SD file..
 438       my($CmpdString, $SpecifiedDataField, @CmpdLines, %DataFieldValues);
 439 
 440       @CmpdLines = ();
 441       open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
 442       $CmpdString = ReadCmpdString(\*SDFILE);
 443       close SDFILE;
 444       @CmpdLines = split "\n", $CmpdString;
 445       %DataFieldValues = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
 446       $SpecifiedDataField = $OptionsInfo{CompoundID};
 447       if (!exists $DataFieldValues{$SpecifiedDataField}) {
 448         warn "Warning: Ignoring file $SDFile: Data field value, $SpecifiedDataField, using  \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\" doesn't exist\n";
 449         next FILELIST;
 450       }
 451     }
 452 
 453     $AllDataFieldsRef = '';
 454     $CommonDataFieldsRef = '';
 455     if ($CollectDataFields) {
 456       my($CmpdCount);
 457       open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
 458       ($CmpdCount, $AllDataFieldsRef, $CommonDataFieldsRef) = GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE);
 459       close SDFILE;
 460     }
 461 
 462     # Setup output file names...
 463     $FileDir = ""; $FileName = ""; $FileExt = "";
 464     ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile);
 465 
 466     $TextOutFileExt = "csv";
 467     if ($Options{outdelim} =~ /^tab$/i) {
 468       $TextOutFileExt = "tsv";
 469     }
 470     $SDOutFileExt = $FileExt;
 471     $FPOutFileExt = "fpf";
 472 
 473     if ($OptionsInfo{OutFileRoot} && (@SDFilesList == 1)) {
 474       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
 475       if ($RootFileName && $RootFileExt) {
 476         $FileName = $RootFileName;
 477       }
 478       else {
 479         $FileName = $OptionsInfo{OutFileRoot};
 480       }
 481       $OutFileRoot = $FileName;
 482     }
 483     else {
 484       $OutFileRoot = "${FileName}TopologicalPharmacophoreAtomTripletsFP";
 485     }
 486 
 487     $NewSDFileName = "${OutFileRoot}.${SDOutFileExt}";
 488     $NewFPFileName = "${OutFileRoot}.${FPOutFileExt}";
 489     $NewTextFileName = "${OutFileRoot}.${TextOutFileExt}";
 490 
 491     if ($OptionsInfo{SDOutput}) {
 492       if ($SDFile =~ /$NewSDFileName/i) {
 493         warn "Warning: Ignoring input file $SDFile: Same output, $NewSDFileName, and input file names.\n";
 494         print "Specify a different name using \"-r --root\" option or use default name.\n";
 495         next FILELIST;
 496       }
 497     }
 498 
 499     if (!$OptionsInfo{OverwriteFiles}) {
 500       # Check SD and text outout files...
 501       if ($OptionsInfo{SDOutput}) {
 502         if (-e $NewSDFileName) {
 503           warn "Warning: Ignoring file $SDFile: The file $NewSDFileName already exists\n";
 504           next FILELIST;
 505         }
 506       }
 507       if ($OptionsInfo{FPOutput}) {
 508         if (-e $NewFPFileName) {
 509           warn "Warning: Ignoring file $SDFile: The file $NewFPFileName already exists\n";
 510           next FILELIST;
 511         }
 512       }
 513       if ($OptionsInfo{TextOutput}) {
 514         if (-e $NewTextFileName) {
 515           warn "Warning: Ignoring file $SDFile: The file $NewTextFileName already exists\n";
 516           next FILELIST;
 517         }
 518       }
 519     }
 520 
 521     $SDFilesInfo{FileOkay}[$Index] = 1;
 522 
 523     $SDFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
 524     $SDFilesInfo{SDOutFileNames}[$Index] = $NewSDFileName;
 525     $SDFilesInfo{FPOutFileNames}[$Index] = $NewFPFileName;
 526     $SDFilesInfo{TextOutFileNames}[$Index] = $NewTextFileName;
 527 
 528     $SDFilesInfo{AllDataFieldsRef}[$Index] = $AllDataFieldsRef;
 529     $SDFilesInfo{CommonDataFieldsRef}[$Index] = $CommonDataFieldsRef;
 530   }
 531 }
 532 
 533 # Process option values...
 534 sub ProcessOptions {
 535   %OptionsInfo = ();
 536 
 537   ProcessAtomTypesToUseOption();
 538 
 539   $OptionsInfo{AromaticityModel} = $Options{aromaticitymodel};
 540 
 541   $OptionsInfo{AtomTripletsSetSizeToUse} = $Options{atomtripletssetsizetouse};
 542 
 543   $OptionsInfo{CompoundIDMode} = $Options{compoundidmode};
 544   $OptionsInfo{CompoundIDLabel} = $Options{compoundidlabel};
 545   $OptionsInfo{DataFieldsMode} = $Options{datafieldsmode};
 546 
 547   my(@SpecifiedDataFields);
 548   @SpecifiedDataFields = ();
 549 
 550   @{$OptionsInfo{SpecifiedDataFields}} = ();
 551   $OptionsInfo{CompoundID} = '';
 552 
 553   if ($Options{datafieldsmode} =~ /^CompoundID$/i) {
 554     if ($Options{compoundidmode} =~ /^DataField$/i) {
 555       if (!$Options{compoundid}) {
 556         die "Error: You must specify a value for \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\". \n";
 557       }
 558       $OptionsInfo{CompoundID} = $Options{compoundid};
 559     }
 560     elsif ($Options{compoundidmode} =~ /^(LabelPrefix|MolNameOrLabelPrefix)$/i) {
 561       $OptionsInfo{CompoundID} = $Options{compoundid} ? $Options{compoundid} : 'Cmpd';
 562     }
 563   }
 564   elsif ($Options{datafieldsmode} =~ /^Specify$/i) {
 565     if (!$Options{datafields}) {
 566       die "Error: You must specify a value for \"--DataFields\" option in \"Specify\" \"-d, --DataFieldsMode\". \n";
 567     }
 568     @SpecifiedDataFields = split /\,/, $Options{datafields};
 569     push @{$OptionsInfo{SpecifiedDataFields}}, @SpecifiedDataFields;
 570   }
 571 
 572   $OptionsInfo{Filter} = ($Options{filter} =~ /^Yes$/i) ? 1 : 0;
 573 
 574   $OptionsInfo{FingerprintsLabelMode} = $Options{fingerprintslabelmode};
 575   $OptionsInfo{FingerprintsLabel} = $Options{fingerprintslabel} ? $Options{fingerprintslabel} : 'TopologicalPharmacophoreAtomTripletsFingerprints';
 576 
 577   $OptionsInfo{KeepLargestComponent} = ($Options{keeplargestcomponent} =~ /^Yes$/i) ? 1 : 0;
 578 
 579   $OptionsInfo{DistanceBinSize} = $Options{distancebinsize};
 580 
 581   $OptionsInfo{MinDistance} = $Options{mindistance};
 582   $OptionsInfo{MaxDistance} = $Options{maxdistance};
 583 
 584   $OptionsInfo{Output} = $Options{output};
 585   $OptionsInfo{SDOutput} = ($Options{output} =~ /^(SD|All)$/i) ? 1 : 0;
 586   $OptionsInfo{FPOutput} = ($Options{output} =~ /^(FP|All)$/i) ? 1 : 0;
 587   $OptionsInfo{TextOutput} = ($Options{output} =~ /^(Text|All)$/i) ? 1 : 0;
 588 
 589   $OptionsInfo{OutDelim} = $Options{outdelim};
 590   $OptionsInfo{OutQuote} = ($Options{quote} =~ /^Yes$/i) ? 1 : 0;
 591 
 592   $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
 593   $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
 594 
 595   $OptionsInfo{UseTriangleInequality} = ($Options{usetriangleinequality} =~ /^Yes$/i) ? 1 : 0;
 596 
 597   # Setup default vector string format...
 598   my($VectorStringFormat);
 599   $VectorStringFormat = '';
 600 
 601   if ($Options{vectorstringformat}) {
 602     $VectorStringFormat = $Options{vectorstringformat};
 603 
 604     if ($Options{atomtripletssetsizetouse} =~ /^ArbitrarySize$/i && $VectorStringFormat =~ /^ValuesString$/i) {
 605       die "Error: The value specified, $Options{vectorstringformat}, for option \"-v, --VectorStringFormat\" is not valid for $Options{atomtripletssetsizetouse} value of \"--AtomTripletsSetSizeToUse\" option. Allowed values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString or ValuesAndIDsPairsString\n";
 606     }
 607   }
 608   else {
 609     $VectorStringFormat = ($Options{atomtripletssetsizetouse} =~ /^FixedSize$/) ? "ValuesString" : "IDsAndValuesString";
 610   }
 611   $OptionsInfo{VectorStringFormat} = $VectorStringFormat;
 612 }
 613 
 614 # Process atom type to use option...
 615 #
 616 sub ProcessAtomTypesToUseOption {
 617   my($AtomType, $SpecifiedAtomTypesToUse, @AtomTypesWords);
 618 
 619   @{$OptionsInfo{AtomTypesToUse}} = ();
 620   if (IsEmpty($Options{atomtypestouse})) {
 621     die "Error: Atom types value specified using \"-a, --AtomTypesToUse\" option is empty\n";
 622   }
 623 
 624   $SpecifiedAtomTypesToUse = $Options{atomtypestouse};
 625   $SpecifiedAtomTypesToUse =~ s/ //g;
 626   @AtomTypesWords = split /\,/, $SpecifiedAtomTypesToUse;
 627 
 628   for $AtomType (@AtomTypesWords) {
 629     if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($AtomType)) {
 630       die "Error: Atom type specified, $AtomType, using \"-a, --AtomTypesToUse\" option is not valid...\n ";
 631     }
 632     push @{$OptionsInfo{AtomTypesToUse}}, $AtomType;
 633   }
 634 }
 635 
 636 # Setup script usage  and retrieve command line arguments specified using various options...
 637 sub SetupScriptUsage {
 638 
 639   # Retrieve all the options...
 640   %Options = ();
 641 
 642   $Options{aromaticitymodel} = 'MayaChemToolsAromaticityModel';
 643 
 644   $Options{atomtripletssetsizetouse} = 'ArbitrarySize';
 645 
 646   $Options{atomtypestouse} = 'HBD,HBA,PI,NI,H,Ar';
 647 
 648   $Options{compoundidmode} = 'LabelPrefix';
 649   $Options{compoundidlabel} = 'CompoundID';
 650   $Options{datafieldsmode} = 'CompoundID';
 651 
 652   $Options{filter} = 'Yes';
 653 
 654   $Options{fingerprintslabelmode} = 'FingerprintsLabelOnly';
 655 
 656   $Options{keeplargestcomponent} = 'Yes';
 657 
 658   $Options{mindistance} = 1;
 659   $Options{maxdistance} = 10;
 660 
 661   $Options{distancebinsize} = 2;
 662 
 663   $Options{usetriangleinequality} = 'Yes';
 664 
 665   $Options{output} = 'text';
 666   $Options{outdelim} = 'comma';
 667   $Options{quote} = 'yes';
 668 
 669   $Options{vectorstringformat} = '';
 670 
 671   if (!GetOptions(\%Options, "aromaticitymodel=s", "atomtripletssetsizetouse=s", "atomtypestouse|a=s", "compoundid=s", "compoundidlabel=s", "compoundidmode=s", "datafields=s", "datafieldsmode|d=s", "distancebinsize=s", "filter|f=s", "fingerprintslabelmode=s", "fingerprintslabel=s", "help|h", "keeplargestcomponent|k=s",  "mindistance=s", "maxdistance=s", "outdelim=s", "output=s", "overwrite|o", "quote|q=s", "root|r=s", "usetriangleinequality|u=s", "vectorstringformat|v=s", "workingdir|w=s")) {
 672     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 673   }
 674   if ($Options{workingdir}) {
 675     if (! -d $Options{workingdir}) {
 676       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 677     }
 678     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 679   }
 680   if (!Molecule::IsSupportedAromaticityModel($Options{aromaticitymodel})) {
 681     my(@SupportedModels) = Molecule::GetSupportedAromaticityModels();
 682     die "Error: The value specified, $Options{aromaticitymodel}, for option \"--AromaticityModel\" is not valid. Supported aromaticity models in current release of MayaChemTools: @SupportedModels\n";
 683   }
 684   if ($Options{atomtripletssetsizetouse} !~ /^(ArbitrarySize|FixedSize)$/i) {
 685     die "Error: The value specified, $Options{atomtripletssetsizetouse}, for option \"--AtomTripletsSetSizeToUse\" is not valid. Allowed values: ArbitrarySize or FixedSize\n";
 686   }
 687   if ($Options{compoundidmode} !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) {
 688     die "Error: The value specified, $Options{compoundidmode}, for option \"--CompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n";
 689   }
 690   if ($Options{datafieldsmode} !~ /^(All|Common|Specify|CompoundID)$/i) {
 691     die "Error: The value specified, $Options{datafieldsmode}, for option \"-d, --DataFieldsMode\" is not valid. Allowed values: All, Common, Specify or CompoundID\n";
 692   }
 693   if (!IsPositiveInteger($Options{distancebinsize})) {
 694     die "Error: The value specified, $Options{distancebinsize}, for option \"--DistanceBinSize\" is not valid. Allowed values: > 0 \n";
 695   }
 696   if ($Options{filter} !~ /^(Yes|No)$/i) {
 697     die "Error: The value specified, $Options{filter}, for option \"-f, --Filter\" is not valid. Allowed values: Yes or No\n";
 698   }
 699   if ($Options{fingerprintslabelmode} !~ /^(FingerprintsLabelOnly|FingerprintsLabelWithIDs)$/i) {
 700     die "Error: The value specified, $Options{fingerprintslabelmode}, for option \"--FingerprintsLabelMode\" is not valid. Allowed values: FingerprintsLabelOnly or FingerprintsLabelWithIDs\n";
 701   }
 702   if ($Options{keeplargestcomponent} !~ /^(Yes|No)$/i) {
 703     die "Error: The value specified, $Options{keeplargestcomponent}, for option \"-k, --KeepLargestComponent\" is not valid. Allowed values: Yes or No\n";
 704   }
 705   if (!IsPositiveInteger($Options{mindistance})) {
 706     die "Error: The value specified, $Options{mindistance}, for option \"--MinDistance\" is not valid. Allowed values: > 0 \n";
 707   }
 708   if (!IsPositiveInteger($Options{maxdistance})) {
 709     die "Error: The value specified, $Options{maxdistance}, for option \"--MaxDistance\" is not valid. Allowed values: > 0 \n";
 710   }
 711   if ($Options{mindistance} > $Options{maxdistance}) {
 712     die "Error: The value specified, specified, $Options{mindistance}, for option \"--MinDistance\" must be less than the value specified, $Options{maxdistance}, for option \"--MaxDistance\" \n";
 713   }
 714   if ($Options{output} !~ /^(SD|FP|text|all)$/i) {
 715     die "Error: The value specified, $Options{output}, for option \"--output\" is not valid. Allowed values: SD, FP, text, or all\n";
 716   }
 717   if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
 718     die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
 719   }
 720   if ($Options{quote} !~ /^(Yes|No)$/i) {
 721     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: Yes or No\n";
 722   }
 723   if ($Options{outdelim} =~ /semicolon/i && $Options{quote} =~ /^No$/i) {
 724     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not allowed with, semicolon value of \"--outdelim\" option: Fingerprints string use semicolon as delimiter for various data fields and must be quoted.\n";
 725   }
 726   if ($Options{usetriangleinequality} !~ /^(Yes|No)$/i) {
 727     die "Error: The value specified, $Options{usetriangleinequality}, for option \"-u, --UseTriangleInequality\" is not valid. Allowed values: Yes or No\n";
 728   }
 729   if ($Options{vectorstringformat} && $Options{vectorstringformat} !~ /^(ValuesString|IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString)$/i) {
 730     die "Error: The value specified, $Options{vectorstringformat}, for option \"-v, --VectorStringFormat\" is not valid. Allowed values: ValuesString, IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString or ValuesAndIDsPairsString\n";
 731   }
 732 }
 733