MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # $RCSfile: InfoSDFiles.pl,v $
   4 # $Date: 2015/02/28 20:46:20 $
   5 # $Revision: 1.35 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use FindBin; use lib "$FindBin::Bin/../lib";
  31 use Getopt::Long;
  32 use File::Basename;
  33 use Benchmark;
  34 use SDFileUtil;
  35 use TextUtil;
  36 use FileUtil;
  37 
  38 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  39 
  40 # Autoflush STDOUT
  41 $| = 1;
  42 
  43 # Starting message...
  44 $ScriptName = basename $0;
  45 print "\n$ScriptName:Starting...\n\n";
  46 $StartTime = new Benchmark;
  47 
  48 # Get the options and setup script...
  49 SetupScriptUsage();
  50 if ($Options{help} || @ARGV < 1) {
  51   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  52 }
  53 
  54 my(@SDFilesList);
  55 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
  56 
  57 # Process options...
  58 print "Processing options...\n";
  59 my(%OptionsInfo);
  60 ProcessOptions();
  61 
  62 # Setup information about input files...
  63 print "Checking input SD file(s)...\n";
  64 my(%SDFilesInfo, %SDCmpdsInfo);
  65 RetrieveSDFilesInfo();
  66 InitializeSDCmpdsInfo();
  67 
  68 # Process input files..
  69 my($FileIndex);
  70 if (@SDFilesList > 1) {
  71   print "\nProcessing SD files...\n";
  72 }
  73 for $FileIndex (0 .. $#SDFilesList) {
  74   if ($SDFilesInfo{FileOkay}[$FileIndex]) {
  75     print "\nProcessing file $SDFilesList[$FileIndex]...\n";
  76     ListSDFileInfo($FileIndex);
  77   }
  78 }
  79 ListTotalSizeOfFiles();
  80 
  81 print "\n$ScriptName:Done...\n\n";
  82 
  83 $EndTime = new Benchmark;
  84 $TotalTime = timediff ($EndTime, $StartTime);
  85 print "Total time: ", timestr($TotalTime), "\n";
  86 
  87 ###############################################################################
  88 
  89 # List appropriate information...
  90 sub ListSDFileInfo {
  91   my($Index) = @_;
  92   my($SDFile);
  93 
  94   $SDFile = $SDFilesList[$Index];
  95 
  96   if ($OptionsInfo{ProcessCmpdInfo}) {
  97     ListCompoundDetailsInfo($Index);
  98   }
  99   else {
 100     ListCompoundCountInfo($Index);
 101   }
 102 
 103   # File size and modification information...
 104   print "\nFile size: ", FormatFileSize($SDFilesInfo{FileSize}[$Index]), " \n";
 105   print "Last modified: ", $SDFilesInfo{FileLastModified}[$Index], " \n";
 106 }
 107 
 108 # List number of compounds in SD file...
 109 sub ListCompoundCountInfo {
 110   my($Index) = @_;
 111   my($SDFile, $CmpdCount);
 112 
 113   $SDFile = $SDFilesList[$Index];
 114 
 115   $CmpdCount = 0;
 116 
 117   open SDFILE, "$SDFile" or die "Couldn't open $SDFile: $! \n";
 118   while (<SDFILE>) {
 119     if (/^\$\$\$\$/) {
 120       $CmpdCount++;
 121     }
 122   }
 123   close SDFILE;
 124 
 125   $SDCmpdsInfo{TotalCmpdCount} += $CmpdCount;
 126 
 127   print "\nNumber of compounds: $CmpdCount\n";
 128 }
 129 
 130 # List detailed compound information...
 131 sub ListCompoundDetailsInfo {
 132   my($Index) = @_;
 133   my($SDFile, $CmpdCount, $EmptyCtabBlocksCount, $MismatchCtabBlockCount, $ChiralCtabBlockCount, $UnknownAtomsCtabBlockCount, $InvalidAtomNumbersCtabBlockCount, $SaltsCtabBlockCount, $CtabLinesCount, $PrintCmpdCounterHeader, $ProblematicCmpdData, $CmpdString, @CmpdLines);
 134 
 135   $SDFile = $SDFilesList[$Index];
 136 
 137   ($CmpdCount, $EmptyCtabBlocksCount, $MismatchCtabBlockCount, $ChiralCtabBlockCount, $UnknownAtomsCtabBlockCount, $InvalidAtomNumbersCtabBlockCount, $SaltsCtabBlockCount) = (0) x 7;
 138 
 139   InitializeSDCmpdsInfo();
 140 
 141   $PrintCmpdCounterHeader = 1;
 142 
 143   open SDFILE, "$SDFile" or die "Couldn't open $SDFile: $! \n";
 144   while ($CmpdString = ReadCmpdString(\*SDFILE)) {
 145     $CmpdCount++;
 146     $ProblematicCmpdData = 0;
 147     if ($OptionsInfo{Detail} <= 1) {
 148       if (($CmpdCount % 5000) == 0) {
 149         if ($PrintCmpdCounterHeader) {
 150           $PrintCmpdCounterHeader = 0;
 151           print "Processing compounds:";
 152         }
 153         print "$CmpdCount...";
 154       }
 155     }
 156     @CmpdLines = split "\n", $CmpdString;
 157     $CtabLinesCount = GetCtabLinesCount(\@CmpdLines);
 158     if ($OptionsInfo{All} || $OptionsInfo{Empty}) {
 159       if ($CtabLinesCount <= 0) {
 160         $EmptyCtabBlocksCount++;
 161         $ProblematicCmpdData = 1;
 162       }
 163     }
 164     if ($CtabLinesCount > 0) {
 165       my ($AtomCount, $BondCount, $ChiralFlag) = ParseCmpdCountsLine($CmpdLines[3]);
 166       if ($OptionsInfo{All} || $OptionsInfo{Mismatch}) {
 167         if ($CtabLinesCount != ($AtomCount + $BondCount)) {
 168           $MismatchCtabBlockCount++;
 169           $ProblematicCmpdData = 1;
 170           if ($OptionsInfo{Detail} >= 2) {
 171             print "\nMismatch found: Ctab lines count: $CtabLinesCount;  Atoms count: $AtomCount; Bond count: $BondCount\n";
 172           }
 173         }
 174       }
 175       if ($OptionsInfo{All} || $OptionsInfo{Chiral}) {
 176         if ($ChiralFlag == 1) {
 177           $ChiralCtabBlockCount++;
 178         }
 179       }
 180       if ($CtabLinesCount == ($AtomCount + $BondCount)) {
 181         if ($OptionsInfo{All} || $OptionsInfo{UnknownAtoms}) {
 182           my($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines) = GetUnknownAtoms(\@CmpdLines);
 183           if ($UnknownAtomCount) {
 184             $UnknownAtomsCtabBlockCount++;
 185             $ProblematicCmpdData = 1;
 186             if ($OptionsInfo{Detail} >= 2) {
 187               print "\nUnknown atom(s) found: $UnknownAtomCount\nUnknown atom(s) symbols:$UnknownAtoms\nUnknown atom(s) data lines:\n$UnknownAtomLines\n";
 188             }
 189           }
 190         }
 191         if ($OptionsInfo{All} || $OptionsInfo{InvalidAtomNumbers}) {
 192           my($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines) = GetInvalidAtomNumbers(\@CmpdLines);
 193           if ($InvalidAtomNumbersCount) {
 194             $InvalidAtomNumbersCtabBlockCount++;
 195             $ProblematicCmpdData = 1;
 196             if ($OptionsInfo{Detail} >= 2) {
 197               print "\nInvalid atom number(s) found: $InvalidAtomNumbersCount\nInvalid atom number(s):$InvalidAtomNumbers\nInvalid atom number(s) data lines:\n$InvalidAtomNumberLines\n";
 198             }
 199           }
 200         }
 201         if ($OptionsInfo{All} || $OptionsInfo{Salts}) {
 202           my($FragmentsCount, $Fragments) = GetCmpdFragments(\@CmpdLines);
 203           if ($FragmentsCount > 1) {
 204             $SaltsCtabBlockCount++;
 205             $ProblematicCmpdData = 1;
 206             if ($OptionsInfo{Detail} >= 2) {
 207               print "\nSalts found: $FragmentsCount\nSalts atom numbers:\n$Fragments\n";
 208             }
 209           }
 210         }
 211       }
 212     }
 213     if ($OptionsInfo{ProcessCmpdData}) {
 214       ProcessCmpdInfo(\@CmpdLines, $CmpdCount);
 215     }
 216     if ($OptionsInfo{Detail} >= 3) {
 217       if ($ProblematicCmpdData) {
 218         print "\nCompound data:\n$CmpdString\n\n";
 219       }
 220     }
 221   }
 222   if ($OptionsInfo{Detail} <= 1) {
 223     if (!$PrintCmpdCounterHeader) {
 224       print "\n";
 225     }
 226   }
 227   close SDFILE;
 228 
 229   $SDCmpdsInfo{TotalCmpdCount} += $CmpdCount;
 230 
 231   print "\nNumber of compounds: $CmpdCount\n";
 232 
 233   if ($OptionsInfo{All} || $OptionsInfo{Empty}) {
 234     print "Number of empty atom/bond blocks: $EmptyCtabBlocksCount\n";
 235   }
 236   if ($OptionsInfo{All} || $OptionsInfo{Mismatch}) {
 237     print "Number of mismatched atom/bond blocks: $MismatchCtabBlockCount\n";
 238   }
 239   if ($OptionsInfo{All} || $OptionsInfo{UnknownAtoms}) {
 240     print "Number of atom blocks with unknown atom labels: $UnknownAtomsCtabBlockCount\n";
 241   }
 242   if ($OptionsInfo{All} || $OptionsInfo{InvalidAtomNumbers}) {
 243     print "Number of bond blocks and atom property blocks with invalid atom numbers: $InvalidAtomNumbersCtabBlockCount\n";
 244   }
 245   if ($OptionsInfo{All} || $OptionsInfo{Salts}) {
 246     print "Number of atom blocks containing salts: $SaltsCtabBlockCount\n";
 247   }
 248   if ($OptionsInfo{All} || $OptionsInfo{Chiral}) {
 249     print "Number of chiral atom/bond blocks: $ChiralCtabBlockCount\n";
 250   }
 251   if ($OptionsInfo{ProcessCmpdData}) {
 252     PrintCmpdInfoSummary();
 253   }
 254 
 255 }
 256 
 257 # Initialize compound data information for a SD file...
 258 sub InitializeSDCmpdsInfo {
 259 
 260   if (!exists $SDCmpdsInfo{TotalCmpdCount}) {
 261     $SDCmpdsInfo{TotalCmpdCount} = 0;
 262   }
 263 
 264   @{$SDCmpdsInfo{FieldLabels}} = ();
 265   %{$SDCmpdsInfo{FieldLabelsMap}} = ();
 266   %{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}} = ();
 267   %{$SDCmpdsInfo{EmptyFieldValuesCountMap}} = ();
 268   %{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}} = ();
 269   %{$SDCmpdsInfo{NumericalFieldValuesCountMap}} = ();
 270 }
 271 
 272 # Process compound data header labels and figure out which ones are present for
 273 # all the compounds...
 274 sub ProcessCmpdInfo {
 275   my($CmpdLinesRef, $CmpdCount) = @_;
 276   my($Label);
 277 
 278   if (@{$SDCmpdsInfo{FieldLabels}}) {
 279     my (@CmpdFieldLabels) = GetCmpdDataHeaderLabels($CmpdLinesRef);
 280     my(%CmpdFieldLabelsMap) = ();
 281     # Setup a map for the current labels...
 282     for $Label (@CmpdFieldLabels) {
 283       $CmpdFieldLabelsMap{$Label} = "PresentInSome";
 284     }
 285     # Check the presence old labels for this compound; otherwise, mark 'em new...
 286     for $Label (@{$SDCmpdsInfo{FieldLabels}}) {
 287       if (!$CmpdFieldLabelsMap{$Label}) {
 288         $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInSome";
 289       }
 290     }
 291     # Check the presence this compound in the old labels; otherwise, add 'em...
 292     for $Label (@CmpdFieldLabels ) {
 293       if (!$SDCmpdsInfo{FieldLabelsMap}{$Label}) {
 294         # It's a new label...
 295         push @{$SDCmpdsInfo{FieldLabels}}, $Label;
 296         $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInSome";
 297       }
 298     }
 299   }
 300   else {
 301     # Get the initial label set and set up a map...
 302     @{$SDCmpdsInfo{FieldLabels}} = GetCmpdDataHeaderLabels($CmpdLinesRef);
 303     for $Label (@{$SDCmpdsInfo{FieldLabels}}) {
 304       $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInAll";
 305     }
 306   }
 307   if ($OptionsInfo{CountEmptyData} || $OptionsInfo{CheckData}) {
 308     # Count empty data field values...
 309     my(%DataFieldAndValues, $Label, $Value);
 310 
 311     %DataFieldAndValues = GetCmpdDataHeaderLabelsAndValues($CmpdLinesRef);
 312     for $Label (keys %DataFieldAndValues) {
 313       $Value = $DataFieldAndValues{$Label};
 314       if ($OptionsInfo{CountEmptyData}) {
 315         if (IsNotEmpty($Value)) {
 316           if (exists($SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label})) {
 317             $SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label} += 1;
 318           }
 319           else {
 320             $SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label} = 1;
 321           }
 322         }
 323         else {
 324           if ($Options{detail} >= 2) {
 325             print "Compound record $CmpdCount: Empty data field <$Label>\n";
 326           }
 327           if (exists($SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label})) {
 328             $SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label} += 1;
 329           }
 330           else {
 331             $SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label} = 1;
 332           }
 333         }
 334       }
 335       if ($OptionsInfo{CheckData}) {
 336         if (IsNumerical($Value)) {
 337           if (exists($SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label})) {
 338             $SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label} += 1;
 339           }
 340           else {
 341             $SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label} = 1;
 342           }
 343         }
 344         else {
 345           if (exists($SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label})) {
 346             $SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label} += 1;
 347           }
 348           else {
 349             $SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label} = 1;
 350           }
 351         }
 352       }
 353     }
 354   }
 355 }
 356 
 357 # Print compound summary...
 358 sub PrintCmpdInfoSummary {
 359   if (@{$SDCmpdsInfo{FieldLabels}}) {
 360     my($PresentInAllCount, $Label, @FieldLabelsPresentInSome, @FieldLabelsPresentInAll);
 361 
 362     @FieldLabelsPresentInSome = ();
 363     @FieldLabelsPresentInAll = ();
 364 
 365     $PresentInAllCount = 0;
 366     print "\nNumber of data fields: ", scalar(@{$SDCmpdsInfo{FieldLabels}}), "\n";
 367     print "All data field labels: ";
 368     for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
 369       print "<$Label> ";
 370     }
 371     print "\n";
 372     for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
 373       if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInAll") {
 374         $PresentInAllCount++;
 375         push @FieldLabelsPresentInAll, $Label;
 376       }
 377     }
 378     if ($PresentInAllCount != @{$SDCmpdsInfo{FieldLabels}}) {
 379       print "Data field labels present in all compounds: ";
 380       for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
 381         if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInAll") {
 382           print "<$Label> ";
 383         }
 384       }
 385       print "\n";
 386       print "Data field labels present in some compounds: ";
 387       for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
 388         if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInSome") {
 389           print "<$Label> ";
 390           push @FieldLabelsPresentInSome, $Label;
 391         }
 392       }
 393       print "\n";
 394     }
 395     # List empty data field values count...
 396     if ($OptionsInfo{CountEmptyData}) {
 397       print "\n";
 398       if ($PresentInAllCount == @{$SDCmpdsInfo{FieldLabels}}) {
 399         PrintDataInformation("Number of non-empty values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
 400         PrintDataInformation("Number of empty values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
 401       }
 402       else {
 403         PrintDataInformation("Number of non-empty values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
 404         PrintDataInformation("Number of empty values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
 405         PrintDataInformation("Number of non-empty values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
 406         PrintDataInformation("Number of empty values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
 407       }
 408       print "\n";
 409     }
 410     # List numerical data values count...
 411     if ($OptionsInfo{CheckData}) {
 412       print "\n";
 413       if ($PresentInAllCount == @{$SDCmpdsInfo{FieldLabels}}) {
 414         PrintDataInformation("Number of non-numerical values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
 415         PrintDataInformation("Number of numerical values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
 416       }
 417       else {
 418         PrintDataInformation("Number of non-numerical values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
 419         PrintDataInformation("Number of numerical values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
 420         PrintDataInformation("Number of non-numerical values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
 421         PrintDataInformation("Number of numerical values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
 422       }
 423       print "\n";
 424     }
 425   }
 426   else {
 427     print "\nNumber of data fields: 0\n";
 428   }
 429 }
 430 # List data information...
 431 sub PrintDataInformation {
 432   my($InfoLabel, $DataLabelRef, $DataLabelToValueMapRef) = @_;
 433   my($Line, $Label);
 434 
 435   $Line = "";
 436   for $Label (@{$DataLabelRef}) {
 437     $Line .= " <$Label> - " . (exists($DataLabelToValueMapRef->{$Label}) ? $DataLabelToValueMapRef->{$Label} : 0) . ",";
 438   }
 439   $Line =~ s/\,$//g;
 440   print "$InfoLabel: $Line\n";
 441 }
 442 
 443 # Total size of all the files...
 444 sub ListTotalSizeOfFiles {
 445   my($FileOkayCount, $TotalSize, $Index);
 446 
 447   $FileOkayCount = 0;
 448   $TotalSize = 0;
 449 
 450   for $Index (0 .. $#SDFilesList) {
 451     if ($SDFilesInfo{FileOkay}[$Index]) {
 452       $FileOkayCount++;
 453       $TotalSize += $SDFilesInfo{FileSize}[$Index];
 454     }
 455   }
 456   if ($FileOkayCount > 1) {
 457     print "\nTotal number of compounds in  $FileOkayCount SD files: $SDCmpdsInfo{TotalCmpdCount}\n";
 458     print "\nTotal size of $FileOkayCount SD files: ", FormatFileSize($TotalSize), "\n";
 459   }
 460 
 461 }
 462 
 463 # Retrieve information about SD files...
 464 sub RetrieveSDFilesInfo {
 465   my($Index, $SDFile, $ModifiedTimeString, $ModifiedDateString);
 466 
 467   %SDCmpdsInfo = ();
 468 
 469   %SDFilesInfo = ();
 470   @{$SDFilesInfo{FileOkay}} = ();
 471   @{$SDFilesInfo{FileSize}} = ();
 472   @{$SDFilesInfo{FileLastModified}} = ();
 473 
 474   FILELIST: for $Index (0 .. $#SDFilesList) {
 475     $SDFilesInfo{FileOkay}[$Index] = 0;
 476     $SDFilesInfo{FileSize}[$Index] = 0;
 477     $SDFilesInfo{FileLastModified}[$Index] = '';
 478 
 479     $SDFile = $SDFilesList[$Index];
 480     if (!(-e $SDFile)) {
 481       warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
 482       next FILELIST;
 483     }
 484     if (!CheckFileType($SDFile, "sdf sd")) {
 485       warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
 486       next FILELIST;
 487     }
 488     if (! open SDFILE, "$SDFile") {
 489       warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n";
 490       next FILELIST;
 491     }
 492     close SDFILE;
 493 
 494     $SDFilesInfo{FileOkay}[$Index] = 1;
 495     $SDFilesInfo{FileSize}[$Index] = FileSize($SDFile);
 496     ($ModifiedTimeString, $ModifiedDateString) = FormattedFileModificationTimeAndDate($SDFile);
 497     $SDFilesInfo{FileLastModified}[$Index] = "$ModifiedTimeString; $ModifiedDateString";
 498   }
 499 }
 500 
 501 # Process option values...
 502 sub ProcessOptions {
 503   %OptionsInfo = ();
 504 
 505   $OptionsInfo{All} = $Options{all} ? $Options{all} : 0;
 506   $OptionsInfo{Chiral} = $Options{chiral} ? $Options{chiral} : 0;
 507   $OptionsInfo{Count} = $Options{count} ? $Options{count} : 0;
 508   $OptionsInfo{DataCheck} = $Options{datacheck} ? $Options{datacheck} : 0;
 509   $OptionsInfo{Empty} = $Options{empty} ? $Options{empty} : 0;
 510   $OptionsInfo{Fields} = $Options{fields} ? $Options{fields} : 0;
 511   $OptionsInfo{InvalidAtomNumbers} = $Options{invalidatomnumbers} ? $Options{invalidatomnumbers} : 0;
 512   $OptionsInfo{Mismatch} = $Options{mismatch} ? $Options{mismatch} : 0;
 513   $OptionsInfo{Salts} = $Options{salts} ? $Options{salts} : 0;
 514   $OptionsInfo{UnknownAtoms} = $Options{unknownatoms} ? $Options{unknownatoms} : 0;
 515 
 516   $OptionsInfo{Detail} = $Options{detail};
 517 
 518   $OptionsInfo{ProcessCmpdInfo} = ($Options{all} ||  $Options{chiral} || $Options{empty} || $Options{fields} || $Options{invalidatomnumbers}  || $Options{mismatch} || $Options{salts} || $Options{unknownatoms} || $Options{datacheck}) ? 1 : 0;
 519 
 520   $OptionsInfo{ProcessCmpdData} = ($Options{all} || $Options{fields} || $Options{empty} || $Options{datacheck}) ? 1 : 0;
 521 
 522   $OptionsInfo{CountEmptyData} = ($Options{all} || $Options{empty}) ? 1 : 0;
 523   $OptionsInfo{CheckData} = ($Options{all} || $Options{datacheck}) ? 1 : 0;
 524 }
 525 
 526 # Setup script usage  and retrieve command line arguments specified using various options...
 527 sub SetupScriptUsage {
 528 
 529   # Setup default and retrieve all the options...
 530   %Options = ();
 531   $Options{detail} = 1;
 532   if (!GetOptions(\%Options, "all|a", "count|c", "chiral", "datacheck", "detail|d:i", "empty|e", "fields|f", "help|h", "invalidatomnumbers|i", "mismatch|m", "salts|s", "unknownatoms|u", "workingdir|w=s")) {
 533     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 534   }
 535   if ($Options{workingdir}) {
 536     if (! -d $Options{workingdir}) {
 537       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 538     }
 539     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 540   }
 541   if ($Options{detail} <= 0 || $Options{detail} > 3) {
 542     die "Error: The value specified, $Options{detail}, for option \"-d --detail\" is not valid. Possible values: 1 to 3\n";
 543   }
 544 }
 545