view mayachemtools/bin/InfoSDFiles.pl @ 9:ab29fa5c8c1f draft default tip

Uploaded
author deepakjadmin
date Thu, 15 Dec 2016 14:18:03 -0500
parents 73ae111cf86f
children
line wrap: on
line source

#!/usr/bin/perl -w
#
# $RCSfile: InfoSDFiles.pl,v $
# $Date: 2015/02/28 20:46:20 $
# $Revision: 1.35 $
#
# Author: Manish Sud <msud@san.rr.com>
#
# Copyright (C) 2015 Manish Sud. All rights reserved.
#
# This file is part of MayaChemTools.
#
# MayaChemTools is free software; you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option) any
# later version.
#
# MayaChemTools is distributed in the hope that it will be useful, but without
# any warranty; without even the implied warranty of merchantability of fitness
# for a particular purpose.  See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
# write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
# Boston, MA, 02111-1307, USA.
#

use strict;
use FindBin; use lib "$FindBin::Bin/../lib";
use Getopt::Long;
use File::Basename;
use Benchmark;
use SDFileUtil;
use TextUtil;
use FileUtil;

my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);

# Autoflush STDOUT
$| = 1;

# Starting message...
$ScriptName = basename $0;
print "\n$ScriptName:Starting...\n\n";
$StartTime = new Benchmark;

# Get the options and setup script...
SetupScriptUsage();
if ($Options{help} || @ARGV < 1) {
  die GetUsageFromPod("$FindBin::Bin/$ScriptName");
}

my(@SDFilesList);
@SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");

# Process options...
print "Processing options...\n";
my(%OptionsInfo);
ProcessOptions();

# Setup information about input files...
print "Checking input SD file(s)...\n";
my(%SDFilesInfo, %SDCmpdsInfo);
RetrieveSDFilesInfo();
InitializeSDCmpdsInfo();

# Process input files..
my($FileIndex);
if (@SDFilesList > 1) {
  print "\nProcessing SD files...\n";
}
for $FileIndex (0 .. $#SDFilesList) {
  if ($SDFilesInfo{FileOkay}[$FileIndex]) {
    print "\nProcessing file $SDFilesList[$FileIndex]...\n";
    ListSDFileInfo($FileIndex);
  }
}
ListTotalSizeOfFiles();

print "\n$ScriptName:Done...\n\n";

$EndTime = new Benchmark;
$TotalTime = timediff ($EndTime, $StartTime);
print "Total time: ", timestr($TotalTime), "\n";

###############################################################################

# List appropriate information...
sub ListSDFileInfo {
  my($Index) = @_;
  my($SDFile);

  $SDFile = $SDFilesList[$Index];

  if ($OptionsInfo{ProcessCmpdInfo}) {
    ListCompoundDetailsInfo($Index);
  }
  else {
    ListCompoundCountInfo($Index);
  }

  # File size and modification information...
  print "\nFile size: ", FormatFileSize($SDFilesInfo{FileSize}[$Index]), " \n";
  print "Last modified: ", $SDFilesInfo{FileLastModified}[$Index], " \n";
}

# List number of compounds in SD file...
sub ListCompoundCountInfo {
  my($Index) = @_;
  my($SDFile, $CmpdCount);

  $SDFile = $SDFilesList[$Index];

  $CmpdCount = 0;

  open SDFILE, "$SDFile" or die "Couldn't open $SDFile: $! \n";
  while (<SDFILE>) {
    if (/^\$\$\$\$/) {
      $CmpdCount++;
    }
  }
  close SDFILE;

  $SDCmpdsInfo{TotalCmpdCount} += $CmpdCount;

  print "\nNumber of compounds: $CmpdCount\n";
}

# List detailed compound information...
sub ListCompoundDetailsInfo {
  my($Index) = @_;
  my($SDFile, $CmpdCount, $EmptyCtabBlocksCount, $MismatchCtabBlockCount, $ChiralCtabBlockCount, $UnknownAtomsCtabBlockCount, $InvalidAtomNumbersCtabBlockCount, $SaltsCtabBlockCount, $CtabLinesCount, $PrintCmpdCounterHeader, $ProblematicCmpdData, $CmpdString, @CmpdLines);

  $SDFile = $SDFilesList[$Index];

  ($CmpdCount, $EmptyCtabBlocksCount, $MismatchCtabBlockCount, $ChiralCtabBlockCount, $UnknownAtomsCtabBlockCount, $InvalidAtomNumbersCtabBlockCount, $SaltsCtabBlockCount) = (0) x 7;

  InitializeSDCmpdsInfo();

  $PrintCmpdCounterHeader = 1;

  open SDFILE, "$SDFile" or die "Couldn't open $SDFile: $! \n";
  while ($CmpdString = ReadCmpdString(\*SDFILE)) {
    $CmpdCount++;
    $ProblematicCmpdData = 0;
    if ($OptionsInfo{Detail} <= 1) {
      if (($CmpdCount % 5000) == 0) {
	if ($PrintCmpdCounterHeader) {
	  $PrintCmpdCounterHeader = 0;
	  print "Processing compounds:";
	}
	print "$CmpdCount...";
      }
    }
    @CmpdLines = split "\n", $CmpdString;
    $CtabLinesCount = GetCtabLinesCount(\@CmpdLines);
    if ($OptionsInfo{All} || $OptionsInfo{Empty}) {
      if ($CtabLinesCount <= 0) {
	$EmptyCtabBlocksCount++;
	$ProblematicCmpdData = 1;
      }
    }
    if ($CtabLinesCount > 0) {
      my ($AtomCount, $BondCount, $ChiralFlag) = ParseCmpdCountsLine($CmpdLines[3]);
      if ($OptionsInfo{All} || $OptionsInfo{Mismatch}) {
	if ($CtabLinesCount != ($AtomCount + $BondCount)) {
	  $MismatchCtabBlockCount++;
	  $ProblematicCmpdData = 1;
	  if ($OptionsInfo{Detail} >= 2) {
	    print "\nMismatch found: Ctab lines count: $CtabLinesCount;  Atoms count: $AtomCount; Bond count: $BondCount\n";
	  }
	}
      }
      if ($OptionsInfo{All} || $OptionsInfo{Chiral}) {
	if ($ChiralFlag == 1) {
	  $ChiralCtabBlockCount++;
	}
      }
      if ($CtabLinesCount == ($AtomCount + $BondCount)) {
	if ($OptionsInfo{All} || $OptionsInfo{UnknownAtoms}) {
	  my($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines) = GetUnknownAtoms(\@CmpdLines);
	  if ($UnknownAtomCount) {
	    $UnknownAtomsCtabBlockCount++;
	    $ProblematicCmpdData = 1;
	    if ($OptionsInfo{Detail} >= 2) {
	      print "\nUnknown atom(s) found: $UnknownAtomCount\nUnknown atom(s) symbols:$UnknownAtoms\nUnknown atom(s) data lines:\n$UnknownAtomLines\n";
	    }
	  }
	}
	if ($OptionsInfo{All} || $OptionsInfo{InvalidAtomNumbers}) {
	  my($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines) = GetInvalidAtomNumbers(\@CmpdLines);
	  if ($InvalidAtomNumbersCount) {
	    $InvalidAtomNumbersCtabBlockCount++;
	    $ProblematicCmpdData = 1;
	    if ($OptionsInfo{Detail} >= 2) {
	      print "\nInvalid atom number(s) found: $InvalidAtomNumbersCount\nInvalid atom number(s):$InvalidAtomNumbers\nInvalid atom number(s) data lines:\n$InvalidAtomNumberLines\n";
	    }
	  }
	}
	if ($OptionsInfo{All} || $OptionsInfo{Salts}) {
	  my($FragmentsCount, $Fragments) = GetCmpdFragments(\@CmpdLines);
	  if ($FragmentsCount > 1) {
	    $SaltsCtabBlockCount++;
	    $ProblematicCmpdData = 1;
	    if ($OptionsInfo{Detail} >= 2) {
	      print "\nSalts found: $FragmentsCount\nSalts atom numbers:\n$Fragments\n";
	    }
	  }
	}
      }
    }
    if ($OptionsInfo{ProcessCmpdData}) {
      ProcessCmpdInfo(\@CmpdLines, $CmpdCount);
    }
    if ($OptionsInfo{Detail} >= 3) {
      if ($ProblematicCmpdData) {
	print "\nCompound data:\n$CmpdString\n\n";
      }
    }
  }
  if ($OptionsInfo{Detail} <= 1) {
    if (!$PrintCmpdCounterHeader) {
      print "\n";
    }
  }
  close SDFILE;

  $SDCmpdsInfo{TotalCmpdCount} += $CmpdCount;

  print "\nNumber of compounds: $CmpdCount\n";

  if ($OptionsInfo{All} || $OptionsInfo{Empty}) {
    print "Number of empty atom/bond blocks: $EmptyCtabBlocksCount\n";
  }
  if ($OptionsInfo{All} || $OptionsInfo{Mismatch}) {
    print "Number of mismatched atom/bond blocks: $MismatchCtabBlockCount\n";
  }
  if ($OptionsInfo{All} || $OptionsInfo{UnknownAtoms}) {
    print "Number of atom blocks with unknown atom labels: $UnknownAtomsCtabBlockCount\n";
  }
  if ($OptionsInfo{All} || $OptionsInfo{InvalidAtomNumbers}) {
    print "Number of bond blocks and atom property blocks with invalid atom numbers: $InvalidAtomNumbersCtabBlockCount\n";
  }
  if ($OptionsInfo{All} || $OptionsInfo{Salts}) {
    print "Number of atom blocks containing salts: $SaltsCtabBlockCount\n";
  }
  if ($OptionsInfo{All} || $OptionsInfo{Chiral}) {
    print "Number of chiral atom/bond blocks: $ChiralCtabBlockCount\n";
  }
  if ($OptionsInfo{ProcessCmpdData}) {
    PrintCmpdInfoSummary();
  }

}

# Initialize compound data information for a SD file...
sub InitializeSDCmpdsInfo {

  if (!exists $SDCmpdsInfo{TotalCmpdCount}) {
    $SDCmpdsInfo{TotalCmpdCount} = 0;
  }

  @{$SDCmpdsInfo{FieldLabels}} = ();
  %{$SDCmpdsInfo{FieldLabelsMap}} = ();
  %{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}} = ();
  %{$SDCmpdsInfo{EmptyFieldValuesCountMap}} = ();
  %{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}} = ();
  %{$SDCmpdsInfo{NumericalFieldValuesCountMap}} = ();
}

# Process compound data header labels and figure out which ones are present for
# all the compounds...
sub ProcessCmpdInfo {
  my($CmpdLinesRef, $CmpdCount) = @_;
  my($Label);

  if (@{$SDCmpdsInfo{FieldLabels}}) {
    my (@CmpdFieldLabels) = GetCmpdDataHeaderLabels($CmpdLinesRef);
    my(%CmpdFieldLabelsMap) = ();
    # Setup a map for the current labels...
    for $Label (@CmpdFieldLabels) {
      $CmpdFieldLabelsMap{$Label} = "PresentInSome";
    }
    # Check the presence old labels for this compound; otherwise, mark 'em new...
    for $Label (@{$SDCmpdsInfo{FieldLabels}}) {
      if (!$CmpdFieldLabelsMap{$Label}) {
	$SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInSome";
      }
    }
    # Check the presence this compound in the old labels; otherwise, add 'em...
    for $Label (@CmpdFieldLabels ) {
      if (!$SDCmpdsInfo{FieldLabelsMap}{$Label}) {
	# It's a new label...
	push @{$SDCmpdsInfo{FieldLabels}}, $Label;
	$SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInSome";
      }
    }
  }
  else {
    # Get the initial label set and set up a map...
    @{$SDCmpdsInfo{FieldLabels}} = GetCmpdDataHeaderLabels($CmpdLinesRef);
    for $Label (@{$SDCmpdsInfo{FieldLabels}}) {
      $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInAll";
    }
  }
  if ($OptionsInfo{CountEmptyData} || $OptionsInfo{CheckData}) {
    # Count empty data field values...
    my(%DataFieldAndValues, $Label, $Value);

    %DataFieldAndValues = GetCmpdDataHeaderLabelsAndValues($CmpdLinesRef);
    for $Label (keys %DataFieldAndValues) {
      $Value = $DataFieldAndValues{$Label};
      if ($OptionsInfo{CountEmptyData}) {
	if (IsNotEmpty($Value)) {
	  if (exists($SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label})) {
	    $SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label} += 1;
	  }
	  else {
	    $SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label} = 1;
	  }
	}
	else {
	  if ($Options{detail} >= 2) {
	    print "Compound record $CmpdCount: Empty data field <$Label>\n";
	  }
	  if (exists($SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label})) {
	    $SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label} += 1;
	  }
	  else {
	    $SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label} = 1;
	  }
	}
      }
      if ($OptionsInfo{CheckData}) {
	if (IsNumerical($Value)) {
	  if (exists($SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label})) {
	    $SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label} += 1;
	  }
	  else {
	    $SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label} = 1;
	  }
	}
	else {
	  if (exists($SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label})) {
	    $SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label} += 1;
	  }
	  else {
	    $SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label} = 1;
	  }
	}
      }
    }
  }
}

# Print compound summary...
sub PrintCmpdInfoSummary {
  if (@{$SDCmpdsInfo{FieldLabels}}) {
    my($PresentInAllCount, $Label, @FieldLabelsPresentInSome, @FieldLabelsPresentInAll);

    @FieldLabelsPresentInSome = ();
    @FieldLabelsPresentInAll = ();

    $PresentInAllCount = 0;
    print "\nNumber of data fields: ", scalar(@{$SDCmpdsInfo{FieldLabels}}), "\n";
    print "All data field labels: ";
    for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
      print "<$Label> ";
    }
    print "\n";
    for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
      if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInAll") {
	$PresentInAllCount++;
	push @FieldLabelsPresentInAll, $Label;
      }
    }
    if ($PresentInAllCount != @{$SDCmpdsInfo{FieldLabels}}) {
      print "Data field labels present in all compounds: ";
      for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
	if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInAll") {
	  print "<$Label> ";
	}
      }
      print "\n";
      print "Data field labels present in some compounds: ";
      for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
	if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInSome") {
	  print "<$Label> ";
	  push @FieldLabelsPresentInSome, $Label;
	}
      }
      print "\n";
    }
    # List empty data field values count...
    if ($OptionsInfo{CountEmptyData}) {
      print "\n";
      if ($PresentInAllCount == @{$SDCmpdsInfo{FieldLabels}}) {
	PrintDataInformation("Number of non-empty values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
	PrintDataInformation("Number of empty values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
      }
      else {
	PrintDataInformation("Number of non-empty values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
	PrintDataInformation("Number of empty values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
	PrintDataInformation("Number of non-empty values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
	PrintDataInformation("Number of empty values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
      }
      print "\n";
    }
    # List numerical data values count...
    if ($OptionsInfo{CheckData}) {
      print "\n";
      if ($PresentInAllCount == @{$SDCmpdsInfo{FieldLabels}}) {
	PrintDataInformation("Number of non-numerical values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
	PrintDataInformation("Number of numerical values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
      }
      else {
	PrintDataInformation("Number of non-numerical values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
	PrintDataInformation("Number of numerical values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
	PrintDataInformation("Number of non-numerical values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
	PrintDataInformation("Number of numerical values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
      }
      print "\n";
    }
  }
  else {
    print "\nNumber of data fields: 0\n";
  }
}
# List data information...
sub PrintDataInformation {
  my($InfoLabel, $DataLabelRef, $DataLabelToValueMapRef) = @_;
  my($Line, $Label);

  $Line = "";
  for $Label (@{$DataLabelRef}) {
    $Line .= " <$Label> - " . (exists($DataLabelToValueMapRef->{$Label}) ? $DataLabelToValueMapRef->{$Label} : 0) . ",";
  }
  $Line =~ s/\,$//g;
  print "$InfoLabel: $Line\n";
}

# Total size of all the files...
sub ListTotalSizeOfFiles {
  my($FileOkayCount, $TotalSize, $Index);

  $FileOkayCount = 0;
  $TotalSize = 0;

  for $Index (0 .. $#SDFilesList) {
    if ($SDFilesInfo{FileOkay}[$Index]) {
      $FileOkayCount++;
      $TotalSize += $SDFilesInfo{FileSize}[$Index];
    }
  }
  if ($FileOkayCount > 1) {
    print "\nTotal number of compounds in  $FileOkayCount SD files: $SDCmpdsInfo{TotalCmpdCount}\n";
    print "\nTotal size of $FileOkayCount SD files: ", FormatFileSize($TotalSize), "\n";
  }

}

# Retrieve information about SD files...
sub RetrieveSDFilesInfo {
  my($Index, $SDFile, $ModifiedTimeString, $ModifiedDateString);

  %SDCmpdsInfo = ();

  %SDFilesInfo = ();
  @{$SDFilesInfo{FileOkay}} = ();
  @{$SDFilesInfo{FileSize}} = ();
  @{$SDFilesInfo{FileLastModified}} = ();

  FILELIST: for $Index (0 .. $#SDFilesList) {
    $SDFilesInfo{FileOkay}[$Index] = 0;
    $SDFilesInfo{FileSize}[$Index] = 0;
    $SDFilesInfo{FileLastModified}[$Index] = '';

    $SDFile = $SDFilesList[$Index];
    if (!(-e $SDFile)) {
      warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
      next FILELIST;
    }
    if (!CheckFileType($SDFile, "sdf sd")) {
      warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
      next FILELIST;
    }
    if (! open SDFILE, "$SDFile") {
      warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n";
      next FILELIST;
    }
    close SDFILE;

    $SDFilesInfo{FileOkay}[$Index] = 1;
    $SDFilesInfo{FileSize}[$Index] = FileSize($SDFile);
    ($ModifiedTimeString, $ModifiedDateString) = FormattedFileModificationTimeAndDate($SDFile);
    $SDFilesInfo{FileLastModified}[$Index] = "$ModifiedTimeString; $ModifiedDateString";
  }
}

# Process option values...
sub ProcessOptions {
  %OptionsInfo = ();

  $OptionsInfo{All} = $Options{all} ? $Options{all} : 0;
  $OptionsInfo{Chiral} = $Options{chiral} ? $Options{chiral} : 0;
  $OptionsInfo{Count} = $Options{count} ? $Options{count} : 0;
  $OptionsInfo{DataCheck} = $Options{datacheck} ? $Options{datacheck} : 0;
  $OptionsInfo{Empty} = $Options{empty} ? $Options{empty} : 0;
  $OptionsInfo{Fields} = $Options{fields} ? $Options{fields} : 0;
  $OptionsInfo{InvalidAtomNumbers} = $Options{invalidatomnumbers} ? $Options{invalidatomnumbers} : 0;
  $OptionsInfo{Mismatch} = $Options{mismatch} ? $Options{mismatch} : 0;
  $OptionsInfo{Salts} = $Options{salts} ? $Options{salts} : 0;
  $OptionsInfo{UnknownAtoms} = $Options{unknownatoms} ? $Options{unknownatoms} : 0;

  $OptionsInfo{Detail} = $Options{detail};

  $OptionsInfo{ProcessCmpdInfo} = ($Options{all} ||  $Options{chiral} || $Options{empty} || $Options{fields} || $Options{invalidatomnumbers}  || $Options{mismatch} || $Options{salts} || $Options{unknownatoms} || $Options{datacheck}) ? 1 : 0;

  $OptionsInfo{ProcessCmpdData} = ($Options{all} || $Options{fields} || $Options{empty} || $Options{datacheck}) ? 1 : 0;

  $OptionsInfo{CountEmptyData} = ($Options{all} || $Options{empty}) ? 1 : 0;
  $OptionsInfo{CheckData} = ($Options{all} || $Options{datacheck}) ? 1 : 0;
}

# Setup script usage  and retrieve command line arguments specified using various options...
sub SetupScriptUsage {

  # Setup default and retrieve all the options...
  %Options = ();
  $Options{detail} = 1;
  if (!GetOptions(\%Options, "all|a", "count|c", "chiral", "datacheck", "detail|d:i", "empty|e", "fields|f", "help|h", "invalidatomnumbers|i", "mismatch|m", "salts|s", "unknownatoms|u", "workingdir|w=s")) {
    die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
  }
  if ($Options{workingdir}) {
    if (! -d $Options{workingdir}) {
      die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
    }
    chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
  }
  if ($Options{detail} <= 0 || $Options{detail} > 3) {
    die "Error: The value specified, $Options{detail}, for option \"-d --detail\" is not valid. Possible values: 1 to 3\n";
  }
}

__END__

=head1 NAME

InfoSDFiles.pl - List information about SDFile(s)

=head1 SYNOPSIS

InfoSDFile.pl SDFile(s)...

InfoSDFile.pl [B<-a --all>] [B<-c --count>] [B<--chiral>] [B<--datacheck>]
[B<-d --detail> infolevel] [B<-e --empty>] [B<-f, --fields>] [B<-h, --help>]
[B<-i, --invalidatomnumbers>] [B<-m, --mismatch>] [B<-s, --salts>] [B<-u, --unknownatoms>]
[B<-w, --workingdir> dirname] SDFile(s)...

=head1 DESCRIPTION

List information about I<SDFile(s)> contents: number of compounds, empty records
and so on. Multiple SDFile names are separated by spaces. The valid file extensions
are I<.sdf> and I<.sd>. All other file names are ignored. All the SD files in a current
directory can be specified either by I<*.sdf> or the current directory name.

=head1 OPTIONS

=over 4

=item B<-a, --all>

List all the available information.

=item B<-c, --count>

List number of compounds. This is B<default behavior>.

=item B<--chiral>

List number of empty atom/bond blocks for compounds with chiral flag set in
count line.

=item B<-d, --detail> I<infolevel>

Level of information to print. Default: 1. Possible values: I<1, 2, or 3>.

=item B<--datacheck>

List number of numerical and non-numerical values for each data field.

=item B<-e, --empty>

List number of empty atom/bond blocks and data fields for compounds.

=item B<-f, --fields>

List data field labels present for compounds.

=item B<-h, --help>

Print this help message.

=item B<-i, --invalidatomnumbers>

List number of bond blocks for compounds which contain invalid atom numbers.

=item B<-m, --mismatch>

List number of atom/bond blocks for compounds which don't match with counts
line information in header block.

=item B<-s, --salts>

List number of atom blocks for compounds which contain salts identified as
disconnected structural units.

=item B<-u, --unknownatoms>

List number of atom blocks for compounds which contain special atom symbols
such as L, Q, * ,LP, X, R#, or any other non periodic table symbols.

=item B<-w, --workingdir> I<dirname>

Location of working directory. Default: current directory.

=back

=head1 EXAMPLES

To count compounds in SD file(s), type:

    % InfoSDFiles.pl Sample1.sdf
    % InfoSDFiles.pl Sample1.sdf Sample2.sdf
    % InfoSDFiles.pl *.sdf

To list all available information for SD file(s), type:

    % InfoSDFiles.pl -a *.sdf

To list all data fields present in sample.sdf, type:

    % InfoSDFiles.pl -f Sample.sdf

To count number of compounds which contain salts and list associated structural
data, type:

    % InfoSDFiles.pl -s -d 3 Sample.sdf

=head1 AUTHOR

Manish Sud <msud@san.rr.com>

=head1 SEE ALSO

ExtractFromSDFiles.pl, FilterSDFiles.pl, MergeTextFilesWithSD.pl

=head1 COPYRIGHT

Copyright (C) 2015 Manish Sud. All rights reserved.

This file is part of MayaChemTools.

MayaChemTools is free software; you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.

=cut