1 #!/usr/bin/perl -w 2 # 3 # $RCSfile: InfoSDFiles.pl,v $ 4 # $Date: 2015/02/28 20:46:20 $ 5 # $Revision: 1.35 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use FindBin; use lib "$FindBin::Bin/../lib"; 31 use Getopt::Long; 32 use File::Basename; 33 use Benchmark; 34 use SDFileUtil; 35 use TextUtil; 36 use FileUtil; 37 38 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); 39 40 # Autoflush STDOUT 41 $| = 1; 42 43 # Starting message... 44 $ScriptName = basename $0; 45 print "\n$ScriptName:Starting...\n\n"; 46 $StartTime = new Benchmark; 47 48 # Get the options and setup script... 49 SetupScriptUsage(); 50 if ($Options{help} || @ARGV < 1) { 51 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); 52 } 53 54 my(@SDFilesList); 55 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd"); 56 57 # Process options... 58 print "Processing options...\n"; 59 my(%OptionsInfo); 60 ProcessOptions(); 61 62 # Setup information about input files... 63 print "Checking input SD file(s)...\n"; 64 my(%SDFilesInfo, %SDCmpdsInfo); 65 RetrieveSDFilesInfo(); 66 InitializeSDCmpdsInfo(); 67 68 # Process input files.. 69 my($FileIndex); 70 if (@SDFilesList > 1) { 71 print "\nProcessing SD files...\n"; 72 } 73 for $FileIndex (0 .. $#SDFilesList) { 74 if ($SDFilesInfo{FileOkay}[$FileIndex]) { 75 print "\nProcessing file $SDFilesList[$FileIndex]...\n"; 76 ListSDFileInfo($FileIndex); 77 } 78 } 79 ListTotalSizeOfFiles(); 80 81 print "\n$ScriptName:Done...\n\n"; 82 83 $EndTime = new Benchmark; 84 $TotalTime = timediff ($EndTime, $StartTime); 85 print "Total time: ", timestr($TotalTime), "\n"; 86 87 ############################################################################### 88 89 # List appropriate information... 90 sub ListSDFileInfo { 91 my($Index) = @_; 92 my($SDFile); 93 94 $SDFile = $SDFilesList[$Index]; 95 96 if ($OptionsInfo{ProcessCmpdInfo}) { 97 ListCompoundDetailsInfo($Index); 98 } 99 else { 100 ListCompoundCountInfo($Index); 101 } 102 103 # File size and modification information... 104 print "\nFile size: ", FormatFileSize($SDFilesInfo{FileSize}[$Index]), " \n"; 105 print "Last modified: ", $SDFilesInfo{FileLastModified}[$Index], " \n"; 106 } 107 108 # List number of compounds in SD file... 109 sub ListCompoundCountInfo { 110 my($Index) = @_; 111 my($SDFile, $CmpdCount); 112 113 $SDFile = $SDFilesList[$Index]; 114 115 $CmpdCount = 0; 116 117 open SDFILE, "$SDFile" or die "Couldn't open $SDFile: $! \n"; 118 while (<SDFILE>) { 119 if (/^\$\$\$\$/) { 120 $CmpdCount++; 121 } 122 } 123 close SDFILE; 124 125 $SDCmpdsInfo{TotalCmpdCount} += $CmpdCount; 126 127 print "\nNumber of compounds: $CmpdCount\n"; 128 } 129 130 # List detailed compound information... 131 sub ListCompoundDetailsInfo { 132 my($Index) = @_; 133 my($SDFile, $CmpdCount, $EmptyCtabBlocksCount, $MismatchCtabBlockCount, $ChiralCtabBlockCount, $UnknownAtomsCtabBlockCount, $InvalidAtomNumbersCtabBlockCount, $SaltsCtabBlockCount, $CtabLinesCount, $PrintCmpdCounterHeader, $ProblematicCmpdData, $CmpdString, @CmpdLines); 134 135 $SDFile = $SDFilesList[$Index]; 136 137 ($CmpdCount, $EmptyCtabBlocksCount, $MismatchCtabBlockCount, $ChiralCtabBlockCount, $UnknownAtomsCtabBlockCount, $InvalidAtomNumbersCtabBlockCount, $SaltsCtabBlockCount) = (0) x 7; 138 139 InitializeSDCmpdsInfo(); 140 141 $PrintCmpdCounterHeader = 1; 142 143 open SDFILE, "$SDFile" or die "Couldn't open $SDFile: $! \n"; 144 while ($CmpdString = ReadCmpdString(\*SDFILE)) { 145 $CmpdCount++; 146 $ProblematicCmpdData = 0; 147 if ($OptionsInfo{Detail} <= 1) { 148 if (($CmpdCount % 5000) == 0) { 149 if ($PrintCmpdCounterHeader) { 150 $PrintCmpdCounterHeader = 0; 151 print "Processing compounds:"; 152 } 153 print "$CmpdCount..."; 154 } 155 } 156 @CmpdLines = split "\n", $CmpdString; 157 $CtabLinesCount = GetCtabLinesCount(\@CmpdLines); 158 if ($OptionsInfo{All} || $OptionsInfo{Empty}) { 159 if ($CtabLinesCount <= 0) { 160 $EmptyCtabBlocksCount++; 161 $ProblematicCmpdData = 1; 162 } 163 } 164 if ($CtabLinesCount > 0) { 165 my ($AtomCount, $BondCount, $ChiralFlag) = ParseCmpdCountsLine($CmpdLines[3]); 166 if ($OptionsInfo{All} || $OptionsInfo{Mismatch}) { 167 if ($CtabLinesCount != ($AtomCount + $BondCount)) { 168 $MismatchCtabBlockCount++; 169 $ProblematicCmpdData = 1; 170 if ($OptionsInfo{Detail} >= 2) { 171 print "\nMismatch found: Ctab lines count: $CtabLinesCount; Atoms count: $AtomCount; Bond count: $BondCount\n"; 172 } 173 } 174 } 175 if ($OptionsInfo{All} || $OptionsInfo{Chiral}) { 176 if ($ChiralFlag == 1) { 177 $ChiralCtabBlockCount++; 178 } 179 } 180 if ($CtabLinesCount == ($AtomCount + $BondCount)) { 181 if ($OptionsInfo{All} || $OptionsInfo{UnknownAtoms}) { 182 my($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines) = GetUnknownAtoms(\@CmpdLines); 183 if ($UnknownAtomCount) { 184 $UnknownAtomsCtabBlockCount++; 185 $ProblematicCmpdData = 1; 186 if ($OptionsInfo{Detail} >= 2) { 187 print "\nUnknown atom(s) found: $UnknownAtomCount\nUnknown atom(s) symbols:$UnknownAtoms\nUnknown atom(s) data lines:\n$UnknownAtomLines\n"; 188 } 189 } 190 } 191 if ($OptionsInfo{All} || $OptionsInfo{InvalidAtomNumbers}) { 192 my($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines) = GetInvalidAtomNumbers(\@CmpdLines); 193 if ($InvalidAtomNumbersCount) { 194 $InvalidAtomNumbersCtabBlockCount++; 195 $ProblematicCmpdData = 1; 196 if ($OptionsInfo{Detail} >= 2) { 197 print "\nInvalid atom number(s) found: $InvalidAtomNumbersCount\nInvalid atom number(s):$InvalidAtomNumbers\nInvalid atom number(s) data lines:\n$InvalidAtomNumberLines\n"; 198 } 199 } 200 } 201 if ($OptionsInfo{All} || $OptionsInfo{Salts}) { 202 my($FragmentsCount, $Fragments) = GetCmpdFragments(\@CmpdLines); 203 if ($FragmentsCount > 1) { 204 $SaltsCtabBlockCount++; 205 $ProblematicCmpdData = 1; 206 if ($OptionsInfo{Detail} >= 2) { 207 print "\nSalts found: $FragmentsCount\nSalts atom numbers:\n$Fragments\n"; 208 } 209 } 210 } 211 } 212 } 213 if ($OptionsInfo{ProcessCmpdData}) { 214 ProcessCmpdInfo(\@CmpdLines, $CmpdCount); 215 } 216 if ($OptionsInfo{Detail} >= 3) { 217 if ($ProblematicCmpdData) { 218 print "\nCompound data:\n$CmpdString\n\n"; 219 } 220 } 221 } 222 if ($OptionsInfo{Detail} <= 1) { 223 if (!$PrintCmpdCounterHeader) { 224 print "\n"; 225 } 226 } 227 close SDFILE; 228 229 $SDCmpdsInfo{TotalCmpdCount} += $CmpdCount; 230 231 print "\nNumber of compounds: $CmpdCount\n"; 232 233 if ($OptionsInfo{All} || $OptionsInfo{Empty}) { 234 print "Number of empty atom/bond blocks: $EmptyCtabBlocksCount\n"; 235 } 236 if ($OptionsInfo{All} || $OptionsInfo{Mismatch}) { 237 print "Number of mismatched atom/bond blocks: $MismatchCtabBlockCount\n"; 238 } 239 if ($OptionsInfo{All} || $OptionsInfo{UnknownAtoms}) { 240 print "Number of atom blocks with unknown atom labels: $UnknownAtomsCtabBlockCount\n"; 241 } 242 if ($OptionsInfo{All} || $OptionsInfo{InvalidAtomNumbers}) { 243 print "Number of bond blocks and atom property blocks with invalid atom numbers: $InvalidAtomNumbersCtabBlockCount\n"; 244 } 245 if ($OptionsInfo{All} || $OptionsInfo{Salts}) { 246 print "Number of atom blocks containing salts: $SaltsCtabBlockCount\n"; 247 } 248 if ($OptionsInfo{All} || $OptionsInfo{Chiral}) { 249 print "Number of chiral atom/bond blocks: $ChiralCtabBlockCount\n"; 250 } 251 if ($OptionsInfo{ProcessCmpdData}) { 252 PrintCmpdInfoSummary(); 253 } 254 255 } 256 257 # Initialize compound data information for a SD file... 258 sub InitializeSDCmpdsInfo { 259 260 if (!exists $SDCmpdsInfo{TotalCmpdCount}) { 261 $SDCmpdsInfo{TotalCmpdCount} = 0; 262 } 263 264 @{$SDCmpdsInfo{FieldLabels}} = (); 265 %{$SDCmpdsInfo{FieldLabelsMap}} = (); 266 %{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}} = (); 267 %{$SDCmpdsInfo{EmptyFieldValuesCountMap}} = (); 268 %{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}} = (); 269 %{$SDCmpdsInfo{NumericalFieldValuesCountMap}} = (); 270 } 271 272 # Process compound data header labels and figure out which ones are present for 273 # all the compounds... 274 sub ProcessCmpdInfo { 275 my($CmpdLinesRef, $CmpdCount) = @_; 276 my($Label); 277 278 if (@{$SDCmpdsInfo{FieldLabels}}) { 279 my (@CmpdFieldLabels) = GetCmpdDataHeaderLabels($CmpdLinesRef); 280 my(%CmpdFieldLabelsMap) = (); 281 # Setup a map for the current labels... 282 for $Label (@CmpdFieldLabels) { 283 $CmpdFieldLabelsMap{$Label} = "PresentInSome"; 284 } 285 # Check the presence old labels for this compound; otherwise, mark 'em new... 286 for $Label (@{$SDCmpdsInfo{FieldLabels}}) { 287 if (!$CmpdFieldLabelsMap{$Label}) { 288 $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInSome"; 289 } 290 } 291 # Check the presence this compound in the old labels; otherwise, add 'em... 292 for $Label (@CmpdFieldLabels ) { 293 if (!$SDCmpdsInfo{FieldLabelsMap}{$Label}) { 294 # It's a new label... 295 push @{$SDCmpdsInfo{FieldLabels}}, $Label; 296 $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInSome"; 297 } 298 } 299 } 300 else { 301 # Get the initial label set and set up a map... 302 @{$SDCmpdsInfo{FieldLabels}} = GetCmpdDataHeaderLabels($CmpdLinesRef); 303 for $Label (@{$SDCmpdsInfo{FieldLabels}}) { 304 $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInAll"; 305 } 306 } 307 if ($OptionsInfo{CountEmptyData} || $OptionsInfo{CheckData}) { 308 # Count empty data field values... 309 my(%DataFieldAndValues, $Label, $Value); 310 311 %DataFieldAndValues = GetCmpdDataHeaderLabelsAndValues($CmpdLinesRef); 312 for $Label (keys %DataFieldAndValues) { 313 $Value = $DataFieldAndValues{$Label}; 314 if ($OptionsInfo{CountEmptyData}) { 315 if (IsNotEmpty($Value)) { 316 if (exists($SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label})) { 317 $SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label} += 1; 318 } 319 else { 320 $SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label} = 1; 321 } 322 } 323 else { 324 if ($Options{detail} >= 2) { 325 print "Compound record $CmpdCount: Empty data field <$Label>\n"; 326 } 327 if (exists($SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label})) { 328 $SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label} += 1; 329 } 330 else { 331 $SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label} = 1; 332 } 333 } 334 } 335 if ($OptionsInfo{CheckData}) { 336 if (IsNumerical($Value)) { 337 if (exists($SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label})) { 338 $SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label} += 1; 339 } 340 else { 341 $SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label} = 1; 342 } 343 } 344 else { 345 if (exists($SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label})) { 346 $SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label} += 1; 347 } 348 else { 349 $SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label} = 1; 350 } 351 } 352 } 353 } 354 } 355 } 356 357 # Print compound summary... 358 sub PrintCmpdInfoSummary { 359 if (@{$SDCmpdsInfo{FieldLabels}}) { 360 my($PresentInAllCount, $Label, @FieldLabelsPresentInSome, @FieldLabelsPresentInAll); 361 362 @FieldLabelsPresentInSome = (); 363 @FieldLabelsPresentInAll = (); 364 365 $PresentInAllCount = 0; 366 print "\nNumber of data fields: ", scalar(@{$SDCmpdsInfo{FieldLabels}}), "\n"; 367 print "All data field labels: "; 368 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) { 369 print "<$Label> "; 370 } 371 print "\n"; 372 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) { 373 if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInAll") { 374 $PresentInAllCount++; 375 push @FieldLabelsPresentInAll, $Label; 376 } 377 } 378 if ($PresentInAllCount != @{$SDCmpdsInfo{FieldLabels}}) { 379 print "Data field labels present in all compounds: "; 380 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) { 381 if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInAll") { 382 print "<$Label> "; 383 } 384 } 385 print "\n"; 386 print "Data field labels present in some compounds: "; 387 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) { 388 if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInSome") { 389 print "<$Label> "; 390 push @FieldLabelsPresentInSome, $Label; 391 } 392 } 393 print "\n"; 394 } 395 # List empty data field values count... 396 if ($OptionsInfo{CountEmptyData}) { 397 print "\n"; 398 if ($PresentInAllCount == @{$SDCmpdsInfo{FieldLabels}}) { 399 PrintDataInformation("Number of non-empty values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}}); 400 PrintDataInformation("Number of empty values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}}); 401 } 402 else { 403 PrintDataInformation("Number of non-empty values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}}); 404 PrintDataInformation("Number of empty values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}}); 405 PrintDataInformation("Number of non-empty values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}}); 406 PrintDataInformation("Number of empty values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}}); 407 } 408 print "\n"; 409 } 410 # List numerical data values count... 411 if ($OptionsInfo{CheckData}) { 412 print "\n"; 413 if ($PresentInAllCount == @{$SDCmpdsInfo{FieldLabels}}) { 414 PrintDataInformation("Number of non-numerical values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}}); 415 PrintDataInformation("Number of numerical values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}}); 416 } 417 else { 418 PrintDataInformation("Number of non-numerical values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}}); 419 PrintDataInformation("Number of numerical values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}}); 420 PrintDataInformation("Number of non-numerical values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}}); 421 PrintDataInformation("Number of numerical values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}}); 422 } 423 print "\n"; 424 } 425 } 426 else { 427 print "\nNumber of data fields: 0\n"; 428 } 429 } 430 # List data information... 431 sub PrintDataInformation { 432 my($InfoLabel, $DataLabelRef, $DataLabelToValueMapRef) = @_; 433 my($Line, $Label); 434 435 $Line = ""; 436 for $Label (@{$DataLabelRef}) { 437 $Line .= " <$Label> - " . (exists($DataLabelToValueMapRef->{$Label}) ? $DataLabelToValueMapRef->{$Label} : 0) . ","; 438 } 439 $Line =~ s/\,$//g; 440 print "$InfoLabel: $Line\n"; 441 } 442 443 # Total size of all the files... 444 sub ListTotalSizeOfFiles { 445 my($FileOkayCount, $TotalSize, $Index); 446 447 $FileOkayCount = 0; 448 $TotalSize = 0; 449 450 for $Index (0 .. $#SDFilesList) { 451 if ($SDFilesInfo{FileOkay}[$Index]) { 452 $FileOkayCount++; 453 $TotalSize += $SDFilesInfo{FileSize}[$Index]; 454 } 455 } 456 if ($FileOkayCount > 1) { 457 print "\nTotal number of compounds in $FileOkayCount SD files: $SDCmpdsInfo{TotalCmpdCount}\n"; 458 print "\nTotal size of $FileOkayCount SD files: ", FormatFileSize($TotalSize), "\n"; 459 } 460 461 } 462 463 # Retrieve information about SD files... 464 sub RetrieveSDFilesInfo { 465 my($Index, $SDFile, $ModifiedTimeString, $ModifiedDateString); 466 467 %SDCmpdsInfo = (); 468 469 %SDFilesInfo = (); 470 @{$SDFilesInfo{FileOkay}} = (); 471 @{$SDFilesInfo{FileSize}} = (); 472 @{$SDFilesInfo{FileLastModified}} = (); 473 474 FILELIST: for $Index (0 .. $#SDFilesList) { 475 $SDFilesInfo{FileOkay}[$Index] = 0; 476 $SDFilesInfo{FileSize}[$Index] = 0; 477 $SDFilesInfo{FileLastModified}[$Index] = ''; 478 479 $SDFile = $SDFilesList[$Index]; 480 if (!(-e $SDFile)) { 481 warn "Warning: Ignoring file $SDFile: It doesn't exist\n"; 482 next FILELIST; 483 } 484 if (!CheckFileType($SDFile, "sdf sd")) { 485 warn "Warning: Ignoring file $SDFile: It's not a SD file\n"; 486 next FILELIST; 487 } 488 if (! open SDFILE, "$SDFile") { 489 warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n"; 490 next FILELIST; 491 } 492 close SDFILE; 493 494 $SDFilesInfo{FileOkay}[$Index] = 1; 495 $SDFilesInfo{FileSize}[$Index] = FileSize($SDFile); 496 ($ModifiedTimeString, $ModifiedDateString) = FormattedFileModificationTimeAndDate($SDFile); 497 $SDFilesInfo{FileLastModified}[$Index] = "$ModifiedTimeString; $ModifiedDateString"; 498 } 499 } 500 501 # Process option values... 502 sub ProcessOptions { 503 %OptionsInfo = (); 504 505 $OptionsInfo{All} = $Options{all} ? $Options{all} : 0; 506 $OptionsInfo{Chiral} = $Options{chiral} ? $Options{chiral} : 0; 507 $OptionsInfo{Count} = $Options{count} ? $Options{count} : 0; 508 $OptionsInfo{DataCheck} = $Options{datacheck} ? $Options{datacheck} : 0; 509 $OptionsInfo{Empty} = $Options{empty} ? $Options{empty} : 0; 510 $OptionsInfo{Fields} = $Options{fields} ? $Options{fields} : 0; 511 $OptionsInfo{InvalidAtomNumbers} = $Options{invalidatomnumbers} ? $Options{invalidatomnumbers} : 0; 512 $OptionsInfo{Mismatch} = $Options{mismatch} ? $Options{mismatch} : 0; 513 $OptionsInfo{Salts} = $Options{salts} ? $Options{salts} : 0; 514 $OptionsInfo{UnknownAtoms} = $Options{unknownatoms} ? $Options{unknownatoms} : 0; 515 516 $OptionsInfo{Detail} = $Options{detail}; 517 518 $OptionsInfo{ProcessCmpdInfo} = ($Options{all} || $Options{chiral} || $Options{empty} || $Options{fields} || $Options{invalidatomnumbers} || $Options{mismatch} || $Options{salts} || $Options{unknownatoms} || $Options{datacheck}) ? 1 : 0; 519 520 $OptionsInfo{ProcessCmpdData} = ($Options{all} || $Options{fields} || $Options{empty} || $Options{datacheck}) ? 1 : 0; 521 522 $OptionsInfo{CountEmptyData} = ($Options{all} || $Options{empty}) ? 1 : 0; 523 $OptionsInfo{CheckData} = ($Options{all} || $Options{datacheck}) ? 1 : 0; 524 } 525 526 # Setup script usage and retrieve command line arguments specified using various options... 527 sub SetupScriptUsage { 528 529 # Setup default and retrieve all the options... 530 %Options = (); 531 $Options{detail} = 1; 532 if (!GetOptions(\%Options, "all|a", "count|c", "chiral", "datacheck", "detail|d:i", "empty|e", "fields|f", "help|h", "invalidatomnumbers|i", "mismatch|m", "salts|s", "unknownatoms|u", "workingdir|w=s")) { 533 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; 534 } 535 if ($Options{workingdir}) { 536 if (! -d $Options{workingdir}) { 537 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; 538 } 539 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; 540 } 541 if ($Options{detail} <= 0 || $Options{detail} > 3) { 542 die "Error: The value specified, $Options{detail}, for option \"-d --detail\" is not valid. Possible values: 1 to 3\n"; 543 } 544 } 545