MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # $RCSfile: ModifyPDBFiles.pl,v $
   4 # $Date: 2015/02/28 20:46:20 $
   5 # $Revision: 1.25 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use FindBin; use lib "$FindBin::Bin/../lib";
  31 use Getopt::Long;
  32 use File::Basename;
  33 use Text::ParseWords;
  34 use Benchmark;
  35 use FileUtil;
  36 use TextUtil;
  37 use PDBFileUtil;
  38 
  39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  40 
  41 # Autoflush STDOUT
  42 $| = 1;
  43 
  44 # Starting message...
  45 $ScriptName = basename($0);
  46 print "\n$ScriptName: Starting...\n\n";
  47 $StartTime = new Benchmark;
  48 
  49 # Get the options and setup script...
  50 SetupScriptUsage();
  51 if ($Options{help} || @ARGV < 1) {
  52   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  53 }
  54 
  55 my(@PDBFilesList);
  56 @PDBFilesList = ExpandFileNames(\@ARGV, "pdb");
  57 
  58 # Process options...
  59 print "Processing options...\n";
  60 my(%OptionsInfo);
  61 ProcessOptions();
  62 
  63 # Setup information about input files...
  64 print "Checking input PDB file(s)...\n";
  65 my(%PDBFilesInfo);
  66 RetrievePDBFilesInfo();
  67 
  68 # Process input files..
  69 my($FileIndex);
  70 if (@PDBFilesList > 1) {
  71   print "\nProcessing PDB files...\n";
  72 }
  73 for $FileIndex (0 .. $#PDBFilesList) {
  74   if ($PDBFilesInfo{FileOkay}[$FileIndex]) {
  75     print "\nProcessing file $PDBFilesList[$FileIndex]...\n";
  76     ModifyPDBFiles($FileIndex);
  77   }
  78 }
  79 print "\n$ScriptName:Done...\n\n";
  80 
  81 $EndTime = new Benchmark;
  82 $TotalTime = timediff ($EndTime, $StartTime);
  83 print "Total time: ", timestr($TotalTime), "\n";
  84 
  85 ###############################################################################
  86 
  87 # Modify appropriate information...
  88 sub ModifyPDBFiles {
  89   my($FileIndex) = @_;
  90   my($PDBFile, $PDBRecordLinesRef);
  91 
  92   # Get PDB data...
  93   $PDBFile = $PDBFilesList[$FileIndex];
  94   $PDBRecordLinesRef = ReadPDBFile($PDBFile);
  95 
  96   if ($OptionsInfo{Mode} =~ /^RenumberAtoms$/i) {
  97     RenumberAtoms($FileIndex, $PDBRecordLinesRef);
  98   }
  99   elsif ($OptionsInfo{Mode} =~ /^RenumberResidues$/i) {
 100     RenumberResidues($FileIndex, $PDBRecordLinesRef);
 101   }
 102   elsif ($OptionsInfo{Mode} =~ /^RenumberWaters$/i) {
 103     RenumberWaters($FileIndex, $PDBRecordLinesRef);
 104   }
 105   elsif ($OptionsInfo{Mode} =~ /^RenameChainIDs$/i) {
 106     RenameChainsIDs($FileIndex, $PDBRecordLinesRef);
 107   }
 108 }
 109 
 110 # Renumber atom and hetro atom numbers...
 111 sub RenumberAtoms {
 112   my($FileIndex, $PDBRecordLinesRef) = @_;
 113   my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewAtomNumber, $RecordType, %OldToNewAtomNumbersMap);
 114 
 115   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 116   print "Generating PDBFileName file $PDBFileName...\n";
 117   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 118 
 119   # Write out header and other older recors...
 120   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 121 
 122   # Write out all ATOM records along with TER and model records to indicate
 123   # chains and multiple models..
 124   %OldToNewAtomNumbersMap = ();
 125   $NewAtomNumber = $OptionsInfo{StartingAtomNumber};
 126   for $RecordLine (@{$PDBRecordLinesRef}) {
 127     if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
 128       $RecordType = GetPDBRecordType($RecordLine);
 129 
 130       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
 131 
 132       print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $NewAtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 133 
 134       $OldToNewAtomNumbersMap{$AtomNumber} = $NewAtomNumber;
 135       $NewAtomNumber++;
 136     }
 137     elsif (IsTerRecordType($RecordLine)) {
 138       $NewAtomNumber++;
 139       print OUTFILE GenerateTerRecordLine($NewAtomNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode), "\n";
 140     }
 141     elsif (IsModelRecordType($RecordLine)) {
 142       print OUTFILE "$RecordLine\n";
 143     }
 144     elsif (IsEndmdlRecordType($RecordLine)) {
 145       print OUTFILE "$RecordLine\n";
 146       # Restart numbering...
 147       $NewAtomNumber = $OptionsInfo{StartingAtomNumber};
 148     }
 149   }
 150 
 151   # Write out modified CONECT records...
 152   my($ModifiedConectAtomNum, $ConectAtomNum, @ConectAtomNums, @ModifiedConectAtomNums);
 153   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 154     if (!IsConectRecordType($RecordLine)) {
 155       next LINE;
 156     }
 157     @ConectAtomNums = ();
 158     @ModifiedConectAtomNums = ();
 159     push @ConectAtomNums, ParseConectRecordLine($RecordLine);
 160     ATOMNUMBER: for $ConectAtomNum (@ConectAtomNums) {
 161       $ModifiedConectAtomNum = $ConectAtomNum;
 162       if (defined($ConectAtomNum)) {
 163         $AtomNumber = $ConectAtomNum;
 164         if ($AtomNumber) {
 165           if (exists $OldToNewAtomNumbersMap{$AtomNumber}) {
 166             $ModifiedConectAtomNum = $OldToNewAtomNumbersMap{$AtomNumber};
 167           }
 168         }
 169       }
 170       push @ModifiedConectAtomNums, $ModifiedConectAtomNum;
 171     }
 172     # Write out the record...
 173     print OUTFILE GenerateConectRecordLine(@ModifiedConectAtomNums), "\n";
 174   }
 175 
 176   # Write out END record...
 177   print OUTFILE GenerateEndRecordLine(), "\n";
 178 
 179   close OUTFILE;
 180 }
 181 
 182 # Renumber residues...
 183 sub RenumberResidues {
 184   my($FileIndex, $PDBRecordLinesRef) = @_;
 185   my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $NewHetatmResidueNumber, $TERCount, $TotalTERCount, $PreviousResidueNumber, $PreviousHetatmResidueNumber, $RecordType);
 186 
 187   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 188   print "Generating PDBFileName file $PDBFileName...\n";
 189   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 190 
 191   # Write out header and other older recors...
 192   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 193 
 194   # Do a quick count of all TER records...
 195   $TotalTERCount = 0;
 196   for $RecordLine (@{$PDBRecordLinesRef}) {
 197     if (IsTerRecordType($RecordLine)) {
 198       $TotalTERCount++;
 199     }
 200   }
 201 
 202   # Write out all ATOM records along with TER and model records to indicate
 203   # chains and multiple models..
 204   $NewResidueNumber = $OptionsInfo{StartingResidueNumber};
 205   $NewHetatmResidueNumber = $OptionsInfo{StartingHetatmResidueNumber};
 206 
 207   $TERCount = 0;
 208   $PreviousResidueNumber = 0;
 209   $PreviousHetatmResidueNumber = 0;
 210 
 211   for $RecordLine (@{$PDBRecordLinesRef}) {
 212     if (IsAtomRecordType($RecordLine) || (IsHetatmRecordType($RecordLine) && ($TERCount < $TotalTERCount || $OptionsInfo{HetatmResidueNumberMode} =~ /^Automatic$/i))) {
 213       $RecordType = GetPDBRecordType($RecordLine);
 214       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
 215 
 216       if ($PreviousResidueNumber && $PreviousResidueNumber != $ResidueNumber) {
 217         $PreviousResidueNumber = $ResidueNumber;
 218         $NewResidueNumber++;
 219       }
 220       else {
 221         # First residue in a chain...
 222         $PreviousResidueNumber = $ResidueNumber;
 223       }
 224       print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 225 
 226     }
 227     elsif (IsHetatmRecordType($RecordLine)) {
 228       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseHetatmRecordLine($RecordLine);
 229 
 230       # User HETATM residue numbers...
 231       if ($PreviousHetatmResidueNumber && $PreviousHetatmResidueNumber != $ResidueNumber) {
 232         $PreviousHetatmResidueNumber = $ResidueNumber;
 233         $NewHetatmResidueNumber++;
 234       }
 235       else {
 236         # First HETATM residue outside a chain...
 237         $PreviousHetatmResidueNumber = $ResidueNumber;
 238       }
 239 
 240       print OUTFILE GenerateHetatmRecordLine($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewHetatmResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 241     }
 242     elsif (IsTerRecordType($RecordLine)) {
 243       $TERCount++;
 244       $AtomNumber++;
 245       print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode), "\n";
 246       # For per chain numbering, start over again...
 247       if ($OptionsInfo{ResidueNumberMode} =~ /^PerChain$/i) {
 248         if ($TERCount < $TotalTERCount ) {
 249           $NewResidueNumber = $OptionsInfo{StartingResidueNumber};
 250         }
 251         $PreviousResidueNumber = 0;
 252       }
 253     }
 254     elsif (IsModelRecordType($RecordLine)) {
 255       print OUTFILE "$RecordLine\n";
 256     }
 257     elsif (IsEndmdlRecordType($RecordLine)) {
 258       print OUTFILE "$RecordLine\n";
 259     }
 260   }
 261 
 262   # Write out CONECT records...
 263   for $RecordLine (@{$PDBRecordLinesRef}) {
 264     if (IsConectRecordType($RecordLine)) {
 265       print OUTFILE "$RecordLine\n";
 266     }
 267   }
 268 
 269   # Write out END record...
 270   print OUTFILE GenerateEndRecordLine(), "\n";
 271 
 272   close OUTFILE;
 273 }
 274 
 275 # Renumber water residues...
 276 sub RenumberWaters {
 277   my($FileIndex, $PDBRecordLinesRef) = @_;
 278   my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $RecordType);
 279 
 280   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 281   print "Generating PDBFileName file $PDBFileName...\n";
 282   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 283 
 284   # Write out header and other older recors...
 285   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 286 
 287   # Write out all ATOM records along with TER and model records to indicate
 288   # chains and multiple models..
 289   $NewResidueNumber = $OptionsInfo{StartingWaterResidueNumber};
 290   for $RecordLine (@{$PDBRecordLinesRef}) {
 291     if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
 292       $RecordType = GetPDBRecordType($RecordLine);
 293 
 294       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
 295 
 296       if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) {
 297         $ResidueNumber = $NewResidueNumber;
 298         print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 299         $NewResidueNumber++;
 300       }
 301       else {
 302         print OUTFILE "$RecordLine\n";
 303       }
 304     }
 305     elsif (IsTerRecordType($RecordLine)) {
 306       print OUTFILE "$RecordLine\n";
 307     }
 308     elsif (IsModelRecordType($RecordLine)) {
 309       print OUTFILE "$RecordLine\n";
 310     }
 311     elsif (IsEndmdlRecordType($RecordLine)) {
 312       print OUTFILE "$RecordLine\n";
 313     }
 314   }
 315 
 316   # Write out CONECT records...
 317   for $RecordLine (@{$PDBRecordLinesRef}) {
 318     if (IsConectRecordType($RecordLine)) {
 319       print OUTFILE "$RecordLine\n";
 320     }
 321   }
 322 
 323   # Write out END record...
 324   print OUTFILE GenerateEndRecordLine(), "\n";
 325 
 326   close OUTFILE;
 327 }
 328 
 329 # Rename chain IDs...
 330 sub RenameChainsIDs {
 331   my($FileIndex, $PDBRecordLinesRef) = @_;
 332   my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $RecordType, $PreviousChainID, $FirstChainID, $NewChainID, $NewChainIDCounter, %OldToNewChainIDsMap);
 333 
 334   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 335   print "Generating PDBFileName file $PDBFileName...\n";
 336   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 337 
 338   # Write out header and other older recors...
 339   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 340 
 341   # Write out all ATOM records along with TER and model records to indicate
 342   # chains and multiple models..
 343   %OldToNewChainIDsMap = ();
 344   $NewChainIDCounter = $OptionsInfo{StartingChainID};
 345   $FirstChainID = 1;
 346   $PreviousChainID = '';
 347   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 348     if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
 349       $RecordType = GetPDBRecordType($RecordLine);
 350 
 351       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
 352 
 353       if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) {
 354         # Chain IDs are not assigned to water residues...
 355         print OUTFILE "$RecordLine\n";
 356         next LINE;
 357       }
 358 
 359       if ($FirstChainID) {
 360         $FirstChainID = 0;
 361         $PreviousChainID = $ChainID;
 362         if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) {
 363           $NewChainID = $NewChainIDCounter;
 364           $OldToNewChainIDsMap{$ChainID} = $NewChainID;
 365         }
 366         else {
 367           $NewChainID = '';
 368         }
 369       }
 370       elsif ($PreviousChainID ne $ChainID) {
 371         if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) {
 372           $PreviousChainID = $ChainID;
 373           if (exists $OldToNewChainIDsMap{$ChainID}) {
 374             $NewChainID = $OldToNewChainIDsMap{$ChainID};
 375           }
 376           else {
 377             $NewChainIDCounter++;
 378             $NewChainID = $NewChainIDCounter;
 379             $OldToNewChainIDsMap{$ChainID} = $NewChainID;
 380           }
 381         }
 382         else {
 383           $NewChainID = '';
 384         }
 385       }
 386 
 387       print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 388     }
 389     elsif (IsTerRecordType($RecordLine)) {
 390       $AtomNumber++;
 391       print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode), "\n";
 392     }
 393     elsif (IsModelRecordType($RecordLine)) {
 394       print OUTFILE "$RecordLine\n";
 395     }
 396     elsif (IsEndmdlRecordType($RecordLine)) {
 397       print OUTFILE "$RecordLine\n";
 398     }
 399   }
 400 
 401   # Write out CONECT records...
 402   for $RecordLine (@{$PDBRecordLinesRef}) {
 403     if (IsConectRecordType($RecordLine)) {
 404       print OUTFILE "$RecordLine\n";
 405     }
 406   }
 407 
 408   # Write out END record...
 409   print OUTFILE GenerateEndRecordLine(), "\n";
 410 
 411   close OUTFILE;
 412 }
 413 
 414 
 415 # Write out modifed header and other older records...
 416 sub WriteHeaderAndOlderRecords {
 417   my($OutFileRef, $PDBRecordLinesRef) = @_;
 418 
 419   if ($OptionsInfo{ModifyHeaderRecord}) {
 420     # Write out modified HEADER record...
 421     my($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef);
 422     $Classification = 'Data modified using MayaChemTools';
 423     print $OutFileRef GenerateHeaderRecordLine($IDCode, $Classification), "\n";
 424   }
 425   else {
 426     print $OutFileRef $PDBRecordLinesRef->[0], "\n";
 427   }
 428 
 429   # Write out any old records...
 430   if ($OptionsInfo{KeepOldRecords}) {
 431     my($RecordLineIndex, $RecordLine);
 432     # Skip HEADER record and write out older records all the way upto first MODEL/ATOM/HETATM records from input file...
 433     RECORDLINE: for $RecordLineIndex (1 .. $#{$PDBRecordLinesRef}) {
 434       $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex];
 435       if (IsModelRecordType($RecordLine) || IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
 436         last RECORDLINE;
 437       }
 438       print $OutFileRef "$RecordLine\n";
 439     }
 440   }
 441 }
 442 
 443 # Get header record information assuming it's the first record...
 444 sub GetHeaderRecordInformation {
 445   my($PDBRecordLinesRef) = @_;
 446   my($Classification, $DepositionDate, $IDCode, $HeaderRecordLine);
 447 
 448   ($Classification, $DepositionDate, $IDCode) = ('') x 3;
 449   $HeaderRecordLine = $PDBRecordLinesRef->[0];
 450   if (IsHeaderRecordType($HeaderRecordLine)) {
 451     ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine);
 452   }
 453   return ($Classification, $DepositionDate, $IDCode);
 454 }
 455 
 456 
 457 # Process option values...
 458 sub ProcessOptions {
 459   %OptionsInfo = ();
 460   $OptionsInfo{Mode} = $Options{mode};
 461 
 462   $OptionsInfo{StartingAtomNumber} = $Options{atomnumberstart};
 463   $OptionsInfo{StartingChainID} = $Options{chainidstart};
 464   $OptionsInfo{RenameEmptyChainIDs} = ($Options{chainidrenameempty} =~ /^Yes$/i) ? 1 : 0;
 465 
 466   $OptionsInfo{KeepOldRecords} = ($Options{keepoldrecords} =~ /^Yes$/i) ? 1 : 0;
 467   $OptionsInfo{ModifyHeaderRecord} = ($Options{modifyheader} =~ /^Yes$/i) ? 1 : 0;
 468 
 469   $OptionsInfo{ResidueNumberMode} = $Options{residuenumbermode};
 470   $OptionsInfo{StartingResidueNumber} = $Options{residuenumberstart};
 471 
 472   $OptionsInfo{HetatmResidueNumberMode} = $Options{residuenumberhetatmmode};
 473   $OptionsInfo{StartingHetatmResidueNumber} = $Options{residuenumberstarthetatm};
 474 
 475   $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
 476   $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
 477 
 478   $OptionsInfo{WaterResidueNames} = $Options{waterresiduenames};
 479   $OptionsInfo{StartingWaterResidueNumber} = $Options{waterresiduestart};
 480   @{$OptionsInfo{SpecifiedWaterResiduesList}} = ();
 481   %{$OptionsInfo{SpecifiedWaterResiduesMap}} = ();
 482 
 483   my(@SpecifiedWaterResiduesList);
 484   @SpecifiedWaterResiduesList = ();
 485   my($WaterResidueName);
 486   if ($OptionsInfo{WaterResidueNames} =~ /Automatic/i) {
 487     push @SpecifiedWaterResiduesList, ('HOH', 'WAT', 'H2O');
 488   }
 489   else {
 490     @SpecifiedWaterResiduesList = split /\,/, $Options{waterresiduenames};
 491   }
 492   for $WaterResidueName (@SpecifiedWaterResiduesList) {
 493     $OptionsInfo{SpecifiedWaterResiduesMap}{$WaterResidueName} = $WaterResidueName;
 494   }
 495   push @{$OptionsInfo{SpecifiedWaterResiduesList}}, @SpecifiedWaterResiduesList;
 496 }
 497 
 498 # Retrieve information about PDB files...
 499 sub RetrievePDBFilesInfo {
 500   my($Index, $PDBFile, $PDBRecordLinesRef, $ChainsAndResiduesInfoRef, $FileDir, $FileName, $FileExt, $OutFileName, $OutFileRoot,  $Mode, $OutFileMode, @OutFileNames);
 501 
 502   %PDBFilesInfo = ();
 503   @{$PDBFilesInfo{FileOkay}} = ();
 504   @{$PDBFilesInfo{OutFileRoot}} = ();
 505   @{$PDBFilesInfo{OutFileNames}} = ();
 506 
 507   FILELIST: for $Index (0 .. $#PDBFilesList) {
 508     $PDBFilesInfo{FileOkay}[$Index] = 0;
 509 
 510     $PDBFilesInfo{OutFileRoot}[$Index] = '';
 511     @{$PDBFilesInfo{OutFileNames}[$Index]} = ();
 512     @{$PDBFilesInfo{OutFileNames}[$Index]} = ();
 513 
 514     $PDBFile = $PDBFilesList[$Index];
 515     if (!(-e $PDBFile)) {
 516       warn "Warning: Ignoring file $PDBFile: It doesn't exist\n";
 517       next FILELIST;
 518     }
 519     if (!CheckFileType($PDBFile, "pdb")) {
 520       warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n";
 521       next FILELIST;
 522     }
 523     if (! open PDBFILE, "$PDBFile") {
 524       warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n";
 525       next FILELIST;
 526     }
 527     close PDBFILE;
 528 
 529     # Get PDB data...
 530     $PDBRecordLinesRef = ReadPDBFile($PDBFile);
 531     $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef);
 532     if (!scalar @{$ChainsAndResiduesInfoRef->{ChainIDs}}) {
 533       warn "Warning: Ignoring file $PDBFile: No chains found \n";
 534       next FILELIST;
 535     }
 536 
 537     # Setup output file names...
 538     @OutFileNames = ();
 539     $FileDir = ""; $FileName = ""; $FileExt = "";
 540     ($FileDir, $FileName, $FileExt) = ParseFileName($PDBFile);
 541     if ($OptionsInfo{OutFileRoot} && (@PDBFilesList == 1)) {
 542       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
 543       if ($RootFileName && $RootFileExt) {
 544         $FileName = $RootFileName;
 545       }
 546       else {
 547         $FileName = $OptionsInfo{OutFileRoot};
 548       }
 549       $OutFileRoot = $FileName;
 550     }
 551     else {
 552       $OutFileRoot = $FileName;
 553     }
 554     $Mode = $OptionsInfo{Mode};
 555     MODE: {
 556         if ($Mode =~ /^RenumberAtoms$/i) { $OutFileMode = 'RenumberAtoms'; last MODE;}
 557         if ($Mode =~ /^RenumberResidues$/i) { $OutFileMode = 'RenumberResidues'; last MODE;}
 558         if ($Mode =~ /^RenumberWaters$/i) { $OutFileMode = 'RenumberWaters'; last MODE;}
 559         if ($Mode =~ /^RenameChainIDs$/i) { $OutFileMode = 'RenameChainIDs'; last MODE;}
 560         $OutFileMode = '';
 561     }
 562     $OutFileName = "${OutFileRoot}${OutFileMode}.pdb";
 563     push @OutFileNames, $OutFileName;
 564 
 565     $PDBFilesInfo{FileOkay}[$Index] = 1;
 566     $PDBFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
 567 
 568     push @{$PDBFilesInfo{OutFileNames}[$Index]}, @OutFileNames;
 569   }
 570 }
 571 
 572 # Setup script usage  and retrieve command line arguments specified using various options...
 573 sub SetupScriptUsage {
 574 
 575   # Retrieve all the options...
 576   %Options = ();
 577   $Options{atomnumberstart} = 1;
 578   $Options{chainidstart} = 'A';
 579   $Options{chainidrenameempty} = 'No';
 580   $Options{keepoldrecords} = 'no';
 581   $Options{mode} = 'RenumberResidues';
 582   $Options{modifyheader} = 'yes';
 583   $Options{residuenumbermode} = 'PerChain';
 584   $Options{residuenumberstart} = 1;
 585   $Options{residuenumberhetatmmode} = 'Automatic';
 586   $Options{residuenumberstarthetatm} = 6000;
 587   $Options{waterresiduenames} = 'Automatic';
 588   $Options{waterresiduestart} = 8000;
 589 
 590   if (!GetOptions(\%Options, "help|h", "atomnumberstart|a=i", "chainidstart|c=s", "chainidrenameempty=s", "keepoldrecords|k=s", "mode|m=s", "modifyheader=s", "overwrite|o", "residuenumbermode=s", "residuenumberstart=i", "residuenumberhetatmmode=s", "residuenumberstarthetatm=i", "root|r=s", "sequencelength=i", "waterresiduenames=s", "waterresiduestart=i", "workingdir|w=s")) {
 591     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 592   }
 593   if ($Options{workingdir}) {
 594     if (! -d $Options{workingdir}) {
 595       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 596     }
 597     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 598   }
 599   if (!IsPositiveInteger($Options{atomnumberstart})) {
 600     die "Error: The value specified, $Options{atomnumberstart}, for option \"-a, --AtomNumberStart\" is not valid. Allowed values: >0\n";
 601   }
 602   if ((length($Options{chainidstart}) > 1) || ($Options{chainidstart} !~ /[A-Z]/i)) {
 603     die "Error: The value specified, $Options{chainidstart}, for option \"-c, --ChainIDStart\" is not valid. Allowed values: a single character from A to Z\n";
 604   }
 605   if ($Options{chainidrenameempty} !~ /^(yes|no)$/i) {
 606     die "Error: The value specified, $Options{chainidrenameempty}, for option \"--chainidrenameempty\" is not valid. Allowed values: yes or no\n";
 607   }
 608   if ($Options{keepoldrecords} !~ /^(yes|no)$/i) {
 609     die "Error: The value specified, $Options{keepoldrecords}, for option \"--KeepOldRecords\" is not valid. Allowed values: yes or no\n";
 610   }
 611   if ($Options{mode} !~ /^(RenumberAtoms|RenumberResidues|RenumberWaters|RenameChainIDs)$/i) {
 612     die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: RenumberAtoms, RenumberResidues, RenumberWaters or RenameChainIDs\n";
 613   }
 614   if ($Options{modifyheader} !~ /^(yes|no)$/i) {
 615     die "Error: The value specified, $Options{modifyheader}, for option \"--ModifyHeader\" is not valid. Allowed values: yes or no\n";
 616   }
 617   if ($Options{residuenumbermode} !~ /^(Sequential|PerChain)$/i) {
 618     die "Error: The value specified, $Options{residuenumbermode}, for option \"--ResidueNumberMode\" is not valid. Allowed values: Sequential or PerChain\n";
 619   }
 620   if (!IsPositiveInteger($Options{residuenumberstart})) {
 621     die "Error: The value specified, $Options{residuenumberstart}, for option \"--ResidueNumberStart\" is not valid. Allowed values: >0\n";
 622   }
 623   if ($Options{residuenumberhetatmmode} !~ /^(automatic|specify)$/i) {
 624     die "Error: The value specified, $Options{residuenumberhetatmmode}, for option \"--residuenumbermode\" is not valid. Allowed values: automatic or specify\n";
 625   }
 626   if (!IsPositiveInteger($Options{residuenumberstarthetatm})) {
 627     die "Error: The value specified, $Options{residuenumberstarthetatm}, for option \"--residuenumberstartHetatm\" is not valid. Allowed values: >0\n";
 628   }
 629   if (!IsPositiveInteger $Options{waterresiduestart}) {
 630     die "Error: The value specified, $Options{waterresiduestart}, for option \"--waterresiduestart\" is not valid. Allowed values: >0\n";
 631   }
 632 }
 633