Mercurial > repos > deepakjadmin > mayatool3_test2

diff bin/ModifyPDBFiles.pl @ 0:4816e4a8ae95 draft default tip
Uploaded
author: deepakjadmin
date: Wed, 20 Jan 2016 09:23:18 -0500
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/ModifyPDBFiles.pl	Wed Jan 20 09:23:18 2016 -0500
@@ -0,0 +1,865 @@
+#!/usr/bin/perl -w
+#
+# $RCSfile: ModifyPDBFiles.pl,v $
+# $Date: 2015/02/28 20:46:20 $
+# $Revision: 1.25 $
+#
+# Author: Manish Sud <msud@san.rr.com>
+#
+# Copyright (C) 2015 Manish Sud. All rights reserved.
+#
+# This file is part of MayaChemTools.
+#
+# MayaChemTools is free software; you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option) any
+# later version.
+#
+# MayaChemTools is distributed in the hope that it will be useful, but without
+# any warranty; without even the implied warranty of merchantability of fitness
+# for a particular purpose.  See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
+# write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
+# Boston, MA, 02111-1307, USA.
+#
+
+use strict;
+use FindBin; use lib "$FindBin::Bin/../lib";
+use Getopt::Long;
+use File::Basename;
+use Text::ParseWords;
+use Benchmark;
+use FileUtil;
+use TextUtil;
+use PDBFileUtil;
+
+my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
+
+# Autoflush STDOUT
+$| = 1;
+
+# Starting message...
+$ScriptName = basename($0);
+print "\n$ScriptName: Starting...\n\n";
+$StartTime = new Benchmark;
+
+# Get the options and setup script...
+SetupScriptUsage();
+if ($Options{help} || @ARGV < 1) {
+  die GetUsageFromPod("$FindBin::Bin/$ScriptName");
+}
+
+my(@PDBFilesList);
+@PDBFilesList = ExpandFileNames(\@ARGV, "pdb");
+
+# Process options...
+print "Processing options...\n";
+my(%OptionsInfo);
+ProcessOptions();
+
+# Setup information about input files...
+print "Checking input PDB file(s)...\n";
+my(%PDBFilesInfo);
+RetrievePDBFilesInfo();
+
+# Process input files..
+my($FileIndex);
+if (@PDBFilesList > 1) {
+  print "\nProcessing PDB files...\n";
+}
+for $FileIndex (0 .. $#PDBFilesList) {
+  if ($PDBFilesInfo{FileOkay}[$FileIndex]) {
+    print "\nProcessing file $PDBFilesList[$FileIndex]...\n";
+    ModifyPDBFiles($FileIndex);
+  }
+}
+print "\n$ScriptName:Done...\n\n";
+
+$EndTime = new Benchmark;
+$TotalTime = timediff ($EndTime, $StartTime);
+print "Total time: ", timestr($TotalTime), "\n";
+
+###############################################################################
+
+# Modify appropriate information...
+sub ModifyPDBFiles {
+  my($FileIndex) = @_;
+  my($PDBFile, $PDBRecordLinesRef);
+
+  # Get PDB data...
+  $PDBFile = $PDBFilesList[$FileIndex];
+  $PDBRecordLinesRef = ReadPDBFile($PDBFile);
+
+  if ($OptionsInfo{Mode} =~ /^RenumberAtoms$/i) {
+    RenumberAtoms($FileIndex, $PDBRecordLinesRef);
+  }
+  elsif ($OptionsInfo{Mode} =~ /^RenumberResidues$/i) {
+    RenumberResidues($FileIndex, $PDBRecordLinesRef);
+  }
+  elsif ($OptionsInfo{Mode} =~ /^RenumberWaters$/i) {
+    RenumberWaters($FileIndex, $PDBRecordLinesRef);
+  }
+  elsif ($OptionsInfo{Mode} =~ /^RenameChainIDs$/i) {
+    RenameChainsIDs($FileIndex, $PDBRecordLinesRef);
+  }
+}
+
+# Renumber atom and hetro atom numbers...
+sub RenumberAtoms {
+  my($FileIndex, $PDBRecordLinesRef) = @_;
+  my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewAtomNumber, $RecordType, %OldToNewAtomNumbersMap);
+
+  $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
+  print "Generating PDBFileName file $PDBFileName...\n";
+  open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
+
+  # Write out header and other older recors...
+  WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
+
+  # Write out all ATOM records along with TER and model records to indicate
+  # chains and multiple models..
+  %OldToNewAtomNumbersMap = ();
+  $NewAtomNumber = $OptionsInfo{StartingAtomNumber};
+  for $RecordLine (@{$PDBRecordLinesRef}) {
+    if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
+      $RecordType = GetPDBRecordType($RecordLine);
+
+      ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
+
+      print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $NewAtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
+
+      $OldToNewAtomNumbersMap{$AtomNumber} = $NewAtomNumber;
+      $NewAtomNumber++;
+    }
+    elsif (IsTerRecordType($RecordLine)) {
+      $NewAtomNumber++;
+      print OUTFILE GenerateTerRecordLine($NewAtomNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode), "\n";
+    }
+    elsif (IsModelRecordType($RecordLine)) {
+      print OUTFILE "$RecordLine\n";
+    }
+    elsif (IsEndmdlRecordType($RecordLine)) {
+      print OUTFILE "$RecordLine\n";
+      # Restart numbering...
+      $NewAtomNumber = $OptionsInfo{StartingAtomNumber};
+    }
+  }
+
+  # Write out modified CONECT records...
+  my($ModifiedConectAtomNum, $ConectAtomNum, @ConectAtomNums, @ModifiedConectAtomNums);
+  LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
+    if (!IsConectRecordType($RecordLine)) {
+      next LINE;
+    }
+    @ConectAtomNums = ();
+    @ModifiedConectAtomNums = ();
+    push @ConectAtomNums, ParseConectRecordLine($RecordLine);
+    ATOMNUMBER: for $ConectAtomNum (@ConectAtomNums) {
+      $ModifiedConectAtomNum = $ConectAtomNum;
+      if (defined($ConectAtomNum)) {
+	$AtomNumber = $ConectAtomNum;
+	if ($AtomNumber) {
+	  if (exists $OldToNewAtomNumbersMap{$AtomNumber}) {
+	    $ModifiedConectAtomNum = $OldToNewAtomNumbersMap{$AtomNumber};
+	  }
+	}
+      }
+      push @ModifiedConectAtomNums, $ModifiedConectAtomNum;
+    }
+    # Write out the record...
+    print OUTFILE GenerateConectRecordLine(@ModifiedConectAtomNums), "\n";
+  }
+
+  # Write out END record...
+  print OUTFILE GenerateEndRecordLine(), "\n";
+
+  close OUTFILE;
+}
+
+# Renumber residues...
+sub RenumberResidues {
+  my($FileIndex, $PDBRecordLinesRef) = @_;
+  my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $NewHetatmResidueNumber, $TERCount, $TotalTERCount, $PreviousResidueNumber, $PreviousHetatmResidueNumber, $RecordType);
+
+  $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
+  print "Generating PDBFileName file $PDBFileName...\n";
+  open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
+
+  # Write out header and other older recors...
+  WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
+
+  # Do a quick count of all TER records...
+  $TotalTERCount = 0;
+  for $RecordLine (@{$PDBRecordLinesRef}) {
+    if (IsTerRecordType($RecordLine)) {
+      $TotalTERCount++;
+    }
+  }
+
+  # Write out all ATOM records along with TER and model records to indicate
+  # chains and multiple models..
+  $NewResidueNumber = $OptionsInfo{StartingResidueNumber};
+  $NewHetatmResidueNumber = $OptionsInfo{StartingHetatmResidueNumber};
+
+  $TERCount = 0;
+  $PreviousResidueNumber = 0;
+  $PreviousHetatmResidueNumber = 0;
+
+  for $RecordLine (@{$PDBRecordLinesRef}) {
+    if (IsAtomRecordType($RecordLine) || (IsHetatmRecordType($RecordLine) && ($TERCount < $TotalTERCount || $OptionsInfo{HetatmResidueNumberMode} =~ /^Automatic$/i))) {
+      $RecordType = GetPDBRecordType($RecordLine);
+      ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
+
+      if ($PreviousResidueNumber && $PreviousResidueNumber != $ResidueNumber) {
+	$PreviousResidueNumber = $ResidueNumber;
+	$NewResidueNumber++;
+      }
+      else {
+	# First residue in a chain...
+	$PreviousResidueNumber = $ResidueNumber;
+      }
+      print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
+
+    }
+    elsif (IsHetatmRecordType($RecordLine)) {
+      ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseHetatmRecordLine($RecordLine);
+
+      # User HETATM residue numbers...
+      if ($PreviousHetatmResidueNumber && $PreviousHetatmResidueNumber != $ResidueNumber) {
+	$PreviousHetatmResidueNumber = $ResidueNumber;
+	$NewHetatmResidueNumber++;
+      }
+      else {
+	# First HETATM residue outside a chain...
+	$PreviousHetatmResidueNumber = $ResidueNumber;
+      }
+
+      print OUTFILE GenerateHetatmRecordLine($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewHetatmResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
+    }
+    elsif (IsTerRecordType($RecordLine)) {
+      $TERCount++;
+      $AtomNumber++;
+      print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode), "\n";
+      # For per chain numbering, start over again...
+      if ($OptionsInfo{ResidueNumberMode} =~ /^PerChain$/i) {
+	if ($TERCount < $TotalTERCount ) {
+	  $NewResidueNumber = $OptionsInfo{StartingResidueNumber};
+	}
+	$PreviousResidueNumber = 0;
+      }
+    }
+    elsif (IsModelRecordType($RecordLine)) {
+      print OUTFILE "$RecordLine\n";
+    }
+    elsif (IsEndmdlRecordType($RecordLine)) {
+      print OUTFILE "$RecordLine\n";
+    }
+  }
+
+  # Write out CONECT records...
+  for $RecordLine (@{$PDBRecordLinesRef}) {
+    if (IsConectRecordType($RecordLine)) {
+      print OUTFILE "$RecordLine\n";
+    }
+  }
+
+  # Write out END record...
+  print OUTFILE GenerateEndRecordLine(), "\n";
+
+  close OUTFILE;
+}
+
+# Renumber water residues...
+sub RenumberWaters {
+  my($FileIndex, $PDBRecordLinesRef) = @_;
+  my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $RecordType);
+
+  $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
+  print "Generating PDBFileName file $PDBFileName...\n";
+  open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
+
+  # Write out header and other older recors...
+  WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
+
+  # Write out all ATOM records along with TER and model records to indicate
+  # chains and multiple models..
+  $NewResidueNumber = $OptionsInfo{StartingWaterResidueNumber};
+  for $RecordLine (@{$PDBRecordLinesRef}) {
+    if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
+      $RecordType = GetPDBRecordType($RecordLine);
+
+      ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
+
+      if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) {
+	$ResidueNumber = $NewResidueNumber;
+	print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
+	$NewResidueNumber++;
+      }
+      else {
+	print OUTFILE "$RecordLine\n";
+      }
+    }
+    elsif (IsTerRecordType($RecordLine)) {
+      print OUTFILE "$RecordLine\n";
+    }
+    elsif (IsModelRecordType($RecordLine)) {
+      print OUTFILE "$RecordLine\n";
+    }
+    elsif (IsEndmdlRecordType($RecordLine)) {
+      print OUTFILE "$RecordLine\n";
+    }
+  }
+
+  # Write out CONECT records...
+  for $RecordLine (@{$PDBRecordLinesRef}) {
+    if (IsConectRecordType($RecordLine)) {
+      print OUTFILE "$RecordLine\n";
+    }
+  }
+
+  # Write out END record...
+  print OUTFILE GenerateEndRecordLine(), "\n";
+
+  close OUTFILE;
+}
+
+# Rename chain IDs...
+sub RenameChainsIDs {
+  my($FileIndex, $PDBRecordLinesRef) = @_;
+  my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $RecordType, $PreviousChainID, $FirstChainID, $NewChainID, $NewChainIDCounter, %OldToNewChainIDsMap);
+
+  $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
+  print "Generating PDBFileName file $PDBFileName...\n";
+  open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
+
+  # Write out header and other older recors...
+  WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
+
+  # Write out all ATOM records along with TER and model records to indicate
+  # chains and multiple models..
+  %OldToNewChainIDsMap = ();
+  $NewChainIDCounter = $OptionsInfo{StartingChainID};
+  $FirstChainID = 1;
+  $PreviousChainID = '';
+  LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
+    if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
+      $RecordType = GetPDBRecordType($RecordLine);
+
+      ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
+
+      if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) {
+	# Chain IDs are not assigned to water residues...
+	print OUTFILE "$RecordLine\n";
+	next LINE;
+      }
+
+      if ($FirstChainID) {
+	$FirstChainID = 0;
+	$PreviousChainID = $ChainID;
+	if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) {
+	  $NewChainID = $NewChainIDCounter;
+	  $OldToNewChainIDsMap{$ChainID} = $NewChainID;
+	}
+	else {
+	  $NewChainID = '';
+	}
+      }
+      elsif ($PreviousChainID ne $ChainID) {
+	if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) {
+	  $PreviousChainID = $ChainID;
+	  if (exists $OldToNewChainIDsMap{$ChainID}) {
+	    $NewChainID = $OldToNewChainIDsMap{$ChainID};
+	  }
+	  else {
+	    $NewChainIDCounter++;
+	    $NewChainID = $NewChainIDCounter;
+	    $OldToNewChainIDsMap{$ChainID} = $NewChainID;
+	  }
+	}
+	else {
+	  $NewChainID = '';
+	}
+      }
+
+      print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
+    }
+    elsif (IsTerRecordType($RecordLine)) {
+      $AtomNumber++;
+      print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode), "\n";
+    }
+    elsif (IsModelRecordType($RecordLine)) {
+      print OUTFILE "$RecordLine\n";
+    }
+    elsif (IsEndmdlRecordType($RecordLine)) {
+      print OUTFILE "$RecordLine\n";
+    }
+  }
+
+  # Write out CONECT records...
+  for $RecordLine (@{$PDBRecordLinesRef}) {
+    if (IsConectRecordType($RecordLine)) {
+      print OUTFILE "$RecordLine\n";
+    }
+  }
+
+  # Write out END record...
+  print OUTFILE GenerateEndRecordLine(), "\n";
+
+  close OUTFILE;
+}
+
+
+# Write out modifed header and other older records...
+sub WriteHeaderAndOlderRecords {
+  my($OutFileRef, $PDBRecordLinesRef) = @_;
+
+  if ($OptionsInfo{ModifyHeaderRecord}) {
+    # Write out modified HEADER record...
+    my($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef);
+    $Classification = 'Data modified using MayaChemTools';
+    print $OutFileRef GenerateHeaderRecordLine($IDCode, $Classification), "\n";
+  }
+  else {
+    print $OutFileRef $PDBRecordLinesRef->[0], "\n";
+  }
+
+  # Write out any old records...
+  if ($OptionsInfo{KeepOldRecords}) {
+    my($RecordLineIndex, $RecordLine);
+    # Skip HEADER record and write out older records all the way upto first MODEL/ATOM/HETATM records from input file...
+    RECORDLINE: for $RecordLineIndex (1 .. $#{$PDBRecordLinesRef}) {
+      $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex];
+      if (IsModelRecordType($RecordLine) || IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
+	last RECORDLINE;
+      }
+      print $OutFileRef "$RecordLine\n";
+    }
+  }
+}
+
+# Get header record information assuming it's the first record...
+sub GetHeaderRecordInformation {
+  my($PDBRecordLinesRef) = @_;
+  my($Classification, $DepositionDate, $IDCode, $HeaderRecordLine);
+
+  ($Classification, $DepositionDate, $IDCode) = ('') x 3;
+  $HeaderRecordLine = $PDBRecordLinesRef->[0];
+  if (IsHeaderRecordType($HeaderRecordLine)) {
+    ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine);
+  }
+  return ($Classification, $DepositionDate, $IDCode);
+}
+
+
+# Process option values...
+sub ProcessOptions {
+  %OptionsInfo = ();
+  $OptionsInfo{Mode} = $Options{mode};
+
+  $OptionsInfo{StartingAtomNumber} = $Options{atomnumberstart};
+  $OptionsInfo{StartingChainID} = $Options{chainidstart};
+  $OptionsInfo{RenameEmptyChainIDs} = ($Options{chainidrenameempty} =~ /^Yes$/i) ? 1 : 0;
+
+  $OptionsInfo{KeepOldRecords} = ($Options{keepoldrecords} =~ /^Yes$/i) ? 1 : 0;
+  $OptionsInfo{ModifyHeaderRecord} = ($Options{modifyheader} =~ /^Yes$/i) ? 1 : 0;
+
+  $OptionsInfo{ResidueNumberMode} = $Options{residuenumbermode};
+  $OptionsInfo{StartingResidueNumber} = $Options{residuenumberstart};
+
+  $OptionsInfo{HetatmResidueNumberMode} = $Options{residuenumberhetatmmode};
+  $OptionsInfo{StartingHetatmResidueNumber} = $Options{residuenumberstarthetatm};
+
+  $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
+  $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
+
+  $OptionsInfo{WaterResidueNames} = $Options{waterresiduenames};
+  $OptionsInfo{StartingWaterResidueNumber} = $Options{waterresiduestart};
+  @{$OptionsInfo{SpecifiedWaterResiduesList}} = ();
+  %{$OptionsInfo{SpecifiedWaterResiduesMap}} = ();
+
+  my(@SpecifiedWaterResiduesList);
+  @SpecifiedWaterResiduesList = ();
+  my($WaterResidueName);
+  if ($OptionsInfo{WaterResidueNames} =~ /Automatic/i) {
+    push @SpecifiedWaterResiduesList, ('HOH', 'WAT', 'H2O');
+  }
+  else {
+    @SpecifiedWaterResiduesList = split /\,/, $Options{waterresiduenames};
+  }
+  for $WaterResidueName (@SpecifiedWaterResiduesList) {
+    $OptionsInfo{SpecifiedWaterResiduesMap}{$WaterResidueName} = $WaterResidueName;
+  }
+  push @{$OptionsInfo{SpecifiedWaterResiduesList}}, @SpecifiedWaterResiduesList;
+}
+
+# Retrieve information about PDB files...
+sub RetrievePDBFilesInfo {
+  my($Index, $PDBFile, $PDBRecordLinesRef, $ChainsAndResiduesInfoRef, $FileDir, $FileName, $FileExt, $OutFileName, $OutFileRoot,  $Mode, $OutFileMode, @OutFileNames);
+
+  %PDBFilesInfo = ();
+  @{$PDBFilesInfo{FileOkay}} = ();
+  @{$PDBFilesInfo{OutFileRoot}} = ();
+  @{$PDBFilesInfo{OutFileNames}} = ();
+
+  FILELIST: for $Index (0 .. $#PDBFilesList) {
+    $PDBFilesInfo{FileOkay}[$Index] = 0;
+
+    $PDBFilesInfo{OutFileRoot}[$Index] = '';
+    @{$PDBFilesInfo{OutFileNames}[$Index]} = ();
+    @{$PDBFilesInfo{OutFileNames}[$Index]} = ();
+
+    $PDBFile = $PDBFilesList[$Index];
+    if (!(-e $PDBFile)) {
+      warn "Warning: Ignoring file $PDBFile: It doesn't exist\n";
+      next FILELIST;
+    }
+    if (!CheckFileType($PDBFile, "pdb")) {
+      warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n";
+      next FILELIST;
+    }
+    if (! open PDBFILE, "$PDBFile") {
+      warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n";
+      next FILELIST;
+    }
+    close PDBFILE;
+
+    # Get PDB data...
+    $PDBRecordLinesRef = ReadPDBFile($PDBFile);
+    $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef);
+    if (!scalar @{$ChainsAndResiduesInfoRef->{ChainIDs}}) {
+      warn "Warning: Ignoring file $PDBFile: No chains found \n";
+      next FILELIST;
+    }
+
+    # Setup output file names...
+    @OutFileNames = ();
+    $FileDir = ""; $FileName = ""; $FileExt = "";
+    ($FileDir, $FileName, $FileExt) = ParseFileName($PDBFile);
+    if ($OptionsInfo{OutFileRoot} && (@PDBFilesList == 1)) {
+      my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
+      if ($RootFileName && $RootFileExt) {
+	$FileName = $RootFileName;
+      }
+      else {
+	$FileName = $OptionsInfo{OutFileRoot};
+      }
+      $OutFileRoot = $FileName;
+    }
+    else {
+      $OutFileRoot = $FileName;
+    }
+    $Mode = $OptionsInfo{Mode};
+    MODE: {
+	if ($Mode =~ /^RenumberAtoms$/i) { $OutFileMode = 'RenumberAtoms'; last MODE;}
+	if ($Mode =~ /^RenumberResidues$/i) { $OutFileMode = 'RenumberResidues'; last MODE;}
+	if ($Mode =~ /^RenumberWaters$/i) { $OutFileMode = 'RenumberWaters'; last MODE;}
+	if ($Mode =~ /^RenameChainIDs$/i) { $OutFileMode = 'RenameChainIDs'; last MODE;}
+	$OutFileMode = '';
+    }
+    $OutFileName = "${OutFileRoot}${OutFileMode}.pdb";
+    push @OutFileNames, $OutFileName;
+
+    $PDBFilesInfo{FileOkay}[$Index] = 1;
+    $PDBFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
+
+    push @{$PDBFilesInfo{OutFileNames}[$Index]}, @OutFileNames;
+  }
+}
+
+# Setup script usage  and retrieve command line arguments specified using various options...
+sub SetupScriptUsage {
+
+  # Retrieve all the options...
+  %Options = ();
+  $Options{atomnumberstart} = 1;
+  $Options{chainidstart} = 'A';
+  $Options{chainidrenameempty} = 'No';
+  $Options{keepoldrecords} = 'no';
+  $Options{mode} = 'RenumberResidues';
+  $Options{modifyheader} = 'yes';
+  $Options{residuenumbermode} = 'PerChain';
+  $Options{residuenumberstart} = 1;
+  $Options{residuenumberhetatmmode} = 'Automatic';
+  $Options{residuenumberstarthetatm} = 6000;
+  $Options{waterresiduenames} = 'Automatic';
+  $Options{waterresiduestart} = 8000;
+
+  if (!GetOptions(\%Options, "help|h", "atomnumberstart|a=i", "chainidstart|c=s", "chainidrenameempty=s", "keepoldrecords|k=s", "mode|m=s", "modifyheader=s", "overwrite|o", "residuenumbermode=s", "residuenumberstart=i", "residuenumberhetatmmode=s", "residuenumberstarthetatm=i", "root|r=s", "sequencelength=i", "waterresiduenames=s", "waterresiduestart=i", "workingdir|w=s")) {
+    die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
+  }
+  if ($Options{workingdir}) {
+    if (! -d $Options{workingdir}) {
+      die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
+    }
+    chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
+  }
+  if (!IsPositiveInteger($Options{atomnumberstart})) {
+    die "Error: The value specified, $Options{atomnumberstart}, for option \"-a, --AtomNumberStart\" is not valid. Allowed values: >0\n";
+  }
+  if ((length($Options{chainidstart}) > 1) || ($Options{chainidstart} !~ /[A-Z]/i)) {
+    die "Error: The value specified, $Options{chainidstart}, for option \"-c, --ChainIDStart\" is not valid. Allowed values: a single character from A to Z\n";
+  }
+  if ($Options{chainidrenameempty} !~ /^(yes|no)$/i) {
+    die "Error: The value specified, $Options{chainidrenameempty}, for option \"--chainidrenameempty\" is not valid. Allowed values: yes or no\n";
+  }
+  if ($Options{keepoldrecords} !~ /^(yes|no)$/i) {
+    die "Error: The value specified, $Options{keepoldrecords}, for option \"--KeepOldRecords\" is not valid. Allowed values: yes or no\n";
+  }
+  if ($Options{mode} !~ /^(RenumberAtoms|RenumberResidues|RenumberWaters|RenameChainIDs)$/i) {
+    die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: RenumberAtoms, RenumberResidues, RenumberWaters or RenameChainIDs\n";
+  }
+  if ($Options{modifyheader} !~ /^(yes|no)$/i) {
+    die "Error: The value specified, $Options{modifyheader}, for option \"--ModifyHeader\" is not valid. Allowed values: yes or no\n";
+  }
+  if ($Options{residuenumbermode} !~ /^(Sequential|PerChain)$/i) {
+    die "Error: The value specified, $Options{residuenumbermode}, for option \"--ResidueNumberMode\" is not valid. Allowed values: Sequential or PerChain\n";
+  }
+  if (!IsPositiveInteger($Options{residuenumberstart})) {
+    die "Error: The value specified, $Options{residuenumberstart}, for option \"--ResidueNumberStart\" is not valid. Allowed values: >0\n";
+  }
+  if ($Options{residuenumberhetatmmode} !~ /^(automatic|specify)$/i) {
+    die "Error: The value specified, $Options{residuenumberhetatmmode}, for option \"--residuenumbermode\" is not valid. Allowed values: automatic or specify\n";
+  }
+  if (!IsPositiveInteger($Options{residuenumberstarthetatm})) {
+    die "Error: The value specified, $Options{residuenumberstarthetatm}, for option \"--residuenumberstartHetatm\" is not valid. Allowed values: >0\n";
+  }
+  if (!IsPositiveInteger $Options{waterresiduestart}) {
+    die "Error: The value specified, $Options{waterresiduestart}, for option \"--waterresiduestart\" is not valid. Allowed values: >0\n";
+  }
+}
+
+__END__
+
+=head1 NAME
+
+ModifyPDBFiles.pl - Modify data in PDBFile(s)
+
+=head1 SYNOPSIS
+
+ModifyPDBFiles.pl PDBFile(s)...
+
+ModifyPDBFiles.pl [B<-a, --AtomNumberStart> number] [B<-c, --ChainIDStart> character]
+[B<--ChainIDRenameEmpty> yes | no] [B<-h, --help>] [B<-k, --KeepOldRecords> yes | no]
+[B<-m, --mode > RenumberAtoms | RenumberResidues | RenumberWaters | RenameChainIDs]
+[B<--ModifyHeader> yes | no] [B<-o, --overwrite>] [B<--ResidueNumberMode> Sequential | PerChain]
+[B<--ResidueNumberStart> number] [B<--ResidueNumberHetatmMode> automatic | specify]
+[B<--ResidueNumberStarHetatm> number] [B<-r, --root> rootname]
+[B<--WaterResidueNames> Automatic | "ResidueName, [ResidueName,...]"] [B<--WaterResidueStart> number]
+[B<-w, --WorkingDir> dirname] PDBFile(s)...
+
+=head1 DESCRIPTION
+
+Modify data in I<PDBFile(s)>: renumber atoms, residues, and water residues or assign new
+chain IDs. Multiple PDBFile names are separated by spaces. The valid file extension is I<.pdb>.
+All other file name extensions are ignored during the wild card expansion. All the PDB files
+in a current directory can be specified either by I<*.pdb> or the current directory name.
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-a, --AtomNumberStart> I<number>
+
+Starting atom number to use during I<RenumberAtoms> value of B<-m, --mode> option. Default: I<1>.
+Valid values: positive integers.
+
+=item B<-c, --ChainIDStart> I<character>
+
+A single character to use for starting IDs for chains during I<RenameChainIDs> value of B<-m, --mode> option.
+Default: I<A>. Valid values: I<A to Z>.
+
+=item B<--ChainIDRenameEmpty> I<Yes | No>
+
+Specify whether to rename empty chain IDs during I<RenameChainIDs> B<-m, --mode> value. By
+default, ATOM and HETATM records with no chain IDs are left unchanged. Possible values:
+I<yes | no>. Default: I<No>.
+
+=item B<-h, --help>
+
+Print this help message.
+
+=item B<-k, --KeepOldRecords> I<yes | no>
+
+Specify whether to transfer old non ATOM and HETATM records from input PDBFile(s) to new
+PDBFile(s). By default, except for the HEADER record, all records other than ATOM/HETATM
+are dropped during the generation of new PDB files. Possible values: I<yes | no>.
+Default: I<no>.
+
+=item B<-m, --mode > I<RenumberAtoms | RenumberResidues | RenumberWaters | RenameChainIDs>
+
+Specify how to modify I<PDBFile(s)>. Possible values: I<RenumberAtoms | RenumberResidues
+| RenumberWaters | RenameChainIDs>. Default: I<RenumberResidues>.
+
+For I<RenumberAtoms> mode, residue number in ATOM and HETATM records are reassigned
+sequentially starting using value of B<-a, --AtomNumberStart> option.
+
+For I<RenumberResidues> mode, serial number in ATOM and HETATM records are reassigned
+either sequentially or statring from specified values for ATOM and HETATM records in each
+chain.
+
+For I<RenumberWaters> mode, residue number for waters are reassigned starting from a specific
+value.
+
+For I<RenameChainIDs> mode, all the chain IDs are reassigned starting from a specific chain ID.
+
+During the generation of new PDB files, unnecessary CONECT records are dropped.
+
+=item B<--ModifyHeader> I<yes | no>
+
+Specify whether to modify HEADER record during the generation of new PDB files
+Possible values: I<yes | no>.  Default: I<yes>. By defailt, Classification data is replaced
+by I<Data modified using MayaChemTools> before writing out HEADER record.
+
+=item B<-o, --overwrite>
+
+Overwrite existing files
+
+=item B<--ResidueNumberMode> I<Sequential | PerChain>
+
+Specify how to renumber residues: renumber residues sequentially across all the chains
+or start from the begining for each chain. Possible values: I<Sequential | PerChain>. Default:
+I<PerChain>.
+
+=item B<--ResidueNumberStart> I<number>
+
+Starting residue number to use for ATOM records in chains. Default: I<1>. Valid values
+positive integers.
+
+For I<Sequential> value of B<--ResidueNumberMode> option, residue numbers are
+assigned sequentially across all the chains starting from the specified value.
+
+For I<PerChain> value of B<--ResidueNumberMode> option, residue numbers are
+starting again from the specified value for each chain.
+
+HETATM residues with in the chains are numbered using this value as well
+
+=item B<--ResidueNumberHetatmMode> I<automatic | specify>
+
+Specify how to start residue number for HETATM records: use the next sequential
+residue number after the last residue number from ATOM records or start from a
+specific residue number. Possible values: I<automatic | specify>. Default:
+I<automatic>
+
+For I<automatic> , residue number after highest residue number of ATOM
+records is used as the starting residue number for HETATM records.
+
+For I<specify>,  value of option B<--ResidueNumberStarHetatm> is used as the
+starting residue number for HETATM records.
+
+This option along with B<--ResidueNumberStartHetatm> only applies to HETATM records
+outside the chains.
+
+=item B<--ResidueNumberStartHetatm> I<number>
+
+Starting residue number to use for HETATM records. Default: I<6000>. Valid values
+positive integers.
+
+=item B<-r, --root> I<rootname>
+
+New PDB and sequence file name is generated using the root: <Root><Mode>.<Ext>.
+Default new file name: <PDBFileName><Mode>.pdb. This option is ignored for multiple
+input files.
+
+=item B<--WaterResidueNames> I<Automatic | "ResidueName,[ResidueName,...]">
+
+Identification of water residues during I<RenumberWaters> value of B<-m, --mode> option. Possible
+values: I<Automatic | "ResidueName,[ResidueName,...]">. Default: I<Automatic> which corresponds
+to "HOH,WAT,H20". You can also specify a different comma delimited list of residue names
+to use for water.
+
+=item B<--WaterResidueStart> I<number>
+
+Starting water residue number to use during I<RenumberWaters> B<-m, --mode> value.
+Default: I<8000>. Valid values: positive integers.
+
+=item B<-w, --WorkingDir> I<dirname>
+
+Location of working directory. Default: current directory.
+
+=back
+
+=head1 EXAMPLES
+
+To renumber ATOM and HETATM residues starting from 1 for each chain with continuation to
+HETATM residues outside TER records in Sample2.pdb and generate
+Sample2RenumberResidues.pdb file, type:
+
+    % ModifyPDBFiles.pl Sample1.pdb
+
+To renumber ATOM and HETATM residues sequentially across all chains starting from 1 with
+continuation to HETATM residues outside TER records in Sample2.pdb and generate
+Sample2RenumberResidues.pdb file, type:
+
+    % ModifyPDBFiles.pl --ResidueNumberMode Sequential -o Sample1.pdb
+
+To renumber ATOM and HETATM residues sequentially across all chains starting from 1 and
+HETATM residues outside TER records starting from 6000 in Sample2.pdb and generate
+Sample2RenumberResidues.pdb file, type:
+
+    % ModifyPDBFiles.pl --ResidueNumberMode Sequential
+      --ResidueNumberHetatmMode Specify  -o Sample1.pdb
+
+
+To renumber ATOM and HETATM residues sequentially across all chains starting from 100 for
+ATOM/HETATM  residues with in TER records and starting from 999 for HETATM residues
+outside TER records in Sample2.pdb and generate Sample2RenumberResidues.pdb file, type:
+
+    % ModifyPDBFiles.pl --ResidueNumberMode Sequential
+      --ResidueNumberHetatmMode Specify --ResidueNumberStart 100
+      --ResidueNumberStartHetatm 999 -o Sample2.pdb
+
+To renumber ATOM and HETATM residues from 100 for each chain and starting from 999 for
+HETATM  residues outside TER records in Sample2.pdb and generate Sample2RenumberResidues.pdb
+file, type:
+
+    % ModifyPDBFiles.pl --ResidueNumberMode PerChain
+      --ResidueNumberHetatmMode Specify --ResidueNumberStart 100
+      --ResidueNumberStartHetatm 999 -o Sample2.pdb
+
+To renumber ATOM serial numbers sequentially starting from 100 in Sample1.pdb file and generate
+Sample1RenumberAtoms.pdb file, type:
+
+    % ModifyPDBFiles.pl -m RenumberAtoms --AtomNumberStart 100
+      -o Sample1.pdb
+
+To renumber water residues identified by "HOH,WAT" starting from residue number 1000
+in Sample2.pdb file and generate Sample2RenumberWaters.pdb file, type:
+
+    % ModifyPDBFiles.pl -m RenumberWaters --WaterResidueNames "HOH,WAT"
+      -o --WaterResidueStart 950 Sample2.pdb
+
+To rename all chain IDs starting from A in Sample1.pdb file and generate
+Sample1RenameChainIDs.pdb file, type:
+
+    % ModifyPDBFiles.pl -m RenameChainIDs -o Sample1.pdb
+
+To rename all chain IDs starting from B without assigning any chain IDs to ATOM/HETATOM
+with no chain IDs in Sample2.pdb file and generate Sample2RenameChainIDs.pdb file, type:
+
+    % ModifyPDBFiles.pl l -m RenameChainIDs -c B --ChainIDRenameEmpty No
+      -o Sample2.pdb
+
+
+=head1 AUTHOR
+
+Manish Sud <msud@san.rr.com>
+
+=head1 SEE ALSO
+
+ExtractFromPDBFiles.pl, InfoPDBFiles.pl
+
+=head1 COPYRIGHT
+
+Copyright (C) 2015 Manish Sud. All rights reserved.
+
+This file is part of MayaChemTools.
+
+MayaChemTools is free software; you can redistribute it and/or modify it under
+the terms of the GNU Lesser General Public License as published by the Free
+Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+=cut
author	deepakjadmin
date	Wed, 20 Jan 2016 09:23:18 -0500
parents
children