Mercurial > repos > deepakjadmin > mayatool3_test2
view bin/ModifyPDBFiles.pl @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl -w # # $RCSfile: ModifyPDBFiles.pl,v $ # $Date: 2015/02/28 20:46:20 $ # $Revision: 1.25 $ # # Author: Manish Sud <msud@san.rr.com> # # Copyright (C) 2015 Manish Sud. All rights reserved. # # This file is part of MayaChemTools. # # MayaChemTools is free software; you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. # # MayaChemTools is distributed in the hope that it will be useful, but without # any warranty; without even the implied warranty of merchantability of fitness # for a particular purpose. See the GNU Lesser General Public License for more # details. # # You should have received a copy of the GNU Lesser General Public License # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, # Boston, MA, 02111-1307, USA. # use strict; use FindBin; use lib "$FindBin::Bin/../lib"; use Getopt::Long; use File::Basename; use Text::ParseWords; use Benchmark; use FileUtil; use TextUtil; use PDBFileUtil; my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); # Autoflush STDOUT $| = 1; # Starting message... $ScriptName = basename($0); print "\n$ScriptName: Starting...\n\n"; $StartTime = new Benchmark; # Get the options and setup script... SetupScriptUsage(); if ($Options{help} || @ARGV < 1) { die GetUsageFromPod("$FindBin::Bin/$ScriptName"); } my(@PDBFilesList); @PDBFilesList = ExpandFileNames(\@ARGV, "pdb"); # Process options... print "Processing options...\n"; my(%OptionsInfo); ProcessOptions(); # Setup information about input files... print "Checking input PDB file(s)...\n"; my(%PDBFilesInfo); RetrievePDBFilesInfo(); # Process input files.. my($FileIndex); if (@PDBFilesList > 1) { print "\nProcessing PDB files...\n"; } for $FileIndex (0 .. $#PDBFilesList) { if ($PDBFilesInfo{FileOkay}[$FileIndex]) { print "\nProcessing file $PDBFilesList[$FileIndex]...\n"; ModifyPDBFiles($FileIndex); } } print "\n$ScriptName:Done...\n\n"; $EndTime = new Benchmark; $TotalTime = timediff ($EndTime, $StartTime); print "Total time: ", timestr($TotalTime), "\n"; ############################################################################### # Modify appropriate information... sub ModifyPDBFiles { my($FileIndex) = @_; my($PDBFile, $PDBRecordLinesRef); # Get PDB data... $PDBFile = $PDBFilesList[$FileIndex]; $PDBRecordLinesRef = ReadPDBFile($PDBFile); if ($OptionsInfo{Mode} =~ /^RenumberAtoms$/i) { RenumberAtoms($FileIndex, $PDBRecordLinesRef); } elsif ($OptionsInfo{Mode} =~ /^RenumberResidues$/i) { RenumberResidues($FileIndex, $PDBRecordLinesRef); } elsif ($OptionsInfo{Mode} =~ /^RenumberWaters$/i) { RenumberWaters($FileIndex, $PDBRecordLinesRef); } elsif ($OptionsInfo{Mode} =~ /^RenameChainIDs$/i) { RenameChainsIDs($FileIndex, $PDBRecordLinesRef); } } # Renumber atom and hetro atom numbers... sub RenumberAtoms { my($FileIndex, $PDBRecordLinesRef) = @_; my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewAtomNumber, $RecordType, %OldToNewAtomNumbersMap); $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; print "Generating PDBFileName file $PDBFileName...\n"; open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; # Write out header and other older recors... WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); # Write out all ATOM records along with TER and model records to indicate # chains and multiple models.. %OldToNewAtomNumbersMap = (); $NewAtomNumber = $OptionsInfo{StartingAtomNumber}; for $RecordLine (@{$PDBRecordLinesRef}) { if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { $RecordType = GetPDBRecordType($RecordLine); ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $NewAtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; $OldToNewAtomNumbersMap{$AtomNumber} = $NewAtomNumber; $NewAtomNumber++; } elsif (IsTerRecordType($RecordLine)) { $NewAtomNumber++; print OUTFILE GenerateTerRecordLine($NewAtomNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode), "\n"; } elsif (IsModelRecordType($RecordLine)) { print OUTFILE "$RecordLine\n"; } elsif (IsEndmdlRecordType($RecordLine)) { print OUTFILE "$RecordLine\n"; # Restart numbering... $NewAtomNumber = $OptionsInfo{StartingAtomNumber}; } } # Write out modified CONECT records... my($ModifiedConectAtomNum, $ConectAtomNum, @ConectAtomNums, @ModifiedConectAtomNums); LINE: for $RecordLine (@{$PDBRecordLinesRef}) { if (!IsConectRecordType($RecordLine)) { next LINE; } @ConectAtomNums = (); @ModifiedConectAtomNums = (); push @ConectAtomNums, ParseConectRecordLine($RecordLine); ATOMNUMBER: for $ConectAtomNum (@ConectAtomNums) { $ModifiedConectAtomNum = $ConectAtomNum; if (defined($ConectAtomNum)) { $AtomNumber = $ConectAtomNum; if ($AtomNumber) { if (exists $OldToNewAtomNumbersMap{$AtomNumber}) { $ModifiedConectAtomNum = $OldToNewAtomNumbersMap{$AtomNumber}; } } } push @ModifiedConectAtomNums, $ModifiedConectAtomNum; } # Write out the record... print OUTFILE GenerateConectRecordLine(@ModifiedConectAtomNums), "\n"; } # Write out END record... print OUTFILE GenerateEndRecordLine(), "\n"; close OUTFILE; } # Renumber residues... sub RenumberResidues { my($FileIndex, $PDBRecordLinesRef) = @_; my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $NewHetatmResidueNumber, $TERCount, $TotalTERCount, $PreviousResidueNumber, $PreviousHetatmResidueNumber, $RecordType); $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; print "Generating PDBFileName file $PDBFileName...\n"; open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; # Write out header and other older recors... WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); # Do a quick count of all TER records... $TotalTERCount = 0; for $RecordLine (@{$PDBRecordLinesRef}) { if (IsTerRecordType($RecordLine)) { $TotalTERCount++; } } # Write out all ATOM records along with TER and model records to indicate # chains and multiple models.. $NewResidueNumber = $OptionsInfo{StartingResidueNumber}; $NewHetatmResidueNumber = $OptionsInfo{StartingHetatmResidueNumber}; $TERCount = 0; $PreviousResidueNumber = 0; $PreviousHetatmResidueNumber = 0; for $RecordLine (@{$PDBRecordLinesRef}) { if (IsAtomRecordType($RecordLine) || (IsHetatmRecordType($RecordLine) && ($TERCount < $TotalTERCount || $OptionsInfo{HetatmResidueNumberMode} =~ /^Automatic$/i))) { $RecordType = GetPDBRecordType($RecordLine); ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); if ($PreviousResidueNumber && $PreviousResidueNumber != $ResidueNumber) { $PreviousResidueNumber = $ResidueNumber; $NewResidueNumber++; } else { # First residue in a chain... $PreviousResidueNumber = $ResidueNumber; } print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; } elsif (IsHetatmRecordType($RecordLine)) { ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseHetatmRecordLine($RecordLine); # User HETATM residue numbers... if ($PreviousHetatmResidueNumber && $PreviousHetatmResidueNumber != $ResidueNumber) { $PreviousHetatmResidueNumber = $ResidueNumber; $NewHetatmResidueNumber++; } else { # First HETATM residue outside a chain... $PreviousHetatmResidueNumber = $ResidueNumber; } print OUTFILE GenerateHetatmRecordLine($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewHetatmResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; } elsif (IsTerRecordType($RecordLine)) { $TERCount++; $AtomNumber++; print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode), "\n"; # For per chain numbering, start over again... if ($OptionsInfo{ResidueNumberMode} =~ /^PerChain$/i) { if ($TERCount < $TotalTERCount ) { $NewResidueNumber = $OptionsInfo{StartingResidueNumber}; } $PreviousResidueNumber = 0; } } elsif (IsModelRecordType($RecordLine)) { print OUTFILE "$RecordLine\n"; } elsif (IsEndmdlRecordType($RecordLine)) { print OUTFILE "$RecordLine\n"; } } # Write out CONECT records... for $RecordLine (@{$PDBRecordLinesRef}) { if (IsConectRecordType($RecordLine)) { print OUTFILE "$RecordLine\n"; } } # Write out END record... print OUTFILE GenerateEndRecordLine(), "\n"; close OUTFILE; } # Renumber water residues... sub RenumberWaters { my($FileIndex, $PDBRecordLinesRef) = @_; my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $RecordType); $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; print "Generating PDBFileName file $PDBFileName...\n"; open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; # Write out header and other older recors... WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); # Write out all ATOM records along with TER and model records to indicate # chains and multiple models.. $NewResidueNumber = $OptionsInfo{StartingWaterResidueNumber}; for $RecordLine (@{$PDBRecordLinesRef}) { if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { $RecordType = GetPDBRecordType($RecordLine); ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) { $ResidueNumber = $NewResidueNumber; print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; $NewResidueNumber++; } else { print OUTFILE "$RecordLine\n"; } } elsif (IsTerRecordType($RecordLine)) { print OUTFILE "$RecordLine\n"; } elsif (IsModelRecordType($RecordLine)) { print OUTFILE "$RecordLine\n"; } elsif (IsEndmdlRecordType($RecordLine)) { print OUTFILE "$RecordLine\n"; } } # Write out CONECT records... for $RecordLine (@{$PDBRecordLinesRef}) { if (IsConectRecordType($RecordLine)) { print OUTFILE "$RecordLine\n"; } } # Write out END record... print OUTFILE GenerateEndRecordLine(), "\n"; close OUTFILE; } # Rename chain IDs... sub RenameChainsIDs { my($FileIndex, $PDBRecordLinesRef) = @_; my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $RecordType, $PreviousChainID, $FirstChainID, $NewChainID, $NewChainIDCounter, %OldToNewChainIDsMap); $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; print "Generating PDBFileName file $PDBFileName...\n"; open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; # Write out header and other older recors... WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); # Write out all ATOM records along with TER and model records to indicate # chains and multiple models.. %OldToNewChainIDsMap = (); $NewChainIDCounter = $OptionsInfo{StartingChainID}; $FirstChainID = 1; $PreviousChainID = ''; LINE: for $RecordLine (@{$PDBRecordLinesRef}) { if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { $RecordType = GetPDBRecordType($RecordLine); ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) { # Chain IDs are not assigned to water residues... print OUTFILE "$RecordLine\n"; next LINE; } if ($FirstChainID) { $FirstChainID = 0; $PreviousChainID = $ChainID; if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) { $NewChainID = $NewChainIDCounter; $OldToNewChainIDsMap{$ChainID} = $NewChainID; } else { $NewChainID = ''; } } elsif ($PreviousChainID ne $ChainID) { if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) { $PreviousChainID = $ChainID; if (exists $OldToNewChainIDsMap{$ChainID}) { $NewChainID = $OldToNewChainIDsMap{$ChainID}; } else { $NewChainIDCounter++; $NewChainID = $NewChainIDCounter; $OldToNewChainIDsMap{$ChainID} = $NewChainID; } } else { $NewChainID = ''; } } print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; } elsif (IsTerRecordType($RecordLine)) { $AtomNumber++; print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode), "\n"; } elsif (IsModelRecordType($RecordLine)) { print OUTFILE "$RecordLine\n"; } elsif (IsEndmdlRecordType($RecordLine)) { print OUTFILE "$RecordLine\n"; } } # Write out CONECT records... for $RecordLine (@{$PDBRecordLinesRef}) { if (IsConectRecordType($RecordLine)) { print OUTFILE "$RecordLine\n"; } } # Write out END record... print OUTFILE GenerateEndRecordLine(), "\n"; close OUTFILE; } # Write out modifed header and other older records... sub WriteHeaderAndOlderRecords { my($OutFileRef, $PDBRecordLinesRef) = @_; if ($OptionsInfo{ModifyHeaderRecord}) { # Write out modified HEADER record... my($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef); $Classification = 'Data modified using MayaChemTools'; print $OutFileRef GenerateHeaderRecordLine($IDCode, $Classification), "\n"; } else { print $OutFileRef $PDBRecordLinesRef->[0], "\n"; } # Write out any old records... if ($OptionsInfo{KeepOldRecords}) { my($RecordLineIndex, $RecordLine); # Skip HEADER record and write out older records all the way upto first MODEL/ATOM/HETATM records from input file... RECORDLINE: for $RecordLineIndex (1 .. $#{$PDBRecordLinesRef}) { $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex]; if (IsModelRecordType($RecordLine) || IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { last RECORDLINE; } print $OutFileRef "$RecordLine\n"; } } } # Get header record information assuming it's the first record... sub GetHeaderRecordInformation { my($PDBRecordLinesRef) = @_; my($Classification, $DepositionDate, $IDCode, $HeaderRecordLine); ($Classification, $DepositionDate, $IDCode) = ('') x 3; $HeaderRecordLine = $PDBRecordLinesRef->[0]; if (IsHeaderRecordType($HeaderRecordLine)) { ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine); } return ($Classification, $DepositionDate, $IDCode); } # Process option values... sub ProcessOptions { %OptionsInfo = (); $OptionsInfo{Mode} = $Options{mode}; $OptionsInfo{StartingAtomNumber} = $Options{atomnumberstart}; $OptionsInfo{StartingChainID} = $Options{chainidstart}; $OptionsInfo{RenameEmptyChainIDs} = ($Options{chainidrenameempty} =~ /^Yes$/i) ? 1 : 0; $OptionsInfo{KeepOldRecords} = ($Options{keepoldrecords} =~ /^Yes$/i) ? 1 : 0; $OptionsInfo{ModifyHeaderRecord} = ($Options{modifyheader} =~ /^Yes$/i) ? 1 : 0; $OptionsInfo{ResidueNumberMode} = $Options{residuenumbermode}; $OptionsInfo{StartingResidueNumber} = $Options{residuenumberstart}; $OptionsInfo{HetatmResidueNumberMode} = $Options{residuenumberhetatmmode}; $OptionsInfo{StartingHetatmResidueNumber} = $Options{residuenumberstarthetatm}; $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0; $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0; $OptionsInfo{WaterResidueNames} = $Options{waterresiduenames}; $OptionsInfo{StartingWaterResidueNumber} = $Options{waterresiduestart}; @{$OptionsInfo{SpecifiedWaterResiduesList}} = (); %{$OptionsInfo{SpecifiedWaterResiduesMap}} = (); my(@SpecifiedWaterResiduesList); @SpecifiedWaterResiduesList = (); my($WaterResidueName); if ($OptionsInfo{WaterResidueNames} =~ /Automatic/i) { push @SpecifiedWaterResiduesList, ('HOH', 'WAT', 'H2O'); } else { @SpecifiedWaterResiduesList = split /\,/, $Options{waterresiduenames}; } for $WaterResidueName (@SpecifiedWaterResiduesList) { $OptionsInfo{SpecifiedWaterResiduesMap}{$WaterResidueName} = $WaterResidueName; } push @{$OptionsInfo{SpecifiedWaterResiduesList}}, @SpecifiedWaterResiduesList; } # Retrieve information about PDB files... sub RetrievePDBFilesInfo { my($Index, $PDBFile, $PDBRecordLinesRef, $ChainsAndResiduesInfoRef, $FileDir, $FileName, $FileExt, $OutFileName, $OutFileRoot, $Mode, $OutFileMode, @OutFileNames); %PDBFilesInfo = (); @{$PDBFilesInfo{FileOkay}} = (); @{$PDBFilesInfo{OutFileRoot}} = (); @{$PDBFilesInfo{OutFileNames}} = (); FILELIST: for $Index (0 .. $#PDBFilesList) { $PDBFilesInfo{FileOkay}[$Index] = 0; $PDBFilesInfo{OutFileRoot}[$Index] = ''; @{$PDBFilesInfo{OutFileNames}[$Index]} = (); @{$PDBFilesInfo{OutFileNames}[$Index]} = (); $PDBFile = $PDBFilesList[$Index]; if (!(-e $PDBFile)) { warn "Warning: Ignoring file $PDBFile: It doesn't exist\n"; next FILELIST; } if (!CheckFileType($PDBFile, "pdb")) { warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n"; next FILELIST; } if (! open PDBFILE, "$PDBFile") { warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n"; next FILELIST; } close PDBFILE; # Get PDB data... $PDBRecordLinesRef = ReadPDBFile($PDBFile); $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef); if (!scalar @{$ChainsAndResiduesInfoRef->{ChainIDs}}) { warn "Warning: Ignoring file $PDBFile: No chains found \n"; next FILELIST; } # Setup output file names... @OutFileNames = (); $FileDir = ""; $FileName = ""; $FileExt = ""; ($FileDir, $FileName, $FileExt) = ParseFileName($PDBFile); if ($OptionsInfo{OutFileRoot} && (@PDBFilesList == 1)) { my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot}); if ($RootFileName && $RootFileExt) { $FileName = $RootFileName; } else { $FileName = $OptionsInfo{OutFileRoot}; } $OutFileRoot = $FileName; } else { $OutFileRoot = $FileName; } $Mode = $OptionsInfo{Mode}; MODE: { if ($Mode =~ /^RenumberAtoms$/i) { $OutFileMode = 'RenumberAtoms'; last MODE;} if ($Mode =~ /^RenumberResidues$/i) { $OutFileMode = 'RenumberResidues'; last MODE;} if ($Mode =~ /^RenumberWaters$/i) { $OutFileMode = 'RenumberWaters'; last MODE;} if ($Mode =~ /^RenameChainIDs$/i) { $OutFileMode = 'RenameChainIDs'; last MODE;} $OutFileMode = ''; } $OutFileName = "${OutFileRoot}${OutFileMode}.pdb"; push @OutFileNames, $OutFileName; $PDBFilesInfo{FileOkay}[$Index] = 1; $PDBFilesInfo{OutFileRoot}[$Index] = $OutFileRoot; push @{$PDBFilesInfo{OutFileNames}[$Index]}, @OutFileNames; } } # Setup script usage and retrieve command line arguments specified using various options... sub SetupScriptUsage { # Retrieve all the options... %Options = (); $Options{atomnumberstart} = 1; $Options{chainidstart} = 'A'; $Options{chainidrenameempty} = 'No'; $Options{keepoldrecords} = 'no'; $Options{mode} = 'RenumberResidues'; $Options{modifyheader} = 'yes'; $Options{residuenumbermode} = 'PerChain'; $Options{residuenumberstart} = 1; $Options{residuenumberhetatmmode} = 'Automatic'; $Options{residuenumberstarthetatm} = 6000; $Options{waterresiduenames} = 'Automatic'; $Options{waterresiduestart} = 8000; if (!GetOptions(\%Options, "help|h", "atomnumberstart|a=i", "chainidstart|c=s", "chainidrenameempty=s", "keepoldrecords|k=s", "mode|m=s", "modifyheader=s", "overwrite|o", "residuenumbermode=s", "residuenumberstart=i", "residuenumberhetatmmode=s", "residuenumberstarthetatm=i", "root|r=s", "sequencelength=i", "waterresiduenames=s", "waterresiduestart=i", "workingdir|w=s")) { die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; } if ($Options{workingdir}) { if (! -d $Options{workingdir}) { die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; } chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; } if (!IsPositiveInteger($Options{atomnumberstart})) { die "Error: The value specified, $Options{atomnumberstart}, for option \"-a, --AtomNumberStart\" is not valid. Allowed values: >0\n"; } if ((length($Options{chainidstart}) > 1) || ($Options{chainidstart} !~ /[A-Z]/i)) { die "Error: The value specified, $Options{chainidstart}, for option \"-c, --ChainIDStart\" is not valid. Allowed values: a single character from A to Z\n"; } if ($Options{chainidrenameempty} !~ /^(yes|no)$/i) { die "Error: The value specified, $Options{chainidrenameempty}, for option \"--chainidrenameempty\" is not valid. Allowed values: yes or no\n"; } if ($Options{keepoldrecords} !~ /^(yes|no)$/i) { die "Error: The value specified, $Options{keepoldrecords}, for option \"--KeepOldRecords\" is not valid. Allowed values: yes or no\n"; } if ($Options{mode} !~ /^(RenumberAtoms|RenumberResidues|RenumberWaters|RenameChainIDs)$/i) { die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: RenumberAtoms, RenumberResidues, RenumberWaters or RenameChainIDs\n"; } if ($Options{modifyheader} !~ /^(yes|no)$/i) { die "Error: The value specified, $Options{modifyheader}, for option \"--ModifyHeader\" is not valid. Allowed values: yes or no\n"; } if ($Options{residuenumbermode} !~ /^(Sequential|PerChain)$/i) { die "Error: The value specified, $Options{residuenumbermode}, for option \"--ResidueNumberMode\" is not valid. Allowed values: Sequential or PerChain\n"; } if (!IsPositiveInteger($Options{residuenumberstart})) { die "Error: The value specified, $Options{residuenumberstart}, for option \"--ResidueNumberStart\" is not valid. Allowed values: >0\n"; } if ($Options{residuenumberhetatmmode} !~ /^(automatic|specify)$/i) { die "Error: The value specified, $Options{residuenumberhetatmmode}, for option \"--residuenumbermode\" is not valid. Allowed values: automatic or specify\n"; } if (!IsPositiveInteger($Options{residuenumberstarthetatm})) { die "Error: The value specified, $Options{residuenumberstarthetatm}, for option \"--residuenumberstartHetatm\" is not valid. Allowed values: >0\n"; } if (!IsPositiveInteger $Options{waterresiduestart}) { die "Error: The value specified, $Options{waterresiduestart}, for option \"--waterresiduestart\" is not valid. Allowed values: >0\n"; } } __END__ =head1 NAME ModifyPDBFiles.pl - Modify data in PDBFile(s) =head1 SYNOPSIS ModifyPDBFiles.pl PDBFile(s)... ModifyPDBFiles.pl [B<-a, --AtomNumberStart> number] [B<-c, --ChainIDStart> character] [B<--ChainIDRenameEmpty> yes | no] [B<-h, --help>] [B<-k, --KeepOldRecords> yes | no] [B<-m, --mode > RenumberAtoms | RenumberResidues | RenumberWaters | RenameChainIDs] [B<--ModifyHeader> yes | no] [B<-o, --overwrite>] [B<--ResidueNumberMode> Sequential | PerChain] [B<--ResidueNumberStart> number] [B<--ResidueNumberHetatmMode> automatic | specify] [B<--ResidueNumberStarHetatm> number] [B<-r, --root> rootname] [B<--WaterResidueNames> Automatic | "ResidueName, [ResidueName,...]"] [B<--WaterResidueStart> number] [B<-w, --WorkingDir> dirname] PDBFile(s)... =head1 DESCRIPTION Modify data in I<PDBFile(s)>: renumber atoms, residues, and water residues or assign new chain IDs. Multiple PDBFile names are separated by spaces. The valid file extension is I<.pdb>. All other file name extensions are ignored during the wild card expansion. All the PDB files in a current directory can be specified either by I<*.pdb> or the current directory name. =head1 OPTIONS =over 4 =item B<-a, --AtomNumberStart> I<number> Starting atom number to use during I<RenumberAtoms> value of B<-m, --mode> option. Default: I<1>. Valid values: positive integers. =item B<-c, --ChainIDStart> I<character> A single character to use for starting IDs for chains during I<RenameChainIDs> value of B<-m, --mode> option. Default: I<A>. Valid values: I<A to Z>. =item B<--ChainIDRenameEmpty> I<Yes | No> Specify whether to rename empty chain IDs during I<RenameChainIDs> B<-m, --mode> value. By default, ATOM and HETATM records with no chain IDs are left unchanged. Possible values: I<yes | no>. Default: I<No>. =item B<-h, --help> Print this help message. =item B<-k, --KeepOldRecords> I<yes | no> Specify whether to transfer old non ATOM and HETATM records from input PDBFile(s) to new PDBFile(s). By default, except for the HEADER record, all records other than ATOM/HETATM are dropped during the generation of new PDB files. Possible values: I<yes | no>. Default: I<no>. =item B<-m, --mode > I<RenumberAtoms | RenumberResidues | RenumberWaters | RenameChainIDs> Specify how to modify I<PDBFile(s)>. Possible values: I<RenumberAtoms | RenumberResidues | RenumberWaters | RenameChainIDs>. Default: I<RenumberResidues>. For I<RenumberAtoms> mode, residue number in ATOM and HETATM records are reassigned sequentially starting using value of B<-a, --AtomNumberStart> option. For I<RenumberResidues> mode, serial number in ATOM and HETATM records are reassigned either sequentially or statring from specified values for ATOM and HETATM records in each chain. For I<RenumberWaters> mode, residue number for waters are reassigned starting from a specific value. For I<RenameChainIDs> mode, all the chain IDs are reassigned starting from a specific chain ID. During the generation of new PDB files, unnecessary CONECT records are dropped. =item B<--ModifyHeader> I<yes | no> Specify whether to modify HEADER record during the generation of new PDB files Possible values: I<yes | no>. Default: I<yes>. By defailt, Classification data is replaced by I<Data modified using MayaChemTools> before writing out HEADER record. =item B<-o, --overwrite> Overwrite existing files =item B<--ResidueNumberMode> I<Sequential | PerChain> Specify how to renumber residues: renumber residues sequentially across all the chains or start from the begining for each chain. Possible values: I<Sequential | PerChain>. Default: I<PerChain>. =item B<--ResidueNumberStart> I<number> Starting residue number to use for ATOM records in chains. Default: I<1>. Valid values positive integers. For I<Sequential> value of B<--ResidueNumberMode> option, residue numbers are assigned sequentially across all the chains starting from the specified value. For I<PerChain> value of B<--ResidueNumberMode> option, residue numbers are starting again from the specified value for each chain. HETATM residues with in the chains are numbered using this value as well =item B<--ResidueNumberHetatmMode> I<automatic | specify> Specify how to start residue number for HETATM records: use the next sequential residue number after the last residue number from ATOM records or start from a specific residue number. Possible values: I<automatic | specify>. Default: I<automatic> For I<automatic> , residue number after highest residue number of ATOM records is used as the starting residue number for HETATM records. For I<specify>, value of option B<--ResidueNumberStarHetatm> is used as the starting residue number for HETATM records. This option along with B<--ResidueNumberStartHetatm> only applies to HETATM records outside the chains. =item B<--ResidueNumberStartHetatm> I<number> Starting residue number to use for HETATM records. Default: I<6000>. Valid values positive integers. =item B<-r, --root> I<rootname> New PDB and sequence file name is generated using the root: <Root><Mode>.<Ext>. Default new file name: <PDBFileName><Mode>.pdb. This option is ignored for multiple input files. =item B<--WaterResidueNames> I<Automatic | "ResidueName,[ResidueName,...]"> Identification of water residues during I<RenumberWaters> value of B<-m, --mode> option. Possible values: I<Automatic | "ResidueName,[ResidueName,...]">. Default: I<Automatic> which corresponds to "HOH,WAT,H20". You can also specify a different comma delimited list of residue names to use for water. =item B<--WaterResidueStart> I<number> Starting water residue number to use during I<RenumberWaters> B<-m, --mode> value. Default: I<8000>. Valid values: positive integers. =item B<-w, --WorkingDir> I<dirname> Location of working directory. Default: current directory. =back =head1 EXAMPLES To renumber ATOM and HETATM residues starting from 1 for each chain with continuation to HETATM residues outside TER records in Sample2.pdb and generate Sample2RenumberResidues.pdb file, type: % ModifyPDBFiles.pl Sample1.pdb To renumber ATOM and HETATM residues sequentially across all chains starting from 1 with continuation to HETATM residues outside TER records in Sample2.pdb and generate Sample2RenumberResidues.pdb file, type: % ModifyPDBFiles.pl --ResidueNumberMode Sequential -o Sample1.pdb To renumber ATOM and HETATM residues sequentially across all chains starting from 1 and HETATM residues outside TER records starting from 6000 in Sample2.pdb and generate Sample2RenumberResidues.pdb file, type: % ModifyPDBFiles.pl --ResidueNumberMode Sequential --ResidueNumberHetatmMode Specify -o Sample1.pdb To renumber ATOM and HETATM residues sequentially across all chains starting from 100 for ATOM/HETATM residues with in TER records and starting from 999 for HETATM residues outside TER records in Sample2.pdb and generate Sample2RenumberResidues.pdb file, type: % ModifyPDBFiles.pl --ResidueNumberMode Sequential --ResidueNumberHetatmMode Specify --ResidueNumberStart 100 --ResidueNumberStartHetatm 999 -o Sample2.pdb To renumber ATOM and HETATM residues from 100 for each chain and starting from 999 for HETATM residues outside TER records in Sample2.pdb and generate Sample2RenumberResidues.pdb file, type: % ModifyPDBFiles.pl --ResidueNumberMode PerChain --ResidueNumberHetatmMode Specify --ResidueNumberStart 100 --ResidueNumberStartHetatm 999 -o Sample2.pdb To renumber ATOM serial numbers sequentially starting from 100 in Sample1.pdb file and generate Sample1RenumberAtoms.pdb file, type: % ModifyPDBFiles.pl -m RenumberAtoms --AtomNumberStart 100 -o Sample1.pdb To renumber water residues identified by "HOH,WAT" starting from residue number 1000 in Sample2.pdb file and generate Sample2RenumberWaters.pdb file, type: % ModifyPDBFiles.pl -m RenumberWaters --WaterResidueNames "HOH,WAT" -o --WaterResidueStart 950 Sample2.pdb To rename all chain IDs starting from A in Sample1.pdb file and generate Sample1RenameChainIDs.pdb file, type: % ModifyPDBFiles.pl -m RenameChainIDs -o Sample1.pdb To rename all chain IDs starting from B without assigning any chain IDs to ATOM/HETATOM with no chain IDs in Sample2.pdb file and generate Sample2RenameChainIDs.pdb file, type: % ModifyPDBFiles.pl l -m RenameChainIDs -c B --ChainIDRenameEmpty No -o Sample2.pdb =head1 AUTHOR Manish Sud <msud@san.rr.com> =head1 SEE ALSO ExtractFromPDBFiles.pl, InfoPDBFiles.pl =head1 COPYRIGHT Copyright (C) 2015 Manish Sud. All rights reserved. This file is part of MayaChemTools. MayaChemTools is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. =cut