1 #!/usr/bin/perl -w 2 # 3 # $RCSfile: ModifyPDBFiles.pl,v $ 4 # $Date: 2015/02/28 20:46:20 $ 5 # $Revision: 1.25 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use FindBin; use lib "$FindBin::Bin/../lib"; 31 use Getopt::Long; 32 use File::Basename; 33 use Text::ParseWords; 34 use Benchmark; 35 use FileUtil; 36 use TextUtil; 37 use PDBFileUtil; 38 39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); 40 41 # Autoflush STDOUT 42 $| = 1; 43 44 # Starting message... 45 $ScriptName = basename($0); 46 print "\n$ScriptName: Starting...\n\n"; 47 $StartTime = new Benchmark; 48 49 # Get the options and setup script... 50 SetupScriptUsage(); 51 if ($Options{help} || @ARGV < 1) { 52 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); 53 } 54 55 my(@PDBFilesList); 56 @PDBFilesList = ExpandFileNames(\@ARGV, "pdb"); 57 58 # Process options... 59 print "Processing options...\n"; 60 my(%OptionsInfo); 61 ProcessOptions(); 62 63 # Setup information about input files... 64 print "Checking input PDB file(s)...\n"; 65 my(%PDBFilesInfo); 66 RetrievePDBFilesInfo(); 67 68 # Process input files.. 69 my($FileIndex); 70 if (@PDBFilesList > 1) { 71 print "\nProcessing PDB files...\n"; 72 } 73 for $FileIndex (0 .. $#PDBFilesList) { 74 if ($PDBFilesInfo{FileOkay}[$FileIndex]) { 75 print "\nProcessing file $PDBFilesList[$FileIndex]...\n"; 76 ModifyPDBFiles($FileIndex); 77 } 78 } 79 print "\n$ScriptName:Done...\n\n"; 80 81 $EndTime = new Benchmark; 82 $TotalTime = timediff ($EndTime, $StartTime); 83 print "Total time: ", timestr($TotalTime), "\n"; 84 85 ############################################################################### 86 87 # Modify appropriate information... 88 sub ModifyPDBFiles { 89 my($FileIndex) = @_; 90 my($PDBFile, $PDBRecordLinesRef); 91 92 # Get PDB data... 93 $PDBFile = $PDBFilesList[$FileIndex]; 94 $PDBRecordLinesRef = ReadPDBFile($PDBFile); 95 96 if ($OptionsInfo{Mode} =~ /^RenumberAtoms$/i) { 97 RenumberAtoms($FileIndex, $PDBRecordLinesRef); 98 } 99 elsif ($OptionsInfo{Mode} =~ /^RenumberResidues$/i) { 100 RenumberResidues($FileIndex, $PDBRecordLinesRef); 101 } 102 elsif ($OptionsInfo{Mode} =~ /^RenumberWaters$/i) { 103 RenumberWaters($FileIndex, $PDBRecordLinesRef); 104 } 105 elsif ($OptionsInfo{Mode} =~ /^RenameChainIDs$/i) { 106 RenameChainsIDs($FileIndex, $PDBRecordLinesRef); 107 } 108 } 109 110 # Renumber atom and hetro atom numbers... 111 sub RenumberAtoms { 112 my($FileIndex, $PDBRecordLinesRef) = @_; 113 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewAtomNumber, $RecordType, %OldToNewAtomNumbersMap); 114 115 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; 116 print "Generating PDBFileName file $PDBFileName...\n"; 117 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; 118 119 # Write out header and other older recors... 120 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); 121 122 # Write out all ATOM records along with TER and model records to indicate 123 # chains and multiple models.. 124 %OldToNewAtomNumbersMap = (); 125 $NewAtomNumber = $OptionsInfo{StartingAtomNumber}; 126 for $RecordLine (@{$PDBRecordLinesRef}) { 127 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { 128 $RecordType = GetPDBRecordType($RecordLine); 129 130 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); 131 132 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $NewAtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 133 134 $OldToNewAtomNumbersMap{$AtomNumber} = $NewAtomNumber; 135 $NewAtomNumber++; 136 } 137 elsif (IsTerRecordType($RecordLine)) { 138 $NewAtomNumber++; 139 print OUTFILE GenerateTerRecordLine($NewAtomNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode), "\n"; 140 } 141 elsif (IsModelRecordType($RecordLine)) { 142 print OUTFILE "$RecordLine\n"; 143 } 144 elsif (IsEndmdlRecordType($RecordLine)) { 145 print OUTFILE "$RecordLine\n"; 146 # Restart numbering... 147 $NewAtomNumber = $OptionsInfo{StartingAtomNumber}; 148 } 149 } 150 151 # Write out modified CONECT records... 152 my($ModifiedConectAtomNum, $ConectAtomNum, @ConectAtomNums, @ModifiedConectAtomNums); 153 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 154 if (!IsConectRecordType($RecordLine)) { 155 next LINE; 156 } 157 @ConectAtomNums = (); 158 @ModifiedConectAtomNums = (); 159 push @ConectAtomNums, ParseConectRecordLine($RecordLine); 160 ATOMNUMBER: for $ConectAtomNum (@ConectAtomNums) { 161 $ModifiedConectAtomNum = $ConectAtomNum; 162 if (defined($ConectAtomNum)) { 163 $AtomNumber = $ConectAtomNum; 164 if ($AtomNumber) { 165 if (exists $OldToNewAtomNumbersMap{$AtomNumber}) { 166 $ModifiedConectAtomNum = $OldToNewAtomNumbersMap{$AtomNumber}; 167 } 168 } 169 } 170 push @ModifiedConectAtomNums, $ModifiedConectAtomNum; 171 } 172 # Write out the record... 173 print OUTFILE GenerateConectRecordLine(@ModifiedConectAtomNums), "\n"; 174 } 175 176 # Write out END record... 177 print OUTFILE GenerateEndRecordLine(), "\n"; 178 179 close OUTFILE; 180 } 181 182 # Renumber residues... 183 sub RenumberResidues { 184 my($FileIndex, $PDBRecordLinesRef) = @_; 185 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $NewHetatmResidueNumber, $TERCount, $TotalTERCount, $PreviousResidueNumber, $PreviousHetatmResidueNumber, $RecordType); 186 187 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; 188 print "Generating PDBFileName file $PDBFileName...\n"; 189 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; 190 191 # Write out header and other older recors... 192 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); 193 194 # Do a quick count of all TER records... 195 $TotalTERCount = 0; 196 for $RecordLine (@{$PDBRecordLinesRef}) { 197 if (IsTerRecordType($RecordLine)) { 198 $TotalTERCount++; 199 } 200 } 201 202 # Write out all ATOM records along with TER and model records to indicate 203 # chains and multiple models.. 204 $NewResidueNumber = $OptionsInfo{StartingResidueNumber}; 205 $NewHetatmResidueNumber = $OptionsInfo{StartingHetatmResidueNumber}; 206 207 $TERCount = 0; 208 $PreviousResidueNumber = 0; 209 $PreviousHetatmResidueNumber = 0; 210 211 for $RecordLine (@{$PDBRecordLinesRef}) { 212 if (IsAtomRecordType($RecordLine) || (IsHetatmRecordType($RecordLine) && ($TERCount < $TotalTERCount || $OptionsInfo{HetatmResidueNumberMode} =~ /^Automatic$/i))) { 213 $RecordType = GetPDBRecordType($RecordLine); 214 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); 215 216 if ($PreviousResidueNumber && $PreviousResidueNumber != $ResidueNumber) { 217 $PreviousResidueNumber = $ResidueNumber; 218 $NewResidueNumber++; 219 } 220 else { 221 # First residue in a chain... 222 $PreviousResidueNumber = $ResidueNumber; 223 } 224 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 225 226 } 227 elsif (IsHetatmRecordType($RecordLine)) { 228 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseHetatmRecordLine($RecordLine); 229 230 # User HETATM residue numbers... 231 if ($PreviousHetatmResidueNumber && $PreviousHetatmResidueNumber != $ResidueNumber) { 232 $PreviousHetatmResidueNumber = $ResidueNumber; 233 $NewHetatmResidueNumber++; 234 } 235 else { 236 # First HETATM residue outside a chain... 237 $PreviousHetatmResidueNumber = $ResidueNumber; 238 } 239 240 print OUTFILE GenerateHetatmRecordLine($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewHetatmResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 241 } 242 elsif (IsTerRecordType($RecordLine)) { 243 $TERCount++; 244 $AtomNumber++; 245 print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode), "\n"; 246 # For per chain numbering, start over again... 247 if ($OptionsInfo{ResidueNumberMode} =~ /^PerChain$/i) { 248 if ($TERCount < $TotalTERCount ) { 249 $NewResidueNumber = $OptionsInfo{StartingResidueNumber}; 250 } 251 $PreviousResidueNumber = 0; 252 } 253 } 254 elsif (IsModelRecordType($RecordLine)) { 255 print OUTFILE "$RecordLine\n"; 256 } 257 elsif (IsEndmdlRecordType($RecordLine)) { 258 print OUTFILE "$RecordLine\n"; 259 } 260 } 261 262 # Write out CONECT records... 263 for $RecordLine (@{$PDBRecordLinesRef}) { 264 if (IsConectRecordType($RecordLine)) { 265 print OUTFILE "$RecordLine\n"; 266 } 267 } 268 269 # Write out END record... 270 print OUTFILE GenerateEndRecordLine(), "\n"; 271 272 close OUTFILE; 273 } 274 275 # Renumber water residues... 276 sub RenumberWaters { 277 my($FileIndex, $PDBRecordLinesRef) = @_; 278 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $RecordType); 279 280 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; 281 print "Generating PDBFileName file $PDBFileName...\n"; 282 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; 283 284 # Write out header and other older recors... 285 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); 286 287 # Write out all ATOM records along with TER and model records to indicate 288 # chains and multiple models.. 289 $NewResidueNumber = $OptionsInfo{StartingWaterResidueNumber}; 290 for $RecordLine (@{$PDBRecordLinesRef}) { 291 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { 292 $RecordType = GetPDBRecordType($RecordLine); 293 294 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); 295 296 if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) { 297 $ResidueNumber = $NewResidueNumber; 298 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 299 $NewResidueNumber++; 300 } 301 else { 302 print OUTFILE "$RecordLine\n"; 303 } 304 } 305 elsif (IsTerRecordType($RecordLine)) { 306 print OUTFILE "$RecordLine\n"; 307 } 308 elsif (IsModelRecordType($RecordLine)) { 309 print OUTFILE "$RecordLine\n"; 310 } 311 elsif (IsEndmdlRecordType($RecordLine)) { 312 print OUTFILE "$RecordLine\n"; 313 } 314 } 315 316 # Write out CONECT records... 317 for $RecordLine (@{$PDBRecordLinesRef}) { 318 if (IsConectRecordType($RecordLine)) { 319 print OUTFILE "$RecordLine\n"; 320 } 321 } 322 323 # Write out END record... 324 print OUTFILE GenerateEndRecordLine(), "\n"; 325 326 close OUTFILE; 327 } 328 329 # Rename chain IDs... 330 sub RenameChainsIDs { 331 my($FileIndex, $PDBRecordLinesRef) = @_; 332 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $RecordType, $PreviousChainID, $FirstChainID, $NewChainID, $NewChainIDCounter, %OldToNewChainIDsMap); 333 334 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; 335 print "Generating PDBFileName file $PDBFileName...\n"; 336 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; 337 338 # Write out header and other older recors... 339 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); 340 341 # Write out all ATOM records along with TER and model records to indicate 342 # chains and multiple models.. 343 %OldToNewChainIDsMap = (); 344 $NewChainIDCounter = $OptionsInfo{StartingChainID}; 345 $FirstChainID = 1; 346 $PreviousChainID = ''; 347 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 348 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { 349 $RecordType = GetPDBRecordType($RecordLine); 350 351 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); 352 353 if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) { 354 # Chain IDs are not assigned to water residues... 355 print OUTFILE "$RecordLine\n"; 356 next LINE; 357 } 358 359 if ($FirstChainID) { 360 $FirstChainID = 0; 361 $PreviousChainID = $ChainID; 362 if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) { 363 $NewChainID = $NewChainIDCounter; 364 $OldToNewChainIDsMap{$ChainID} = $NewChainID; 365 } 366 else { 367 $NewChainID = ''; 368 } 369 } 370 elsif ($PreviousChainID ne $ChainID) { 371 if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) { 372 $PreviousChainID = $ChainID; 373 if (exists $OldToNewChainIDsMap{$ChainID}) { 374 $NewChainID = $OldToNewChainIDsMap{$ChainID}; 375 } 376 else { 377 $NewChainIDCounter++; 378 $NewChainID = $NewChainIDCounter; 379 $OldToNewChainIDsMap{$ChainID} = $NewChainID; 380 } 381 } 382 else { 383 $NewChainID = ''; 384 } 385 } 386 387 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 388 } 389 elsif (IsTerRecordType($RecordLine)) { 390 $AtomNumber++; 391 print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode), "\n"; 392 } 393 elsif (IsModelRecordType($RecordLine)) { 394 print OUTFILE "$RecordLine\n"; 395 } 396 elsif (IsEndmdlRecordType($RecordLine)) { 397 print OUTFILE "$RecordLine\n"; 398 } 399 } 400 401 # Write out CONECT records... 402 for $RecordLine (@{$PDBRecordLinesRef}) { 403 if (IsConectRecordType($RecordLine)) { 404 print OUTFILE "$RecordLine\n"; 405 } 406 } 407 408 # Write out END record... 409 print OUTFILE GenerateEndRecordLine(), "\n"; 410 411 close OUTFILE; 412 } 413 414 415 # Write out modifed header and other older records... 416 sub WriteHeaderAndOlderRecords { 417 my($OutFileRef, $PDBRecordLinesRef) = @_; 418 419 if ($OptionsInfo{ModifyHeaderRecord}) { 420 # Write out modified HEADER record... 421 my($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef); 422 $Classification = 'Data modified using MayaChemTools'; 423 print $OutFileRef GenerateHeaderRecordLine($IDCode, $Classification), "\n"; 424 } 425 else { 426 print $OutFileRef $PDBRecordLinesRef->[0], "\n"; 427 } 428 429 # Write out any old records... 430 if ($OptionsInfo{KeepOldRecords}) { 431 my($RecordLineIndex, $RecordLine); 432 # Skip HEADER record and write out older records all the way upto first MODEL/ATOM/HETATM records from input file... 433 RECORDLINE: for $RecordLineIndex (1 .. $#{$PDBRecordLinesRef}) { 434 $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex]; 435 if (IsModelRecordType($RecordLine) || IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { 436 last RECORDLINE; 437 } 438 print $OutFileRef "$RecordLine\n"; 439 } 440 } 441 } 442 443 # Get header record information assuming it's the first record... 444 sub GetHeaderRecordInformation { 445 my($PDBRecordLinesRef) = @_; 446 my($Classification, $DepositionDate, $IDCode, $HeaderRecordLine); 447 448 ($Classification, $DepositionDate, $IDCode) = ('') x 3; 449 $HeaderRecordLine = $PDBRecordLinesRef->[0]; 450 if (IsHeaderRecordType($HeaderRecordLine)) { 451 ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine); 452 } 453 return ($Classification, $DepositionDate, $IDCode); 454 } 455 456 457 # Process option values... 458 sub ProcessOptions { 459 %OptionsInfo = (); 460 $OptionsInfo{Mode} = $Options{mode}; 461 462 $OptionsInfo{StartingAtomNumber} = $Options{atomnumberstart}; 463 $OptionsInfo{StartingChainID} = $Options{chainidstart}; 464 $OptionsInfo{RenameEmptyChainIDs} = ($Options{chainidrenameempty} =~ /^Yes$/i) ? 1 : 0; 465 466 $OptionsInfo{KeepOldRecords} = ($Options{keepoldrecords} =~ /^Yes$/i) ? 1 : 0; 467 $OptionsInfo{ModifyHeaderRecord} = ($Options{modifyheader} =~ /^Yes$/i) ? 1 : 0; 468 469 $OptionsInfo{ResidueNumberMode} = $Options{residuenumbermode}; 470 $OptionsInfo{StartingResidueNumber} = $Options{residuenumberstart}; 471 472 $OptionsInfo{HetatmResidueNumberMode} = $Options{residuenumberhetatmmode}; 473 $OptionsInfo{StartingHetatmResidueNumber} = $Options{residuenumberstarthetatm}; 474 475 $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0; 476 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0; 477 478 $OptionsInfo{WaterResidueNames} = $Options{waterresiduenames}; 479 $OptionsInfo{StartingWaterResidueNumber} = $Options{waterresiduestart}; 480 @{$OptionsInfo{SpecifiedWaterResiduesList}} = (); 481 %{$OptionsInfo{SpecifiedWaterResiduesMap}} = (); 482 483 my(@SpecifiedWaterResiduesList); 484 @SpecifiedWaterResiduesList = (); 485 my($WaterResidueName); 486 if ($OptionsInfo{WaterResidueNames} =~ /Automatic/i) { 487 push @SpecifiedWaterResiduesList, ('HOH', 'WAT', 'H2O'); 488 } 489 else { 490 @SpecifiedWaterResiduesList = split /\,/, $Options{waterresiduenames}; 491 } 492 for $WaterResidueName (@SpecifiedWaterResiduesList) { 493 $OptionsInfo{SpecifiedWaterResiduesMap}{$WaterResidueName} = $WaterResidueName; 494 } 495 push @{$OptionsInfo{SpecifiedWaterResiduesList}}, @SpecifiedWaterResiduesList; 496 } 497 498 # Retrieve information about PDB files... 499 sub RetrievePDBFilesInfo { 500 my($Index, $PDBFile, $PDBRecordLinesRef, $ChainsAndResiduesInfoRef, $FileDir, $FileName, $FileExt, $OutFileName, $OutFileRoot, $Mode, $OutFileMode, @OutFileNames); 501 502 %PDBFilesInfo = (); 503 @{$PDBFilesInfo{FileOkay}} = (); 504 @{$PDBFilesInfo{OutFileRoot}} = (); 505 @{$PDBFilesInfo{OutFileNames}} = (); 506 507 FILELIST: for $Index (0 .. $#PDBFilesList) { 508 $PDBFilesInfo{FileOkay}[$Index] = 0; 509 510 $PDBFilesInfo{OutFileRoot}[$Index] = ''; 511 @{$PDBFilesInfo{OutFileNames}[$Index]} = (); 512 @{$PDBFilesInfo{OutFileNames}[$Index]} = (); 513 514 $PDBFile = $PDBFilesList[$Index]; 515 if (!(-e $PDBFile)) { 516 warn "Warning: Ignoring file $PDBFile: It doesn't exist\n"; 517 next FILELIST; 518 } 519 if (!CheckFileType($PDBFile, "pdb")) { 520 warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n"; 521 next FILELIST; 522 } 523 if (! open PDBFILE, "$PDBFile") { 524 warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n"; 525 next FILELIST; 526 } 527 close PDBFILE; 528 529 # Get PDB data... 530 $PDBRecordLinesRef = ReadPDBFile($PDBFile); 531 $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef); 532 if (!scalar @{$ChainsAndResiduesInfoRef->{ChainIDs}}) { 533 warn "Warning: Ignoring file $PDBFile: No chains found \n"; 534 next FILELIST; 535 } 536 537 # Setup output file names... 538 @OutFileNames = (); 539 $FileDir = ""; $FileName = ""; $FileExt = ""; 540 ($FileDir, $FileName, $FileExt) = ParseFileName($PDBFile); 541 if ($OptionsInfo{OutFileRoot} && (@PDBFilesList == 1)) { 542 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot}); 543 if ($RootFileName && $RootFileExt) { 544 $FileName = $RootFileName; 545 } 546 else { 547 $FileName = $OptionsInfo{OutFileRoot}; 548 } 549 $OutFileRoot = $FileName; 550 } 551 else { 552 $OutFileRoot = $FileName; 553 } 554 $Mode = $OptionsInfo{Mode}; 555 MODE: { 556 if ($Mode =~ /^RenumberAtoms$/i) { $OutFileMode = 'RenumberAtoms'; last MODE;} 557 if ($Mode =~ /^RenumberResidues$/i) { $OutFileMode = 'RenumberResidues'; last MODE;} 558 if ($Mode =~ /^RenumberWaters$/i) { $OutFileMode = 'RenumberWaters'; last MODE;} 559 if ($Mode =~ /^RenameChainIDs$/i) { $OutFileMode = 'RenameChainIDs'; last MODE;} 560 $OutFileMode = ''; 561 } 562 $OutFileName = "${OutFileRoot}${OutFileMode}.pdb"; 563 push @OutFileNames, $OutFileName; 564 565 $PDBFilesInfo{FileOkay}[$Index] = 1; 566 $PDBFilesInfo{OutFileRoot}[$Index] = $OutFileRoot; 567 568 push @{$PDBFilesInfo{OutFileNames}[$Index]}, @OutFileNames; 569 } 570 } 571 572 # Setup script usage and retrieve command line arguments specified using various options... 573 sub SetupScriptUsage { 574 575 # Retrieve all the options... 576 %Options = (); 577 $Options{atomnumberstart} = 1; 578 $Options{chainidstart} = 'A'; 579 $Options{chainidrenameempty} = 'No'; 580 $Options{keepoldrecords} = 'no'; 581 $Options{mode} = 'RenumberResidues'; 582 $Options{modifyheader} = 'yes'; 583 $Options{residuenumbermode} = 'PerChain'; 584 $Options{residuenumberstart} = 1; 585 $Options{residuenumberhetatmmode} = 'Automatic'; 586 $Options{residuenumberstarthetatm} = 6000; 587 $Options{waterresiduenames} = 'Automatic'; 588 $Options{waterresiduestart} = 8000; 589 590 if (!GetOptions(\%Options, "help|h", "atomnumberstart|a=i", "chainidstart|c=s", "chainidrenameempty=s", "keepoldrecords|k=s", "mode|m=s", "modifyheader=s", "overwrite|o", "residuenumbermode=s", "residuenumberstart=i", "residuenumberhetatmmode=s", "residuenumberstarthetatm=i", "root|r=s", "sequencelength=i", "waterresiduenames=s", "waterresiduestart=i", "workingdir|w=s")) { 591 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; 592 } 593 if ($Options{workingdir}) { 594 if (! -d $Options{workingdir}) { 595 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; 596 } 597 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; 598 } 599 if (!IsPositiveInteger($Options{atomnumberstart})) { 600 die "Error: The value specified, $Options{atomnumberstart}, for option \"-a, --AtomNumberStart\" is not valid. Allowed values: >0\n"; 601 } 602 if ((length($Options{chainidstart}) > 1) || ($Options{chainidstart} !~ /[A-Z]/i)) { 603 die "Error: The value specified, $Options{chainidstart}, for option \"-c, --ChainIDStart\" is not valid. Allowed values: a single character from A to Z\n"; 604 } 605 if ($Options{chainidrenameempty} !~ /^(yes|no)$/i) { 606 die "Error: The value specified, $Options{chainidrenameempty}, for option \"--chainidrenameempty\" is not valid. Allowed values: yes or no\n"; 607 } 608 if ($Options{keepoldrecords} !~ /^(yes|no)$/i) { 609 die "Error: The value specified, $Options{keepoldrecords}, for option \"--KeepOldRecords\" is not valid. Allowed values: yes or no\n"; 610 } 611 if ($Options{mode} !~ /^(RenumberAtoms|RenumberResidues|RenumberWaters|RenameChainIDs)$/i) { 612 die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: RenumberAtoms, RenumberResidues, RenumberWaters or RenameChainIDs\n"; 613 } 614 if ($Options{modifyheader} !~ /^(yes|no)$/i) { 615 die "Error: The value specified, $Options{modifyheader}, for option \"--ModifyHeader\" is not valid. Allowed values: yes or no\n"; 616 } 617 if ($Options{residuenumbermode} !~ /^(Sequential|PerChain)$/i) { 618 die "Error: The value specified, $Options{residuenumbermode}, for option \"--ResidueNumberMode\" is not valid. Allowed values: Sequential or PerChain\n"; 619 } 620 if (!IsPositiveInteger($Options{residuenumberstart})) { 621 die "Error: The value specified, $Options{residuenumberstart}, for option \"--ResidueNumberStart\" is not valid. Allowed values: >0\n"; 622 } 623 if ($Options{residuenumberhetatmmode} !~ /^(automatic|specify)$/i) { 624 die "Error: The value specified, $Options{residuenumberhetatmmode}, for option \"--residuenumbermode\" is not valid. Allowed values: automatic or specify\n"; 625 } 626 if (!IsPositiveInteger($Options{residuenumberstarthetatm})) { 627 die "Error: The value specified, $Options{residuenumberstarthetatm}, for option \"--residuenumberstartHetatm\" is not valid. Allowed values: >0\n"; 628 } 629 if (!IsPositiveInteger $Options{waterresiduestart}) { 630 die "Error: The value specified, $Options{waterresiduestart}, for option \"--waterresiduestart\" is not valid. Allowed values: >0\n"; 631 } 632 } 633