MayaChemTools

   1 package PDBFileUtil;
   2 #
   3 # $RCSfile: PDBFileUtil.pm,v $
   4 # $Date: 2015/02/28 20:47:18 $
   5 # $Revision: 1.36 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Exporter;
  31 use Text::ParseWords;
  32 use TextUtil;
  33 use FileUtil;
  34 use TimeUtil ();
  35 
  36 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  37 
  38 @ISA = qw(Exporter);
  39 @EXPORT = qw(GetPDBRecordType GetRecordTypesCount GetAllResidues GetConectRecordLines GetChainsAndResidues GetExperimentalTechnique GetExperimentalTechniqueResolution GetMinMaxCoords IsPDBFile IsAtomRecordType IsConectRecordType IsHeaderRecordType IsHetatmRecordType IsSeqresRecordType IsModelRecordType IsEndmdlRecordType IsTerRecordType IsMasterRecordType ReadPDBFile ParseHeaderRecordLine GenerateHeaderRecordLine GenerateHeaderRecordTimeStamp ParseAtomRecordLine GenerateAtomRecordLine ParseAtomOrHetatmRecordLine GenerateAtomOrHetatmRecordLine GenerateHetatmRecordLine ParseHetatmRecordLine ParseConectRecordLine GenerateConectRecordLine ParseExpdtaRecordLine ParseRemark2ResolutionRecordLine ParseSeqresRecordLine ParseTerRecordLine GenerateTerRecordLine ParseMasterRecordLine GenerateEndRecordLine);
  40 @EXPORT_OK = qw();
  41 
  42 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  43 
  44 # Get PDB record type...
  45 sub GetPDBRecordType {
  46   my($Line) = @_;
  47 
  48   return _GetRecordType($Line);
  49 }
  50 
  51 # Is it a PDB file?
  52 sub IsPDBFile {
  53   my($PDBFile) = @_;
  54   my($Line, $Status);
  55 
  56   $Status = 0;
  57   open PDBFILE, "$PDBFile" or die "Can't open $PDBFile: $!\n";
  58   $Line = GetTextLine(\*PDBFILE);
  59   $Status = ($Line =~ /^HEADER/i) ? 1 : 0;
  60   close PDBFILE;
  61 
  62   return $Status;
  63 }
  64 
  65 # Is it a atom record type?
  66 sub IsAtomRecordType {
  67   my($Line) = @_;
  68 
  69   return _IsRecordType($Line, 'ATOM');
  70 }
  71 
  72 # Is it a connect record type?
  73 sub IsConectRecordType {
  74   my($Line) = @_;
  75 
  76   return _IsRecordType($Line, 'CONECT');
  77 }
  78 
  79 # Is it a header atom record type?
  80 sub IsHeaderRecordType {
  81   my($Line) = @_;
  82 
  83   return _IsRecordType($Line, 'HEADER');
  84 }
  85 
  86 # Is it a hetro atom record type?
  87 sub IsHetatmRecordType {
  88   my($Line) = @_;
  89 
  90   return _IsRecordType($Line, 'HETATM');
  91 }
  92 
  93 # Is it a seqres record type?
  94 sub IsSeqresRecordType {
  95   my($Line) = @_;
  96 
  97   return _IsRecordType($Line, 'SEQRES');
  98 }
  99 
 100 # Is it a MODEL record type?
 101 sub IsModelRecordType {
 102   my($Line) = @_;
 103 
 104   return _IsRecordType($Line, 'MODEL');
 105 }
 106 
 107 # Is it a ENDMDL record type?
 108 sub IsEndmdlRecordType {
 109   my($Line) = @_;
 110 
 111   return _IsRecordType($Line, 'ENDMDL');
 112 }
 113 
 114 # Is it a TER record type?
 115 sub IsTerRecordType {
 116   my($Line) = @_;
 117 
 118   return _IsRecordType($Line, 'TER');
 119 }
 120 
 121 # Is it a MASTER record type?
 122 sub IsMasterRecordType {
 123   my($Line) = @_;
 124 
 125   return _IsRecordType($Line, 'MASTER');
 126 }
 127 
 128 # Count the number of each record type and a reference to data type with these key/value pairs:
 129 # {RecordTypes} - An array of unique record types in order of their presence in the file
 130 # {Count}{$RecordType} - Count of each record type
 131 # {Lines}{$RecordType} - Optional lines data for a specific record type.
 132 #
 133 sub GetRecordTypesCount {
 134   my($PDBRecordLinesRef, $SpecifiedRecordType, $GetRecordLinesFlag, $RecordType, $RecordLine, %RecordTypeDataMap);
 135 
 136   %RecordTypeDataMap = ();
 137   @{$RecordTypeDataMap{RecordTypes}} = ();
 138   %{$RecordTypeDataMap{Count}} = ();
 139   %{$RecordTypeDataMap{Lines}} = ();
 140 
 141   $SpecifiedRecordType = '';
 142   $GetRecordLinesFlag = 0;
 143   if (@_ == 3) {
 144     ($PDBRecordLinesRef, $SpecifiedRecordType, $GetRecordLinesFlag) = @_;
 145     $SpecifiedRecordType = uc $SpecifiedRecordType;
 146   }
 147   elsif (@_ == 2) {
 148     ($PDBRecordLinesRef, $SpecifiedRecordType) = @_;
 149     $SpecifiedRecordType = uc $SpecifiedRecordType;
 150   }
 151   else {
 152     ($PDBRecordLinesRef) = @_;
 153   }
 154   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 155     $RecordType = _GetRecordType($RecordLine);
 156     if ($SpecifiedRecordType && ($SpecifiedRecordType ne $RecordType)) {
 157       next LINE;
 158     }
 159     if (exists $RecordTypeDataMap{Count}{$RecordType}) {
 160       # Update count...
 161       $RecordTypeDataMap{Count}{$RecordType} += 1;
 162 
 163       if ($GetRecordLinesFlag) {
 164         push @{$RecordTypeDataMap{Lines}{$RecordType}}, $RecordLine;
 165       }
 166     }
 167     else {
 168       # New record type...
 169       push @{$RecordTypeDataMap{RecordTypes}}, $RecordType;
 170       $RecordTypeDataMap{Count}{$RecordType} = 1;
 171 
 172       if ($GetRecordLinesFlag) {
 173         @{$RecordTypeDataMap{Lines}{$RecordType}} = ();
 174         push @{$RecordTypeDataMap{Lines}{$RecordType}}, $RecordLine;
 175       }
 176     }
 177   }
 178   return (\%RecordTypeDataMap);
 179 }
 180 
 181 # Collect CONECT record lines for specific atom number, modified specified data to exclude any atom
 182 # number not present in the list of specified atom numbers and return a reference to list of
 183 # CONECT record lines.
 184 #
 185 sub GetConectRecordLines {
 186   my($PDBRecordLinesRef, $AtomNumbersMapRef) = @_;
 187   my($AtomNumber, $ConectAtomNumber, $RecordLine, @ConectRecordAtomNums, @ConectRecordLines);
 188 
 189   @ConectRecordLines = ();
 190   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 191     if (!IsConectRecordType($RecordLine)) {
 192       next LINE;
 193     }
 194     @ConectRecordAtomNums = ();
 195     push @ConectRecordAtomNums, ParseConectRecordLine($RecordLine);
 196     ATOMNUMBER: for $ConectAtomNumber (@ConectRecordAtomNums) {
 197       if (defined $ConectAtomNumber) {
 198         $AtomNumber = $ConectAtomNumber;
 199         if ($AtomNumber) {
 200           if (! exists $AtomNumbersMapRef->{$AtomNumber}) {
 201             next LINE;
 202           }
 203         }
 204       }
 205     }
 206     push @ConectRecordLines, $RecordLine;
 207   }
 208   return \@ConectRecordLines;
 209 }
 210 
 211 # Get chains and residue information using ATOM/HETATM or SEQRES records. And return a reference to a
 212 #  hash with these keys:
 213 #
 214 # @{$ChainsDataMap{ChainIDs}} - List of chain IDs with 'None' for no chain identification
 215 # @{$ChainsDataMap{Residues}{$ChainID}} - List of residues in order of their appearance in a chain
 216 # @{$ChainsDataMap{ResidueNumbers}{$ChainID}} - List of residue numbers in order of their appearance in a chain
 217 # %{$ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}} - Count of specific residues in a chain
 218 #
 219 # Notes:
 220 #  . Chains and residue data can be extacted using either ATOM/HETATM records or SEQRES records.
 221 #  . In addition to a different chain ID in ATOM/HETATM a TER record also indicates end of an existing chain
 222 #    and start of a new one: ChainID in ATOM/HETATM records might still be emtpy.
 223 #   . ATOM/HETATM records after the first ENDMDL records are simply ingnored.
 224 #
 225 sub GetChainsAndResidues {
 226   my($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag);
 227 
 228   $RecordsSource = 'AtomAndHetatm';
 229   $GetChainResiduesBeyondTERFlag = 0;
 230   $GetRecordLinesFlag = 0;
 231 
 232   if (@_ == 4) {
 233     ($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag) = @_;
 234   }
 235   elsif (@_ == 3) {
 236     ($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag) = @_;
 237   }
 238   elsif (@_ == 2) {
 239     ($PDBRecordLinesRef, $RecordsSource) = @_;
 240   }
 241   else {
 242     ($PDBRecordLinesRef) = @_;
 243   }
 244 
 245   if ($RecordsSource =~ /^AtomAndHetatm$/i) {
 246     return _GetChainsAndResiduesFromAtomHetatmRecords($PDBRecordLinesRef, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag);
 247   }
 248   elsif ($RecordsSource =~ /^Seqres$/i) {
 249     return _GetChainsAndResiduesFromSeqresRecords($PDBRecordLinesRef);
 250   }
 251   else {
 252     my(%ChainsDataMap);
 253     %ChainsDataMap = ();
 254     @{$ChainsDataMap{ChainIDs}} = ();
 255     %{$ChainsDataMap{Residues}} = ();
 256     %{$ChainsDataMap{ResidueNumbers}} = ();
 257     %{$ChainsDataMap{ResidueCount}} = ();
 258 
 259     return \%ChainsDataMap;
 260   }
 261 }
 262 
 263 
 264 # Get residue information using ATOM/HETATM records and return a reference to a hash with
 265 # these keys:
 266 #
 267 # @{$ResiduesDataMap{ResidueNames}} - List of all the residues
 268 # %{$ResiduesDataMap{ResidueCount}{$ResidueName}} - Count of residues
 269 # @{$ResiduesDataMap{AtomResidueNames}} - List of all the residues
 270 # %{$ResiduesDataMap{AtomResidueCount}{$ResidueName}} - Count of residues in ATOM records
 271 # @{$ResiduesDataMap{HetatomResidueNames}} - List of all the residues
 272 # %{$ResiduesDataMap{HetatmResidueCount}{$ResidueName}} - Count of residues HETATM records
 273 #
 274 # Notes:
 275 #  . ATOM/HETATM records after the first ENDMDL records are simply ingnored.
 276 #
 277 sub GetAllResidues {
 278   my($PDBRecordLinesRef) = @_;
 279 
 280   my($PreviousChainID, $PreviousResidueNumber, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, %ResiduesDataMap);
 281 
 282   %ResiduesDataMap = ();
 283   @{$ResiduesDataMap{ResidueNames}} = ();
 284   %{$ResiduesDataMap{ResidueCount}} = ();
 285   @{$ResiduesDataMap{AtomResidueNames}} = ();
 286   %{$ResiduesDataMap{AtomResidueCount}} = ();
 287   @{$ResiduesDataMap{HetatmResidueNames}} = ();
 288   %{$ResiduesDataMap{HetatmResidueCount}} = ();
 289 
 290   $PreviousChainID = '';
 291   $PreviousResidueNumber = 0;
 292 
 293   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 294     if (IsEndmdlRecordType($RecordLine)) {
 295       last LINE;
 296     }
 297     if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
 298       next LINE;
 299     }
 300     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
 301 
 302     if ($PreviousChainID eq $ChainID) {
 303       if ($ResidueNumber == $PreviousResidueNumber) {
 304         next LINE;
 305       }
 306       $PreviousResidueNumber = $ResidueNumber;
 307     }
 308     else {
 309       # New chain...
 310       $PreviousChainID = $ChainID;
 311       $PreviousResidueNumber = $ResidueNumber;
 312     }
 313 
 314     # Store the residue and update its count...
 315     push @{$ResiduesDataMap{ResidueNames}}, $ResidueName;
 316     if (exists $ResiduesDataMap{ResidueCount}{$ResidueName}) {
 317       $ResiduesDataMap{ResidueCount}{$ResidueName} += 1;
 318     }
 319     else {
 320       $ResiduesDataMap{ResidueCount}{$ResidueName} = 1;
 321     }
 322     # Update ATOM residue data...
 323     if (IsAtomRecordType($RecordLine)) {
 324       push @{$ResiduesDataMap{AtomResidueNames}}, $ResidueName;
 325       if (exists $ResiduesDataMap{AtomResidueCount}{$ResidueName}) {
 326         $ResiduesDataMap{AtomResidueCount}{$ResidueName} += 1;
 327       }
 328       else {
 329         $ResiduesDataMap{AtomResidueCount}{$ResidueName} = 1;
 330       }
 331     }
 332     # Update HETATM residue data...
 333     if (IsHetatmRecordType($RecordLine)) {
 334       push @{$ResiduesDataMap{HetatmResidueNames}}, $ResidueName;
 335       if (exists $ResiduesDataMap{HetatmResidueCount}{$ResidueName}) {
 336         $ResiduesDataMap{HetatmResidueCount}{$ResidueName} += 1;
 337       }
 338       else {
 339         $ResiduesDataMap{HetatmResidueCount}{$ResidueName} = 1;
 340       }
 341     }
 342   }
 343 
 344   return \%ResiduesDataMap;
 345 }
 346 
 347 # Return min/max XYZ coordinates for ATOM/HETATM records...
 348 sub GetMinMaxCoords {
 349   my($PDBRecordLinesRef) = @_;
 350 
 351   my($XMin, $YMin, $ZMin, $XMax, $YMax, $ZMax, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 352 
 353   ($XMin, $YMin, $ZMin) = (99999) x 3;
 354   ($XMax, $YMax, $ZMax) = (-99999) x 3;
 355 
 356   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 357     if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
 358       next LINE;
 359     }
 360     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
 361 
 362     $XMin = ($X < $XMin) ? $X : $XMin;
 363     $YMin = ($Y < $YMin) ? $Y : $YMin;
 364     $ZMin = ($Z < $ZMin) ? $Z : $ZMin;
 365 
 366     $XMax = ($X > $XMax) ? $X : $XMax;
 367     $YMax = ($Y > $YMax) ? $Y : $YMax;
 368     $ZMax = ($Z > $ZMax) ? $Z : $ZMax;
 369   }
 370 
 371   if ($XMin == 99999) { $XMin = undef; }
 372   if ($YMin == 99999) { $YMin = undef; }
 373   if ($ZMin == 99999) { $ZMin = undef; }
 374   if ($XMax == -99999) { $XMax = undef; }
 375   if ($YMax == -99999) { $YMax = undef; }
 376   if ($ZMax == -99999) { $ZMax = undef; }
 377 
 378   return ($XMin, $YMin, $ZMin, $XMax, $YMax, $ZMax);
 379 }
 380 
 381 # Read PDB file and return reference to record lines..
 382 sub ReadPDBFile {
 383   my($PDBFile) = @_;
 384 
 385   my($Line, @PDBRecordLines);
 386 
 387   @PDBRecordLines = ();
 388   open PDBFILE, "$PDBFile" or die "Can't open $PDBFile: $!\n";
 389   while ($Line = GetTextLine(\*PDBFILE)) {
 390     push @PDBRecordLines, $Line;
 391   }
 392 
 393   close PDBFILE;
 394 
 395   return (\@PDBRecordLines);
 396 }
 397 
 398 #
 399 # Get experimental technique information...
 400 #
 401 sub GetExperimentalTechnique {
 402   my($PDBRecordLinesRef) = @_;
 403   my($RecordLine, $ContinuationNum, $ExperimentalTechnique);
 404 
 405   $ExperimentalTechnique = undef;
 406 
 407   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 408       if (_IsRecordType($RecordLine, 'EXPDTA')) {
 409         ($ContinuationNum, $ExperimentalTechnique) = ParseExpdtaRecordLine($RecordLine);
 410         last LINE;
 411       }
 412   }
 413 
 414   return $ExperimentalTechnique;
 415 }
 416 
 417 #
 418 # Get experimental technique resolution information...
 419 #
 420 sub GetExperimentalTechniqueResolution {
 421   my($PDBRecordLinesRef) = @_;
 422   my($RecordLine, $Resolution, $ResolutionUnits);
 423 
 424   ($Resolution, $ResolutionUnits) = ((undef) x 2);
 425 
 426   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 427     if ($RecordLine =~ /^REMARK   2 RESOLUTION./i) {
 428       ($Resolution, $ResolutionUnits) = ParseRemark2ResolutionRecordLine($RecordLine);
 429       last LINE;
 430     }
 431   }
 432 
 433   return ($Resolution, $ResolutionUnits);
 434 }
 435 
 436 #
 437 # Parse HEADER record line...
 438 sub ParseHeaderRecordLine {
 439   my($Line) = @_;
 440   my($Classification, $DepositionDate, $IDCode) = (undef, undef, undef);
 441 
 442   if ($Line !~ /^HEADER/i) {
 443     return ($Classification, $DepositionDate, $IDCode);
 444   }
 445   my($Length);
 446 
 447   ($Classification, $DepositionDate, $IDCode) = ('') x 3;
 448 
 449   $Length = length $Line;
 450 
 451   if ($Length <= 62) {
 452     ($Classification, $DepositionDate) = unpack("x10A40A9", $Line);
 453   }
 454   else {
 455     ($Classification, $DepositionDate, $IDCode) = unpack("x10A40A9x3A4", $Line);
 456   }
 457 
 458   $Classification = RemoveLeadingAndTrailingWhiteSpaces($Classification);
 459   $DepositionDate =~ s/ //g;
 460   $IDCode =~ s/ //g;
 461 
 462   return ($Classification, $DepositionDate, $IDCode);
 463 }
 464 
 465 #
 466 # Generate HEADER record line...
 467 sub GenerateHeaderRecordLine {
 468   my($Classification, $Date, $IDCode, $Line);
 469 
 470   $Classification = "Created using MayaChemTools";
 471   $Date = GenerateHeaderRecordTimeStamp();
 472   if (@_ == 3) {
 473     ($IDCode, $Classification, $Date) = @_;
 474   }
 475   elsif (@_ == 2) {
 476     ($IDCode, $Classification) = @_;
 477   }
 478   elsif (@_ == 1) {
 479     ($IDCode) = @_;
 480   }
 481 
 482   $Line = sprintf "HEADER    %-40.40s%9.9s   %4.4s", $Classification, $Date, $IDCode;
 483   return $Line;
 484 }
 485 
 486 # Generate PDB header time stamp...
 487 sub GenerateHeaderRecordTimeStamp {
 488   return TimeUtil::PDBFileTimeStamp();
 489 }
 490 
 491 #
 492 # Parse ATOM record line.
 493 #
 494 sub ParseAtomRecordLine {
 495   my($Line) = @_;
 496 
 497   return _ParseAtomOrHetatmRecordLine($Line);
 498 }
 499 
 500 # Generate ATOM record line...
 501 sub GenerateAtomRecordLine {
 502   my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_;
 503 
 504   return _GenerateAtomOrHetatmRecordLine('ATOM', $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 505 }
 506 
 507 #
 508 # Parse ATOM/HETATm record line.
 509 #
 510 sub ParseAtomOrHetatmRecordLine {
 511   my($Line) = @_;
 512 
 513   return _ParseAtomOrHetatmRecordLine($Line);
 514 }
 515 
 516 # Generate ATOM/HETATM record line...
 517 sub GenerateAtomOrHetatmRecordLine {
 518   my($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_;
 519 
 520   return _GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 521 }
 522 #
 523 # Parse HETATM record line...
 524 #
 525 sub ParseHetatmRecordLine {
 526   my($Line) = @_;
 527 
 528   return _ParseAtomOrHetatmRecordLine($Line);
 529 }
 530 
 531 # Generate HETATM record line...
 532 sub GenerateHetatmRecordLine {
 533   my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_;
 534 
 535   return _GenerateAtomOrHetatmRecordLine('HETATM', $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 536 }
 537 
 538 # Parse EXPDTA record line...
 539 #
 540 # EXPDTA format:
 541 #
 542 #1 - 6 Record name "EXPDTA"
 543 # 9 - 10 Continuation continuation Allows concatenation of multiple records.
 544 # 11 - 70 SList technique The experimental technique(s) with optional comment describing the sample or experiment.
 545 #
 546 # The EXPDTA record identifies the experimental technique used. This may refer to the type of radiation and
 547 # sample, or include the spectroscopic or modeling technique. Permitted values include:
 548 #
 549 # ELECTRON DIFFRACTION
 550 # FIBER DIFFRACTION
 551 # FLUORESCENCE TRANSFER
 552 # NEUTRON DIFFRACTION
 553 # NMR
 554 # THEORETICAL MODEL
 555 # X-RAY DIFFRACTION
 556 #
 557 sub ParseExpdtaRecordLine {
 558   my($Line) = @_;
 559 
 560   if ($Line !~ /^EXPDTA/i) {
 561     return ((undef) x 2);
 562   }
 563 
 564   my($ContinuationNum, $ExperimentalTechnique) = unpack("x8A2A60" , $Line);
 565 
 566   $ContinuationNum =~ s/ //g;
 567   $ExperimentalTechnique = RemoveLeadingAndTrailingWhiteSpaces($ExperimentalTechnique);
 568 
 569   return ($ContinuationNum, $ExperimentalTechnique);
 570 }
 571 
 572 # Parse REMARK 2 record line...
 573 #
 574 # REMARK 2 format:
 575 #
 576 # The second REMARK 2 record has one of two formats. The first is used for diffraction studies, the second
 577 # for other types of experiments in which resolution is not relevant, e.g., NMR and theoretical modeling.
 578 #
 579 #For diffraction experiments:
 580 #
 581 # 1 - 6 Record name "REMARK"
 582 # 10 LString(1) "2"
 583 # 12 - 22 LString(11) "RESOLUTION."
 584 # 23 - 27 Real(5.2) resolution Resolution.
 585 # 29 - 38 LString(10) "ANGSTROMS."
 586 #
 587 # REMARK 2 when not a diffraction experiment:
 588 #
 589 # 1 - 6 Record name "REMARK"
 590 # 10 LString(1) "2"
 591 # 12 - 38 LString(28) "RESOLUTION. NOT APPLICABLE."
 592 # 41 - 70 String comment Comment.
 593 #
 594 sub ParseRemark2ResolutionRecordLine {
 595   my($Line) = @_;
 596 
 597   if ($Line !~ /^REMARK   2 RESOLUTION./i) {
 598     return ((undef) x 2);
 599   }
 600 
 601   my($Resolution, $ResolutionUnits);
 602 
 603   if ($Line =~ /NOT APPLICABLE/i) {
 604     ($Resolution, $ResolutionUnits) = ("NOT APPLICABLE", "");
 605   }
 606   else {
 607     ($Resolution, $ResolutionUnits) = unpack("x22A5x1A10" , $Line);
 608   }
 609 
 610   $Resolution = RemoveLeadingAndTrailingWhiteSpaces($Resolution);
 611 
 612   $ResolutionUnits = RemoveLeadingAndTrailingWhiteSpaces($ResolutionUnits);
 613   $ResolutionUnits =~ s/\.$//;
 614 
 615   return ($Resolution, $ResolutionUnits);
 616 }
 617 
 618 #
 619 # Parse SEQRES record line...
 620 #
 621 # SEQRES format:
 622 #
 623 # 1 - 6 Record name "SEQRES"
 624 # 9 - 10 Serial number of the SEQRES record for the current chain. Starts at 1 and increments by one each line. Reset to 1 for each chain.
 625 # 12 - Chain identifier
 626 # 14 - 17 Integer numRes Number of residues in the chain
 627 # 20 - 22 24 -26 ... ... 68 - 70 Residue name resName Residue name.
 628 #
 629 sub ParseSeqresRecordLine {
 630   my($Line) = @_;
 631 
 632   if ($Line !~ /^SEQRES/i) {
 633     return ((undef) x 5);
 634   }
 635   my($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNames) = unpack("x8A2x1A1x1A4x2A51" , $Line);
 636   $RecordSerialNumber =~ s/ //g;
 637   $ChainID =~ s/ //g;
 638   $NumOfResidues =~ s/ //g;
 639   $ResidueNames = RemoveLeadingAndTrailingWhiteSpaces($ResidueNames);
 640 
 641   return ($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNames);
 642 }
 643 
 644 #
 645 # Parse CONECT record line...
 646 #
 647 # CONECT format:
 648 #
 649 # 1 - 6 Record name "CONECT"
 650 # 7 - 11 Atom number
 651 # 12 - 16, 17 - 21, 22 - 26, 27 - 31 Atom number of bonded atom
 652 #
 653 # 32 - 36, 37 - 41 Atom number of hydrogen bonded atom
 654 # 42 - 46 Atom number of salt bridged atom
 655 # 47 - 51, 52 -56 Atom number of hydrogen bonded atom
 656 # 57 - 61 Atom number of salt bridged atom
 657 #
 658 sub ParseConectRecordLine {
 659   my($Line) = @_;
 660 
 661   if ($Line !~ /^CONECT/i) {
 662     return ((undef) x 11);
 663   }
 664   my($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2) = map {s/ //g; $_} unpack("x6A5A5A5A5A5A5A5A5A5A5A5", $Line);
 665 
 666   return ($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2);
 667 }
 668 
 669 # Generate CONECT record line...
 670 sub GenerateConectRecordLine {
 671   my($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2) = @_;
 672   my($Line);
 673 
 674   $Line = sprintf "CONECT%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s", $AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2;
 675 
 676   return $Line;
 677 }
 678 
 679 #
 680 # Parse TER record line...
 681 #
 682 # TER format:
 683 #
 684 #1 - 6 Record name "TER "
 685 # 7 - 11 Serial number
 686 # 18 - 20 Residue name
 687 # 22 Chain identifier
 688 # 23 - 26 Residue sequence number
 689 # 27 Insertion code
 690 #
 691 sub ParseTerRecordLine {
 692   my($Line) = @_;
 693 
 694   if ($Line !~ /^TER/i) {
 695     return ((undef) x 5);
 696   }
 697   my($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $Length);
 698 
 699   ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = ('') x 5;
 700 
 701   $Length = length $Line;
 702 
 703   if ($Length <= 17) {
 704     ($SerialNumber, $ResidueName) = map {s/ //g; $_} unpack("x6A5", $Line);
 705   }
 706   elsif ($Length <= 21) {
 707     ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = map {s/ //g; $_} unpack("x6A5x6A3", $Line);
 708   }
 709   else {
 710     ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = map {s/ //g; $_} unpack("x6A5x6A3xA1A4A1", $Line);
 711   }
 712 
 713   return ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode);
 714 }
 715 
 716 # Generate TER record line...
 717 sub GenerateTerRecordLine {
 718   my($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $Line) = ('') x 6;
 719 
 720   if (@_ == 5) {
 721     ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = @_;
 722   }
 723   elsif (@_ == 4) {
 724     ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber) = @_;
 725   }
 726   elsif (@_ == 3) {
 727     ($SerialNumber, $ResidueName) = @_;
 728   }
 729   elsif (@_ == 2) {
 730     ($SerialNumber, $ResidueName) = @_;
 731   }
 732   elsif (@_ == 1) {
 733     ($SerialNumber) = @_;
 734   }
 735   $Line = sprintf "TER   %5.5s      %-3.3s %1.1s%4.4s%1.1s", $SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode;
 736 
 737   return $Line;
 738 }
 739 
 740 #
 741 # Parse MASTER record line...
 742 #
 743 # MASTER record format:
 744 #
 745 #1 - 6 Record name "MASTER"
 746 # 11 - 15 Number of REMARK records
 747 # 16 - 20 "0"
 748 # 21 - 25 Number of HET records
 749 # 26 - 30 Number of HELIX records
 750 # 31 - 35 Number of SHEET records
 751 # 36 - 40 Number of TURN records
 752 # 41 - 45 Number of SITE records
 753 # 46 - 50 Number of coordinate transformation records (ORIGXn+SCALEn+MTRIXn)
 754 # 51 - 55 Number of atomic coordinate records (ATOM+HETATM)
 755 # 56 - 60 Number of TER records
 756 # 61 - 65 Number of CONECT records
 757 # 66 - 70 Number of SEQRES records
 758 #
 759 sub ParseMasterRecordLine {
 760   my($Line) = @_;
 761 
 762   if ($Line !~ /^MASTER/i) {
 763     return ((undef) x 11);
 764   }
 765   my($NumOfRemarkRecords, $NumOfHetRecords, $NumOfHelixRecords, $NumOfSheetRecords, $NumOfTurnRecords, $NumOfSiteRecords, $NumOfTransformationsRecords, $NumOfAtomAndHetatmRecords, $NumOfTerRecords, $NumOfConectRecords, $NumOfSeqresRecords) = map {s/ //g; $_} unpack("x6x4A5x5A5A5A5A5A5A5A5A5A5A5", $Line);
 766 
 767   return ($NumOfRemarkRecords, $NumOfHetRecords, $NumOfHelixRecords, $NumOfSheetRecords, $NumOfTurnRecords, $NumOfSiteRecords, $NumOfTransformationsRecords, $NumOfAtomAndHetatmRecords, $NumOfTerRecords, $NumOfConectRecords, $NumOfSeqresRecords);
 768 }
 769 
 770 # End record...
 771 sub GenerateEndRecordLine {
 772   my($Line);
 773   $Line = 'END   ';
 774   return $Line;
 775 }
 776 
 777 # ATOM/HETATM record format:
 778 #
 779 # 1 - 6 Record name
 780 # 7 - 11  Atom serial number - right justified
 781 # 13 - 16 Atom name
 782 # 17 Alternate location indicator.
 783 # 18 - 20 Residue name - right justified
 784 # 22 Chain identifier.
 785 # 23 - 26 Residue sequence number - right justified
 786 # 27 Code for insertion of residues.
 787 # 31 - 38 Real(8.3), Orthogonal coordinates for X in Angstroms.
 788 # 39 - 46 Real(8.3), Orthogonal coordinates for Y in Angstroms.
 789 # 47 - 54 Real(8.3), Orthogonal coordinates for Z in Angstroms.
 790 # 55 - 60 Real(6.2), Occupancy
 791 # 61 - 66 Real(6.2), Temperature factor
 792 # 73 - 76 LString(4), Segment identifier, left-justified.
 793 # 77 - 78 LString(2), Element symbol, right-justified.
 794 #79 - 80 LString(2), Charge on the atom.
 795 #
 796 # Notes:
 797 #  . Atom names starting with C, N, O and S are left justified starting with column 14
 798 #    and others are left justified starting with column 13.
 799 #
 800 #  . Six characters (columns) are reserved for atom names, assigned as follows:
 801 #
 802 #   13 - 14 Chemical symbol - right justified, except for hydrogen atoms
 803 #
 804 #   And for amino acids:
 805 #
 806 #   15 Remoteness indicator (alphabetic) (A, B, G, D, E, Z and so on)
 807 #   16 Branch designator (numeric)
 808 #
 809 sub _ParseAtomOrHetatmRecordLine {
 810   my($Line) = @_;
 811 
 812   if ($Line !~ /^(ATOM|HETATM)/i) {
 813     return ((undef) x 15);
 814   }
 815   my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $Length);
 816 
 817   ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ('') x 15;
 818 
 819   $Length = length $Line;
 820 
 821   if ($Length <= 72) {
 822     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor) = map {s/ //g; $_} unpack("x6A5xA4A1A3xA1A4A1x3A8A8A8A6A6", $Line);
 823   }
 824   else {
 825     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = map {s/ //g; $_} unpack("x6A5xA4A1A3xA1A4A1x3A8A8A8A6A6x6A4A2A2", $Line);
 826   }
 827   return($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 828 }
 829 
 830 # Generate ATOM/HETATM record line...
 831 sub _GenerateAtomOrHetatmRecordLine {
 832   my($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_;
 833   my($Line, $AtomNameFormat);
 834 
 835   if (length($AtomName) >= 4) {
 836     # Left justified starting at column 13 for all atom names of length 4...
 837     $AtomNameFormat = "%-4.4s";
 838   }
 839   elsif (IsEmpty($ElementSymbol)) {
 840     # No element symbol specified; just guess from atom name to cover most likely cases...
 841     $AtomNameFormat = ($AtomName =~ /^(C|N|O|S)/i) ? " %-3.3s" : "%-4.4s";
 842   }
 843   else {
 844     # Element symbol specified...
 845     if ($ElementSymbol =~ /^H$/i) {
 846       # Hydrogen atom name with <=3 characters is left justified starting at column 14;
 847       # Otherwise, left justified starting at column 13.
 848       $AtomNameFormat = (length($AtomName) <= 3) ? " %-3.3s" : "%-4.4s";
 849     }
 850     else {
 851       # Non-hydrogen atom name...
 852       $AtomNameFormat = (length($ElementSymbol) == 1) ? " %-3.3s" : "%-4.4s";
 853     }
 854   }
 855 
 856   $Line = sprintf "%-6.6s%5.5s ${AtomNameFormat}%1.1s%3.3s %1.1s%4.4s%1.1s   %8.8s%8.8s%8.8s%6.6s%6.6s      %-4.4s%2.2s%2.2s", $RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge;
 857 
 858   return $Line;
 859 }
 860 
 861 # Check record type...
 862 sub _IsRecordType {
 863   my($Line, $SpecifiedType) = @_;
 864   my($Type, $Status);
 865 
 866   ($Type) = map {s/ //g; $_} unpack("A6", $Line);
 867 
 868   $Status = ($SpecifiedType eq $Type) ? 1 : 0;
 869 
 870   return $Status;
 871 }
 872 
 873 # Get record type...
 874 sub _GetRecordType {
 875   my($Line) = @_;
 876   my($Type);
 877 
 878   ($Type) = map {s/ //g; $_} unpack("A6", $Line);
 879 
 880   return $Type;
 881 }
 882 
 883 # Get chains and residues data using ATOM/HETATM records...
 884 #
 885 sub _GetChainsAndResiduesFromAtomHetatmRecords {
 886   my($PDBRecordLinesRef, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag) = @_;
 887 
 888   my($LineCount, $TotalChainCount, $PreviousResidueNumber, $ChainCount, $DefaultChainID, $DefaultChainLabel, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, %ChainsDataMap);
 889 
 890   # Do a quick chain count using TER record...
 891   $TotalChainCount = 0;
 892   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 893     if (IsEndmdlRecordType($RecordLine)) {
 894       last LINE;
 895     }
 896     if (IsTerRecordType($RecordLine)) {
 897       $TotalChainCount++;
 898     }
 899   }
 900 
 901   %ChainsDataMap = ();
 902   @{$ChainsDataMap{ChainIDs}} = ();
 903   %{$ChainsDataMap{Residues}} = ();
 904   %{$ChainsDataMap{ResidueNumbers}} = ();
 905   %{$ChainsDataMap{Lines}} = ();
 906   %{$ChainsDataMap{ResidueCount}} = ();
 907 
 908   $LineCount = 0;
 909   $ChainCount = 0;
 910   $DefaultChainLabel = 'None';
 911   $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1);
 912   $PreviousResidueNumber = 0;
 913 
 914   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 915       $LineCount++;
 916       if (IsTerRecordType($RecordLine)) {
 917         $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1);
 918         $ChainCount++;
 919         if ($ChainCount == $TotalChainCount) {
 920           last LINE;
 921         }
 922         else {
 923           next LINE;
 924         }
 925       }
 926       elsif (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
 927         next LINE;
 928       }
 929       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
 930 
 931       if (IsEmpty($ChainID)) {
 932         $ChainID = $DefaultChainID;
 933       }
 934       if (exists $ChainsDataMap{Residues}{$ChainID}) {
 935         # Data for existing chain...
 936         if ($GetRecordLinesFlag) {
 937           push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine;
 938         }
 939 
 940         if ($ResidueNumber != $PreviousResidueNumber) {
 941           # Next residue with in the chain...
 942           push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName;
 943           push @{$ChainsDataMap{ResidueNumbers}{$ChainID}}, $ResidueNumber;
 944 
 945           if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) {
 946             $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1;
 947           }
 948           else {
 949             $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1;
 950           }
 951           $PreviousResidueNumber = $ResidueNumber;
 952         }
 953       }
 954       else {
 955         # Data for new chain...
 956         push @{$ChainsDataMap{ChainIDs}}, $ChainID;
 957 
 958         @{$ChainsDataMap{Residues}{$ChainID}} = ();
 959         push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName;
 960 
 961         @{$ChainsDataMap{ResidueNumbers}{$ChainID}} = ();
 962         push @{$ChainsDataMap{ResidueNumbers}{$ChainID}}, $ResidueNumber;
 963 
 964         @{$ChainsDataMap{Lines}{$ChainID}} = ();
 965         if ($GetRecordLinesFlag) {
 966           push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine;
 967         }
 968 
 969         %{$ChainsDataMap{ResidueCount}{$ChainID}} = ();
 970         $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1;
 971         $PreviousResidueNumber = $ResidueNumber;
 972       }
 973   }
 974   if (!$GetChainResiduesBeyondTERFlag) {
 975     return \%ChainsDataMap;
 976   }
 977   # Look for any HETATM residues specified outside TER records which could belong to an existing chain...
 978   my($LineIndex, $PreviousChainID);
 979   $PreviousChainID = '';
 980   $PreviousResidueNumber = 0;
 981   LINE: for $LineIndex (($LineCount - 1) .. $#{$PDBRecordLinesRef}) {
 982     $RecordLine = $PDBRecordLinesRef->[$LineIndex];
 983     if (IsEndmdlRecordType($RecordLine)) {
 984       last LINE;
 985     }
 986     if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
 987       next LINE;
 988     }
 989     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
 990     if (IsEmpty($ChainID)) {
 991       # Ignore the chains with no ids...
 992       next LINE;
 993     }
 994     if (! exists($ChainsDataMap{Residues}{$ChainID})) {
 995       # Don't collect any new chains after TER record...
 996       next LINE;
 997     }
 998     if ($GetRecordLinesFlag) {
 999       push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine;
1000     }
1001     if ($ResidueNumber != $PreviousResidueNumber || $ChainID ne $PreviousChainID) {
1002 
1003       push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName;
1004       push @{$ChainsDataMap{ResidueNumbers}{$ChainID}}, $ResidueNumber;
1005 
1006       if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) {
1007         $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1;
1008       }
1009       else {
1010         $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1;
1011       }
1012       $PreviousChainID = $ChainID;
1013       $PreviousResidueNumber = $ResidueNumber;
1014     }
1015   }
1016   return \%ChainsDataMap;
1017 }
1018 
1019 # Get chains and residues data using SEQRES records...
1020 #
1021 sub _GetChainsAndResiduesFromSeqresRecords {
1022   my($PDBRecordLinesRef) = @_;
1023 
1024   my($ChainCount, $DefaultChainLabel, $DefaultChainID, $RecordLine, $RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueName, $ResidueNamesString, @ResidueNamesList, %ChainsDataMap);
1025 
1026   %ChainsDataMap = ();
1027   @{$ChainsDataMap{ChainIDs}} = ();
1028   %{$ChainsDataMap{Residues}} = ();
1029   %{$ChainsDataMap{ResidueNumbers}} = ();
1030   %{$ChainsDataMap{ResidueCount}} = ();
1031 
1032   $ChainCount = 0;
1033   $DefaultChainLabel = 'None';
1034   $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1);
1035 
1036   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
1037       if (!IsSeqresRecordType($RecordLine)) {
1038         next LINE;
1039       }
1040       ($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNamesString) = ParseSeqresRecordLine($RecordLine);
1041       if ($RecordSerialNumber == 1) {
1042         # Indicates start of a new chain...
1043         $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1);
1044         $ChainCount++;
1045       }
1046       if (IsEmpty($ChainID)) {
1047         $ChainID = $DefaultChainID;
1048       }
1049       # Process the residues...
1050       @ResidueNamesList = split /[ ]+/, $ResidueNamesString;
1051 
1052       if (exists $ChainsDataMap{Residues}{$ChainID}) {
1053         # Data for existing chain...
1054         push @{$ChainsDataMap{Residues}{$ChainID}}, @ResidueNamesList;
1055       }
1056       else {
1057         # Data for new chain...
1058         push @{$ChainsDataMap{ChainIDs}}, $ChainID;
1059         @{$ChainsDataMap{Residues}{$ChainID}} = ();
1060         push @{$ChainsDataMap{Residues}{$ChainID}}, @ResidueNamesList;
1061       }
1062 
1063       # Setup residue count...
1064       for $ResidueName (@ResidueNamesList) {
1065         if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) {
1066           $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1;
1067         }
1068         else {
1069           $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1;
1070         }
1071       }
1072   }
1073   return \%ChainsDataMap;
1074 }
1075