1 package PDBFileUtil; 2 # 3 # $RCSfile: PDBFileUtil.pm,v $ 4 # $Date: 2015/02/28 20:47:18 $ 5 # $Revision: 1.36 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Exporter; 31 use Text::ParseWords; 32 use TextUtil; 33 use FileUtil; 34 use TimeUtil (); 35 36 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 37 38 @ISA = qw(Exporter); 39 @EXPORT = qw(GetPDBRecordType GetRecordTypesCount GetAllResidues GetConectRecordLines GetChainsAndResidues GetExperimentalTechnique GetExperimentalTechniqueResolution GetMinMaxCoords IsPDBFile IsAtomRecordType IsConectRecordType IsHeaderRecordType IsHetatmRecordType IsSeqresRecordType IsModelRecordType IsEndmdlRecordType IsTerRecordType IsMasterRecordType ReadPDBFile ParseHeaderRecordLine GenerateHeaderRecordLine GenerateHeaderRecordTimeStamp ParseAtomRecordLine GenerateAtomRecordLine ParseAtomOrHetatmRecordLine GenerateAtomOrHetatmRecordLine GenerateHetatmRecordLine ParseHetatmRecordLine ParseConectRecordLine GenerateConectRecordLine ParseExpdtaRecordLine ParseRemark2ResolutionRecordLine ParseSeqresRecordLine ParseTerRecordLine GenerateTerRecordLine ParseMasterRecordLine GenerateEndRecordLine); 40 @EXPORT_OK = qw(); 41 42 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 43 44 # Get PDB record type... 45 sub GetPDBRecordType { 46 my($Line) = @_; 47 48 return _GetRecordType($Line); 49 } 50 51 # Is it a PDB file? 52 sub IsPDBFile { 53 my($PDBFile) = @_; 54 my($Line, $Status); 55 56 $Status = 0; 57 open PDBFILE, "$PDBFile" or die "Can't open $PDBFile: $!\n"; 58 $Line = GetTextLine(\*PDBFILE); 59 $Status = ($Line =~ /^HEADER/i) ? 1 : 0; 60 close PDBFILE; 61 62 return $Status; 63 } 64 65 # Is it a atom record type? 66 sub IsAtomRecordType { 67 my($Line) = @_; 68 69 return _IsRecordType($Line, 'ATOM'); 70 } 71 72 # Is it a connect record type? 73 sub IsConectRecordType { 74 my($Line) = @_; 75 76 return _IsRecordType($Line, 'CONECT'); 77 } 78 79 # Is it a header atom record type? 80 sub IsHeaderRecordType { 81 my($Line) = @_; 82 83 return _IsRecordType($Line, 'HEADER'); 84 } 85 86 # Is it a hetro atom record type? 87 sub IsHetatmRecordType { 88 my($Line) = @_; 89 90 return _IsRecordType($Line, 'HETATM'); 91 } 92 93 # Is it a seqres record type? 94 sub IsSeqresRecordType { 95 my($Line) = @_; 96 97 return _IsRecordType($Line, 'SEQRES'); 98 } 99 100 # Is it a MODEL record type? 101 sub IsModelRecordType { 102 my($Line) = @_; 103 104 return _IsRecordType($Line, 'MODEL'); 105 } 106 107 # Is it a ENDMDL record type? 108 sub IsEndmdlRecordType { 109 my($Line) = @_; 110 111 return _IsRecordType($Line, 'ENDMDL'); 112 } 113 114 # Is it a TER record type? 115 sub IsTerRecordType { 116 my($Line) = @_; 117 118 return _IsRecordType($Line, 'TER'); 119 } 120 121 # Is it a MASTER record type? 122 sub IsMasterRecordType { 123 my($Line) = @_; 124 125 return _IsRecordType($Line, 'MASTER'); 126 } 127 128 # Count the number of each record type and a reference to data type with these key/value pairs: 129 # {RecordTypes} - An array of unique record types in order of their presence in the file 130 # {Count}{$RecordType} - Count of each record type 131 # {Lines}{$RecordType} - Optional lines data for a specific record type. 132 # 133 sub GetRecordTypesCount { 134 my($PDBRecordLinesRef, $SpecifiedRecordType, $GetRecordLinesFlag, $RecordType, $RecordLine, %RecordTypeDataMap); 135 136 %RecordTypeDataMap = (); 137 @{$RecordTypeDataMap{RecordTypes}} = (); 138 %{$RecordTypeDataMap{Count}} = (); 139 %{$RecordTypeDataMap{Lines}} = (); 140 141 $SpecifiedRecordType = ''; 142 $GetRecordLinesFlag = 0; 143 if (@_ == 3) { 144 ($PDBRecordLinesRef, $SpecifiedRecordType, $GetRecordLinesFlag) = @_; 145 $SpecifiedRecordType = uc $SpecifiedRecordType; 146 } 147 elsif (@_ == 2) { 148 ($PDBRecordLinesRef, $SpecifiedRecordType) = @_; 149 $SpecifiedRecordType = uc $SpecifiedRecordType; 150 } 151 else { 152 ($PDBRecordLinesRef) = @_; 153 } 154 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 155 $RecordType = _GetRecordType($RecordLine); 156 if ($SpecifiedRecordType && ($SpecifiedRecordType ne $RecordType)) { 157 next LINE; 158 } 159 if (exists $RecordTypeDataMap{Count}{$RecordType}) { 160 # Update count... 161 $RecordTypeDataMap{Count}{$RecordType} += 1; 162 163 if ($GetRecordLinesFlag) { 164 push @{$RecordTypeDataMap{Lines}{$RecordType}}, $RecordLine; 165 } 166 } 167 else { 168 # New record type... 169 push @{$RecordTypeDataMap{RecordTypes}}, $RecordType; 170 $RecordTypeDataMap{Count}{$RecordType} = 1; 171 172 if ($GetRecordLinesFlag) { 173 @{$RecordTypeDataMap{Lines}{$RecordType}} = (); 174 push @{$RecordTypeDataMap{Lines}{$RecordType}}, $RecordLine; 175 } 176 } 177 } 178 return (\%RecordTypeDataMap); 179 } 180 181 # Collect CONECT record lines for specific atom number, modified specified data to exclude any atom 182 # number not present in the list of specified atom numbers and return a reference to list of 183 # CONECT record lines. 184 # 185 sub GetConectRecordLines { 186 my($PDBRecordLinesRef, $AtomNumbersMapRef) = @_; 187 my($AtomNumber, $ConectAtomNumber, $RecordLine, @ConectRecordAtomNums, @ConectRecordLines); 188 189 @ConectRecordLines = (); 190 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 191 if (!IsConectRecordType($RecordLine)) { 192 next LINE; 193 } 194 @ConectRecordAtomNums = (); 195 push @ConectRecordAtomNums, ParseConectRecordLine($RecordLine); 196 ATOMNUMBER: for $ConectAtomNumber (@ConectRecordAtomNums) { 197 if (defined $ConectAtomNumber) { 198 $AtomNumber = $ConectAtomNumber; 199 if ($AtomNumber) { 200 if (! exists $AtomNumbersMapRef->{$AtomNumber}) { 201 next LINE; 202 } 203 } 204 } 205 } 206 push @ConectRecordLines, $RecordLine; 207 } 208 return \@ConectRecordLines; 209 } 210 211 # Get chains and residue information using ATOM/HETATM or SEQRES records. And return a reference to a 212 # hash with these keys: 213 # 214 # @{$ChainsDataMap{ChainIDs}} - List of chain IDs with 'None' for no chain identification 215 # @{$ChainsDataMap{Residues}{$ChainID}} - List of residues in order of their appearance in a chain 216 # @{$ChainsDataMap{ResidueNumbers}{$ChainID}} - List of residue numbers in order of their appearance in a chain 217 # %{$ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}} - Count of specific residues in a chain 218 # 219 # Notes: 220 # . Chains and residue data can be extacted using either ATOM/HETATM records or SEQRES records. 221 # . In addition to a different chain ID in ATOM/HETATM a TER record also indicates end of an existing chain 222 # and start of a new one: ChainID in ATOM/HETATM records might still be emtpy. 223 # . ATOM/HETATM records after the first ENDMDL records are simply ingnored. 224 # 225 sub GetChainsAndResidues { 226 my($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag); 227 228 $RecordsSource = 'AtomAndHetatm'; 229 $GetChainResiduesBeyondTERFlag = 0; 230 $GetRecordLinesFlag = 0; 231 232 if (@_ == 4) { 233 ($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag) = @_; 234 } 235 elsif (@_ == 3) { 236 ($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag) = @_; 237 } 238 elsif (@_ == 2) { 239 ($PDBRecordLinesRef, $RecordsSource) = @_; 240 } 241 else { 242 ($PDBRecordLinesRef) = @_; 243 } 244 245 if ($RecordsSource =~ /^AtomAndHetatm$/i) { 246 return _GetChainsAndResiduesFromAtomHetatmRecords($PDBRecordLinesRef, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag); 247 } 248 elsif ($RecordsSource =~ /^Seqres$/i) { 249 return _GetChainsAndResiduesFromSeqresRecords($PDBRecordLinesRef); 250 } 251 else { 252 my(%ChainsDataMap); 253 %ChainsDataMap = (); 254 @{$ChainsDataMap{ChainIDs}} = (); 255 %{$ChainsDataMap{Residues}} = (); 256 %{$ChainsDataMap{ResidueNumbers}} = (); 257 %{$ChainsDataMap{ResidueCount}} = (); 258 259 return \%ChainsDataMap; 260 } 261 } 262 263 264 # Get residue information using ATOM/HETATM records and return a reference to a hash with 265 # these keys: 266 # 267 # @{$ResiduesDataMap{ResidueNames}} - List of all the residues 268 # %{$ResiduesDataMap{ResidueCount}{$ResidueName}} - Count of residues 269 # @{$ResiduesDataMap{AtomResidueNames}} - List of all the residues 270 # %{$ResiduesDataMap{AtomResidueCount}{$ResidueName}} - Count of residues in ATOM records 271 # @{$ResiduesDataMap{HetatomResidueNames}} - List of all the residues 272 # %{$ResiduesDataMap{HetatmResidueCount}{$ResidueName}} - Count of residues HETATM records 273 # 274 # Notes: 275 # . ATOM/HETATM records after the first ENDMDL records are simply ingnored. 276 # 277 sub GetAllResidues { 278 my($PDBRecordLinesRef) = @_; 279 280 my($PreviousChainID, $PreviousResidueNumber, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, %ResiduesDataMap); 281 282 %ResiduesDataMap = (); 283 @{$ResiduesDataMap{ResidueNames}} = (); 284 %{$ResiduesDataMap{ResidueCount}} = (); 285 @{$ResiduesDataMap{AtomResidueNames}} = (); 286 %{$ResiduesDataMap{AtomResidueCount}} = (); 287 @{$ResiduesDataMap{HetatmResidueNames}} = (); 288 %{$ResiduesDataMap{HetatmResidueCount}} = (); 289 290 $PreviousChainID = ''; 291 $PreviousResidueNumber = 0; 292 293 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 294 if (IsEndmdlRecordType($RecordLine)) { 295 last LINE; 296 } 297 if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) { 298 next LINE; 299 } 300 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine); 301 302 if ($PreviousChainID eq $ChainID) { 303 if ($ResidueNumber == $PreviousResidueNumber) { 304 next LINE; 305 } 306 $PreviousResidueNumber = $ResidueNumber; 307 } 308 else { 309 # New chain... 310 $PreviousChainID = $ChainID; 311 $PreviousResidueNumber = $ResidueNumber; 312 } 313 314 # Store the residue and update its count... 315 push @{$ResiduesDataMap{ResidueNames}}, $ResidueName; 316 if (exists $ResiduesDataMap{ResidueCount}{$ResidueName}) { 317 $ResiduesDataMap{ResidueCount}{$ResidueName} += 1; 318 } 319 else { 320 $ResiduesDataMap{ResidueCount}{$ResidueName} = 1; 321 } 322 # Update ATOM residue data... 323 if (IsAtomRecordType($RecordLine)) { 324 push @{$ResiduesDataMap{AtomResidueNames}}, $ResidueName; 325 if (exists $ResiduesDataMap{AtomResidueCount}{$ResidueName}) { 326 $ResiduesDataMap{AtomResidueCount}{$ResidueName} += 1; 327 } 328 else { 329 $ResiduesDataMap{AtomResidueCount}{$ResidueName} = 1; 330 } 331 } 332 # Update HETATM residue data... 333 if (IsHetatmRecordType($RecordLine)) { 334 push @{$ResiduesDataMap{HetatmResidueNames}}, $ResidueName; 335 if (exists $ResiduesDataMap{HetatmResidueCount}{$ResidueName}) { 336 $ResiduesDataMap{HetatmResidueCount}{$ResidueName} += 1; 337 } 338 else { 339 $ResiduesDataMap{HetatmResidueCount}{$ResidueName} = 1; 340 } 341 } 342 } 343 344 return \%ResiduesDataMap; 345 } 346 347 # Return min/max XYZ coordinates for ATOM/HETATM records... 348 sub GetMinMaxCoords { 349 my($PDBRecordLinesRef) = @_; 350 351 my($XMin, $YMin, $ZMin, $XMax, $YMax, $ZMax, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge); 352 353 ($XMin, $YMin, $ZMin) = (99999) x 3; 354 ($XMax, $YMax, $ZMax) = (-99999) x 3; 355 356 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 357 if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) { 358 next LINE; 359 } 360 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine); 361 362 $XMin = ($X < $XMin) ? $X : $XMin; 363 $YMin = ($Y < $YMin) ? $Y : $YMin; 364 $ZMin = ($Z < $ZMin) ? $Z : $ZMin; 365 366 $XMax = ($X > $XMax) ? $X : $XMax; 367 $YMax = ($Y > $YMax) ? $Y : $YMax; 368 $ZMax = ($Z > $ZMax) ? $Z : $ZMax; 369 } 370 371 if ($XMin == 99999) { $XMin = undef; } 372 if ($YMin == 99999) { $YMin = undef; } 373 if ($ZMin == 99999) { $ZMin = undef; } 374 if ($XMax == -99999) { $XMax = undef; } 375 if ($YMax == -99999) { $YMax = undef; } 376 if ($ZMax == -99999) { $ZMax = undef; } 377 378 return ($XMin, $YMin, $ZMin, $XMax, $YMax, $ZMax); 379 } 380 381 # Read PDB file and return reference to record lines.. 382 sub ReadPDBFile { 383 my($PDBFile) = @_; 384 385 my($Line, @PDBRecordLines); 386 387 @PDBRecordLines = (); 388 open PDBFILE, "$PDBFile" or die "Can't open $PDBFile: $!\n"; 389 while ($Line = GetTextLine(\*PDBFILE)) { 390 push @PDBRecordLines, $Line; 391 } 392 393 close PDBFILE; 394 395 return (\@PDBRecordLines); 396 } 397 398 # 399 # Get experimental technique information... 400 # 401 sub GetExperimentalTechnique { 402 my($PDBRecordLinesRef) = @_; 403 my($RecordLine, $ContinuationNum, $ExperimentalTechnique); 404 405 $ExperimentalTechnique = undef; 406 407 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 408 if (_IsRecordType($RecordLine, 'EXPDTA')) { 409 ($ContinuationNum, $ExperimentalTechnique) = ParseExpdtaRecordLine($RecordLine); 410 last LINE; 411 } 412 } 413 414 return $ExperimentalTechnique; 415 } 416 417 # 418 # Get experimental technique resolution information... 419 # 420 sub GetExperimentalTechniqueResolution { 421 my($PDBRecordLinesRef) = @_; 422 my($RecordLine, $Resolution, $ResolutionUnits); 423 424 ($Resolution, $ResolutionUnits) = ((undef) x 2); 425 426 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 427 if ($RecordLine =~ /^REMARK 2 RESOLUTION./i) { 428 ($Resolution, $ResolutionUnits) = ParseRemark2ResolutionRecordLine($RecordLine); 429 last LINE; 430 } 431 } 432 433 return ($Resolution, $ResolutionUnits); 434 } 435 436 # 437 # Parse HEADER record line... 438 sub ParseHeaderRecordLine { 439 my($Line) = @_; 440 my($Classification, $DepositionDate, $IDCode) = (undef, undef, undef); 441 442 if ($Line !~ /^HEADER/i) { 443 return ($Classification, $DepositionDate, $IDCode); 444 } 445 my($Length); 446 447 ($Classification, $DepositionDate, $IDCode) = ('') x 3; 448 449 $Length = length $Line; 450 451 if ($Length <= 62) { 452 ($Classification, $DepositionDate) = unpack("x10A40A9", $Line); 453 } 454 else { 455 ($Classification, $DepositionDate, $IDCode) = unpack("x10A40A9x3A4", $Line); 456 } 457 458 $Classification = RemoveLeadingAndTrailingWhiteSpaces($Classification); 459 $DepositionDate =~ s/ //g; 460 $IDCode =~ s/ //g; 461 462 return ($Classification, $DepositionDate, $IDCode); 463 } 464 465 # 466 # Generate HEADER record line... 467 sub GenerateHeaderRecordLine { 468 my($Classification, $Date, $IDCode, $Line); 469 470 $Classification = "Created using MayaChemTools"; 471 $Date = GenerateHeaderRecordTimeStamp(); 472 if (@_ == 3) { 473 ($IDCode, $Classification, $Date) = @_; 474 } 475 elsif (@_ == 2) { 476 ($IDCode, $Classification) = @_; 477 } 478 elsif (@_ == 1) { 479 ($IDCode) = @_; 480 } 481 482 $Line = sprintf "HEADER %-40.40s%9.9s %4.4s", $Classification, $Date, $IDCode; 483 return $Line; 484 } 485 486 # Generate PDB header time stamp... 487 sub GenerateHeaderRecordTimeStamp { 488 return TimeUtil::PDBFileTimeStamp(); 489 } 490 491 # 492 # Parse ATOM record line. 493 # 494 sub ParseAtomRecordLine { 495 my($Line) = @_; 496 497 return _ParseAtomOrHetatmRecordLine($Line); 498 } 499 500 # Generate ATOM record line... 501 sub GenerateAtomRecordLine { 502 my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_; 503 504 return _GenerateAtomOrHetatmRecordLine('ATOM', $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge); 505 } 506 507 # 508 # Parse ATOM/HETATm record line. 509 # 510 sub ParseAtomOrHetatmRecordLine { 511 my($Line) = @_; 512 513 return _ParseAtomOrHetatmRecordLine($Line); 514 } 515 516 # Generate ATOM/HETATM record line... 517 sub GenerateAtomOrHetatmRecordLine { 518 my($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_; 519 520 return _GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge); 521 } 522 # 523 # Parse HETATM record line... 524 # 525 sub ParseHetatmRecordLine { 526 my($Line) = @_; 527 528 return _ParseAtomOrHetatmRecordLine($Line); 529 } 530 531 # Generate HETATM record line... 532 sub GenerateHetatmRecordLine { 533 my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_; 534 535 return _GenerateAtomOrHetatmRecordLine('HETATM', $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge); 536 } 537 538 # Parse EXPDTA record line... 539 # 540 # EXPDTA format: 541 # 542 #1 - 6 Record name "EXPDTA" 543 # 9 - 10 Continuation continuation Allows concatenation of multiple records. 544 # 11 - 70 SList technique The experimental technique(s) with optional comment describing the sample or experiment. 545 # 546 # The EXPDTA record identifies the experimental technique used. This may refer to the type of radiation and 547 # sample, or include the spectroscopic or modeling technique. Permitted values include: 548 # 549 # ELECTRON DIFFRACTION 550 # FIBER DIFFRACTION 551 # FLUORESCENCE TRANSFER 552 # NEUTRON DIFFRACTION 553 # NMR 554 # THEORETICAL MODEL 555 # X-RAY DIFFRACTION 556 # 557 sub ParseExpdtaRecordLine { 558 my($Line) = @_; 559 560 if ($Line !~ /^EXPDTA/i) { 561 return ((undef) x 2); 562 } 563 564 my($ContinuationNum, $ExperimentalTechnique) = unpack("x8A2A60" , $Line); 565 566 $ContinuationNum =~ s/ //g; 567 $ExperimentalTechnique = RemoveLeadingAndTrailingWhiteSpaces($ExperimentalTechnique); 568 569 return ($ContinuationNum, $ExperimentalTechnique); 570 } 571 572 # Parse REMARK 2 record line... 573 # 574 # REMARK 2 format: 575 # 576 # The second REMARK 2 record has one of two formats. The first is used for diffraction studies, the second 577 # for other types of experiments in which resolution is not relevant, e.g., NMR and theoretical modeling. 578 # 579 #For diffraction experiments: 580 # 581 # 1 - 6 Record name "REMARK" 582 # 10 LString(1) "2" 583 # 12 - 22 LString(11) "RESOLUTION." 584 # 23 - 27 Real(5.2) resolution Resolution. 585 # 29 - 38 LString(10) "ANGSTROMS." 586 # 587 # REMARK 2 when not a diffraction experiment: 588 # 589 # 1 - 6 Record name "REMARK" 590 # 10 LString(1) "2" 591 # 12 - 38 LString(28) "RESOLUTION. NOT APPLICABLE." 592 # 41 - 70 String comment Comment. 593 # 594 sub ParseRemark2ResolutionRecordLine { 595 my($Line) = @_; 596 597 if ($Line !~ /^REMARK 2 RESOLUTION./i) { 598 return ((undef) x 2); 599 } 600 601 my($Resolution, $ResolutionUnits); 602 603 if ($Line =~ /NOT APPLICABLE/i) { 604 ($Resolution, $ResolutionUnits) = ("NOT APPLICABLE", ""); 605 } 606 else { 607 ($Resolution, $ResolutionUnits) = unpack("x22A5x1A10" , $Line); 608 } 609 610 $Resolution = RemoveLeadingAndTrailingWhiteSpaces($Resolution); 611 612 $ResolutionUnits = RemoveLeadingAndTrailingWhiteSpaces($ResolutionUnits); 613 $ResolutionUnits =~ s/\.$//; 614 615 return ($Resolution, $ResolutionUnits); 616 } 617 618 # 619 # Parse SEQRES record line... 620 # 621 # SEQRES format: 622 # 623 # 1 - 6 Record name "SEQRES" 624 # 9 - 10 Serial number of the SEQRES record for the current chain. Starts at 1 and increments by one each line. Reset to 1 for each chain. 625 # 12 - Chain identifier 626 # 14 - 17 Integer numRes Number of residues in the chain 627 # 20 - 22 24 -26 ... ... 68 - 70 Residue name resName Residue name. 628 # 629 sub ParseSeqresRecordLine { 630 my($Line) = @_; 631 632 if ($Line !~ /^SEQRES/i) { 633 return ((undef) x 5); 634 } 635 my($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNames) = unpack("x8A2x1A1x1A4x2A51" , $Line); 636 $RecordSerialNumber =~ s/ //g; 637 $ChainID =~ s/ //g; 638 $NumOfResidues =~ s/ //g; 639 $ResidueNames = RemoveLeadingAndTrailingWhiteSpaces($ResidueNames); 640 641 return ($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNames); 642 } 643 644 # 645 # Parse CONECT record line... 646 # 647 # CONECT format: 648 # 649 # 1 - 6 Record name "CONECT" 650 # 7 - 11 Atom number 651 # 12 - 16, 17 - 21, 22 - 26, 27 - 31 Atom number of bonded atom 652 # 653 # 32 - 36, 37 - 41 Atom number of hydrogen bonded atom 654 # 42 - 46 Atom number of salt bridged atom 655 # 47 - 51, 52 -56 Atom number of hydrogen bonded atom 656 # 57 - 61 Atom number of salt bridged atom 657 # 658 sub ParseConectRecordLine { 659 my($Line) = @_; 660 661 if ($Line !~ /^CONECT/i) { 662 return ((undef) x 11); 663 } 664 my($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2) = map {s/ //g; $_} unpack("x6A5A5A5A5A5A5A5A5A5A5A5", $Line); 665 666 return ($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2); 667 } 668 669 # Generate CONECT record line... 670 sub GenerateConectRecordLine { 671 my($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2) = @_; 672 my($Line); 673 674 $Line = sprintf "CONECT%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s", $AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2; 675 676 return $Line; 677 } 678 679 # 680 # Parse TER record line... 681 # 682 # TER format: 683 # 684 #1 - 6 Record name "TER " 685 # 7 - 11 Serial number 686 # 18 - 20 Residue name 687 # 22 Chain identifier 688 # 23 - 26 Residue sequence number 689 # 27 Insertion code 690 # 691 sub ParseTerRecordLine { 692 my($Line) = @_; 693 694 if ($Line !~ /^TER/i) { 695 return ((undef) x 5); 696 } 697 my($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $Length); 698 699 ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = ('') x 5; 700 701 $Length = length $Line; 702 703 if ($Length <= 17) { 704 ($SerialNumber, $ResidueName) = map {s/ //g; $_} unpack("x6A5", $Line); 705 } 706 elsif ($Length <= 21) { 707 ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = map {s/ //g; $_} unpack("x6A5x6A3", $Line); 708 } 709 else { 710 ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = map {s/ //g; $_} unpack("x6A5x6A3xA1A4A1", $Line); 711 } 712 713 return ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode); 714 } 715 716 # Generate TER record line... 717 sub GenerateTerRecordLine { 718 my($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $Line) = ('') x 6; 719 720 if (@_ == 5) { 721 ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = @_; 722 } 723 elsif (@_ == 4) { 724 ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber) = @_; 725 } 726 elsif (@_ == 3) { 727 ($SerialNumber, $ResidueName) = @_; 728 } 729 elsif (@_ == 2) { 730 ($SerialNumber, $ResidueName) = @_; 731 } 732 elsif (@_ == 1) { 733 ($SerialNumber) = @_; 734 } 735 $Line = sprintf "TER %5.5s %-3.3s %1.1s%4.4s%1.1s", $SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode; 736 737 return $Line; 738 } 739 740 # 741 # Parse MASTER record line... 742 # 743 # MASTER record format: 744 # 745 #1 - 6 Record name "MASTER" 746 # 11 - 15 Number of REMARK records 747 # 16 - 20 "0" 748 # 21 - 25 Number of HET records 749 # 26 - 30 Number of HELIX records 750 # 31 - 35 Number of SHEET records 751 # 36 - 40 Number of TURN records 752 # 41 - 45 Number of SITE records 753 # 46 - 50 Number of coordinate transformation records (ORIGXn+SCALEn+MTRIXn) 754 # 51 - 55 Number of atomic coordinate records (ATOM+HETATM) 755 # 56 - 60 Number of TER records 756 # 61 - 65 Number of CONECT records 757 # 66 - 70 Number of SEQRES records 758 # 759 sub ParseMasterRecordLine { 760 my($Line) = @_; 761 762 if ($Line !~ /^MASTER/i) { 763 return ((undef) x 11); 764 } 765 my($NumOfRemarkRecords, $NumOfHetRecords, $NumOfHelixRecords, $NumOfSheetRecords, $NumOfTurnRecords, $NumOfSiteRecords, $NumOfTransformationsRecords, $NumOfAtomAndHetatmRecords, $NumOfTerRecords, $NumOfConectRecords, $NumOfSeqresRecords) = map {s/ //g; $_} unpack("x6x4A5x5A5A5A5A5A5A5A5A5A5A5", $Line); 766 767 return ($NumOfRemarkRecords, $NumOfHetRecords, $NumOfHelixRecords, $NumOfSheetRecords, $NumOfTurnRecords, $NumOfSiteRecords, $NumOfTransformationsRecords, $NumOfAtomAndHetatmRecords, $NumOfTerRecords, $NumOfConectRecords, $NumOfSeqresRecords); 768 } 769 770 # End record... 771 sub GenerateEndRecordLine { 772 my($Line); 773 $Line = 'END '; 774 return $Line; 775 } 776 777 # ATOM/HETATM record format: 778 # 779 # 1 - 6 Record name 780 # 7 - 11 Atom serial number - right justified 781 # 13 - 16 Atom name 782 # 17 Alternate location indicator. 783 # 18 - 20 Residue name - right justified 784 # 22 Chain identifier. 785 # 23 - 26 Residue sequence number - right justified 786 # 27 Code for insertion of residues. 787 # 31 - 38 Real(8.3), Orthogonal coordinates for X in Angstroms. 788 # 39 - 46 Real(8.3), Orthogonal coordinates for Y in Angstroms. 789 # 47 - 54 Real(8.3), Orthogonal coordinates for Z in Angstroms. 790 # 55 - 60 Real(6.2), Occupancy 791 # 61 - 66 Real(6.2), Temperature factor 792 # 73 - 76 LString(4), Segment identifier, left-justified. 793 # 77 - 78 LString(2), Element symbol, right-justified. 794 #79 - 80 LString(2), Charge on the atom. 795 # 796 # Notes: 797 # . Atom names starting with C, N, O and S are left justified starting with column 14 798 # and others are left justified starting with column 13. 799 # 800 # . Six characters (columns) are reserved for atom names, assigned as follows: 801 # 802 # 13 - 14 Chemical symbol - right justified, except for hydrogen atoms 803 # 804 # And for amino acids: 805 # 806 # 15 Remoteness indicator (alphabetic) (A, B, G, D, E, Z and so on) 807 # 16 Branch designator (numeric) 808 # 809 sub _ParseAtomOrHetatmRecordLine { 810 my($Line) = @_; 811 812 if ($Line !~ /^(ATOM|HETATM)/i) { 813 return ((undef) x 15); 814 } 815 my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $Length); 816 817 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ('') x 15; 818 819 $Length = length $Line; 820 821 if ($Length <= 72) { 822 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor) = map {s/ //g; $_} unpack("x6A5xA4A1A3xA1A4A1x3A8A8A8A6A6", $Line); 823 } 824 else { 825 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = map {s/ //g; $_} unpack("x6A5xA4A1A3xA1A4A1x3A8A8A8A6A6x6A4A2A2", $Line); 826 } 827 return($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge); 828 } 829 830 # Generate ATOM/HETATM record line... 831 sub _GenerateAtomOrHetatmRecordLine { 832 my($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_; 833 my($Line, $AtomNameFormat); 834 835 if (length($AtomName) >= 4) { 836 # Left justified starting at column 13 for all atom names of length 4... 837 $AtomNameFormat = "%-4.4s"; 838 } 839 elsif (IsEmpty($ElementSymbol)) { 840 # No element symbol specified; just guess from atom name to cover most likely cases... 841 $AtomNameFormat = ($AtomName =~ /^(C|N|O|S)/i) ? " %-3.3s" : "%-4.4s"; 842 } 843 else { 844 # Element symbol specified... 845 if ($ElementSymbol =~ /^H$/i) { 846 # Hydrogen atom name with <=3 characters is left justified starting at column 14; 847 # Otherwise, left justified starting at column 13. 848 $AtomNameFormat = (length($AtomName) <= 3) ? " %-3.3s" : "%-4.4s"; 849 } 850 else { 851 # Non-hydrogen atom name... 852 $AtomNameFormat = (length($ElementSymbol) == 1) ? " %-3.3s" : "%-4.4s"; 853 } 854 } 855 856 $Line = sprintf "%-6.6s%5.5s ${AtomNameFormat}%1.1s%3.3s %1.1s%4.4s%1.1s %8.8s%8.8s%8.8s%6.6s%6.6s %-4.4s%2.2s%2.2s", $RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge; 857 858 return $Line; 859 } 860 861 # Check record type... 862 sub _IsRecordType { 863 my($Line, $SpecifiedType) = @_; 864 my($Type, $Status); 865 866 ($Type) = map {s/ //g; $_} unpack("A6", $Line); 867 868 $Status = ($SpecifiedType eq $Type) ? 1 : 0; 869 870 return $Status; 871 } 872 873 # Get record type... 874 sub _GetRecordType { 875 my($Line) = @_; 876 my($Type); 877 878 ($Type) = map {s/ //g; $_} unpack("A6", $Line); 879 880 return $Type; 881 } 882 883 # Get chains and residues data using ATOM/HETATM records... 884 # 885 sub _GetChainsAndResiduesFromAtomHetatmRecords { 886 my($PDBRecordLinesRef, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag) = @_; 887 888 my($LineCount, $TotalChainCount, $PreviousResidueNumber, $ChainCount, $DefaultChainID, $DefaultChainLabel, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, %ChainsDataMap); 889 890 # Do a quick chain count using TER record... 891 $TotalChainCount = 0; 892 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 893 if (IsEndmdlRecordType($RecordLine)) { 894 last LINE; 895 } 896 if (IsTerRecordType($RecordLine)) { 897 $TotalChainCount++; 898 } 899 } 900 901 %ChainsDataMap = (); 902 @{$ChainsDataMap{ChainIDs}} = (); 903 %{$ChainsDataMap{Residues}} = (); 904 %{$ChainsDataMap{ResidueNumbers}} = (); 905 %{$ChainsDataMap{Lines}} = (); 906 %{$ChainsDataMap{ResidueCount}} = (); 907 908 $LineCount = 0; 909 $ChainCount = 0; 910 $DefaultChainLabel = 'None'; 911 $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1); 912 $PreviousResidueNumber = 0; 913 914 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 915 $LineCount++; 916 if (IsTerRecordType($RecordLine)) { 917 $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1); 918 $ChainCount++; 919 if ($ChainCount == $TotalChainCount) { 920 last LINE; 921 } 922 else { 923 next LINE; 924 } 925 } 926 elsif (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) { 927 next LINE; 928 } 929 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine); 930 931 if (IsEmpty($ChainID)) { 932 $ChainID = $DefaultChainID; 933 } 934 if (exists $ChainsDataMap{Residues}{$ChainID}) { 935 # Data for existing chain... 936 if ($GetRecordLinesFlag) { 937 push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine; 938 } 939 940 if ($ResidueNumber != $PreviousResidueNumber) { 941 # Next residue with in the chain... 942 push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName; 943 push @{$ChainsDataMap{ResidueNumbers}{$ChainID}}, $ResidueNumber; 944 945 if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) { 946 $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1; 947 } 948 else { 949 $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1; 950 } 951 $PreviousResidueNumber = $ResidueNumber; 952 } 953 } 954 else { 955 # Data for new chain... 956 push @{$ChainsDataMap{ChainIDs}}, $ChainID; 957 958 @{$ChainsDataMap{Residues}{$ChainID}} = (); 959 push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName; 960 961 @{$ChainsDataMap{ResidueNumbers}{$ChainID}} = (); 962 push @{$ChainsDataMap{ResidueNumbers}{$ChainID}}, $ResidueNumber; 963 964 @{$ChainsDataMap{Lines}{$ChainID}} = (); 965 if ($GetRecordLinesFlag) { 966 push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine; 967 } 968 969 %{$ChainsDataMap{ResidueCount}{$ChainID}} = (); 970 $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1; 971 $PreviousResidueNumber = $ResidueNumber; 972 } 973 } 974 if (!$GetChainResiduesBeyondTERFlag) { 975 return \%ChainsDataMap; 976 } 977 # Look for any HETATM residues specified outside TER records which could belong to an existing chain... 978 my($LineIndex, $PreviousChainID); 979 $PreviousChainID = ''; 980 $PreviousResidueNumber = 0; 981 LINE: for $LineIndex (($LineCount - 1) .. $#{$PDBRecordLinesRef}) { 982 $RecordLine = $PDBRecordLinesRef->[$LineIndex]; 983 if (IsEndmdlRecordType($RecordLine)) { 984 last LINE; 985 } 986 if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) { 987 next LINE; 988 } 989 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine); 990 if (IsEmpty($ChainID)) { 991 # Ignore the chains with no ids... 992 next LINE; 993 } 994 if (! exists($ChainsDataMap{Residues}{$ChainID})) { 995 # Don't collect any new chains after TER record... 996 next LINE; 997 } 998 if ($GetRecordLinesFlag) { 999 push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine; 1000 } 1001 if ($ResidueNumber != $PreviousResidueNumber || $ChainID ne $PreviousChainID) { 1002 1003 push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName; 1004 push @{$ChainsDataMap{ResidueNumbers}{$ChainID}}, $ResidueNumber; 1005 1006 if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) { 1007 $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1; 1008 } 1009 else { 1010 $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1; 1011 } 1012 $PreviousChainID = $ChainID; 1013 $PreviousResidueNumber = $ResidueNumber; 1014 } 1015 } 1016 return \%ChainsDataMap; 1017 } 1018 1019 # Get chains and residues data using SEQRES records... 1020 # 1021 sub _GetChainsAndResiduesFromSeqresRecords { 1022 my($PDBRecordLinesRef) = @_; 1023 1024 my($ChainCount, $DefaultChainLabel, $DefaultChainID, $RecordLine, $RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueName, $ResidueNamesString, @ResidueNamesList, %ChainsDataMap); 1025 1026 %ChainsDataMap = (); 1027 @{$ChainsDataMap{ChainIDs}} = (); 1028 %{$ChainsDataMap{Residues}} = (); 1029 %{$ChainsDataMap{ResidueNumbers}} = (); 1030 %{$ChainsDataMap{ResidueCount}} = (); 1031 1032 $ChainCount = 0; 1033 $DefaultChainLabel = 'None'; 1034 $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1); 1035 1036 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 1037 if (!IsSeqresRecordType($RecordLine)) { 1038 next LINE; 1039 } 1040 ($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNamesString) = ParseSeqresRecordLine($RecordLine); 1041 if ($RecordSerialNumber == 1) { 1042 # Indicates start of a new chain... 1043 $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1); 1044 $ChainCount++; 1045 } 1046 if (IsEmpty($ChainID)) { 1047 $ChainID = $DefaultChainID; 1048 } 1049 # Process the residues... 1050 @ResidueNamesList = split /[ ]+/, $ResidueNamesString; 1051 1052 if (exists $ChainsDataMap{Residues}{$ChainID}) { 1053 # Data for existing chain... 1054 push @{$ChainsDataMap{Residues}{$ChainID}}, @ResidueNamesList; 1055 } 1056 else { 1057 # Data for new chain... 1058 push @{$ChainsDataMap{ChainIDs}}, $ChainID; 1059 @{$ChainsDataMap{Residues}{$ChainID}} = (); 1060 push @{$ChainsDataMap{Residues}{$ChainID}}, @ResidueNamesList; 1061 } 1062 1063 # Setup residue count... 1064 for $ResidueName (@ResidueNamesList) { 1065 if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) { 1066 $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1; 1067 } 1068 else { 1069 $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1; 1070 } 1071 } 1072 } 1073 return \%ChainsDataMap; 1074 } 1075