comparison mayachemtool/mayachemtools/bin/InfoPDBFiles.pl @ 0:68300206e90d draft default tip

Uploaded
author deepakjadmin
date Thu, 05 Nov 2015 02:41:30 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:68300206e90d
1 #!/usr/bin/perl -w
2 #
3 # $RCSfile: InfoPDBFiles.pl,v $
4 # $Date: 2015/02/28 20:46:20 $
5 # $Revision: 1.36 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use FindBin; use lib "$FindBin::Bin/../lib";
31 use Getopt::Long;
32 use File::Basename;
33 use Text::ParseWords;
34 use Benchmark;
35 use FileUtil;
36 use TextUtil;
37 use PDBFileUtil;
38
39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
40
41 # Autoflush STDOUT
42 $| = 1;
43
44 # Starting message...
45 $ScriptName = basename($0);
46 print "\n$ScriptName: Starting...\n\n";
47 $StartTime = new Benchmark;
48
49 # Get the options and setup script...
50 SetupScriptUsage();
51 if ($Options{help} || @ARGV < 1) {
52 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
53 }
54
55 my(@PDBFilesList);
56 @PDBFilesList = ExpandFileNames(\@ARGV, "pdb");
57
58 # Process options...
59 print "Processing options...\n";
60 my(%OptionsInfo);
61 ProcessOptions();
62
63 # Setup information about input files...
64 my(%PDBFilesInfo);
65 print "Checking input PDB file(s)...\n";
66 RetrievePDBFilesInfo();
67
68 # Process input files..
69 my($FileIndex);
70 if (@PDBFilesList > 1) {
71 print "\nProcessing PDB files...\n";
72 }
73 for $FileIndex (0 .. $#PDBFilesList) {
74 if ($PDBFilesInfo{FileOkay}[$FileIndex]) {
75 print "\nProcessing file $PDBFilesList[$FileIndex]...\n";
76 ListPDBFileInfo($FileIndex);
77 }
78 }
79 ListTotalSizeOfFiles();
80
81 print "\n$ScriptName:Done...\n\n";
82
83 $EndTime = new Benchmark;
84 $TotalTime = timediff ($EndTime, $StartTime);
85 print "Total time: ", timestr($TotalTime), "\n";
86
87 ###############################################################################
88
89 # List appropriate information...
90 sub ListPDBFileInfo {
91 my($Index) = @_;
92 my($PDBFile, $PDBRecordLinesRef);
93
94 $PDBFile = $PDBFilesList[$Index];
95 $PDBRecordLinesRef = ReadPDBFile($PDBFile);
96
97 # Header informaton...
98 if ($OptionsInfo{ListHeaderInfo}) {
99 ListHeaderInfo($PDBRecordLinesRef);
100 }
101
102 # Experiment informaton...
103 if ($OptionsInfo{ListExperimentalTechniqueInfo}) {
104 ListExperimentalTechniqueInfo($PDBRecordLinesRef);
105 }
106
107 # Total number of records...
108 my($TotalRecordsCount) = scalar @{$PDBRecordLinesRef};
109 print "\nTotal number of records: $TotalRecordsCount\n";
110
111 # List record type count information...
112 ListRecordTypesInfo($PDBRecordLinesRef);
113
114 if ($OptionsInfo{CountChains} || $OptionsInfo{CountResiduesInChains} || $OptionsInfo{ResiduesFrequencyInChains}) {
115 ListChainsAndResiduesInfo($PDBRecordLinesRef);
116 }
117 if ($OptionsInfo{CountResiduesAll} || $OptionsInfo{ResiduesFrequencyAll}) {
118 ListAllResiduesInfo($PDBRecordLinesRef);
119 }
120 if ($OptionsInfo{ResidueNumbersInfo}) {
121 ListResidueNumbersInfo($PDBRecordLinesRef);
122 }
123 if ($OptionsInfo{CalculateBoundingBox}) {
124 ListBoundingBox($PDBRecordLinesRef);
125 }
126
127 # File size and modification information...
128 print "\nFile size: ", FormatFileSize($PDBFilesInfo{FileSize}[$Index]), " \n";
129 print "Last modified: ", $PDBFilesInfo{FileLastModified}[$Index], " \n";
130 }
131
132 sub ListHeaderInfo {
133 my($PDBRecordLinesRef) = @_;
134 my($HeaderRecordLine, $Classification, $DepositionDate, $IDCode);
135
136 ($Classification, $DepositionDate, $IDCode) = (undef) x 3;
137 $HeaderRecordLine = $PDBRecordLinesRef->[0];
138 if (IsHeaderRecordType($HeaderRecordLine)) {
139 ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine);
140 }
141
142 $Classification = IsEmpty($Classification) ? 'Not available' : $Classification;
143 $DepositionDate = IsEmpty($DepositionDate) ? 'Not available' : $DepositionDate;
144 $IDCode = IsEmpty($IDCode) ? 'Not available' : $IDCode;
145
146 print "\nClassification: $Classification\nID: $IDCode\nDeposition date: $DepositionDate\n";
147 }
148
149 # List experimental technique information info...
150 sub ListExperimentalTechniqueInfo {
151 my($PDBRecordLinesRef) = @_;
152 my($ExperimentalTechnique, $Resolution, $ResolutionUnits);
153
154 $ExperimentalTechnique = GetExperimentalTechnique($PDBRecordLinesRef);
155 print "\nExperimental technique: " . ($ExperimentalTechnique ? $ExperimentalTechnique : "Not available") . "\n";
156
157 ($Resolution, $ResolutionUnits) = GetExperimentalTechniqueResolution($PDBRecordLinesRef);
158 print "Resolution: " . ($Resolution ? "$Resolution $ResolutionUnits" : "Not available") . "\n";
159
160 }
161
162 # List record type info...
163 sub ListRecordTypesInfo {
164 my($PDBRecordLinesRef) = @_;
165 my($RecordType, $RecordCount, $RecordTypesCountRef, @RecordTypeCountInfo);
166
167 $RecordTypesCountRef = GetRecordTypesCount($PDBRecordLinesRef);
168
169 @RecordTypeCountInfo = ();
170 if ($OptionsInfo{CountRecordType} =~ /^All$/i) {
171 for $RecordType (@{$RecordTypesCountRef->{RecordTypes}}) {
172 $RecordCount = $RecordTypesCountRef->{Count}{$RecordType};
173 push @RecordTypeCountInfo, "$RecordType - $RecordCount";
174 }
175 }
176 else {
177 for $RecordType (@{$OptionsInfo{SpecifiedRecordTypes}}) {
178 $RecordCount = (exists $RecordTypesCountRef->{Count}{$RecordType}) ? ($RecordTypesCountRef->{Count}{$RecordType}) : 0;
179 push @RecordTypeCountInfo, "$RecordType - $RecordCount";
180 }
181 }
182 print "Number of individual records: ", JoinWords(\@RecordTypeCountInfo, '; ', 0), "\n";
183
184 if ($OptionsInfo{CheckMasterRecord}) {
185 CheckMasterRecord($RecordTypesCountRef, $PDBRecordLinesRef);
186 }
187 }
188
189 # List information about residues and chains...
190 sub ListChainsAndResiduesInfo {
191 my($PDBRecordLinesRef) = @_;
192 my($ResidueName, $ResidueCount, $ChainCount, $ChainID, $CollectChainResiduesBeyondTER, $ChainsAndResiduesInfoRef);
193
194 $CollectChainResiduesBeyondTER = 1;
195 $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'AtomAndHetatm', $CollectChainResiduesBeyondTER);
196 $ChainCount = @{$ChainsAndResiduesInfoRef->{ChainIDs}};
197 if ($OptionsInfo{CountChains}) {
198 print "\nNumber of chains: $ChainCount \n";
199 my($ChainID, @ChainIDsList);
200 @ChainIDsList = ();
201 for $ChainID (@{$ChainsAndResiduesInfoRef->{ChainIDs}}) {
202 push @ChainIDsList, CleanupChainID($ChainID);
203 }
204 print "Chain IDs: ", JoinWords(\@ChainIDsList, ', ', 0),"\n";
205 }
206
207 if ($OptionsInfo{CountResiduesInChains}) {
208 my($TotalResiduesCount, $ResidueCountInfo, @ResiduesCountInfo);
209 @ResiduesCountInfo = ();
210 $TotalResiduesCount = 0;
211 for $ChainID (@{$ChainsAndResiduesInfoRef->{ChainIDs}}) {
212 $ResidueCount = @{$ChainsAndResiduesInfoRef->{Residues}{$ChainID}};
213 $TotalResiduesCount += $ResidueCount;
214 $ResidueCountInfo = "Chain ${ChainID} - ${ResidueCount}";
215 push @ResiduesCountInfo, $ResidueCountInfo;
216 }
217 print "\nNumber of residues in chain(s): ";
218 if ($ChainCount > 1) {
219 print "Total - $TotalResiduesCount; ", JoinWords(\@ResiduesCountInfo, '; ', 0),"\n";
220 }
221 else {
222 print "$TotalResiduesCount\n";
223 }
224
225 # List of residues in each chain...
226 if ($OptionsInfo{DetailLevel} >= 3) {
227 print "List of residues in chain(s): \n";
228 for $ChainID (@{$ChainsAndResiduesInfoRef->{ChainIDs}}) {
229 if ($ChainCount > 1) {
230 print "Chain ", CleanupChainID($ChainID), ": ";
231 }
232 print JoinWords(\@{$ChainsAndResiduesInfoRef->{Residues}{$ChainID}}, ', ', 0),"\n";
233 }
234 }
235 }
236 if ($OptionsInfo{ResiduesFrequencyInChains}) {
237 # Setup a hash using residue count as key for sorting the values...
238 my(%ResiduesCountToNameMap);
239 %ResiduesCountToNameMap = ();
240 @{$ResiduesCountToNameMap{ChainIDs}} = ();
241 %{$ResiduesCountToNameMap{ResidueNames}} = ();
242
243 for $ChainID (@{$ChainsAndResiduesInfoRef->{ChainIDs}}) {
244 push @{$ResiduesCountToNameMap{ChainIDs}}, $ChainID;
245 %{$ResiduesCountToNameMap{ResidueNames}{$ChainID}} = ();
246
247 for $ResidueName (sort keys %{$ChainsAndResiduesInfoRef->{ResidueCount}{$ChainID}}) {
248 $ResidueCount = $ChainsAndResiduesInfoRef->{ResidueCount}{$ChainID}{$ResidueName};
249 # Setup count value for each chain...
250 if (exists $ResiduesCountToNameMap{ResidueNames}{$ChainID}{$ResidueCount}) {
251 $ResiduesCountToNameMap{ResidueNames}{$ChainID}{$ResidueCount} .= "~${ResidueName}";
252 }
253 else {
254 $ResiduesCountToNameMap{ResidueNames}{$ChainID}{$ResidueCount} = $ResidueName;
255 }
256 }
257 }
258 # Collect data for all the residues in all the chains...
259 my(%AllResiduesNameToCountMap, %AllResiduesCountToNameMap);
260 %AllResiduesNameToCountMap = ();
261 %AllResiduesCountToNameMap = ();
262 if ($ChainCount > 1) {
263 for $ChainID (@{$ChainsAndResiduesInfoRef->{ChainIDs}}) {
264 for $ResidueName (keys %{$ChainsAndResiduesInfoRef->{ResidueCount}{$ChainID}}) {
265 $ResidueCount = $ChainsAndResiduesInfoRef->{ResidueCount}{$ChainID}{$ResidueName};
266 if (exists $AllResiduesNameToCountMap{$ResidueName}) {
267 $AllResiduesNameToCountMap{$ResidueName} += $ResidueCount;
268 }
269 else {
270 $AllResiduesNameToCountMap{$ResidueName} = $ResidueCount;
271 }
272 }
273 }
274 for $ResidueName (keys %AllResiduesNameToCountMap) {
275 $ResidueCount = $AllResiduesNameToCountMap{$ResidueName};
276 if (exists $AllResiduesCountToNameMap{$ResidueCount}) {
277 $AllResiduesCountToNameMap{$ResidueCount} .= "~${ResidueName}";
278 }
279 else {
280 $AllResiduesCountToNameMap{$ResidueCount} = $ResidueName;
281 }
282 }
283 }
284
285 # Setup distribution data for individual chains and the grand total as well...
286 my($ChainResidueCount, $PercentResidueCount, $TotalResidueCount, $ResidueNames, @ResidueNamesList, %ResiduesFrequencyInfoMap);
287 @{$ResiduesFrequencyInfoMap{ChainIDs}} = ();
288 %{$ResiduesFrequencyInfoMap{Frequency}} = ();
289 %{$ResiduesFrequencyInfoMap{PercentFrequency}} = ();
290
291 @{$ResiduesFrequencyInfoMap{AllChainsFrequency}} = ();
292 @{$ResiduesFrequencyInfoMap{AllChainsPercentFrequency}} = ();
293
294 $TotalResidueCount = 0;
295
296 for $ChainID (@{$ResiduesCountToNameMap{ChainIDs}}) {
297 push @{$ResiduesFrequencyInfoMap{ChainIDs}}, $ChainID;
298 @{$ResiduesFrequencyInfoMap{Frequency}{$ChainID}} = ();
299 @{$ResiduesFrequencyInfoMap{PercentFrequency}{$ChainID}} = ();
300
301 $ChainResidueCount = @{$ChainsAndResiduesInfoRef->{Residues}{$ChainID}};
302 $TotalResidueCount += $ChainResidueCount;
303
304 for $ResidueCount (sort {$b <=> $a} keys %{$ResiduesCountToNameMap{ResidueNames}{$ChainID}}) {
305 $ResidueNames = $ResiduesCountToNameMap{ResidueNames}{$ChainID}{$ResidueCount};
306 @ResidueNamesList = split /~/, $ResidueNames;
307 for $ResidueName (@ResidueNamesList) {
308 push @{$ResiduesFrequencyInfoMap{Frequency}{$ChainID}}, "${ResidueName} - ${ResidueCount}";
309 $PercentResidueCount = sprintf("%.1f", (($ResidueCount/$ChainResidueCount)*100)) + 0;
310 push @{$ResiduesFrequencyInfoMap{PercentFrequency}{$ChainID}}, "${ResidueName} - ${PercentResidueCount}%";
311 }
312 }
313 }
314 if ($ChainCount > 1) {
315 for $ResidueCount (sort {$b <=> $a} keys %AllResiduesCountToNameMap) {
316 $ResidueNames = $AllResiduesCountToNameMap{$ResidueCount};
317 @ResidueNamesList = split /~/, $ResidueNames;
318 for $ResidueName (@ResidueNamesList) {
319 push @{$ResiduesFrequencyInfoMap{AllChainsFrequency}}, "${ResidueName} - ${ResidueCount}";
320 $PercentResidueCount = sprintf("%.1f", (($ResidueCount/$TotalResidueCount)*100)) + 0;
321 push @{$ResiduesFrequencyInfoMap{AllChainsPercentFrequency}}, "${ResidueName} - ${PercentResidueCount}%";
322 }
323 }
324 }
325
326 # List distribution of residues
327 print "\nDistribution of residues in chain(s): \n";
328 for $ChainID (@{$ResiduesFrequencyInfoMap{ChainIDs}}) {
329 if ($ChainCount > 1) {
330 print "Chain ", CleanupChainID($ChainID), ": ";
331 }
332 print JoinWords(\@{$ResiduesFrequencyInfoMap{Frequency}{$ChainID}}, '; ', 0), "\n";
333 }
334 if ($OptionsInfo{DetailLevel} >= 2) {
335 print "\nPercent distribution of residues in chain(s): \n";
336 for $ChainID (@{$ResiduesFrequencyInfoMap{ChainIDs}}) {
337 if ($ChainCount > 1) {
338 print "Chain ", CleanupChainID($ChainID), ": ";
339 }
340 print JoinWords(\@{$ResiduesFrequencyInfoMap{PercentFrequency}{$ChainID}}, '; ', 0), "\n";
341 }
342 }
343 if ($ChainCount > 1) {
344 print "\nDistribution of residues across all chains: \n";
345 print JoinWords(\@{$ResiduesFrequencyInfoMap{AllChainsFrequency}}, '; ', 0), "\n";
346
347 if ($OptionsInfo{DetailLevel} >= 2) {
348 print "\nPercent distribution of residues across all chains: \n";
349 print JoinWords(\@{$ResiduesFrequencyInfoMap{AllChainsPercentFrequency}}, '; ', 0), "\n";
350 }
351 }
352 }
353 }
354
355 # List information about all the residues...
356 sub ListAllResiduesInfo {
357 my($PDBRecordLinesRef) = @_;
358 my($TotalResidueCount, $AtomResiduesCount, $HetatmResiduesCount, $ResiduesInfoRef);
359
360 $ResiduesInfoRef = GetAllResidues($PDBRecordLinesRef);
361 $TotalResidueCount = @{$ResiduesInfoRef->{ResidueNames}};
362 $AtomResiduesCount = @{$ResiduesInfoRef->{AtomResidueNames}};
363 $HetatmResiduesCount = @{$ResiduesInfoRef->{HetatmResidueNames}};
364
365 if ($OptionsInfo{CountResiduesAll}) {
366 print "\nTotal number of residues: Total - $TotalResidueCount; ATOM residues - $AtomResiduesCount; HETATM residues - $HetatmResiduesCount\n";
367
368 if ($OptionsInfo{DetailLevel} >= 3) {
369 print "List of residues: \n";
370 if ($AtomResiduesCount) {
371 print "ATOM residues: ", JoinWords(\@{$ResiduesInfoRef->{AtomResidueNames}}, ', ', 0), "\n";
372 }
373 if ($HetatmResiduesCount) {
374 print "HETATM residues: ", JoinWords(\@{$ResiduesInfoRef->{HetatmResidueNames}}, ', ', 0), "\n";
375 }
376 }
377 }
378
379 if ($OptionsInfo{ResiduesFrequencyAll}) {
380 my($ResidueName, $ResidueCount);
381
382 # Setup a hash using residue count as key for sorting the values...
383 my(%ResiduesCountToNameMap, %AtomResiduesCountToNameMap, %HetatmResiduesCountToNameMap);
384 %ResiduesCountToNameMap = ();
385 %{$ResiduesCountToNameMap{ResidueNames}} = ();
386
387 %AtomResiduesCountToNameMap = ();
388 %{$AtomResiduesCountToNameMap{ResidueNames}} = ();
389
390 %HetatmResiduesCountToNameMap = ();
391 %{$HetatmResiduesCountToNameMap{ResidueNames}} = ();
392
393 for $ResidueName (keys %{$ResiduesInfoRef->{ResidueCount}}) {
394 $ResidueCount = $ResiduesInfoRef->{ResidueCount}{$ResidueName};
395 if (exists $ResiduesCountToNameMap{ResidueNames}{$ResidueCount}) {
396 $ResiduesCountToNameMap{ResidueNames}{$ResidueCount} .= "~${ResidueName}";
397 }
398 else {
399 $ResiduesCountToNameMap{ResidueNames}{$ResidueCount} = $ResidueName;
400 }
401 }
402
403 if ($OptionsInfo{DetailLevel} >= 1) {
404 for $ResidueName (keys %{$ResiduesInfoRef->{AtomResidueCount}}) {
405 $ResidueCount = $ResiduesInfoRef->{AtomResidueCount}{$ResidueName};
406 if (exists $AtomResiduesCountToNameMap{ResidueNames}{$ResidueCount}) {
407 $AtomResiduesCountToNameMap{ResidueNames}{$ResidueCount} .= "~${ResidueName}";
408 }
409 else {
410 $AtomResiduesCountToNameMap{ResidueNames}{$ResidueCount} = $ResidueName;
411 }
412 }
413 for $ResidueName (keys %{$ResiduesInfoRef->{HetatmResidueCount}}) {
414 $ResidueCount = $ResiduesInfoRef->{HetatmResidueCount}{$ResidueName};
415 if (exists $HetatmResiduesCountToNameMap{ResidueNames}{$ResidueCount}) {
416 $HetatmResiduesCountToNameMap{ResidueNames}{$ResidueCount} .= "~${ResidueName}";
417 }
418 else {
419 $HetatmResiduesCountToNameMap{ResidueNames}{$ResidueCount} = $ResidueName;
420 }
421 }
422 }
423
424 # Setup distribution of residues info...
425 my($ResidueNames, $PercentResidueCount, @ResidueNamesList, %ResiduesCountInfoMap, %AtomResiduesCountInfoMap, %HetatmResiduesCountInfoMap);
426
427 @{$ResiduesCountInfoMap{Frequency}} = ();
428 @{$ResiduesCountInfoMap{PercentFrequency}} = ();
429 for $ResidueCount (sort {$b <=> $a} keys %{$ResiduesCountToNameMap{ResidueNames}}) {
430 $PercentResidueCount = sprintf("%.1f", (($ResidueCount/$TotalResidueCount)*100)) + 0;
431 $ResidueNames = $ResiduesCountToNameMap{ResidueNames}{$ResidueCount};
432 @ResidueNamesList = split /~/, $ResidueNames;
433 for $ResidueName (@ResidueNamesList) {
434 push @{$ResiduesCountInfoMap{Frequency}}, "${ResidueName} - ${ResidueCount}";
435 push @{$ResiduesCountInfoMap{PercentFrequency}}, "${ResidueName} - ${PercentResidueCount}";
436 }
437 }
438 if ($OptionsInfo{DetailLevel} >= 1) {
439 @{$AtomResiduesCountInfoMap{Frequency}} = ();
440 @{$AtomResiduesCountInfoMap{PercentFrequency}} = ();
441 for $ResidueCount (sort {$b <=> $a} keys %{$AtomResiduesCountToNameMap{ResidueNames}}) {
442 $PercentResidueCount = sprintf("%.1f", (($ResidueCount/$TotalResidueCount)*100)) + 0;
443 $ResidueNames = $AtomResiduesCountToNameMap{ResidueNames}{$ResidueCount};
444 @ResidueNamesList = split /~/, $ResidueNames;
445 for $ResidueName (@ResidueNamesList) {
446 push @{$AtomResiduesCountInfoMap{Frequency}}, "${ResidueName} - ${ResidueCount}";
447 push @{$AtomResiduesCountInfoMap{PercentFrequency}}, "${ResidueName} - ${PercentResidueCount}";
448 }
449 }
450 @{$HetatmResiduesCountInfoMap{Frequency}} = ();
451 @{$HetatmResiduesCountInfoMap{PercentFrequency}} = ();
452 for $ResidueCount (sort {$b <=> $a} keys %{$HetatmResiduesCountToNameMap{ResidueNames}}) {
453 $PercentResidueCount = sprintf("%.1f", (($ResidueCount/$TotalResidueCount)*100)) + 0;
454 $ResidueNames = $HetatmResiduesCountToNameMap{ResidueNames}{$ResidueCount};
455 @ResidueNamesList = split /~/, $ResidueNames;
456 for $ResidueName (@ResidueNamesList) {
457 push @{$HetatmResiduesCountInfoMap{Frequency}}, "${ResidueName} - ${ResidueCount}";
458 push @{$HetatmResiduesCountInfoMap{PercentFrequency}}, "${ResidueName} - ${PercentResidueCount}";
459 }
460 }
461 }
462
463 # List distribution of residues
464 print "\nDistribution of residues: ", JoinWords(\@{$ResiduesCountInfoMap{Frequency}},'; ', 0), "\n";
465 if ($OptionsInfo{DetailLevel} >= 2) {
466 print "\nPercent distribution of residues: ", JoinWords(\@{$ResiduesCountInfoMap{PercentFrequency}},'; ', 0), "\n";
467 }
468
469 if ($OptionsInfo{DetailLevel} >= 1) {
470 print "\nDistribution of ATOM residues: ", JoinWords(\@{$AtomResiduesCountInfoMap{Frequency}},'; ', 0), "\n";
471 if ($OptionsInfo{DetailLevel} >= 2) {
472 print "\nPercent distribution of ATOM residues: ", JoinWords(\@{$AtomResiduesCountInfoMap{PercentFrequency}},'; ', 0), "\n";
473 }
474
475 print "\nDistribution of HETATM residues: ", JoinWords(\@{$HetatmResiduesCountInfoMap{Frequency}},'; ', 0), "\n";
476 if ($OptionsInfo{DetailLevel} >= 2) {
477 print "\nPercent distribution of HETATM residues: ", JoinWords(\@{$HetatmResiduesCountInfoMap{PercentFrequency}},'; ', 0), "\n";
478 }
479 }
480 }
481 }
482
483 # List information about residue numbers for each chain...
484 sub ListResidueNumbersInfo {
485 my($PDBRecordLinesRef) = @_;
486 my($Index, $ResidueCount, $StartResidueNum, $EndResidueNum, $ChainID, $CollectChainResiduesBeyondTER, $ChainsAndResiduesInfoRef, $ResidueNum, $PreviousResidueNum, $ResidueName, $PreviousResidueName, $GapResiduePairsCount, $GapLength, $DescendingOrderResiduePairsCount, @DescendingOrderResiduePairs, @GapResiduePairs);
487
488 $CollectChainResiduesBeyondTER = 0;
489 $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'AtomAndHetatm', $CollectChainResiduesBeyondTER);
490
491 print "\nATOM/HETATM residue numbers information for chains:\n";
492
493 for $ChainID (@{$ChainsAndResiduesInfoRef->{ChainIDs}}) {
494 print "\nChain ID - ", CleanupChainID($ChainID), "";
495
496 $ResidueCount = @{$ChainsAndResiduesInfoRef->{ResidueNumbers}{$ChainID}};
497
498 # Start and end residue numbers...
499 $StartResidueNum = $ChainsAndResiduesInfoRef->{ResidueNumbers}{$ChainID}[0];
500 $EndResidueNum = $ChainsAndResiduesInfoRef->{ResidueNumbers}{$ChainID}[$ResidueCount - 1];
501 print "; Number of residues: $ResidueCount; Start residue number - $StartResidueNum; End residue number - $EndResidueNum\n";
502
503 # Identify any gaps in residue numbers or non-ascending order residue numbers...
504 $GapResiduePairsCount = 0;
505 $DescendingOrderResiduePairsCount = 0;
506
507 @DescendingOrderResiduePairs = ();
508 @GapResiduePairs = ();
509
510 RESIDUE: for $Index (1 .. ($ResidueCount - 1)) {
511 $ResidueNum = $ChainsAndResiduesInfoRef->{ResidueNumbers}{$ChainID}[$Index];
512 $PreviousResidueNum = $ChainsAndResiduesInfoRef->{ResidueNumbers}{$ChainID}[$Index - 1];
513
514 $ResidueName = $ChainsAndResiduesInfoRef->{Residues}{$ChainID}[$Index];
515 $PreviousResidueName = $ChainsAndResiduesInfoRef->{Residues}{$ChainID}[$Index - 1];
516
517 if ($ResidueNum == ($PreviousResidueNum + 1)) {
518 # All is good...
519 next RESIDUE;
520 }
521
522 # Are residue in descending order?
523 if ($ResidueNum < $PreviousResidueNum) {
524 $DescendingOrderResiduePairsCount++;
525 push @DescendingOrderResiduePairs, "<${PreviousResidueName}${PreviousResidueNum} - ${ResidueName}${ResidueNum}>";
526 }
527
528 # Track gaps in residue pairs...
529 $GapResiduePairsCount++;
530 $GapLength = abs($ResidueNum - $PreviousResidueNum) - 1;
531
532 push @GapResiduePairs, "<${PreviousResidueName}${PreviousResidueNum} - ${ResidueName}${ResidueNum}; $GapLength>";
533 }
534
535 # Print gaps information...
536 print "Gaps in residue numbers: ", $GapResiduePairsCount ? "Yes" : "None";
537 if ($GapResiduePairsCount) {
538 print "; Number of gap residue number pairs: $GapResiduePairsCount; Gap residue pairs: <StartRes-EndRes; GapLength> - ", JoinWords(\@GapResiduePairs, "; ", 0);
539 }
540 print "\n";
541
542 # Print descending residue order information...
543 print "Residue numbers in descending order: ", $DescendingOrderResiduePairsCount ? "Yes" : "None";
544 if ($DescendingOrderResiduePairsCount) {
545 print "; Number of descending residue number pairs: $DescendingOrderResiduePairsCount; Descending residue number pairs: <StartRes-EndRes> ", JoinWords(\@DescendingOrderResiduePairs, "; ", 0);
546 }
547 print "\n";
548 }
549 }
550
551 # List min/max XYZ coordinates for ATOM/HETATM records...
552 sub ListBoundingBox {
553 my($PDBRecordLinesRef) = @_;
554 my($XMin, $YMin, $ZMin, $XMax, $YMax, $ZMax, $XSize, $YSize, $ZSize);
555
556 ($XMin, $YMin, $ZMin, $XMax, $YMax, $ZMax) = GetMinMaxCoords($PDBRecordLinesRef);
557 $XSize = abs($XMax - $XMin);
558 $YSize = abs($YMax - $YMin);
559 $ZSize = abs($ZMax - $ZMin);
560
561 $XMin = sprintf("%.3f", $XMin) + 0; $XMax = sprintf("%.3f", $XMax) + 0;
562 $YMin = sprintf("%.3f", $YMin) + 0; $YMax = sprintf("%.3f", $YMax) + 0;
563 $ZMin = sprintf("%.3f", $ZMin) + 0; $ZMax = sprintf("%.3f", $ZMax) + 0;
564
565 $XSize = sprintf("%.3f", $XSize) + 0;
566 $YSize = sprintf("%.3f", $YSize) + 0;
567 $ZSize = sprintf("%.3f", $ZSize) + 0;
568
569 print "\nBounding box coordinates: <XMin, XMax> - <$XMin, $XMax>; <YMin, YMax> - <$YMin, $YMax>; <ZMin, ZMax> - <$ZMin, $ZMax>;\n";
570 print "Bounding box size in angstroms: XSize - $XSize; YSize - $YSize; ZSize - $ZSize\n";
571
572 }
573
574 # Check master record counts against actual record counts...
575 sub CheckMasterRecord {
576 my($RecordTypesCountRef, $PDBRecordLinesRef) = @_;
577
578 # Get master record information...
579 my($NumOfRemarkRecords, $NumOfHetRecords, $NumOfHelixRecords, $NumOfSheetRecords, $NumOfTurnRecords, $NumOfSiteRecords, $NumOfTransformationsRecords, $NumOfAtomAndHetatmRecords, $NumOfTerRecords, $NumOfConectRecords, $NumOfSeqresRecords) = (undef) x 11;
580 my($RecordLine, $MasterRecordFound);
581 $MasterRecordFound = 0;
582
583 LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
584 if (IsMasterRecordType($RecordLine)) {
585 ($NumOfRemarkRecords, $NumOfHetRecords, $NumOfHelixRecords, $NumOfSheetRecords, $NumOfTurnRecords, $NumOfSiteRecords, $NumOfTransformationsRecords, $NumOfAtomAndHetatmRecords, $NumOfTerRecords, $NumOfConectRecords, $NumOfSeqresRecords) = ParseMasterRecordLine($RecordLine);
586 $MasterRecordFound = 1;
587 last LINE;
588 }
589 }
590 if (!$MasterRecordFound) {
591 print "\nWarning: MASTER record is missing.\n";
592 return;
593 }
594 my(@MasterRecordValidationInfo);
595 @MasterRecordValidationInfo = ();
596 $NumOfRemarkRecords += 0;
597 if (exists($RecordTypesCountRef->{Count}{REMARK}) && $NumOfRemarkRecords != $RecordTypesCountRef->{Count}{REMARK}) {
598 push @MasterRecordValidationInfo, "Number of REMARK records, $NumOfRemarkRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{REMARK}.";
599 }
600 $NumOfHetRecords += 0;
601 if (exists($RecordTypesCountRef->{Count}{HET}) && $NumOfHetRecords != $RecordTypesCountRef->{Count}{HET}) {
602 push @MasterRecordValidationInfo, "Number of HET records, $NumOfHetRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{HET}.";
603 }
604 $NumOfHelixRecords += 0;
605 if (exists($RecordTypesCountRef->{Count}{HELIX}) && $NumOfHelixRecords != $RecordTypesCountRef->{Count}{HELIX}) {
606 push @MasterRecordValidationInfo, "Number of HELIX records, $NumOfHelixRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{HELIX}.";
607 }
608 $NumOfSheetRecords += 0;
609 if (exists($RecordTypesCountRef->{Count}{SHEET}) && $NumOfSheetRecords != $RecordTypesCountRef->{Count}{SHEET}) {
610 push @MasterRecordValidationInfo, "Number of SHEET records, $NumOfSheetRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{SHEET}.";
611 }
612 $NumOfTurnRecords += 0;
613 if (exists($RecordTypesCountRef->{Count}{TURN}) && $NumOfTurnRecords != $RecordTypesCountRef->{Count}{TURN}) {
614 push @MasterRecordValidationInfo, "Number of TURN records, $NumOfTurnRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{REMARK}.";
615 }
616 $NumOfSiteRecords += 0;
617 if (exists($RecordTypesCountRef->{Count}{SITE}) && $NumOfSiteRecords != $RecordTypesCountRef->{Count}{SITE}) {
618 push @MasterRecordValidationInfo, "Number of SITE records, $NumOfSiteRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{SITE}.";
619 }
620
621 $NumOfTransformationsRecords += 0;
622 my($RecordsCount, $ID, $RecordID, $RecordLabel);
623 $RecordsCount = 0;
624 for $RecordLabel ('ORIGX', 'SCALE', 'MTRIX') {
625 for $ID (1 .. 3) {
626 $RecordID = "${RecordLabel}${ID}";
627 if (exists $RecordTypesCountRef->{Count}{$RecordID}) {
628 $RecordsCount += $RecordTypesCountRef->{Count}{$RecordID};
629 }
630 }
631 }
632 if ($NumOfTransformationsRecords != $RecordsCount) {
633 push @MasterRecordValidationInfo, "Number of transformation records (ORIGXn+SCALEn+MTRIXn), $NumOfTransformationsRecords, specified in MASTER record doen't match its explict count, $RecordsCount.";
634 }
635
636 $RecordsCount = 0;
637 for $RecordLabel ('ATOM', 'HETATM') {
638 if (exists $RecordTypesCountRef->{Count}{$RecordLabel}) {
639 $RecordsCount += $RecordTypesCountRef->{Count}{$RecordLabel};
640 }
641 }
642 $NumOfAtomAndHetatmRecords += 0;
643 if ($NumOfAtomAndHetatmRecords != $RecordsCount) {
644 push @MasterRecordValidationInfo, "Number of ATOM + HETATM records, $NumOfAtomAndHetatmRecords, specified in MASTER record doen't match its explict count, $RecordsCount.";
645 }
646 $NumOfTerRecords += 0;
647 if (exists($RecordTypesCountRef->{Count}{TER}) && $NumOfTerRecords != $RecordTypesCountRef->{Count}{TER}) {
648 push @MasterRecordValidationInfo, "Number of TER records, $NumOfTerRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{TER}.";
649 }
650 $NumOfConectRecords += 0;
651 if (exists($RecordTypesCountRef->{Count}{CONECT}) && $NumOfConectRecords != $RecordTypesCountRef->{Count}{CONECT}) {
652 push @MasterRecordValidationInfo, "Number of CONECT records, $NumOfConectRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{CONECT}.";
653 }
654 $NumOfSeqresRecords += 0;
655 if (exists($RecordTypesCountRef->{Count}{SEQRES}) && $NumOfSeqresRecords != $RecordTypesCountRef->{Count}{SEQRES}) {
656 push @MasterRecordValidationInfo, "Number of SITE records, $NumOfSeqresRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{SEQRES}.";
657 }
658
659 if (@MasterRecordValidationInfo) {
660 print "\nMASTER record validation: Count mismatches found:\n";
661 print JoinWords(\@MasterRecordValidationInfo, "\n", 0), "\n";
662 }
663 else {
664 print "\nMASTER record validation: Count values match with the explicit count of the corresponding records.\n";
665 }
666 }
667
668 # Total size of all the files...
669 sub ListTotalSizeOfFiles {
670 my($FileOkayCount, $TotalSize, $Index);
671
672 $FileOkayCount = 0;
673 $TotalSize = 0;
674
675 for $Index (0 .. $#PDBFilesList) {
676 if ($PDBFilesInfo{FileOkay}[$Index]) {
677 $FileOkayCount++;
678 $TotalSize += $PDBFilesInfo{FileSize}[$Index];
679 }
680 }
681 if ($FileOkayCount > 1) {
682 print "\nTotal size of $FileOkayCount files: ", FormatFileSize($TotalSize), "\n";
683 }
684
685 }
686
687 # Empty chain IDs are replaced with "None[1-9]". But for displaying purposes, take out any
688 # numbers from label...
689 sub CleanupChainID {
690 my($ChainID) = @_;
691
692 if ($ChainID =~ /^None/i) {
693 return 'None';
694 }
695 return $ChainID;
696 }
697
698 # Process option values...
699 sub ProcessOptions {
700 %OptionsInfo = ();
701
702 # Setup record types to count...
703 if ($Options{count}) {
704 $OptionsInfo{CountRecordType} = $Options{count};
705 }
706 else {
707 $OptionsInfo{CountRecordType} = $Options{all} ? 'All' : 'ATOM,HETATM';
708 }
709 @{$OptionsInfo{SpecifiedRecordTypes}} =();
710 if ($OptionsInfo{CountRecordType} !~ /^All$/i) {
711 my(@RecordTypes);
712 @RecordTypes = split /\,/, $OptionsInfo{CountRecordType};
713 push @{$OptionsInfo{SpecifiedRecordTypes}}, @RecordTypes;
714 }
715 $OptionsInfo{CountChains} = ($Options{chains} || $Options{all}) ? 1 : 0;
716 $OptionsInfo{CheckMasterRecord} = ($Options{mastercheck} || $Options{all}) ? 1 : 0;
717
718 # Residue count is the default. So $Options{residues} is simply ignored.
719 my($CountResidues) = 1;
720 $OptionsInfo{CountResiduesInChains} = (($CountResidues || $Options{all}) && $Options{residuesmode} =~ /^(InChains|Both)$/i) ? 1 : 0;
721 $OptionsInfo{CountResiduesAll} = (($CountResidues || $Options{all}) && $Options{residuesmode} =~ /^(All|Both)$/i) ? 1 : 0;
722
723 $OptionsInfo{ResiduesFrequencyInChains} = (($Options{frequency} || $Options{all}) && $Options{residuesmode} =~ /^(InChains|Both)$/i) ? 1 : 0;
724 $OptionsInfo{ResiduesFrequencyAll} = (($Options{frequency} || $Options{all}) && $Options{residuesmode} =~ /^(All|Both)$/i) ? 1 : 0;
725
726 $OptionsInfo{ResidueNumbersInfo} = ($Options{residuenumbers} || $Options{all}) ? 1 : 0;
727
728 $OptionsInfo{CalculateBoundingBox} = ($Options{boundingbox} || $Options{all}) ? 1 : 0;
729
730 $OptionsInfo{ListHeaderInfo} = ($Options{header} || $Options{all}) ? 1 : 0;
731 $OptionsInfo{DetailLevel} = $Options{detail};
732
733 $OptionsInfo{ListExperimentalTechniqueInfo} = ($Options{experiment} || $Options{all}) ? 1 : 0;
734
735 }
736
737 # Retrieve information about PDB files...
738 sub RetrievePDBFilesInfo {
739 my($Index, $PDBFile, $ModifiedTimeString, $ModifiedDateString);
740
741 %PDBFilesInfo = ();
742 @{$PDBFilesInfo{FileOkay}} = ();
743 @{$PDBFilesInfo{FileSize}} = ();
744 @{$PDBFilesInfo{FileLastModified}} = ();
745
746 FILELIST: for $Index (0 .. $#PDBFilesList) {
747 $PDBFilesInfo{FileOkay}[$Index] = 0;
748 $PDBFilesInfo{FileSize}[$Index] = 0;
749 $PDBFilesInfo{FileLastModified}[$Index] = '';
750
751 $PDBFile = $PDBFilesList[$Index];
752 if (!(-e $PDBFile)) {
753 warn "Warning: Ignoring file $PDBFile: It doesn't exist\n";
754 next FILELIST;
755 }
756 if (!CheckFileType($PDBFile, "pdb")) {
757 warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n";
758 next FILELIST;
759 }
760 if (! open PDBFILE, "$PDBFile") {
761 warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n";
762 next FILELIST;
763 }
764 close PDBFILE;
765
766 $PDBFilesInfo{FileOkay}[$Index] = 1;
767 $PDBFilesInfo{FileSize}[$Index] = FileSize($PDBFile);
768 ($ModifiedTimeString, $ModifiedDateString) = FormattedFileModificationTimeAndDate($PDBFile);
769 $PDBFilesInfo{FileLastModified}[$Index] = "$ModifiedTimeString; $ModifiedDateString";
770 }
771 }
772
773
774 # Setup script usage and retrieve command line arguments specified using various options...
775 sub SetupScriptUsage {
776
777 # Retrieve all the options...
778 %Options = ();
779 $Options{count} = '';
780 $Options{detail} = 1;
781 $Options{residuesmode} = 'Both';
782
783 if (!GetOptions(\%Options, "all|a", "boundingbox|b", "count|c=s", "chains", "detail|d=i", "experiment|e", "frequency|f", "mastercheck|m", "header", "help|h", "residues", "residuesmode=s", "residuenumbers", "workingdir|w=s")) {
784 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
785 }
786 if ($Options{workingdir}) {
787 if (! -d $Options{workingdir}) {
788 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
789 }
790 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
791 }
792 if (!IsPositiveInteger($Options{detail})) {
793 die "Error: The value specified, $Options{detail}, for option \"-d --detail\" is not valid. Allowed values: > 0\n";
794 }
795 if ($Options{residuesmode} !~ /^(InChains|All|Both)$/i) {
796 die "Error: The value specified, $Options{residuesmode}, for option \"--ResiduesMode\" is not valid. Allowed values: InChains, All, or Both\n";
797 }
798 }
799
800 __END__
801
802 =head1 NAME
803
804 InfoPDBFiles.pl - List information about PDB files
805
806 =head1 SYNOPSIS
807
808 InfoPDBFiles.pl PDBFile(s) PDB(s)...
809
810 InfoPDBFiles.pl [B<-a, --all>] [B<-b, --BoundingBox>]
811 [B<-c, --count> "RecordType, [RecordType,...]" | All] [B<--chains>]
812 [B<-d, --detail> infolevel] [B<-e, --experiment>] [B<-f, --frequency>]
813 [B<-h, --help>] [B<--header>] [B<m, --MasterCheck>] [B<--residues>]
814 [B<--ResiduesMode> InChains | All | Both] [B<--ResidueNumbers>]
815 [B<-w, --WorkingDir> dirname] PDBFile(s)...
816
817 =head1 DESCRIPTION
818
819 List information about contents of I<PDBFile(s)>: number of each record type, number of chains,
820 count and percent distribution of residues in each chain, bounding box and so on.
821 Multiple PDBFile names are separated by spaces. The valid file extension is I<.pdb>.
822 All other file name extensions are ignored during the wild card expansion. All the PDB files
823 in a current directory can be specified either by I<*.pdb> or the current directory name.
824
825 In PDB files containing data for multiple models, all ATOM/HETAM records for chains after the first model
826 are ignored.
827
828 =head1 OPTIONS
829
830 =over 4
831
832 =item B<-a, --all>
833
834 List all the available information.
835
836 =item B<-b, --BoundingBox>
837
838 List min/max XYZ coordiates of ATOM/HETATM records.
839
840 =item B<-c, --count> I<RecordType,[RecordType,...]|All>
841
842 Types of PDB records to count in I<PDBFile(s)>. You can specify a list of any valid PDB
843 record type or count all record types found in the files. Possible values: Comma delimited list
844 of valid I<RecordTypes> or I<All>. Default: I<ATOM,HETATM>. And this is also B<default behavior>.
845
846 The list of valid PDB record types includes: I<HEADER, OBSLTE, TITLE, CAVEAT, COMPND, SOURCE, KEYWDS,
847 EXPDTA, AUTHOR, REVDAT, SPRSDE, JRN, REMARK, DBRE, SEQADV, SEQRES, MODRES, HET, HETNAM, HETSYN,
848 FORMUL, HELIX, SHEET, TURN, SSBOND, LINK, HYDBND, SLTBRG, CISPEP, SITE, CRYST1, ORIGX1, ORIGX2, ORIGX3,
849 SCALE1, SCALE2, SCALE3, MTRIX1 MTRIX2 MTRIX3, TVECT, MODEL, ATOM, SIGATM, ANISOU, SIGUIJ, TER,
850 HETATM, ENDMDL, CONECT, MASTER, END>.
851
852 =item B<--chains>
853
854 Count number of chains.
855
856 =item B<-d, --detail> I<infolevel>
857
858 Level of information to print about PDB during various options. Default: I<1>.
859 Possible values: I<1, 2 or 3>.
860
861 =item B<-e, --experiment>
862
863 List experimental technique information along with any applicable resolution.
864
865 =item B<-f, --frequency>
866
867 List distribution of residues: report count and percent of residues in individual chains and
868 across all the chains, or for all the residues in the file. The value of option B<--residuesmode>
869 determines how residues are counted and what is listed. The list is sorted by frequency in
870 descending order. By default, only residue count values are reported. To list percent distribution
871 of residues, specify B<-d, --detail> value of I<2> or higher.
872
873 =item B<-h, --help>
874
875 Print this help message.
876
877 =item B<--header>
878
879 List header information.
880
881 =item B<m, --MasterCheck>
882
883 Check master record by explicitly counting the number of REMARK, HET, HELIX, SHEET, TURN, SITE,
884 ORIGX, SCALE, MTRIX, ATOM, HETATM, TER, CONECT and SEQRES records and comparing their
885 values against contents of master record.
886
887 =item B<--residues>
888
889 Count residues in I<PDBFile(s)>. This is also B<default behavior>.
890
891 By default, only residue count values are reported. To list percent distribution of residues,
892 specify B<-d, --detail> value of I<2> or higher.
893
894 =item B<--ResiduesMode> <InChains | All | Both>
895
896 Specify how to count residues in I<PDBFile(s)>: Count residue in each chain and across all the chains,
897 list count iof all the residues in the file, or list both. Possible values: I<InChains, All, or Both>.
898 Default: I<Both>.
899
900 =item B<--ResidueNumbers>
901
902 List information about ATOM residue numbers in each chain before TER record: start and end residue
903 number; gaps in residue numbers corresponding to non-sequential residue numbers; residue
904 numbers not in ascending order.
905
906 =item B<-w, --WorkingDir> I<dirname>
907
908 Location of working directory. Default: current directory.
909
910 =back
911
912 =head1 EXAMPLES
913
914 To list total number of records and number of chain(s) residues in PDB files, type:
915
916 % InfoPDBFiles.pl Sample1.pdb
917 % InfoPDBFiles.pl Sample2.pdb
918
919 To list all available information for PDB file Sample2.pdb, type:
920
921 % InfoPDBFiles.pl -a Sample2.pdb
922
923 To list all available information for PDB file Sample2.pdb with all available details, type:
924
925 % InfoPDBFiles.pl -a -d Sample2.pdb
926
927 To count ATOM and HETATM records in Sample2.pdb file, type:
928
929 % InfoPDBFiles.pl -c "ATOM,HETATM" Sample2.pdb
930
931 To list distribution of residues in chains across the whole PDB file Sample2.pdb along with
932 percent distribution, type
933
934 % InfoPDBFiles.pl --frequency -d 2 Sample2.pdb
935
936 To list distribution of residues only across chains in PDB file Sample2.pdb along with
937 percent distribution, type
938
939 % InfoPDBFiles.pl --frequency -d 2 --ResiduesMode InChains Sample2.pdb
940
941 To list min/max coordinates of the bounding box which encompasses the structure in Sample1.pdb
942 file, type:
943
944 % InfoPDBFiles.pl -b Sample1.pdb
945
946 =head1 AUTHOR
947
948 Manish Sud <msud@san.rr.com>
949
950 =head1 SEE ALSO
951
952 ExtractFromPDBFiles.pl, InfoAminoAcids.pl, InfoNucleicAcids.pl, InfoSequenceFiles.pl, ModifyPDBFiles.pl
953
954 =head1 COPYRIGHT
955
956 Copyright (C) 2015 Manish Sud. All rights reserved.
957
958 This file is part of MayaChemTools.
959
960 MayaChemTools is free software; you can redistribute it and/or modify it under
961 the terms of the GNU Lesser General Public License as published by the Free
962 Software Foundation; either version 3 of the License, or (at your option)
963 any later version.
964
965 =cut