comparison bin/ModifyPDBFiles.pl @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 #!/usr/bin/perl -w
2 #
3 # $RCSfile: ModifyPDBFiles.pl,v $
4 # $Date: 2015/02/28 20:46:20 $
5 # $Revision: 1.25 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use FindBin; use lib "$FindBin::Bin/../lib";
31 use Getopt::Long;
32 use File::Basename;
33 use Text::ParseWords;
34 use Benchmark;
35 use FileUtil;
36 use TextUtil;
37 use PDBFileUtil;
38
39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
40
41 # Autoflush STDOUT
42 $| = 1;
43
44 # Starting message...
45 $ScriptName = basename($0);
46 print "\n$ScriptName: Starting...\n\n";
47 $StartTime = new Benchmark;
48
49 # Get the options and setup script...
50 SetupScriptUsage();
51 if ($Options{help} || @ARGV < 1) {
52 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
53 }
54
55 my(@PDBFilesList);
56 @PDBFilesList = ExpandFileNames(\@ARGV, "pdb");
57
58 # Process options...
59 print "Processing options...\n";
60 my(%OptionsInfo);
61 ProcessOptions();
62
63 # Setup information about input files...
64 print "Checking input PDB file(s)...\n";
65 my(%PDBFilesInfo);
66 RetrievePDBFilesInfo();
67
68 # Process input files..
69 my($FileIndex);
70 if (@PDBFilesList > 1) {
71 print "\nProcessing PDB files...\n";
72 }
73 for $FileIndex (0 .. $#PDBFilesList) {
74 if ($PDBFilesInfo{FileOkay}[$FileIndex]) {
75 print "\nProcessing file $PDBFilesList[$FileIndex]...\n";
76 ModifyPDBFiles($FileIndex);
77 }
78 }
79 print "\n$ScriptName:Done...\n\n";
80
81 $EndTime = new Benchmark;
82 $TotalTime = timediff ($EndTime, $StartTime);
83 print "Total time: ", timestr($TotalTime), "\n";
84
85 ###############################################################################
86
87 # Modify appropriate information...
88 sub ModifyPDBFiles {
89 my($FileIndex) = @_;
90 my($PDBFile, $PDBRecordLinesRef);
91
92 # Get PDB data...
93 $PDBFile = $PDBFilesList[$FileIndex];
94 $PDBRecordLinesRef = ReadPDBFile($PDBFile);
95
96 if ($OptionsInfo{Mode} =~ /^RenumberAtoms$/i) {
97 RenumberAtoms($FileIndex, $PDBRecordLinesRef);
98 }
99 elsif ($OptionsInfo{Mode} =~ /^RenumberResidues$/i) {
100 RenumberResidues($FileIndex, $PDBRecordLinesRef);
101 }
102 elsif ($OptionsInfo{Mode} =~ /^RenumberWaters$/i) {
103 RenumberWaters($FileIndex, $PDBRecordLinesRef);
104 }
105 elsif ($OptionsInfo{Mode} =~ /^RenameChainIDs$/i) {
106 RenameChainsIDs($FileIndex, $PDBRecordLinesRef);
107 }
108 }
109
110 # Renumber atom and hetro atom numbers...
111 sub RenumberAtoms {
112 my($FileIndex, $PDBRecordLinesRef) = @_;
113 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewAtomNumber, $RecordType, %OldToNewAtomNumbersMap);
114
115 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
116 print "Generating PDBFileName file $PDBFileName...\n";
117 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
118
119 # Write out header and other older recors...
120 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
121
122 # Write out all ATOM records along with TER and model records to indicate
123 # chains and multiple models..
124 %OldToNewAtomNumbersMap = ();
125 $NewAtomNumber = $OptionsInfo{StartingAtomNumber};
126 for $RecordLine (@{$PDBRecordLinesRef}) {
127 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
128 $RecordType = GetPDBRecordType($RecordLine);
129
130 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
131
132 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $NewAtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
133
134 $OldToNewAtomNumbersMap{$AtomNumber} = $NewAtomNumber;
135 $NewAtomNumber++;
136 }
137 elsif (IsTerRecordType($RecordLine)) {
138 $NewAtomNumber++;
139 print OUTFILE GenerateTerRecordLine($NewAtomNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode), "\n";
140 }
141 elsif (IsModelRecordType($RecordLine)) {
142 print OUTFILE "$RecordLine\n";
143 }
144 elsif (IsEndmdlRecordType($RecordLine)) {
145 print OUTFILE "$RecordLine\n";
146 # Restart numbering...
147 $NewAtomNumber = $OptionsInfo{StartingAtomNumber};
148 }
149 }
150
151 # Write out modified CONECT records...
152 my($ModifiedConectAtomNum, $ConectAtomNum, @ConectAtomNums, @ModifiedConectAtomNums);
153 LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
154 if (!IsConectRecordType($RecordLine)) {
155 next LINE;
156 }
157 @ConectAtomNums = ();
158 @ModifiedConectAtomNums = ();
159 push @ConectAtomNums, ParseConectRecordLine($RecordLine);
160 ATOMNUMBER: for $ConectAtomNum (@ConectAtomNums) {
161 $ModifiedConectAtomNum = $ConectAtomNum;
162 if (defined($ConectAtomNum)) {
163 $AtomNumber = $ConectAtomNum;
164 if ($AtomNumber) {
165 if (exists $OldToNewAtomNumbersMap{$AtomNumber}) {
166 $ModifiedConectAtomNum = $OldToNewAtomNumbersMap{$AtomNumber};
167 }
168 }
169 }
170 push @ModifiedConectAtomNums, $ModifiedConectAtomNum;
171 }
172 # Write out the record...
173 print OUTFILE GenerateConectRecordLine(@ModifiedConectAtomNums), "\n";
174 }
175
176 # Write out END record...
177 print OUTFILE GenerateEndRecordLine(), "\n";
178
179 close OUTFILE;
180 }
181
182 # Renumber residues...
183 sub RenumberResidues {
184 my($FileIndex, $PDBRecordLinesRef) = @_;
185 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $NewHetatmResidueNumber, $TERCount, $TotalTERCount, $PreviousResidueNumber, $PreviousHetatmResidueNumber, $RecordType);
186
187 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
188 print "Generating PDBFileName file $PDBFileName...\n";
189 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
190
191 # Write out header and other older recors...
192 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
193
194 # Do a quick count of all TER records...
195 $TotalTERCount = 0;
196 for $RecordLine (@{$PDBRecordLinesRef}) {
197 if (IsTerRecordType($RecordLine)) {
198 $TotalTERCount++;
199 }
200 }
201
202 # Write out all ATOM records along with TER and model records to indicate
203 # chains and multiple models..
204 $NewResidueNumber = $OptionsInfo{StartingResidueNumber};
205 $NewHetatmResidueNumber = $OptionsInfo{StartingHetatmResidueNumber};
206
207 $TERCount = 0;
208 $PreviousResidueNumber = 0;
209 $PreviousHetatmResidueNumber = 0;
210
211 for $RecordLine (@{$PDBRecordLinesRef}) {
212 if (IsAtomRecordType($RecordLine) || (IsHetatmRecordType($RecordLine) && ($TERCount < $TotalTERCount || $OptionsInfo{HetatmResidueNumberMode} =~ /^Automatic$/i))) {
213 $RecordType = GetPDBRecordType($RecordLine);
214 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
215
216 if ($PreviousResidueNumber && $PreviousResidueNumber != $ResidueNumber) {
217 $PreviousResidueNumber = $ResidueNumber;
218 $NewResidueNumber++;
219 }
220 else {
221 # First residue in a chain...
222 $PreviousResidueNumber = $ResidueNumber;
223 }
224 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
225
226 }
227 elsif (IsHetatmRecordType($RecordLine)) {
228 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseHetatmRecordLine($RecordLine);
229
230 # User HETATM residue numbers...
231 if ($PreviousHetatmResidueNumber && $PreviousHetatmResidueNumber != $ResidueNumber) {
232 $PreviousHetatmResidueNumber = $ResidueNumber;
233 $NewHetatmResidueNumber++;
234 }
235 else {
236 # First HETATM residue outside a chain...
237 $PreviousHetatmResidueNumber = $ResidueNumber;
238 }
239
240 print OUTFILE GenerateHetatmRecordLine($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewHetatmResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
241 }
242 elsif (IsTerRecordType($RecordLine)) {
243 $TERCount++;
244 $AtomNumber++;
245 print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode), "\n";
246 # For per chain numbering, start over again...
247 if ($OptionsInfo{ResidueNumberMode} =~ /^PerChain$/i) {
248 if ($TERCount < $TotalTERCount ) {
249 $NewResidueNumber = $OptionsInfo{StartingResidueNumber};
250 }
251 $PreviousResidueNumber = 0;
252 }
253 }
254 elsif (IsModelRecordType($RecordLine)) {
255 print OUTFILE "$RecordLine\n";
256 }
257 elsif (IsEndmdlRecordType($RecordLine)) {
258 print OUTFILE "$RecordLine\n";
259 }
260 }
261
262 # Write out CONECT records...
263 for $RecordLine (@{$PDBRecordLinesRef}) {
264 if (IsConectRecordType($RecordLine)) {
265 print OUTFILE "$RecordLine\n";
266 }
267 }
268
269 # Write out END record...
270 print OUTFILE GenerateEndRecordLine(), "\n";
271
272 close OUTFILE;
273 }
274
275 # Renumber water residues...
276 sub RenumberWaters {
277 my($FileIndex, $PDBRecordLinesRef) = @_;
278 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $RecordType);
279
280 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
281 print "Generating PDBFileName file $PDBFileName...\n";
282 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
283
284 # Write out header and other older recors...
285 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
286
287 # Write out all ATOM records along with TER and model records to indicate
288 # chains and multiple models..
289 $NewResidueNumber = $OptionsInfo{StartingWaterResidueNumber};
290 for $RecordLine (@{$PDBRecordLinesRef}) {
291 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
292 $RecordType = GetPDBRecordType($RecordLine);
293
294 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
295
296 if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) {
297 $ResidueNumber = $NewResidueNumber;
298 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
299 $NewResidueNumber++;
300 }
301 else {
302 print OUTFILE "$RecordLine\n";
303 }
304 }
305 elsif (IsTerRecordType($RecordLine)) {
306 print OUTFILE "$RecordLine\n";
307 }
308 elsif (IsModelRecordType($RecordLine)) {
309 print OUTFILE "$RecordLine\n";
310 }
311 elsif (IsEndmdlRecordType($RecordLine)) {
312 print OUTFILE "$RecordLine\n";
313 }
314 }
315
316 # Write out CONECT records...
317 for $RecordLine (@{$PDBRecordLinesRef}) {
318 if (IsConectRecordType($RecordLine)) {
319 print OUTFILE "$RecordLine\n";
320 }
321 }
322
323 # Write out END record...
324 print OUTFILE GenerateEndRecordLine(), "\n";
325
326 close OUTFILE;
327 }
328
329 # Rename chain IDs...
330 sub RenameChainsIDs {
331 my($FileIndex, $PDBRecordLinesRef) = @_;
332 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $RecordType, $PreviousChainID, $FirstChainID, $NewChainID, $NewChainIDCounter, %OldToNewChainIDsMap);
333
334 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
335 print "Generating PDBFileName file $PDBFileName...\n";
336 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
337
338 # Write out header and other older recors...
339 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
340
341 # Write out all ATOM records along with TER and model records to indicate
342 # chains and multiple models..
343 %OldToNewChainIDsMap = ();
344 $NewChainIDCounter = $OptionsInfo{StartingChainID};
345 $FirstChainID = 1;
346 $PreviousChainID = '';
347 LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
348 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
349 $RecordType = GetPDBRecordType($RecordLine);
350
351 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
352
353 if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) {
354 # Chain IDs are not assigned to water residues...
355 print OUTFILE "$RecordLine\n";
356 next LINE;
357 }
358
359 if ($FirstChainID) {
360 $FirstChainID = 0;
361 $PreviousChainID = $ChainID;
362 if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) {
363 $NewChainID = $NewChainIDCounter;
364 $OldToNewChainIDsMap{$ChainID} = $NewChainID;
365 }
366 else {
367 $NewChainID = '';
368 }
369 }
370 elsif ($PreviousChainID ne $ChainID) {
371 if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) {
372 $PreviousChainID = $ChainID;
373 if (exists $OldToNewChainIDsMap{$ChainID}) {
374 $NewChainID = $OldToNewChainIDsMap{$ChainID};
375 }
376 else {
377 $NewChainIDCounter++;
378 $NewChainID = $NewChainIDCounter;
379 $OldToNewChainIDsMap{$ChainID} = $NewChainID;
380 }
381 }
382 else {
383 $NewChainID = '';
384 }
385 }
386
387 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
388 }
389 elsif (IsTerRecordType($RecordLine)) {
390 $AtomNumber++;
391 print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode), "\n";
392 }
393 elsif (IsModelRecordType($RecordLine)) {
394 print OUTFILE "$RecordLine\n";
395 }
396 elsif (IsEndmdlRecordType($RecordLine)) {
397 print OUTFILE "$RecordLine\n";
398 }
399 }
400
401 # Write out CONECT records...
402 for $RecordLine (@{$PDBRecordLinesRef}) {
403 if (IsConectRecordType($RecordLine)) {
404 print OUTFILE "$RecordLine\n";
405 }
406 }
407
408 # Write out END record...
409 print OUTFILE GenerateEndRecordLine(), "\n";
410
411 close OUTFILE;
412 }
413
414
415 # Write out modifed header and other older records...
416 sub WriteHeaderAndOlderRecords {
417 my($OutFileRef, $PDBRecordLinesRef) = @_;
418
419 if ($OptionsInfo{ModifyHeaderRecord}) {
420 # Write out modified HEADER record...
421 my($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef);
422 $Classification = 'Data modified using MayaChemTools';
423 print $OutFileRef GenerateHeaderRecordLine($IDCode, $Classification), "\n";
424 }
425 else {
426 print $OutFileRef $PDBRecordLinesRef->[0], "\n";
427 }
428
429 # Write out any old records...
430 if ($OptionsInfo{KeepOldRecords}) {
431 my($RecordLineIndex, $RecordLine);
432 # Skip HEADER record and write out older records all the way upto first MODEL/ATOM/HETATM records from input file...
433 RECORDLINE: for $RecordLineIndex (1 .. $#{$PDBRecordLinesRef}) {
434 $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex];
435 if (IsModelRecordType($RecordLine) || IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
436 last RECORDLINE;
437 }
438 print $OutFileRef "$RecordLine\n";
439 }
440 }
441 }
442
443 # Get header record information assuming it's the first record...
444 sub GetHeaderRecordInformation {
445 my($PDBRecordLinesRef) = @_;
446 my($Classification, $DepositionDate, $IDCode, $HeaderRecordLine);
447
448 ($Classification, $DepositionDate, $IDCode) = ('') x 3;
449 $HeaderRecordLine = $PDBRecordLinesRef->[0];
450 if (IsHeaderRecordType($HeaderRecordLine)) {
451 ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine);
452 }
453 return ($Classification, $DepositionDate, $IDCode);
454 }
455
456
457 # Process option values...
458 sub ProcessOptions {
459 %OptionsInfo = ();
460 $OptionsInfo{Mode} = $Options{mode};
461
462 $OptionsInfo{StartingAtomNumber} = $Options{atomnumberstart};
463 $OptionsInfo{StartingChainID} = $Options{chainidstart};
464 $OptionsInfo{RenameEmptyChainIDs} = ($Options{chainidrenameempty} =~ /^Yes$/i) ? 1 : 0;
465
466 $OptionsInfo{KeepOldRecords} = ($Options{keepoldrecords} =~ /^Yes$/i) ? 1 : 0;
467 $OptionsInfo{ModifyHeaderRecord} = ($Options{modifyheader} =~ /^Yes$/i) ? 1 : 0;
468
469 $OptionsInfo{ResidueNumberMode} = $Options{residuenumbermode};
470 $OptionsInfo{StartingResidueNumber} = $Options{residuenumberstart};
471
472 $OptionsInfo{HetatmResidueNumberMode} = $Options{residuenumberhetatmmode};
473 $OptionsInfo{StartingHetatmResidueNumber} = $Options{residuenumberstarthetatm};
474
475 $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
476 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
477
478 $OptionsInfo{WaterResidueNames} = $Options{waterresiduenames};
479 $OptionsInfo{StartingWaterResidueNumber} = $Options{waterresiduestart};
480 @{$OptionsInfo{SpecifiedWaterResiduesList}} = ();
481 %{$OptionsInfo{SpecifiedWaterResiduesMap}} = ();
482
483 my(@SpecifiedWaterResiduesList);
484 @SpecifiedWaterResiduesList = ();
485 my($WaterResidueName);
486 if ($OptionsInfo{WaterResidueNames} =~ /Automatic/i) {
487 push @SpecifiedWaterResiduesList, ('HOH', 'WAT', 'H2O');
488 }
489 else {
490 @SpecifiedWaterResiduesList = split /\,/, $Options{waterresiduenames};
491 }
492 for $WaterResidueName (@SpecifiedWaterResiduesList) {
493 $OptionsInfo{SpecifiedWaterResiduesMap}{$WaterResidueName} = $WaterResidueName;
494 }
495 push @{$OptionsInfo{SpecifiedWaterResiduesList}}, @SpecifiedWaterResiduesList;
496 }
497
498 # Retrieve information about PDB files...
499 sub RetrievePDBFilesInfo {
500 my($Index, $PDBFile, $PDBRecordLinesRef, $ChainsAndResiduesInfoRef, $FileDir, $FileName, $FileExt, $OutFileName, $OutFileRoot, $Mode, $OutFileMode, @OutFileNames);
501
502 %PDBFilesInfo = ();
503 @{$PDBFilesInfo{FileOkay}} = ();
504 @{$PDBFilesInfo{OutFileRoot}} = ();
505 @{$PDBFilesInfo{OutFileNames}} = ();
506
507 FILELIST: for $Index (0 .. $#PDBFilesList) {
508 $PDBFilesInfo{FileOkay}[$Index] = 0;
509
510 $PDBFilesInfo{OutFileRoot}[$Index] = '';
511 @{$PDBFilesInfo{OutFileNames}[$Index]} = ();
512 @{$PDBFilesInfo{OutFileNames}[$Index]} = ();
513
514 $PDBFile = $PDBFilesList[$Index];
515 if (!(-e $PDBFile)) {
516 warn "Warning: Ignoring file $PDBFile: It doesn't exist\n";
517 next FILELIST;
518 }
519 if (!CheckFileType($PDBFile, "pdb")) {
520 warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n";
521 next FILELIST;
522 }
523 if (! open PDBFILE, "$PDBFile") {
524 warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n";
525 next FILELIST;
526 }
527 close PDBFILE;
528
529 # Get PDB data...
530 $PDBRecordLinesRef = ReadPDBFile($PDBFile);
531 $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef);
532 if (!scalar @{$ChainsAndResiduesInfoRef->{ChainIDs}}) {
533 warn "Warning: Ignoring file $PDBFile: No chains found \n";
534 next FILELIST;
535 }
536
537 # Setup output file names...
538 @OutFileNames = ();
539 $FileDir = ""; $FileName = ""; $FileExt = "";
540 ($FileDir, $FileName, $FileExt) = ParseFileName($PDBFile);
541 if ($OptionsInfo{OutFileRoot} && (@PDBFilesList == 1)) {
542 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
543 if ($RootFileName && $RootFileExt) {
544 $FileName = $RootFileName;
545 }
546 else {
547 $FileName = $OptionsInfo{OutFileRoot};
548 }
549 $OutFileRoot = $FileName;
550 }
551 else {
552 $OutFileRoot = $FileName;
553 }
554 $Mode = $OptionsInfo{Mode};
555 MODE: {
556 if ($Mode =~ /^RenumberAtoms$/i) { $OutFileMode = 'RenumberAtoms'; last MODE;}
557 if ($Mode =~ /^RenumberResidues$/i) { $OutFileMode = 'RenumberResidues'; last MODE;}
558 if ($Mode =~ /^RenumberWaters$/i) { $OutFileMode = 'RenumberWaters'; last MODE;}
559 if ($Mode =~ /^RenameChainIDs$/i) { $OutFileMode = 'RenameChainIDs'; last MODE;}
560 $OutFileMode = '';
561 }
562 $OutFileName = "${OutFileRoot}${OutFileMode}.pdb";
563 push @OutFileNames, $OutFileName;
564
565 $PDBFilesInfo{FileOkay}[$Index] = 1;
566 $PDBFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
567
568 push @{$PDBFilesInfo{OutFileNames}[$Index]}, @OutFileNames;
569 }
570 }
571
572 # Setup script usage and retrieve command line arguments specified using various options...
573 sub SetupScriptUsage {
574
575 # Retrieve all the options...
576 %Options = ();
577 $Options{atomnumberstart} = 1;
578 $Options{chainidstart} = 'A';
579 $Options{chainidrenameempty} = 'No';
580 $Options{keepoldrecords} = 'no';
581 $Options{mode} = 'RenumberResidues';
582 $Options{modifyheader} = 'yes';
583 $Options{residuenumbermode} = 'PerChain';
584 $Options{residuenumberstart} = 1;
585 $Options{residuenumberhetatmmode} = 'Automatic';
586 $Options{residuenumberstarthetatm} = 6000;
587 $Options{waterresiduenames} = 'Automatic';
588 $Options{waterresiduestart} = 8000;
589
590 if (!GetOptions(\%Options, "help|h", "atomnumberstart|a=i", "chainidstart|c=s", "chainidrenameempty=s", "keepoldrecords|k=s", "mode|m=s", "modifyheader=s", "overwrite|o", "residuenumbermode=s", "residuenumberstart=i", "residuenumberhetatmmode=s", "residuenumberstarthetatm=i", "root|r=s", "sequencelength=i", "waterresiduenames=s", "waterresiduestart=i", "workingdir|w=s")) {
591 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
592 }
593 if ($Options{workingdir}) {
594 if (! -d $Options{workingdir}) {
595 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
596 }
597 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
598 }
599 if (!IsPositiveInteger($Options{atomnumberstart})) {
600 die "Error: The value specified, $Options{atomnumberstart}, for option \"-a, --AtomNumberStart\" is not valid. Allowed values: >0\n";
601 }
602 if ((length($Options{chainidstart}) > 1) || ($Options{chainidstart} !~ /[A-Z]/i)) {
603 die "Error: The value specified, $Options{chainidstart}, for option \"-c, --ChainIDStart\" is not valid. Allowed values: a single character from A to Z\n";
604 }
605 if ($Options{chainidrenameempty} !~ /^(yes|no)$/i) {
606 die "Error: The value specified, $Options{chainidrenameempty}, for option \"--chainidrenameempty\" is not valid. Allowed values: yes or no\n";
607 }
608 if ($Options{keepoldrecords} !~ /^(yes|no)$/i) {
609 die "Error: The value specified, $Options{keepoldrecords}, for option \"--KeepOldRecords\" is not valid. Allowed values: yes or no\n";
610 }
611 if ($Options{mode} !~ /^(RenumberAtoms|RenumberResidues|RenumberWaters|RenameChainIDs)$/i) {
612 die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: RenumberAtoms, RenumberResidues, RenumberWaters or RenameChainIDs\n";
613 }
614 if ($Options{modifyheader} !~ /^(yes|no)$/i) {
615 die "Error: The value specified, $Options{modifyheader}, for option \"--ModifyHeader\" is not valid. Allowed values: yes or no\n";
616 }
617 if ($Options{residuenumbermode} !~ /^(Sequential|PerChain)$/i) {
618 die "Error: The value specified, $Options{residuenumbermode}, for option \"--ResidueNumberMode\" is not valid. Allowed values: Sequential or PerChain\n";
619 }
620 if (!IsPositiveInteger($Options{residuenumberstart})) {
621 die "Error: The value specified, $Options{residuenumberstart}, for option \"--ResidueNumberStart\" is not valid. Allowed values: >0\n";
622 }
623 if ($Options{residuenumberhetatmmode} !~ /^(automatic|specify)$/i) {
624 die "Error: The value specified, $Options{residuenumberhetatmmode}, for option \"--residuenumbermode\" is not valid. Allowed values: automatic or specify\n";
625 }
626 if (!IsPositiveInteger($Options{residuenumberstarthetatm})) {
627 die "Error: The value specified, $Options{residuenumberstarthetatm}, for option \"--residuenumberstartHetatm\" is not valid. Allowed values: >0\n";
628 }
629 if (!IsPositiveInteger $Options{waterresiduestart}) {
630 die "Error: The value specified, $Options{waterresiduestart}, for option \"--waterresiduestart\" is not valid. Allowed values: >0\n";
631 }
632 }
633
634 __END__
635
636 =head1 NAME
637
638 ModifyPDBFiles.pl - Modify data in PDBFile(s)
639
640 =head1 SYNOPSIS
641
642 ModifyPDBFiles.pl PDBFile(s)...
643
644 ModifyPDBFiles.pl [B<-a, --AtomNumberStart> number] [B<-c, --ChainIDStart> character]
645 [B<--ChainIDRenameEmpty> yes | no] [B<-h, --help>] [B<-k, --KeepOldRecords> yes | no]
646 [B<-m, --mode > RenumberAtoms | RenumberResidues | RenumberWaters | RenameChainIDs]
647 [B<--ModifyHeader> yes | no] [B<-o, --overwrite>] [B<--ResidueNumberMode> Sequential | PerChain]
648 [B<--ResidueNumberStart> number] [B<--ResidueNumberHetatmMode> automatic | specify]
649 [B<--ResidueNumberStarHetatm> number] [B<-r, --root> rootname]
650 [B<--WaterResidueNames> Automatic | "ResidueName, [ResidueName,...]"] [B<--WaterResidueStart> number]
651 [B<-w, --WorkingDir> dirname] PDBFile(s)...
652
653 =head1 DESCRIPTION
654
655 Modify data in I<PDBFile(s)>: renumber atoms, residues, and water residues or assign new
656 chain IDs. Multiple PDBFile names are separated by spaces. The valid file extension is I<.pdb>.
657 All other file name extensions are ignored during the wild card expansion. All the PDB files
658 in a current directory can be specified either by I<*.pdb> or the current directory name.
659
660 =head1 OPTIONS
661
662 =over 4
663
664 =item B<-a, --AtomNumberStart> I<number>
665
666 Starting atom number to use during I<RenumberAtoms> value of B<-m, --mode> option. Default: I<1>.
667 Valid values: positive integers.
668
669 =item B<-c, --ChainIDStart> I<character>
670
671 A single character to use for starting IDs for chains during I<RenameChainIDs> value of B<-m, --mode> option.
672 Default: I<A>. Valid values: I<A to Z>.
673
674 =item B<--ChainIDRenameEmpty> I<Yes | No>
675
676 Specify whether to rename empty chain IDs during I<RenameChainIDs> B<-m, --mode> value. By
677 default, ATOM and HETATM records with no chain IDs are left unchanged. Possible values:
678 I<yes | no>. Default: I<No>.
679
680 =item B<-h, --help>
681
682 Print this help message.
683
684 =item B<-k, --KeepOldRecords> I<yes | no>
685
686 Specify whether to transfer old non ATOM and HETATM records from input PDBFile(s) to new
687 PDBFile(s). By default, except for the HEADER record, all records other than ATOM/HETATM
688 are dropped during the generation of new PDB files. Possible values: I<yes | no>.
689 Default: I<no>.
690
691 =item B<-m, --mode > I<RenumberAtoms | RenumberResidues | RenumberWaters | RenameChainIDs>
692
693 Specify how to modify I<PDBFile(s)>. Possible values: I<RenumberAtoms | RenumberResidues
694 | RenumberWaters | RenameChainIDs>. Default: I<RenumberResidues>.
695
696 For I<RenumberAtoms> mode, residue number in ATOM and HETATM records are reassigned
697 sequentially starting using value of B<-a, --AtomNumberStart> option.
698
699 For I<RenumberResidues> mode, serial number in ATOM and HETATM records are reassigned
700 either sequentially or statring from specified values for ATOM and HETATM records in each
701 chain.
702
703 For I<RenumberWaters> mode, residue number for waters are reassigned starting from a specific
704 value.
705
706 For I<RenameChainIDs> mode, all the chain IDs are reassigned starting from a specific chain ID.
707
708 During the generation of new PDB files, unnecessary CONECT records are dropped.
709
710 =item B<--ModifyHeader> I<yes | no>
711
712 Specify whether to modify HEADER record during the generation of new PDB files
713 Possible values: I<yes | no>. Default: I<yes>. By defailt, Classification data is replaced
714 by I<Data modified using MayaChemTools> before writing out HEADER record.
715
716 =item B<-o, --overwrite>
717
718 Overwrite existing files
719
720 =item B<--ResidueNumberMode> I<Sequential | PerChain>
721
722 Specify how to renumber residues: renumber residues sequentially across all the chains
723 or start from the begining for each chain. Possible values: I<Sequential | PerChain>. Default:
724 I<PerChain>.
725
726 =item B<--ResidueNumberStart> I<number>
727
728 Starting residue number to use for ATOM records in chains. Default: I<1>. Valid values
729 positive integers.
730
731 For I<Sequential> value of B<--ResidueNumberMode> option, residue numbers are
732 assigned sequentially across all the chains starting from the specified value.
733
734 For I<PerChain> value of B<--ResidueNumberMode> option, residue numbers are
735 starting again from the specified value for each chain.
736
737 HETATM residues with in the chains are numbered using this value as well
738
739 =item B<--ResidueNumberHetatmMode> I<automatic | specify>
740
741 Specify how to start residue number for HETATM records: use the next sequential
742 residue number after the last residue number from ATOM records or start from a
743 specific residue number. Possible values: I<automatic | specify>. Default:
744 I<automatic>
745
746 For I<automatic> , residue number after highest residue number of ATOM
747 records is used as the starting residue number for HETATM records.
748
749 For I<specify>, value of option B<--ResidueNumberStarHetatm> is used as the
750 starting residue number for HETATM records.
751
752 This option along with B<--ResidueNumberStartHetatm> only applies to HETATM records
753 outside the chains.
754
755 =item B<--ResidueNumberStartHetatm> I<number>
756
757 Starting residue number to use for HETATM records. Default: I<6000>. Valid values
758 positive integers.
759
760 =item B<-r, --root> I<rootname>
761
762 New PDB and sequence file name is generated using the root: <Root><Mode>.<Ext>.
763 Default new file name: <PDBFileName><Mode>.pdb. This option is ignored for multiple
764 input files.
765
766 =item B<--WaterResidueNames> I<Automatic | "ResidueName,[ResidueName,...]">
767
768 Identification of water residues during I<RenumberWaters> value of B<-m, --mode> option. Possible
769 values: I<Automatic | "ResidueName,[ResidueName,...]">. Default: I<Automatic> which corresponds
770 to "HOH,WAT,H20". You can also specify a different comma delimited list of residue names
771 to use for water.
772
773 =item B<--WaterResidueStart> I<number>
774
775 Starting water residue number to use during I<RenumberWaters> B<-m, --mode> value.
776 Default: I<8000>. Valid values: positive integers.
777
778 =item B<-w, --WorkingDir> I<dirname>
779
780 Location of working directory. Default: current directory.
781
782 =back
783
784 =head1 EXAMPLES
785
786 To renumber ATOM and HETATM residues starting from 1 for each chain with continuation to
787 HETATM residues outside TER records in Sample2.pdb and generate
788 Sample2RenumberResidues.pdb file, type:
789
790 % ModifyPDBFiles.pl Sample1.pdb
791
792 To renumber ATOM and HETATM residues sequentially across all chains starting from 1 with
793 continuation to HETATM residues outside TER records in Sample2.pdb and generate
794 Sample2RenumberResidues.pdb file, type:
795
796 % ModifyPDBFiles.pl --ResidueNumberMode Sequential -o Sample1.pdb
797
798 To renumber ATOM and HETATM residues sequentially across all chains starting from 1 and
799 HETATM residues outside TER records starting from 6000 in Sample2.pdb and generate
800 Sample2RenumberResidues.pdb file, type:
801
802 % ModifyPDBFiles.pl --ResidueNumberMode Sequential
803 --ResidueNumberHetatmMode Specify -o Sample1.pdb
804
805
806 To renumber ATOM and HETATM residues sequentially across all chains starting from 100 for
807 ATOM/HETATM residues with in TER records and starting from 999 for HETATM residues
808 outside TER records in Sample2.pdb and generate Sample2RenumberResidues.pdb file, type:
809
810 % ModifyPDBFiles.pl --ResidueNumberMode Sequential
811 --ResidueNumberHetatmMode Specify --ResidueNumberStart 100
812 --ResidueNumberStartHetatm 999 -o Sample2.pdb
813
814 To renumber ATOM and HETATM residues from 100 for each chain and starting from 999 for
815 HETATM residues outside TER records in Sample2.pdb and generate Sample2RenumberResidues.pdb
816 file, type:
817
818 % ModifyPDBFiles.pl --ResidueNumberMode PerChain
819 --ResidueNumberHetatmMode Specify --ResidueNumberStart 100
820 --ResidueNumberStartHetatm 999 -o Sample2.pdb
821
822 To renumber ATOM serial numbers sequentially starting from 100 in Sample1.pdb file and generate
823 Sample1RenumberAtoms.pdb file, type:
824
825 % ModifyPDBFiles.pl -m RenumberAtoms --AtomNumberStart 100
826 -o Sample1.pdb
827
828 To renumber water residues identified by "HOH,WAT" starting from residue number 1000
829 in Sample2.pdb file and generate Sample2RenumberWaters.pdb file, type:
830
831 % ModifyPDBFiles.pl -m RenumberWaters --WaterResidueNames "HOH,WAT"
832 -o --WaterResidueStart 950 Sample2.pdb
833
834 To rename all chain IDs starting from A in Sample1.pdb file and generate
835 Sample1RenameChainIDs.pdb file, type:
836
837 % ModifyPDBFiles.pl -m RenameChainIDs -o Sample1.pdb
838
839 To rename all chain IDs starting from B without assigning any chain IDs to ATOM/HETATOM
840 with no chain IDs in Sample2.pdb file and generate Sample2RenameChainIDs.pdb file, type:
841
842 % ModifyPDBFiles.pl l -m RenameChainIDs -c B --ChainIDRenameEmpty No
843 -o Sample2.pdb
844
845
846 =head1 AUTHOR
847
848 Manish Sud <msud@san.rr.com>
849
850 =head1 SEE ALSO
851
852 ExtractFromPDBFiles.pl, InfoPDBFiles.pl
853
854 =head1 COPYRIGHT
855
856 Copyright (C) 2015 Manish Sud. All rights reserved.
857
858 This file is part of MayaChemTools.
859
860 MayaChemTools is free software; you can redistribute it and/or modify it under
861 the terms of the GNU Lesser General Public License as published by the Free
862 Software Foundation; either version 3 of the License, or (at your option)
863 any later version.
864
865 =cut