Mercurial > repos > deepakjadmin > mayatool3_test2
comparison bin/ModifyPDBFiles.pl @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4816e4a8ae95 |
---|---|
1 #!/usr/bin/perl -w | |
2 # | |
3 # $RCSfile: ModifyPDBFiles.pl,v $ | |
4 # $Date: 2015/02/28 20:46:20 $ | |
5 # $Revision: 1.25 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use FindBin; use lib "$FindBin::Bin/../lib"; | |
31 use Getopt::Long; | |
32 use File::Basename; | |
33 use Text::ParseWords; | |
34 use Benchmark; | |
35 use FileUtil; | |
36 use TextUtil; | |
37 use PDBFileUtil; | |
38 | |
39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); | |
40 | |
41 # Autoflush STDOUT | |
42 $| = 1; | |
43 | |
44 # Starting message... | |
45 $ScriptName = basename($0); | |
46 print "\n$ScriptName: Starting...\n\n"; | |
47 $StartTime = new Benchmark; | |
48 | |
49 # Get the options and setup script... | |
50 SetupScriptUsage(); | |
51 if ($Options{help} || @ARGV < 1) { | |
52 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); | |
53 } | |
54 | |
55 my(@PDBFilesList); | |
56 @PDBFilesList = ExpandFileNames(\@ARGV, "pdb"); | |
57 | |
58 # Process options... | |
59 print "Processing options...\n"; | |
60 my(%OptionsInfo); | |
61 ProcessOptions(); | |
62 | |
63 # Setup information about input files... | |
64 print "Checking input PDB file(s)...\n"; | |
65 my(%PDBFilesInfo); | |
66 RetrievePDBFilesInfo(); | |
67 | |
68 # Process input files.. | |
69 my($FileIndex); | |
70 if (@PDBFilesList > 1) { | |
71 print "\nProcessing PDB files...\n"; | |
72 } | |
73 for $FileIndex (0 .. $#PDBFilesList) { | |
74 if ($PDBFilesInfo{FileOkay}[$FileIndex]) { | |
75 print "\nProcessing file $PDBFilesList[$FileIndex]...\n"; | |
76 ModifyPDBFiles($FileIndex); | |
77 } | |
78 } | |
79 print "\n$ScriptName:Done...\n\n"; | |
80 | |
81 $EndTime = new Benchmark; | |
82 $TotalTime = timediff ($EndTime, $StartTime); | |
83 print "Total time: ", timestr($TotalTime), "\n"; | |
84 | |
85 ############################################################################### | |
86 | |
87 # Modify appropriate information... | |
88 sub ModifyPDBFiles { | |
89 my($FileIndex) = @_; | |
90 my($PDBFile, $PDBRecordLinesRef); | |
91 | |
92 # Get PDB data... | |
93 $PDBFile = $PDBFilesList[$FileIndex]; | |
94 $PDBRecordLinesRef = ReadPDBFile($PDBFile); | |
95 | |
96 if ($OptionsInfo{Mode} =~ /^RenumberAtoms$/i) { | |
97 RenumberAtoms($FileIndex, $PDBRecordLinesRef); | |
98 } | |
99 elsif ($OptionsInfo{Mode} =~ /^RenumberResidues$/i) { | |
100 RenumberResidues($FileIndex, $PDBRecordLinesRef); | |
101 } | |
102 elsif ($OptionsInfo{Mode} =~ /^RenumberWaters$/i) { | |
103 RenumberWaters($FileIndex, $PDBRecordLinesRef); | |
104 } | |
105 elsif ($OptionsInfo{Mode} =~ /^RenameChainIDs$/i) { | |
106 RenameChainsIDs($FileIndex, $PDBRecordLinesRef); | |
107 } | |
108 } | |
109 | |
110 # Renumber atom and hetro atom numbers... | |
111 sub RenumberAtoms { | |
112 my($FileIndex, $PDBRecordLinesRef) = @_; | |
113 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewAtomNumber, $RecordType, %OldToNewAtomNumbersMap); | |
114 | |
115 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; | |
116 print "Generating PDBFileName file $PDBFileName...\n"; | |
117 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; | |
118 | |
119 # Write out header and other older recors... | |
120 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); | |
121 | |
122 # Write out all ATOM records along with TER and model records to indicate | |
123 # chains and multiple models.. | |
124 %OldToNewAtomNumbersMap = (); | |
125 $NewAtomNumber = $OptionsInfo{StartingAtomNumber}; | |
126 for $RecordLine (@{$PDBRecordLinesRef}) { | |
127 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { | |
128 $RecordType = GetPDBRecordType($RecordLine); | |
129 | |
130 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); | |
131 | |
132 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $NewAtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; | |
133 | |
134 $OldToNewAtomNumbersMap{$AtomNumber} = $NewAtomNumber; | |
135 $NewAtomNumber++; | |
136 } | |
137 elsif (IsTerRecordType($RecordLine)) { | |
138 $NewAtomNumber++; | |
139 print OUTFILE GenerateTerRecordLine($NewAtomNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode), "\n"; | |
140 } | |
141 elsif (IsModelRecordType($RecordLine)) { | |
142 print OUTFILE "$RecordLine\n"; | |
143 } | |
144 elsif (IsEndmdlRecordType($RecordLine)) { | |
145 print OUTFILE "$RecordLine\n"; | |
146 # Restart numbering... | |
147 $NewAtomNumber = $OptionsInfo{StartingAtomNumber}; | |
148 } | |
149 } | |
150 | |
151 # Write out modified CONECT records... | |
152 my($ModifiedConectAtomNum, $ConectAtomNum, @ConectAtomNums, @ModifiedConectAtomNums); | |
153 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { | |
154 if (!IsConectRecordType($RecordLine)) { | |
155 next LINE; | |
156 } | |
157 @ConectAtomNums = (); | |
158 @ModifiedConectAtomNums = (); | |
159 push @ConectAtomNums, ParseConectRecordLine($RecordLine); | |
160 ATOMNUMBER: for $ConectAtomNum (@ConectAtomNums) { | |
161 $ModifiedConectAtomNum = $ConectAtomNum; | |
162 if (defined($ConectAtomNum)) { | |
163 $AtomNumber = $ConectAtomNum; | |
164 if ($AtomNumber) { | |
165 if (exists $OldToNewAtomNumbersMap{$AtomNumber}) { | |
166 $ModifiedConectAtomNum = $OldToNewAtomNumbersMap{$AtomNumber}; | |
167 } | |
168 } | |
169 } | |
170 push @ModifiedConectAtomNums, $ModifiedConectAtomNum; | |
171 } | |
172 # Write out the record... | |
173 print OUTFILE GenerateConectRecordLine(@ModifiedConectAtomNums), "\n"; | |
174 } | |
175 | |
176 # Write out END record... | |
177 print OUTFILE GenerateEndRecordLine(), "\n"; | |
178 | |
179 close OUTFILE; | |
180 } | |
181 | |
182 # Renumber residues... | |
183 sub RenumberResidues { | |
184 my($FileIndex, $PDBRecordLinesRef) = @_; | |
185 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $NewHetatmResidueNumber, $TERCount, $TotalTERCount, $PreviousResidueNumber, $PreviousHetatmResidueNumber, $RecordType); | |
186 | |
187 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; | |
188 print "Generating PDBFileName file $PDBFileName...\n"; | |
189 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; | |
190 | |
191 # Write out header and other older recors... | |
192 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); | |
193 | |
194 # Do a quick count of all TER records... | |
195 $TotalTERCount = 0; | |
196 for $RecordLine (@{$PDBRecordLinesRef}) { | |
197 if (IsTerRecordType($RecordLine)) { | |
198 $TotalTERCount++; | |
199 } | |
200 } | |
201 | |
202 # Write out all ATOM records along with TER and model records to indicate | |
203 # chains and multiple models.. | |
204 $NewResidueNumber = $OptionsInfo{StartingResidueNumber}; | |
205 $NewHetatmResidueNumber = $OptionsInfo{StartingHetatmResidueNumber}; | |
206 | |
207 $TERCount = 0; | |
208 $PreviousResidueNumber = 0; | |
209 $PreviousHetatmResidueNumber = 0; | |
210 | |
211 for $RecordLine (@{$PDBRecordLinesRef}) { | |
212 if (IsAtomRecordType($RecordLine) || (IsHetatmRecordType($RecordLine) && ($TERCount < $TotalTERCount || $OptionsInfo{HetatmResidueNumberMode} =~ /^Automatic$/i))) { | |
213 $RecordType = GetPDBRecordType($RecordLine); | |
214 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); | |
215 | |
216 if ($PreviousResidueNumber && $PreviousResidueNumber != $ResidueNumber) { | |
217 $PreviousResidueNumber = $ResidueNumber; | |
218 $NewResidueNumber++; | |
219 } | |
220 else { | |
221 # First residue in a chain... | |
222 $PreviousResidueNumber = $ResidueNumber; | |
223 } | |
224 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; | |
225 | |
226 } | |
227 elsif (IsHetatmRecordType($RecordLine)) { | |
228 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseHetatmRecordLine($RecordLine); | |
229 | |
230 # User HETATM residue numbers... | |
231 if ($PreviousHetatmResidueNumber && $PreviousHetatmResidueNumber != $ResidueNumber) { | |
232 $PreviousHetatmResidueNumber = $ResidueNumber; | |
233 $NewHetatmResidueNumber++; | |
234 } | |
235 else { | |
236 # First HETATM residue outside a chain... | |
237 $PreviousHetatmResidueNumber = $ResidueNumber; | |
238 } | |
239 | |
240 print OUTFILE GenerateHetatmRecordLine($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewHetatmResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; | |
241 } | |
242 elsif (IsTerRecordType($RecordLine)) { | |
243 $TERCount++; | |
244 $AtomNumber++; | |
245 print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode), "\n"; | |
246 # For per chain numbering, start over again... | |
247 if ($OptionsInfo{ResidueNumberMode} =~ /^PerChain$/i) { | |
248 if ($TERCount < $TotalTERCount ) { | |
249 $NewResidueNumber = $OptionsInfo{StartingResidueNumber}; | |
250 } | |
251 $PreviousResidueNumber = 0; | |
252 } | |
253 } | |
254 elsif (IsModelRecordType($RecordLine)) { | |
255 print OUTFILE "$RecordLine\n"; | |
256 } | |
257 elsif (IsEndmdlRecordType($RecordLine)) { | |
258 print OUTFILE "$RecordLine\n"; | |
259 } | |
260 } | |
261 | |
262 # Write out CONECT records... | |
263 for $RecordLine (@{$PDBRecordLinesRef}) { | |
264 if (IsConectRecordType($RecordLine)) { | |
265 print OUTFILE "$RecordLine\n"; | |
266 } | |
267 } | |
268 | |
269 # Write out END record... | |
270 print OUTFILE GenerateEndRecordLine(), "\n"; | |
271 | |
272 close OUTFILE; | |
273 } | |
274 | |
275 # Renumber water residues... | |
276 sub RenumberWaters { | |
277 my($FileIndex, $PDBRecordLinesRef) = @_; | |
278 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $RecordType); | |
279 | |
280 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; | |
281 print "Generating PDBFileName file $PDBFileName...\n"; | |
282 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; | |
283 | |
284 # Write out header and other older recors... | |
285 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); | |
286 | |
287 # Write out all ATOM records along with TER and model records to indicate | |
288 # chains and multiple models.. | |
289 $NewResidueNumber = $OptionsInfo{StartingWaterResidueNumber}; | |
290 for $RecordLine (@{$PDBRecordLinesRef}) { | |
291 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { | |
292 $RecordType = GetPDBRecordType($RecordLine); | |
293 | |
294 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); | |
295 | |
296 if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) { | |
297 $ResidueNumber = $NewResidueNumber; | |
298 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; | |
299 $NewResidueNumber++; | |
300 } | |
301 else { | |
302 print OUTFILE "$RecordLine\n"; | |
303 } | |
304 } | |
305 elsif (IsTerRecordType($RecordLine)) { | |
306 print OUTFILE "$RecordLine\n"; | |
307 } | |
308 elsif (IsModelRecordType($RecordLine)) { | |
309 print OUTFILE "$RecordLine\n"; | |
310 } | |
311 elsif (IsEndmdlRecordType($RecordLine)) { | |
312 print OUTFILE "$RecordLine\n"; | |
313 } | |
314 } | |
315 | |
316 # Write out CONECT records... | |
317 for $RecordLine (@{$PDBRecordLinesRef}) { | |
318 if (IsConectRecordType($RecordLine)) { | |
319 print OUTFILE "$RecordLine\n"; | |
320 } | |
321 } | |
322 | |
323 # Write out END record... | |
324 print OUTFILE GenerateEndRecordLine(), "\n"; | |
325 | |
326 close OUTFILE; | |
327 } | |
328 | |
329 # Rename chain IDs... | |
330 sub RenameChainsIDs { | |
331 my($FileIndex, $PDBRecordLinesRef) = @_; | |
332 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $RecordType, $PreviousChainID, $FirstChainID, $NewChainID, $NewChainIDCounter, %OldToNewChainIDsMap); | |
333 | |
334 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; | |
335 print "Generating PDBFileName file $PDBFileName...\n"; | |
336 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; | |
337 | |
338 # Write out header and other older recors... | |
339 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); | |
340 | |
341 # Write out all ATOM records along with TER and model records to indicate | |
342 # chains and multiple models.. | |
343 %OldToNewChainIDsMap = (); | |
344 $NewChainIDCounter = $OptionsInfo{StartingChainID}; | |
345 $FirstChainID = 1; | |
346 $PreviousChainID = ''; | |
347 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { | |
348 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { | |
349 $RecordType = GetPDBRecordType($RecordLine); | |
350 | |
351 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); | |
352 | |
353 if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) { | |
354 # Chain IDs are not assigned to water residues... | |
355 print OUTFILE "$RecordLine\n"; | |
356 next LINE; | |
357 } | |
358 | |
359 if ($FirstChainID) { | |
360 $FirstChainID = 0; | |
361 $PreviousChainID = $ChainID; | |
362 if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) { | |
363 $NewChainID = $NewChainIDCounter; | |
364 $OldToNewChainIDsMap{$ChainID} = $NewChainID; | |
365 } | |
366 else { | |
367 $NewChainID = ''; | |
368 } | |
369 } | |
370 elsif ($PreviousChainID ne $ChainID) { | |
371 if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) { | |
372 $PreviousChainID = $ChainID; | |
373 if (exists $OldToNewChainIDsMap{$ChainID}) { | |
374 $NewChainID = $OldToNewChainIDsMap{$ChainID}; | |
375 } | |
376 else { | |
377 $NewChainIDCounter++; | |
378 $NewChainID = $NewChainIDCounter; | |
379 $OldToNewChainIDsMap{$ChainID} = $NewChainID; | |
380 } | |
381 } | |
382 else { | |
383 $NewChainID = ''; | |
384 } | |
385 } | |
386 | |
387 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; | |
388 } | |
389 elsif (IsTerRecordType($RecordLine)) { | |
390 $AtomNumber++; | |
391 print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode), "\n"; | |
392 } | |
393 elsif (IsModelRecordType($RecordLine)) { | |
394 print OUTFILE "$RecordLine\n"; | |
395 } | |
396 elsif (IsEndmdlRecordType($RecordLine)) { | |
397 print OUTFILE "$RecordLine\n"; | |
398 } | |
399 } | |
400 | |
401 # Write out CONECT records... | |
402 for $RecordLine (@{$PDBRecordLinesRef}) { | |
403 if (IsConectRecordType($RecordLine)) { | |
404 print OUTFILE "$RecordLine\n"; | |
405 } | |
406 } | |
407 | |
408 # Write out END record... | |
409 print OUTFILE GenerateEndRecordLine(), "\n"; | |
410 | |
411 close OUTFILE; | |
412 } | |
413 | |
414 | |
415 # Write out modifed header and other older records... | |
416 sub WriteHeaderAndOlderRecords { | |
417 my($OutFileRef, $PDBRecordLinesRef) = @_; | |
418 | |
419 if ($OptionsInfo{ModifyHeaderRecord}) { | |
420 # Write out modified HEADER record... | |
421 my($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef); | |
422 $Classification = 'Data modified using MayaChemTools'; | |
423 print $OutFileRef GenerateHeaderRecordLine($IDCode, $Classification), "\n"; | |
424 } | |
425 else { | |
426 print $OutFileRef $PDBRecordLinesRef->[0], "\n"; | |
427 } | |
428 | |
429 # Write out any old records... | |
430 if ($OptionsInfo{KeepOldRecords}) { | |
431 my($RecordLineIndex, $RecordLine); | |
432 # Skip HEADER record and write out older records all the way upto first MODEL/ATOM/HETATM records from input file... | |
433 RECORDLINE: for $RecordLineIndex (1 .. $#{$PDBRecordLinesRef}) { | |
434 $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex]; | |
435 if (IsModelRecordType($RecordLine) || IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { | |
436 last RECORDLINE; | |
437 } | |
438 print $OutFileRef "$RecordLine\n"; | |
439 } | |
440 } | |
441 } | |
442 | |
443 # Get header record information assuming it's the first record... | |
444 sub GetHeaderRecordInformation { | |
445 my($PDBRecordLinesRef) = @_; | |
446 my($Classification, $DepositionDate, $IDCode, $HeaderRecordLine); | |
447 | |
448 ($Classification, $DepositionDate, $IDCode) = ('') x 3; | |
449 $HeaderRecordLine = $PDBRecordLinesRef->[0]; | |
450 if (IsHeaderRecordType($HeaderRecordLine)) { | |
451 ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine); | |
452 } | |
453 return ($Classification, $DepositionDate, $IDCode); | |
454 } | |
455 | |
456 | |
457 # Process option values... | |
458 sub ProcessOptions { | |
459 %OptionsInfo = (); | |
460 $OptionsInfo{Mode} = $Options{mode}; | |
461 | |
462 $OptionsInfo{StartingAtomNumber} = $Options{atomnumberstart}; | |
463 $OptionsInfo{StartingChainID} = $Options{chainidstart}; | |
464 $OptionsInfo{RenameEmptyChainIDs} = ($Options{chainidrenameempty} =~ /^Yes$/i) ? 1 : 0; | |
465 | |
466 $OptionsInfo{KeepOldRecords} = ($Options{keepoldrecords} =~ /^Yes$/i) ? 1 : 0; | |
467 $OptionsInfo{ModifyHeaderRecord} = ($Options{modifyheader} =~ /^Yes$/i) ? 1 : 0; | |
468 | |
469 $OptionsInfo{ResidueNumberMode} = $Options{residuenumbermode}; | |
470 $OptionsInfo{StartingResidueNumber} = $Options{residuenumberstart}; | |
471 | |
472 $OptionsInfo{HetatmResidueNumberMode} = $Options{residuenumberhetatmmode}; | |
473 $OptionsInfo{StartingHetatmResidueNumber} = $Options{residuenumberstarthetatm}; | |
474 | |
475 $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0; | |
476 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0; | |
477 | |
478 $OptionsInfo{WaterResidueNames} = $Options{waterresiduenames}; | |
479 $OptionsInfo{StartingWaterResidueNumber} = $Options{waterresiduestart}; | |
480 @{$OptionsInfo{SpecifiedWaterResiduesList}} = (); | |
481 %{$OptionsInfo{SpecifiedWaterResiduesMap}} = (); | |
482 | |
483 my(@SpecifiedWaterResiduesList); | |
484 @SpecifiedWaterResiduesList = (); | |
485 my($WaterResidueName); | |
486 if ($OptionsInfo{WaterResidueNames} =~ /Automatic/i) { | |
487 push @SpecifiedWaterResiduesList, ('HOH', 'WAT', 'H2O'); | |
488 } | |
489 else { | |
490 @SpecifiedWaterResiduesList = split /\,/, $Options{waterresiduenames}; | |
491 } | |
492 for $WaterResidueName (@SpecifiedWaterResiduesList) { | |
493 $OptionsInfo{SpecifiedWaterResiduesMap}{$WaterResidueName} = $WaterResidueName; | |
494 } | |
495 push @{$OptionsInfo{SpecifiedWaterResiduesList}}, @SpecifiedWaterResiduesList; | |
496 } | |
497 | |
498 # Retrieve information about PDB files... | |
499 sub RetrievePDBFilesInfo { | |
500 my($Index, $PDBFile, $PDBRecordLinesRef, $ChainsAndResiduesInfoRef, $FileDir, $FileName, $FileExt, $OutFileName, $OutFileRoot, $Mode, $OutFileMode, @OutFileNames); | |
501 | |
502 %PDBFilesInfo = (); | |
503 @{$PDBFilesInfo{FileOkay}} = (); | |
504 @{$PDBFilesInfo{OutFileRoot}} = (); | |
505 @{$PDBFilesInfo{OutFileNames}} = (); | |
506 | |
507 FILELIST: for $Index (0 .. $#PDBFilesList) { | |
508 $PDBFilesInfo{FileOkay}[$Index] = 0; | |
509 | |
510 $PDBFilesInfo{OutFileRoot}[$Index] = ''; | |
511 @{$PDBFilesInfo{OutFileNames}[$Index]} = (); | |
512 @{$PDBFilesInfo{OutFileNames}[$Index]} = (); | |
513 | |
514 $PDBFile = $PDBFilesList[$Index]; | |
515 if (!(-e $PDBFile)) { | |
516 warn "Warning: Ignoring file $PDBFile: It doesn't exist\n"; | |
517 next FILELIST; | |
518 } | |
519 if (!CheckFileType($PDBFile, "pdb")) { | |
520 warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n"; | |
521 next FILELIST; | |
522 } | |
523 if (! open PDBFILE, "$PDBFile") { | |
524 warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n"; | |
525 next FILELIST; | |
526 } | |
527 close PDBFILE; | |
528 | |
529 # Get PDB data... | |
530 $PDBRecordLinesRef = ReadPDBFile($PDBFile); | |
531 $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef); | |
532 if (!scalar @{$ChainsAndResiduesInfoRef->{ChainIDs}}) { | |
533 warn "Warning: Ignoring file $PDBFile: No chains found \n"; | |
534 next FILELIST; | |
535 } | |
536 | |
537 # Setup output file names... | |
538 @OutFileNames = (); | |
539 $FileDir = ""; $FileName = ""; $FileExt = ""; | |
540 ($FileDir, $FileName, $FileExt) = ParseFileName($PDBFile); | |
541 if ($OptionsInfo{OutFileRoot} && (@PDBFilesList == 1)) { | |
542 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot}); | |
543 if ($RootFileName && $RootFileExt) { | |
544 $FileName = $RootFileName; | |
545 } | |
546 else { | |
547 $FileName = $OptionsInfo{OutFileRoot}; | |
548 } | |
549 $OutFileRoot = $FileName; | |
550 } | |
551 else { | |
552 $OutFileRoot = $FileName; | |
553 } | |
554 $Mode = $OptionsInfo{Mode}; | |
555 MODE: { | |
556 if ($Mode =~ /^RenumberAtoms$/i) { $OutFileMode = 'RenumberAtoms'; last MODE;} | |
557 if ($Mode =~ /^RenumberResidues$/i) { $OutFileMode = 'RenumberResidues'; last MODE;} | |
558 if ($Mode =~ /^RenumberWaters$/i) { $OutFileMode = 'RenumberWaters'; last MODE;} | |
559 if ($Mode =~ /^RenameChainIDs$/i) { $OutFileMode = 'RenameChainIDs'; last MODE;} | |
560 $OutFileMode = ''; | |
561 } | |
562 $OutFileName = "${OutFileRoot}${OutFileMode}.pdb"; | |
563 push @OutFileNames, $OutFileName; | |
564 | |
565 $PDBFilesInfo{FileOkay}[$Index] = 1; | |
566 $PDBFilesInfo{OutFileRoot}[$Index] = $OutFileRoot; | |
567 | |
568 push @{$PDBFilesInfo{OutFileNames}[$Index]}, @OutFileNames; | |
569 } | |
570 } | |
571 | |
572 # Setup script usage and retrieve command line arguments specified using various options... | |
573 sub SetupScriptUsage { | |
574 | |
575 # Retrieve all the options... | |
576 %Options = (); | |
577 $Options{atomnumberstart} = 1; | |
578 $Options{chainidstart} = 'A'; | |
579 $Options{chainidrenameempty} = 'No'; | |
580 $Options{keepoldrecords} = 'no'; | |
581 $Options{mode} = 'RenumberResidues'; | |
582 $Options{modifyheader} = 'yes'; | |
583 $Options{residuenumbermode} = 'PerChain'; | |
584 $Options{residuenumberstart} = 1; | |
585 $Options{residuenumberhetatmmode} = 'Automatic'; | |
586 $Options{residuenumberstarthetatm} = 6000; | |
587 $Options{waterresiduenames} = 'Automatic'; | |
588 $Options{waterresiduestart} = 8000; | |
589 | |
590 if (!GetOptions(\%Options, "help|h", "atomnumberstart|a=i", "chainidstart|c=s", "chainidrenameempty=s", "keepoldrecords|k=s", "mode|m=s", "modifyheader=s", "overwrite|o", "residuenumbermode=s", "residuenumberstart=i", "residuenumberhetatmmode=s", "residuenumberstarthetatm=i", "root|r=s", "sequencelength=i", "waterresiduenames=s", "waterresiduestart=i", "workingdir|w=s")) { | |
591 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; | |
592 } | |
593 if ($Options{workingdir}) { | |
594 if (! -d $Options{workingdir}) { | |
595 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; | |
596 } | |
597 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; | |
598 } | |
599 if (!IsPositiveInteger($Options{atomnumberstart})) { | |
600 die "Error: The value specified, $Options{atomnumberstart}, for option \"-a, --AtomNumberStart\" is not valid. Allowed values: >0\n"; | |
601 } | |
602 if ((length($Options{chainidstart}) > 1) || ($Options{chainidstart} !~ /[A-Z]/i)) { | |
603 die "Error: The value specified, $Options{chainidstart}, for option \"-c, --ChainIDStart\" is not valid. Allowed values: a single character from A to Z\n"; | |
604 } | |
605 if ($Options{chainidrenameempty} !~ /^(yes|no)$/i) { | |
606 die "Error: The value specified, $Options{chainidrenameempty}, for option \"--chainidrenameempty\" is not valid. Allowed values: yes or no\n"; | |
607 } | |
608 if ($Options{keepoldrecords} !~ /^(yes|no)$/i) { | |
609 die "Error: The value specified, $Options{keepoldrecords}, for option \"--KeepOldRecords\" is not valid. Allowed values: yes or no\n"; | |
610 } | |
611 if ($Options{mode} !~ /^(RenumberAtoms|RenumberResidues|RenumberWaters|RenameChainIDs)$/i) { | |
612 die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: RenumberAtoms, RenumberResidues, RenumberWaters or RenameChainIDs\n"; | |
613 } | |
614 if ($Options{modifyheader} !~ /^(yes|no)$/i) { | |
615 die "Error: The value specified, $Options{modifyheader}, for option \"--ModifyHeader\" is not valid. Allowed values: yes or no\n"; | |
616 } | |
617 if ($Options{residuenumbermode} !~ /^(Sequential|PerChain)$/i) { | |
618 die "Error: The value specified, $Options{residuenumbermode}, for option \"--ResidueNumberMode\" is not valid. Allowed values: Sequential or PerChain\n"; | |
619 } | |
620 if (!IsPositiveInteger($Options{residuenumberstart})) { | |
621 die "Error: The value specified, $Options{residuenumberstart}, for option \"--ResidueNumberStart\" is not valid. Allowed values: >0\n"; | |
622 } | |
623 if ($Options{residuenumberhetatmmode} !~ /^(automatic|specify)$/i) { | |
624 die "Error: The value specified, $Options{residuenumberhetatmmode}, for option \"--residuenumbermode\" is not valid. Allowed values: automatic or specify\n"; | |
625 } | |
626 if (!IsPositiveInteger($Options{residuenumberstarthetatm})) { | |
627 die "Error: The value specified, $Options{residuenumberstarthetatm}, for option \"--residuenumberstartHetatm\" is not valid. Allowed values: >0\n"; | |
628 } | |
629 if (!IsPositiveInteger $Options{waterresiduestart}) { | |
630 die "Error: The value specified, $Options{waterresiduestart}, for option \"--waterresiduestart\" is not valid. Allowed values: >0\n"; | |
631 } | |
632 } | |
633 | |
634 __END__ | |
635 | |
636 =head1 NAME | |
637 | |
638 ModifyPDBFiles.pl - Modify data in PDBFile(s) | |
639 | |
640 =head1 SYNOPSIS | |
641 | |
642 ModifyPDBFiles.pl PDBFile(s)... | |
643 | |
644 ModifyPDBFiles.pl [B<-a, --AtomNumberStart> number] [B<-c, --ChainIDStart> character] | |
645 [B<--ChainIDRenameEmpty> yes | no] [B<-h, --help>] [B<-k, --KeepOldRecords> yes | no] | |
646 [B<-m, --mode > RenumberAtoms | RenumberResidues | RenumberWaters | RenameChainIDs] | |
647 [B<--ModifyHeader> yes | no] [B<-o, --overwrite>] [B<--ResidueNumberMode> Sequential | PerChain] | |
648 [B<--ResidueNumberStart> number] [B<--ResidueNumberHetatmMode> automatic | specify] | |
649 [B<--ResidueNumberStarHetatm> number] [B<-r, --root> rootname] | |
650 [B<--WaterResidueNames> Automatic | "ResidueName, [ResidueName,...]"] [B<--WaterResidueStart> number] | |
651 [B<-w, --WorkingDir> dirname] PDBFile(s)... | |
652 | |
653 =head1 DESCRIPTION | |
654 | |
655 Modify data in I<PDBFile(s)>: renumber atoms, residues, and water residues or assign new | |
656 chain IDs. Multiple PDBFile names are separated by spaces. The valid file extension is I<.pdb>. | |
657 All other file name extensions are ignored during the wild card expansion. All the PDB files | |
658 in a current directory can be specified either by I<*.pdb> or the current directory name. | |
659 | |
660 =head1 OPTIONS | |
661 | |
662 =over 4 | |
663 | |
664 =item B<-a, --AtomNumberStart> I<number> | |
665 | |
666 Starting atom number to use during I<RenumberAtoms> value of B<-m, --mode> option. Default: I<1>. | |
667 Valid values: positive integers. | |
668 | |
669 =item B<-c, --ChainIDStart> I<character> | |
670 | |
671 A single character to use for starting IDs for chains during I<RenameChainIDs> value of B<-m, --mode> option. | |
672 Default: I<A>. Valid values: I<A to Z>. | |
673 | |
674 =item B<--ChainIDRenameEmpty> I<Yes | No> | |
675 | |
676 Specify whether to rename empty chain IDs during I<RenameChainIDs> B<-m, --mode> value. By | |
677 default, ATOM and HETATM records with no chain IDs are left unchanged. Possible values: | |
678 I<yes | no>. Default: I<No>. | |
679 | |
680 =item B<-h, --help> | |
681 | |
682 Print this help message. | |
683 | |
684 =item B<-k, --KeepOldRecords> I<yes | no> | |
685 | |
686 Specify whether to transfer old non ATOM and HETATM records from input PDBFile(s) to new | |
687 PDBFile(s). By default, except for the HEADER record, all records other than ATOM/HETATM | |
688 are dropped during the generation of new PDB files. Possible values: I<yes | no>. | |
689 Default: I<no>. | |
690 | |
691 =item B<-m, --mode > I<RenumberAtoms | RenumberResidues | RenumberWaters | RenameChainIDs> | |
692 | |
693 Specify how to modify I<PDBFile(s)>. Possible values: I<RenumberAtoms | RenumberResidues | |
694 | RenumberWaters | RenameChainIDs>. Default: I<RenumberResidues>. | |
695 | |
696 For I<RenumberAtoms> mode, residue number in ATOM and HETATM records are reassigned | |
697 sequentially starting using value of B<-a, --AtomNumberStart> option. | |
698 | |
699 For I<RenumberResidues> mode, serial number in ATOM and HETATM records are reassigned | |
700 either sequentially or statring from specified values for ATOM and HETATM records in each | |
701 chain. | |
702 | |
703 For I<RenumberWaters> mode, residue number for waters are reassigned starting from a specific | |
704 value. | |
705 | |
706 For I<RenameChainIDs> mode, all the chain IDs are reassigned starting from a specific chain ID. | |
707 | |
708 During the generation of new PDB files, unnecessary CONECT records are dropped. | |
709 | |
710 =item B<--ModifyHeader> I<yes | no> | |
711 | |
712 Specify whether to modify HEADER record during the generation of new PDB files | |
713 Possible values: I<yes | no>. Default: I<yes>. By defailt, Classification data is replaced | |
714 by I<Data modified using MayaChemTools> before writing out HEADER record. | |
715 | |
716 =item B<-o, --overwrite> | |
717 | |
718 Overwrite existing files | |
719 | |
720 =item B<--ResidueNumberMode> I<Sequential | PerChain> | |
721 | |
722 Specify how to renumber residues: renumber residues sequentially across all the chains | |
723 or start from the begining for each chain. Possible values: I<Sequential | PerChain>. Default: | |
724 I<PerChain>. | |
725 | |
726 =item B<--ResidueNumberStart> I<number> | |
727 | |
728 Starting residue number to use for ATOM records in chains. Default: I<1>. Valid values | |
729 positive integers. | |
730 | |
731 For I<Sequential> value of B<--ResidueNumberMode> option, residue numbers are | |
732 assigned sequentially across all the chains starting from the specified value. | |
733 | |
734 For I<PerChain> value of B<--ResidueNumberMode> option, residue numbers are | |
735 starting again from the specified value for each chain. | |
736 | |
737 HETATM residues with in the chains are numbered using this value as well | |
738 | |
739 =item B<--ResidueNumberHetatmMode> I<automatic | specify> | |
740 | |
741 Specify how to start residue number for HETATM records: use the next sequential | |
742 residue number after the last residue number from ATOM records or start from a | |
743 specific residue number. Possible values: I<automatic | specify>. Default: | |
744 I<automatic> | |
745 | |
746 For I<automatic> , residue number after highest residue number of ATOM | |
747 records is used as the starting residue number for HETATM records. | |
748 | |
749 For I<specify>, value of option B<--ResidueNumberStarHetatm> is used as the | |
750 starting residue number for HETATM records. | |
751 | |
752 This option along with B<--ResidueNumberStartHetatm> only applies to HETATM records | |
753 outside the chains. | |
754 | |
755 =item B<--ResidueNumberStartHetatm> I<number> | |
756 | |
757 Starting residue number to use for HETATM records. Default: I<6000>. Valid values | |
758 positive integers. | |
759 | |
760 =item B<-r, --root> I<rootname> | |
761 | |
762 New PDB and sequence file name is generated using the root: <Root><Mode>.<Ext>. | |
763 Default new file name: <PDBFileName><Mode>.pdb. This option is ignored for multiple | |
764 input files. | |
765 | |
766 =item B<--WaterResidueNames> I<Automatic | "ResidueName,[ResidueName,...]"> | |
767 | |
768 Identification of water residues during I<RenumberWaters> value of B<-m, --mode> option. Possible | |
769 values: I<Automatic | "ResidueName,[ResidueName,...]">. Default: I<Automatic> which corresponds | |
770 to "HOH,WAT,H20". You can also specify a different comma delimited list of residue names | |
771 to use for water. | |
772 | |
773 =item B<--WaterResidueStart> I<number> | |
774 | |
775 Starting water residue number to use during I<RenumberWaters> B<-m, --mode> value. | |
776 Default: I<8000>. Valid values: positive integers. | |
777 | |
778 =item B<-w, --WorkingDir> I<dirname> | |
779 | |
780 Location of working directory. Default: current directory. | |
781 | |
782 =back | |
783 | |
784 =head1 EXAMPLES | |
785 | |
786 To renumber ATOM and HETATM residues starting from 1 for each chain with continuation to | |
787 HETATM residues outside TER records in Sample2.pdb and generate | |
788 Sample2RenumberResidues.pdb file, type: | |
789 | |
790 % ModifyPDBFiles.pl Sample1.pdb | |
791 | |
792 To renumber ATOM and HETATM residues sequentially across all chains starting from 1 with | |
793 continuation to HETATM residues outside TER records in Sample2.pdb and generate | |
794 Sample2RenumberResidues.pdb file, type: | |
795 | |
796 % ModifyPDBFiles.pl --ResidueNumberMode Sequential -o Sample1.pdb | |
797 | |
798 To renumber ATOM and HETATM residues sequentially across all chains starting from 1 and | |
799 HETATM residues outside TER records starting from 6000 in Sample2.pdb and generate | |
800 Sample2RenumberResidues.pdb file, type: | |
801 | |
802 % ModifyPDBFiles.pl --ResidueNumberMode Sequential | |
803 --ResidueNumberHetatmMode Specify -o Sample1.pdb | |
804 | |
805 | |
806 To renumber ATOM and HETATM residues sequentially across all chains starting from 100 for | |
807 ATOM/HETATM residues with in TER records and starting from 999 for HETATM residues | |
808 outside TER records in Sample2.pdb and generate Sample2RenumberResidues.pdb file, type: | |
809 | |
810 % ModifyPDBFiles.pl --ResidueNumberMode Sequential | |
811 --ResidueNumberHetatmMode Specify --ResidueNumberStart 100 | |
812 --ResidueNumberStartHetatm 999 -o Sample2.pdb | |
813 | |
814 To renumber ATOM and HETATM residues from 100 for each chain and starting from 999 for | |
815 HETATM residues outside TER records in Sample2.pdb and generate Sample2RenumberResidues.pdb | |
816 file, type: | |
817 | |
818 % ModifyPDBFiles.pl --ResidueNumberMode PerChain | |
819 --ResidueNumberHetatmMode Specify --ResidueNumberStart 100 | |
820 --ResidueNumberStartHetatm 999 -o Sample2.pdb | |
821 | |
822 To renumber ATOM serial numbers sequentially starting from 100 in Sample1.pdb file and generate | |
823 Sample1RenumberAtoms.pdb file, type: | |
824 | |
825 % ModifyPDBFiles.pl -m RenumberAtoms --AtomNumberStart 100 | |
826 -o Sample1.pdb | |
827 | |
828 To renumber water residues identified by "HOH,WAT" starting from residue number 1000 | |
829 in Sample2.pdb file and generate Sample2RenumberWaters.pdb file, type: | |
830 | |
831 % ModifyPDBFiles.pl -m RenumberWaters --WaterResidueNames "HOH,WAT" | |
832 -o --WaterResidueStart 950 Sample2.pdb | |
833 | |
834 To rename all chain IDs starting from A in Sample1.pdb file and generate | |
835 Sample1RenameChainIDs.pdb file, type: | |
836 | |
837 % ModifyPDBFiles.pl -m RenameChainIDs -o Sample1.pdb | |
838 | |
839 To rename all chain IDs starting from B without assigning any chain IDs to ATOM/HETATOM | |
840 with no chain IDs in Sample2.pdb file and generate Sample2RenameChainIDs.pdb file, type: | |
841 | |
842 % ModifyPDBFiles.pl l -m RenameChainIDs -c B --ChainIDRenameEmpty No | |
843 -o Sample2.pdb | |
844 | |
845 | |
846 =head1 AUTHOR | |
847 | |
848 Manish Sud <msud@san.rr.com> | |
849 | |
850 =head1 SEE ALSO | |
851 | |
852 ExtractFromPDBFiles.pl, InfoPDBFiles.pl | |
853 | |
854 =head1 COPYRIGHT | |
855 | |
856 Copyright (C) 2015 Manish Sud. All rights reserved. | |
857 | |
858 This file is part of MayaChemTools. | |
859 | |
860 MayaChemTools is free software; you can redistribute it and/or modify it under | |
861 the terms of the GNU Lesser General Public License as published by the Free | |
862 Software Foundation; either version 3 of the License, or (at your option) | |
863 any later version. | |
864 | |
865 =cut |