Mercurial > repos > deepakjadmin > mayatool3_test3
comparison mayachemtools/bin/ExtractFromPDBFiles.pl @ 0:73ae111cf86f draft
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 11:55:01 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:73ae111cf86f |
---|---|
1 #!/usr/bin/perl -w | |
2 # | |
3 # $RCSfile: ExtractFromPDBFiles.pl,v $ | |
4 # $Date: 2015/02/28 20:46:19 $ | |
5 # $Revision: 1.39 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use FindBin; use lib "$FindBin::Bin/../lib"; | |
31 use Getopt::Long; | |
32 use File::Basename; | |
33 use Text::ParseWords; | |
34 use Benchmark; | |
35 use FileUtil; | |
36 use TextUtil; | |
37 use PDBFileUtil; | |
38 use AminoAcids; | |
39 use SequenceFileUtil; | |
40 | |
41 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); | |
42 | |
43 # Autoflush STDOUT | |
44 $| = 1; | |
45 | |
46 # Starting message... | |
47 $ScriptName = basename($0); | |
48 print "\n$ScriptName: Starting...\n\n"; | |
49 $StartTime = new Benchmark; | |
50 | |
51 # Get the options and setup script... | |
52 SetupScriptUsage(); | |
53 if ($Options{help} || @ARGV < 1) { | |
54 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); | |
55 } | |
56 | |
57 my(@PDBFilesList); | |
58 @PDBFilesList = ExpandFileNames(\@ARGV, "pdb"); | |
59 | |
60 # Process options... | |
61 print "Processing options...\n"; | |
62 my(%OptionsInfo); | |
63 ProcessOptions(); | |
64 | |
65 # Setup information about input files... | |
66 print "Checking input PDB file(s)...\n"; | |
67 my(%PDBFilesInfo); | |
68 RetrievePDBFilesInfo(); | |
69 | |
70 # Process input files.. | |
71 my($FileIndex); | |
72 if (@PDBFilesList > 1) { | |
73 print "\nProcessing PDB files...\n"; | |
74 } | |
75 for $FileIndex (0 .. $#PDBFilesList) { | |
76 if ($PDBFilesInfo{FileOkay}[$FileIndex]) { | |
77 print "\nProcessing file $PDBFilesList[$FileIndex]...\n"; | |
78 ExtractFromPDBFiles($FileIndex); | |
79 } | |
80 } | |
81 print "\n$ScriptName:Done...\n\n"; | |
82 | |
83 $EndTime = new Benchmark; | |
84 $TotalTime = timediff ($EndTime, $StartTime); | |
85 print "Total time: ", timestr($TotalTime), "\n"; | |
86 | |
87 ############################################################################### | |
88 | |
89 # Extract appropriate information... | |
90 sub ExtractFromPDBFiles { | |
91 my($FileIndex) = @_; | |
92 my($PDBFile, $PDBRecordLinesRef); | |
93 | |
94 # Get PDB data... | |
95 $PDBFile = $PDBFilesList[$FileIndex]; | |
96 $PDBRecordLinesRef = ReadPDBFile($PDBFile); | |
97 | |
98 if ($OptionsInfo{Mode} =~ /Chains/i) { | |
99 ExtractChains($FileIndex, $PDBRecordLinesRef); | |
100 } | |
101 elsif ($OptionsInfo{Mode} =~ /Sequences/i) { | |
102 ExtractSequences($FileIndex, $PDBRecordLinesRef); | |
103 } | |
104 elsif ($OptionsInfo{Mode} =~ /^(Atoms|CAlphas|AtomNums|AtomsRange|AtomNames)$/i) { | |
105 ExtractByAtoms($FileIndex, $PDBRecordLinesRef); | |
106 } | |
107 elsif ($OptionsInfo{Mode} =~ /^(ResidueNums|ResiduesRange|ResidueNames)$/i) { | |
108 ExtractByResidues($FileIndex, $PDBRecordLinesRef); | |
109 } | |
110 elsif ($OptionsInfo{Mode} =~ /Distance/i) { | |
111 ExtractByDistance($FileIndex, $PDBRecordLinesRef); | |
112 } | |
113 elsif ($OptionsInfo{Mode} =~ /NonWater/i) { | |
114 ExtractNonWaterRecords($FileIndex, $PDBRecordLinesRef); | |
115 } | |
116 elsif ($OptionsInfo{Mode} =~ /NonHydrogens/i) { | |
117 ExtractNonHydrogenRecords($FileIndex, $PDBRecordLinesRef); | |
118 } | |
119 } | |
120 | |
121 # Extract chains and generate new PDB files... | |
122 # | |
123 sub ExtractChains { | |
124 my($FileIndex, $PDBRecordLinesRef) = @_; | |
125 my($ChainIndex, $ChainID, $ChainLabel, $PDBFileName, $RecordLine, $ChainsAndResiduesInfoRef, $AtomNumber, $AtomName, $ResidueName, $AtomChainID, $ResidueNumber, $AlternateLocation, $InsertionCode, $ConectRecordLinesRef, %ChainAtomNumbersMap); | |
126 | |
127 # Get chains and residues data... | |
128 $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'AtomAndHetatm', 0, 1); | |
129 | |
130 if ($OptionsInfo{CombineChains}) { | |
131 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; | |
132 print "Generating PDBFileName file $PDBFileName...\n"; | |
133 | |
134 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; | |
135 | |
136 # Write out header and other older recors... | |
137 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); | |
138 } | |
139 | |
140 for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) { | |
141 $ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex]; | |
142 $ChainLabel = $PDBFilesInfo{ChainLabels}[$FileIndex][$ChainIndex]; | |
143 | |
144 if (!$OptionsInfo{CombineChains}) { | |
145 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][$ChainIndex]; | |
146 print "Generating PDBFileName file $PDBFileName...\n"; | |
147 | |
148 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; | |
149 | |
150 # Write out header and other older recors... | |
151 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); | |
152 } | |
153 | |
154 # Write out ATOM/HETATM line for chain and collect all ATOM/HETATM serial numbers | |
155 # for writing out appropriate CONECT records... | |
156 %ChainAtomNumbersMap = (); | |
157 for $RecordLine (@{$ChainsAndResiduesInfoRef->{Lines}{$ChainID}}) { | |
158 print OUTFILE "$RecordLine\n"; | |
159 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $AtomChainID, $ResidueNumber, $InsertionCode) = ParseAtomRecordLine($RecordLine); | |
160 $AtomNumber = int $AtomNumber; | |
161 $ChainAtomNumbersMap{$AtomNumber} = $AtomName; | |
162 } | |
163 # Write out TER record using information from last chain record... | |
164 $AtomNumber += 1; | |
165 print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $AtomChainID, $ResidueNumber, $InsertionCode), "\n"; | |
166 | |
167 # Write out CONECT records... | |
168 $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%ChainAtomNumbersMap); | |
169 | |
170 for $RecordLine (@{$ConectRecordLinesRef}) { | |
171 print OUTFILE "$RecordLine\n"; | |
172 } | |
173 | |
174 if (!$OptionsInfo{CombineChains}) { | |
175 # Write out END record... | |
176 print OUTFILE GenerateEndRecordLine(), "\n"; | |
177 | |
178 close OUTFILE; | |
179 } | |
180 } | |
181 | |
182 if ($OptionsInfo{CombineChains}) { | |
183 # Write out END record... | |
184 print OUTFILE GenerateEndRecordLine(), "\n"; | |
185 | |
186 close OUTFILE; | |
187 } | |
188 | |
189 } | |
190 | |
191 # Extract sequences for individual chains or combine all the chains... | |
192 sub ExtractSequences { | |
193 my($FileIndex, $PDBRecordLinesRef) = @_; | |
194 my($ChainIndex, $ChainID, $ChainLabel, $SequenceFileName, $Residue, $ResidueCode, $StandardResidue, $ChainSequence, $WrappedChainSequence, $ChainSequenceID, $ChainsAndResiduesInfoRef, $ChainResiduesRef, %ChainSequencesDataMap); | |
195 | |
196 if ($OptionsInfo{SequenceRecordSource} =~ /^SeqRes$/i) { | |
197 $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'SeqRes'); | |
198 } | |
199 else { | |
200 $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef); | |
201 } | |
202 | |
203 # Generate sequence data for all the chains... | |
204 %ChainSequencesDataMap = (); | |
205 @{$ChainSequencesDataMap{IDs}} = (); | |
206 %{$ChainSequencesDataMap{Sequence}} = (); | |
207 %{$ChainSequencesDataMap{Description}} = (); | |
208 | |
209 for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) { | |
210 $ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex]; | |
211 $ChainLabel = $PDBFilesInfo{ChainLabels}[$FileIndex][$ChainIndex]; | |
212 | |
213 # Setup sequence ID... | |
214 $ChainSequenceID = $PDBFilesInfo{ChainSequenceIDs}[$FileIndex][$ChainIndex]; | |
215 push @{$ChainSequencesDataMap{IDs}}, $ChainSequenceID; | |
216 $ChainSequencesDataMap{Description}{$ChainID} = $ChainSequenceID; | |
217 | |
218 # Collect sequence data for the chain... | |
219 if ($OptionsInfo{SequenceRecordSource} =~ /^SeqRes/i) { | |
220 $ChainResiduesRef = \@{$ChainsAndResiduesInfoRef->{Residues}{$ChainID}}; | |
221 } | |
222 else { | |
223 $ChainResiduesRef = \@{$ChainsAndResiduesInfoRef->{Residues}{$ChainID}}; | |
224 } | |
225 # Setup sequence data... | |
226 $ChainSequence = ''; | |
227 RESIDUE: for $Residue (@{$ChainResiduesRef}) { | |
228 ($ResidueCode, $StandardResidue) = GetResidueCode($Residue); | |
229 if (!$StandardResidue) { | |
230 if ($OptionsInfo{KeepNonStandardSequences}) { | |
231 $ResidueCode = $OptionsInfo{NonStandardSequenceCode}; | |
232 warn "Warning: Keeping nonstandard residue $Residue in $ChainLabel...\n"; | |
233 } | |
234 else { | |
235 warn "Warning: Ignoring nonstandard residue $Residue in $ChainLabel...\n"; | |
236 next RESIDUE; | |
237 } | |
238 } | |
239 $ChainSequence .= $ResidueCode; | |
240 } | |
241 $ChainSequencesDataMap{Sequence}{$ChainID} = $ChainSequence; | |
242 | |
243 } | |
244 | |
245 # Write out the sequence files... | |
246 my($SequenceID, $SequenceDescription, $Sequence, %SequencesDataMap ); | |
247 if ($OptionsInfo{CombineChainSequences}) { | |
248 # Combine all the chain sequences... | |
249 $Sequence = ''; | |
250 for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) { | |
251 $ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex]; | |
252 | |
253 $Sequence .= $ChainSequencesDataMap{Sequence}{$ChainID}; | |
254 } | |
255 $SequenceID = $PDBFilesInfo{ChainSequenceIDsPrefix}[$FileIndex][0] . "_CombinedChains|PDB";; | |
256 $SequenceDescription = $SequenceID; | |
257 $SequenceFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; | |
258 | |
259 print "Generating sequence file $SequenceFileName...\n"; | |
260 %SequencesDataMap = (); | |
261 @{$SequencesDataMap{IDs}} = (); | |
262 %{$SequencesDataMap{Sequence}} = (); | |
263 %{$SequencesDataMap{Description}} = (); | |
264 | |
265 push @{$SequencesDataMap{IDs}}, $SequenceID; | |
266 $SequencesDataMap{Description}{$SequenceID} = $SequenceDescription; | |
267 $SequencesDataMap{Sequence}{$SequenceID} = $Sequence; | |
268 | |
269 WritePearsonFastaSequenceFile($SequenceFileName, \%SequencesDataMap, $OptionsInfo{MaxSequenceLength}); | |
270 } | |
271 else { | |
272 # For each specifed chain, write out the sequences... | |
273 for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) { | |
274 $ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex]; | |
275 | |
276 $SequenceFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][$ChainIndex]; | |
277 | |
278 $SequenceID = $ChainSequencesDataMap{IDs}[$ChainIndex]; | |
279 $SequenceDescription = $ChainSequencesDataMap{Description}{$ChainID}; | |
280 $Sequence = $ChainSequencesDataMap{Sequence}{$ChainID}; | |
281 | |
282 print "Generating sequence file $SequenceFileName...\n"; | |
283 %SequencesDataMap = (); | |
284 @{$SequencesDataMap{IDs}} = (); | |
285 %{$SequencesDataMap{Sequence}} = (); | |
286 %{$SequencesDataMap{Description}} = (); | |
287 | |
288 push @{$SequencesDataMap{IDs}}, $SequenceID; | |
289 $SequencesDataMap{Description}{$SequenceID} = $SequenceDescription; | |
290 $SequencesDataMap{Sequence}{$SequenceID} = $Sequence; | |
291 | |
292 WritePearsonFastaSequenceFile($SequenceFileName, \%SequencesDataMap, $OptionsInfo{MaxSequenceLength}); | |
293 } | |
294 } | |
295 } | |
296 | |
297 # Extract atoms... | |
298 sub ExtractByAtoms { | |
299 my($FileIndex, $PDBRecordLinesRef) = @_; | |
300 my($PDBFileName, $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $IgnoreRecord, $ConectRecordLinesRef, %AtomNumbersMap); | |
301 | |
302 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; | |
303 print "Generating PDBFileName file $PDBFileName...\n"; | |
304 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; | |
305 | |
306 # Write out header and other older recors... | |
307 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); | |
308 | |
309 # Write out all ATOM records along with TER and model records to indicate | |
310 # chains and multiple models.. | |
311 %AtomNumbersMap = (); | |
312 $ChainRecordCount = 0; | |
313 for $RecordLine (@{$PDBRecordLinesRef}) { | |
314 if (CheckRecordType($RecordLine)) { | |
315 ($AtomNumber, $AtomName) = ParseAtomRecordLine($RecordLine); | |
316 | |
317 # Check atoms... | |
318 $IgnoreRecord = 1; | |
319 if ($OptionsInfo{Mode} =~ /^Atoms$/i) { | |
320 $IgnoreRecord = 0; | |
321 } | |
322 elsif ($OptionsInfo{Mode} =~ /^(CAlphas|AtomNames)$/i) { | |
323 if (exists $OptionsInfo{SpecifiedAtomNamesMap}{lc $AtomName}) { | |
324 $IgnoreRecord = 0; | |
325 } | |
326 } | |
327 elsif ($OptionsInfo{Mode} =~ /^AtomNums$/i) { | |
328 if (exists $OptionsInfo{SpecifiedAtomNumsMap}{$AtomNumber}) { | |
329 $IgnoreRecord = 0; | |
330 } | |
331 } | |
332 elsif ($OptionsInfo{Mode} =~ /^AtomsRange$/i) { | |
333 if ($AtomNumber >= $OptionsInfo{SpecifiedStartAtomNum} && $AtomNumber <= $OptionsInfo{SpecifiedEndAtomNum}) { | |
334 $IgnoreRecord = 0; | |
335 } | |
336 } | |
337 | |
338 if (!$IgnoreRecord) { | |
339 $ChainRecordCount++; | |
340 print OUTFILE "$RecordLine\n"; | |
341 | |
342 $AtomNumber = int $AtomNumber; | |
343 $AtomNumbersMap{$AtomNumber} = $AtomName; | |
344 } | |
345 } | |
346 elsif (IsTerRecordType($RecordLine)) { | |
347 if ($ChainRecordCount) { | |
348 print OUTFILE GenerateTerRecordLine(), "\n"; | |
349 } | |
350 $ChainRecordCount = 0; | |
351 } | |
352 elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) { | |
353 print OUTFILE "$RecordLine\n"; | |
354 } | |
355 } | |
356 | |
357 # Write out appropriate CONECT records... | |
358 $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap); | |
359 for $RecordLine (@{$ConectRecordLinesRef}) { | |
360 print OUTFILE "$RecordLine\n"; | |
361 } | |
362 | |
363 # Write out END record... | |
364 print OUTFILE GenerateEndRecordLine(), "\n"; | |
365 | |
366 close OUTFILE; | |
367 } | |
368 | |
369 # Extract residues... | |
370 sub ExtractByResidues { | |
371 my($FileIndex, $PDBRecordLinesRef) = @_; | |
372 my($PDBFileName, $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $ConectRecordLinesRef, $IgnoreRecord, %AtomNumbersMap); | |
373 | |
374 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; | |
375 print "Generating PDBFileName file $PDBFileName...\n"; | |
376 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; | |
377 | |
378 # Write out header and other older recors... | |
379 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); | |
380 | |
381 # Write out all ATOM records for specified residues with TER and model records to indicate | |
382 # chains and multiple models... | |
383 %AtomNumbersMap = (); | |
384 $ChainRecordCount = 0; | |
385 for $RecordLine (@{$PDBRecordLinesRef}) { | |
386 if (CheckRecordType($RecordLine)) { | |
387 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber) = ParseAtomRecordLine($RecordLine); | |
388 | |
389 # Check residues... | |
390 $IgnoreRecord = 1; | |
391 if ($OptionsInfo{Mode} =~ /^ResidueNums$/i) { | |
392 if (exists $OptionsInfo{SpecifiedResidueNumsMap}{$ResidueNumber}) { | |
393 $IgnoreRecord = 0; | |
394 } | |
395 } | |
396 elsif ($OptionsInfo{Mode} =~ /^ResiduesRange$/i) { | |
397 if ($ResidueNumber >= $OptionsInfo{SpecifiedStartResidueNum} && $ResidueNumber <= $OptionsInfo{SpecifiedEndResidueNum}) { | |
398 $IgnoreRecord = 0; | |
399 } | |
400 } | |
401 elsif ($OptionsInfo{Mode} =~ /^ResidueNames$/i) { | |
402 if (exists $OptionsInfo{SpecifiedResidueNamesMap}{lc $ResidueName}) { | |
403 $IgnoreRecord = 0; | |
404 } | |
405 } | |
406 if (!$IgnoreRecord) { | |
407 $ChainRecordCount++; | |
408 print OUTFILE "$RecordLine\n"; | |
409 $AtomNumber = int $AtomNumber; | |
410 $AtomNumbersMap{$AtomNumber} = $AtomName; | |
411 } | |
412 } | |
413 elsif (IsTerRecordType($RecordLine)) { | |
414 if ($ChainRecordCount) { | |
415 print OUTFILE GenerateTerRecordLine(), "\n"; | |
416 } | |
417 $ChainRecordCount = 0; | |
418 } | |
419 elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) { | |
420 print OUTFILE "$RecordLine\n"; | |
421 } | |
422 } | |
423 | |
424 # Write out appropriate CONECT records... | |
425 $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap); | |
426 for $RecordLine (@{$ConectRecordLinesRef}) { | |
427 print OUTFILE "$RecordLine\n"; | |
428 } | |
429 # Write out END record... | |
430 print OUTFILE GenerateEndRecordLine(), "\n"; | |
431 | |
432 close OUTFILE; | |
433 } | |
434 | |
435 # Extract non water records... | |
436 sub ExtractNonWaterRecords { | |
437 my($FileIndex, $PDBRecordLinesRef) = @_; | |
438 my($PDBFileName, $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ConectRecordLinesRef, %AtomNumbersMap); | |
439 | |
440 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; | |
441 print "Generating PDBFileName file $PDBFileName...\n"; | |
442 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; | |
443 | |
444 # Write out header and other older recors... | |
445 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); | |
446 | |
447 # Write out all ATOM/HETATM non water records along with TER and model records to indicate | |
448 # chains and multiple models.. | |
449 %AtomNumbersMap = (); | |
450 $ChainRecordCount = 0; | |
451 for $RecordLine (@{$PDBRecordLinesRef}) { | |
452 if (CheckRecordType($RecordLine)) { | |
453 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName) = ParseAtomRecordLine($RecordLine); | |
454 if (! exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName} ) { | |
455 $ChainRecordCount++; | |
456 print OUTFILE "$RecordLine\n"; | |
457 $AtomNumber = int $AtomNumber; | |
458 $AtomNumbersMap{$AtomNumber} = $AtomName; | |
459 } | |
460 } | |
461 elsif (IsTerRecordType($RecordLine)) { | |
462 if ($ChainRecordCount) { | |
463 print OUTFILE GenerateTerRecordLine(), "\n"; | |
464 } | |
465 $ChainRecordCount = 0; | |
466 } | |
467 elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) { | |
468 print OUTFILE "$RecordLine\n"; | |
469 } | |
470 } | |
471 | |
472 # Write out appropriate CONECT records... | |
473 $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap); | |
474 for $RecordLine (@{$ConectRecordLinesRef}) { | |
475 print OUTFILE "$RecordLine\n"; | |
476 } | |
477 # Write out END record... | |
478 print OUTFILE GenerateEndRecordLine(), "\n"; | |
479 | |
480 close OUTFILE; | |
481 } | |
482 | |
483 # Extract non hydrogen records... | |
484 sub ExtractNonHydrogenRecords { | |
485 my($FileIndex, $PDBRecordLinesRef) = @_; | |
486 my($PDBFileName, $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $ConectRecordLinesRef, %AtomNumbersMap); | |
487 | |
488 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; | |
489 print "Generating PDBFileName file $PDBFileName...\n"; | |
490 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; | |
491 | |
492 # Write out header and other older recors... | |
493 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); | |
494 | |
495 # Write out all ATOM/HETATM non hydrogen records along with TER and model records to indicate | |
496 # chains and multiple models.. | |
497 %AtomNumbersMap = (); | |
498 $ChainRecordCount = 0; | |
499 for $RecordLine (@{$PDBRecordLinesRef}) { | |
500 if (CheckRecordType($RecordLine)) { | |
501 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine); | |
502 if ($ElementSymbol !~ /^H$/i) { | |
503 $ChainRecordCount++; | |
504 print OUTFILE "$RecordLine\n"; | |
505 $AtomNumber = int $AtomNumber; | |
506 $AtomNumbersMap{$AtomNumber} = $AtomName; | |
507 } | |
508 } | |
509 elsif (IsTerRecordType($RecordLine)) { | |
510 if ($ChainRecordCount) { | |
511 print OUTFILE GenerateTerRecordLine(), "\n"; | |
512 } | |
513 $ChainRecordCount = 0; | |
514 } | |
515 elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) { | |
516 print OUTFILE "$RecordLine\n"; | |
517 } | |
518 } | |
519 | |
520 # Write out appropriate CONECT records... | |
521 $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap); | |
522 for $RecordLine (@{$ConectRecordLinesRef}) { | |
523 print OUTFILE "$RecordLine\n"; | |
524 } | |
525 # Write out END record... | |
526 print OUTFILE GenerateEndRecordLine(), "\n"; | |
527 | |
528 close OUTFILE; | |
529 } | |
530 | |
531 # Extract ATOM/HETATM records by distance... | |
532 sub ExtractByDistance { | |
533 my($FileIndex, $PDBRecordLinesRef) = @_; | |
534 my($PDBFileName, $RecordLine, $RecordLineNum, $ChainRecordCount, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $IgnoreRecord, $ResidueID, @OriginCoords, @Coords, %AtomNumbersMap, %ResiduesDataMap); | |
535 | |
536 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; | |
537 print "Generating PDBFileName file $PDBFileName...\n"; | |
538 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; | |
539 | |
540 # Write out header and other older recors... | |
541 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); | |
542 | |
543 # Setup coordinates of origin to calculate distance... | |
544 @OriginCoords = (); | |
545 push @OriginCoords, @{$PDBFilesInfo{DistanceOrigin}[$FileIndex]}; | |
546 | |
547 # Write out all ATOM records for which meet specified criteria along with TER and model records to indicate | |
548 # chains and multiple models... | |
549 %AtomNumbersMap = (); | |
550 | |
551 %ResiduesDataMap = (); | |
552 %{$ResiduesDataMap{ID}} = (); | |
553 %{$ResiduesDataMap{Status}} = (); | |
554 | |
555 $ChainRecordCount = 0; | |
556 $RecordLineNum = 0; | |
557 | |
558 for $RecordLine (@{$PDBRecordLinesRef}) { | |
559 $RecordLineNum++; | |
560 if (CheckRecordType($RecordLine)) { | |
561 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine); | |
562 @Coords = (); push @Coords, ($X, $Y, $Z); | |
563 | |
564 $IgnoreRecord = 1; | |
565 if ($OptionsInfo{DistanceSelectionMode} =~ /^ByResidue$/i) { | |
566 $ResidueID = "${ResidueName}_${ResidueNumber}_${ChainID}"; | |
567 if (exists $ResiduesDataMap{ID}{$ResidueID}) { | |
568 # Residue data has been processed; check its selection status... | |
569 if ($ResiduesDataMap{Status}{$ResidueID}) { | |
570 $IgnoreRecord = 0; | |
571 } | |
572 } | |
573 else { | |
574 # Residue hasn't been processed... | |
575 $ResiduesDataMap{ID}{$ResidueID} = $ResidueID; | |
576 $ResiduesDataMap{Status}{$ResidueID} = 0; | |
577 if (CheckResidueDistance($ResidueID, $RecordLineNum, $PDBRecordLinesRef, \@OriginCoords)) { | |
578 $IgnoreRecord = 0; | |
579 $ResiduesDataMap{Status}{$ResidueID} = 1; | |
580 } | |
581 } | |
582 } | |
583 elsif ($OptionsInfo{DistanceSelectionMode} =~ /^ByAtom$/i) { | |
584 if (CheckDistance(\@Coords, \@OriginCoords)) { | |
585 $IgnoreRecord = 0; | |
586 } | |
587 } | |
588 | |
589 if (!$IgnoreRecord) { | |
590 $ChainRecordCount++; | |
591 print OUTFILE "$RecordLine\n"; | |
592 $AtomNumber = int $AtomNumber; | |
593 $AtomNumbersMap{$AtomNumber} = $AtomName; | |
594 } | |
595 } | |
596 elsif (IsTerRecordType($RecordLine)) { | |
597 if ($ChainRecordCount) { | |
598 print OUTFILE GenerateTerRecordLine(), "\n"; | |
599 } | |
600 $ChainRecordCount = 0; | |
601 } | |
602 elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) { | |
603 print OUTFILE "$RecordLine\n"; | |
604 } | |
605 } | |
606 | |
607 # Write out appropriate CONECT records... | |
608 $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap); | |
609 for $RecordLine (@{$ConectRecordLinesRef}) { | |
610 print OUTFILE "$RecordLine\n"; | |
611 } | |
612 | |
613 # Write out END record... | |
614 print OUTFILE GenerateEndRecordLine(), "\n"; | |
615 | |
616 close OUTFILE; | |
617 } | |
618 | |
619 # Does record type correspond to the specified record type? | |
620 sub CheckRecordType { | |
621 my($RecordLine) = @_; | |
622 my($Status); | |
623 | |
624 $Status = 0; | |
625 if ($OptionsInfo{RecordMode} =~ /^Atom$/i) { | |
626 $Status = IsAtomRecordType($RecordLine) ? 1 : 0; | |
627 } | |
628 elsif ($OptionsInfo{RecordMode} =~ /^Hetatm$/i) { | |
629 $Status = IsHetatmRecordType($RecordLine) ? 1 : 0; | |
630 } | |
631 elsif ($OptionsInfo{RecordMode} =~ /^AtomAndHetatm$/i) { | |
632 $Status = (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) ? 1 : 0; | |
633 } | |
634 | |
635 return $Status; | |
636 } | |
637 | |
638 # Does record meets distance citerion specified by the user? | |
639 sub CheckResidueDistance { | |
640 my($SpecifiedResidueID, $StartingLineNum, $PDBRecordLinesRef, $OriginCoordsRef) = @_; | |
641 my($Status, $RecordLine, $RecordLineIndex, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $ResidueID, @Coords); | |
642 | |
643 $Status = 0; | |
644 | |
645 RECORDLINE: for $RecordLineIndex (($StartingLineNum - 1) .. $#{$PDBRecordLinesRef}) { | |
646 $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex]; | |
647 if (!CheckRecordType($RecordLine)) { | |
648 next RECORDLINE; | |
649 } | |
650 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine); | |
651 $ResidueID = "${ResidueName}_${ResidueNumber}_${ChainID}"; | |
652 | |
653 if ($ResidueID !~ /^$SpecifiedResidueID$/i) { | |
654 # It's a new residue line... | |
655 last RECORDLINE; | |
656 } | |
657 | |
658 # Check distance... | |
659 @Coords = (); push @Coords, ($X, $Y, $Z); | |
660 if (CheckDistance(\@Coords, $OriginCoordsRef)) { | |
661 # Distance criterion is met for at least one record in the residue... | |
662 $Status = 1; | |
663 last RECORDLINE; | |
664 } | |
665 } | |
666 return $Status; | |
667 } | |
668 | |
669 # Does record meets distance citerion specified by the user? | |
670 sub CheckDistance { | |
671 my($CoordsRef, $OriginCoordsRef) = @_; | |
672 my($Status, $Index, $Distance, $DistanceSquare); | |
673 | |
674 $Status = 0; | |
675 | |
676 if ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) { | |
677 # Go over coordinates of all the atoms in the residue... | |
678 my($ResidueCoordsCount) = scalar @{$OriginCoordsRef}; | |
679 INDEX: for ($Index = 0; $Index < $ResidueCoordsCount; $Index += 3) { | |
680 $DistanceSquare = ($CoordsRef->[0] - $OriginCoordsRef->[$Index])**2 + ($CoordsRef->[1] - $OriginCoordsRef->[$Index + 1])**2 + ($CoordsRef->[2] - $OriginCoordsRef->[$Index + 2])**2; | |
681 $Distance = sqrt $DistanceSquare; | |
682 if ($Distance <= $OptionsInfo{MaxExtractionDistance}) { | |
683 $Status = 1; | |
684 last INDEX; | |
685 } | |
686 } | |
687 } | |
688 else { | |
689 $DistanceSquare = 0; | |
690 for $Index (0 .. 2) { | |
691 $DistanceSquare += ($CoordsRef->[$Index] - $OriginCoordsRef->[$Index])**2; | |
692 } | |
693 $Distance = sqrt $DistanceSquare; | |
694 $Status = ($Distance <= $OptionsInfo{MaxExtractionDistance}) ? 1 : 0; | |
695 } | |
696 | |
697 return $Status; | |
698 } | |
699 | |
700 # Write out modifed header and other older records... | |
701 sub WriteHeaderAndOlderRecords { | |
702 my($OutFileRef, $PDBRecordLinesRef) = @_; | |
703 | |
704 if ($OptionsInfo{ModifyHeaderRecord}) { | |
705 # Write out modified HEADER record... | |
706 my($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef); | |
707 $Classification = 'Data extracted using MayaChemTools'; | |
708 print $OutFileRef GenerateHeaderRecordLine($IDCode, $Classification), "\n"; | |
709 } | |
710 else { | |
711 print $OutFileRef $PDBRecordLinesRef->[0], "\n"; | |
712 } | |
713 | |
714 # Write out any old records... | |
715 if ($OptionsInfo{KeepOldRecords}) { | |
716 my($RecordLineIndex, $RecordLine); | |
717 # Skip HEADER record and write out older records all the way upto first MODEL/ATOM/HETATM records from input file... | |
718 RECORDLINE: for $RecordLineIndex (1 .. $#{$PDBRecordLinesRef}) { | |
719 $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex]; | |
720 if (IsModelRecordType($RecordLine) || IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { | |
721 last RECORDLINE; | |
722 } | |
723 print $OutFileRef "$RecordLine\n"; | |
724 } | |
725 } | |
726 } | |
727 | |
728 # Get header record information assuming it's the first record... | |
729 sub GetHeaderRecordInformation { | |
730 my($PDBRecordLinesRef) = @_; | |
731 my($Classification, $DepositionDate, $IDCode, $HeaderRecordLine); | |
732 | |
733 ($Classification, $DepositionDate, $IDCode) = ('') x 3; | |
734 $HeaderRecordLine = $PDBRecordLinesRef->[0]; | |
735 if (IsHeaderRecordType($HeaderRecordLine)) { | |
736 ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine); | |
737 } | |
738 return ($Classification, $DepositionDate, $IDCode); | |
739 } | |
740 | |
741 # Get one letter residue code... | |
742 sub GetResidueCode { | |
743 my($ResidueName) = @_; | |
744 my($ResidueCode, $StandardResidue); | |
745 | |
746 $ResidueCode = $OptionsInfo{NonStandardSequenceCode}; | |
747 $StandardResidue = 0; | |
748 | |
749 if (length($ResidueName) == 3) { | |
750 # Assume it's an amino acid... | |
751 if (AminoAcids::IsAminoAcid($ResidueName)) { | |
752 # Standard amino acid... | |
753 $ResidueCode = AminoAcids::GetAminoAcidOneLetterCode($ResidueName); | |
754 $StandardResidue = 1; | |
755 } | |
756 } | |
757 elsif (length($ResidueName) == 1) { | |
758 # Assume it's a nucleic acid... | |
759 if ($ResidueName =~ /^(A|G|T|U|C)$/i) { | |
760 $ResidueCode = $ResidueName; | |
761 $StandardResidue = 1; | |
762 } | |
763 } | |
764 | |
765 return ($ResidueCode, $StandardResidue); | |
766 } | |
767 | |
768 # Process option values... | |
769 sub ProcessOptions { | |
770 %OptionsInfo = (); | |
771 $OptionsInfo{Mode} = $Options{mode}; | |
772 | |
773 my(@SpecifiedChains) = (); | |
774 if ($Options{chains} =~ /^(First|All)$/i) { | |
775 $OptionsInfo{ChainsToExtract} = $Options{chains}; | |
776 } | |
777 else { | |
778 @SpecifiedChains = split /\,/, $Options{chains}; | |
779 $OptionsInfo{ChainsToExtract} = 'Specified'; | |
780 } | |
781 @{$OptionsInfo{SpecifiedChains}} = (); | |
782 push @{$OptionsInfo{SpecifiedChains}}, @SpecifiedChains; | |
783 | |
784 $OptionsInfo{CombineChains} = ($Options{combinechains} =~ /^Yes$/i) ? 1 : 0; | |
785 | |
786 $OptionsInfo{CombineChainSequences} = ($Options{combinechains} =~ /^Yes$/i) ? 1 : 0; | |
787 | |
788 ProcessResiduesOptions(); | |
789 ProcessAtomsOptions(); | |
790 ProcessDistanceOptions(); | |
791 | |
792 $OptionsInfo{WaterResidueNames} = $Options{waterresiduenames}; | |
793 @{$OptionsInfo{SpecifiedWaterResiduesList}} = (); | |
794 %{$OptionsInfo{SpecifiedWaterResiduesMap}} = (); | |
795 | |
796 my(@SpecifiedWaterResiduesList); | |
797 @SpecifiedWaterResiduesList = (); | |
798 | |
799 if ($OptionsInfo{Mode} =~ /^NonWater$/i) { | |
800 my($WaterResidueName); | |
801 if ($OptionsInfo{WaterResidueNames} =~ /Automatic/i) { | |
802 push @SpecifiedWaterResiduesList, ('HOH', 'WAT', 'H2O'); | |
803 } | |
804 else { | |
805 @SpecifiedWaterResiduesList = split /\,/, $Options{waterresiduenames}; | |
806 } | |
807 for $WaterResidueName (@SpecifiedWaterResiduesList) { | |
808 $OptionsInfo{SpecifiedWaterResiduesMap}{$WaterResidueName} = $WaterResidueName; | |
809 } | |
810 } | |
811 push @{$OptionsInfo{SpecifiedWaterResiduesList}}, @SpecifiedWaterResiduesList; | |
812 | |
813 $OptionsInfo{RecordMode} = $Options{recordmode} ? $Options{recordmode} : ($Options{mode} =~ /^(Atoms|CAlphas|AtomNums|AtomsRange|AtomNames)$/i ? "Atom" : "AtomAndHetatm"); | |
814 | |
815 $OptionsInfo{KeepOldRecords} = ($Options{keepoldrecords} =~ /^Yes$/i) ? 1 : 0; | |
816 | |
817 $OptionsInfo{ModifyHeaderRecord} = ($Options{modifyheader} =~ /^Yes$/i) ? 1 : 0; | |
818 | |
819 $OptionsInfo{KeepNonStandardSequences} = ($Options{nonstandardkeep} =~ /^Yes$/i) ? 1 : 0; | |
820 $OptionsInfo{NonStandardSequenceCode} = $Options{nonstandardcode}; | |
821 $OptionsInfo{MaxSequenceLength} = $Options{sequencelength}; | |
822 $OptionsInfo{SequenceRecordSource} = $Options{sequencerecords}; | |
823 $OptionsInfo{SequenceIDPrefixSource} = $Options{sequenceidprefix}; | |
824 | |
825 $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0; | |
826 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0; | |
827 } | |
828 | |
829 # Process specified residue options... | |
830 sub ProcessResiduesOptions { | |
831 my($ResidueNum, $StartResidueNum, $EndResNum, $ResidueName, @SpecifiedResidueNumsList, @SpecifiedResidueNamesList); | |
832 | |
833 @SpecifiedResidueNumsList = (); | |
834 ($StartResidueNum, $EndResNum) = (0, 0); | |
835 | |
836 @SpecifiedResidueNamesList = (); | |
837 | |
838 if ($OptionsInfo{Mode} =~ /^(ResidueNums|ResiduesRange|ResidueNames)$/i) { | |
839 if (!$Options{residues}) { | |
840 die "Error: You must specify a value for \"--Residues\" option in \"ResidueNums, ResiduesRange, or ResidueNames\" \"-m, --mode\". \n"; | |
841 } | |
842 $OptionsInfo{Residues} = $Options{residues}; | |
843 $OptionsInfo{Residues} =~ s/ //g; | |
844 | |
845 if ($OptionsInfo{Mode} =~ /^ResidueNames$/i) { | |
846 @SpecifiedResidueNamesList = split /\,/, $OptionsInfo{Residues}; | |
847 } | |
848 else { | |
849 @SpecifiedResidueNumsList = split /\,/, $OptionsInfo{Residues}; | |
850 for $ResidueNum (@SpecifiedResidueNumsList) { | |
851 if (!IsPositiveInteger($ResidueNum)) { | |
852 die "Error: Invalid residue number value, $ResidueNum, for \"--Residues\" option during \"ResidueNumes\" or \"ResiduesRange\"value of \"-m --mode\" option: Residue number must be a positive integer.\n"; | |
853 } | |
854 } | |
855 if ($OptionsInfo{Mode} =~ /^ResiduesRange$/i) { | |
856 if (@SpecifiedResidueNumsList != 2) { | |
857 die "Error: Invalid number of residue number values, ", scalar(@SpecifiedResidueNumsList), ", for \"--Residues\" option during \"ResiduesRange\" value of \"-m --mode\" option: The number of values must be 2 corresponding to start and end residue numbers.\n"; | |
858 } | |
859 if ($SpecifiedResidueNumsList[0] > $SpecifiedResidueNumsList[1]) { | |
860 die "Error: Invalid residue number values, @SpecifiedResidueNumsList, for \"--Residues\" option during \"ResiduesRange\" value of \"-m --mode\" option: The start residue number must be less than end residue number.\n"; | |
861 } | |
862 ($StartResidueNum, $EndResNum) = @SpecifiedResidueNumsList; | |
863 } | |
864 } | |
865 } | |
866 | |
867 @{$OptionsInfo{SpecifiedResidueNumsList}} = (); | |
868 push @{$OptionsInfo{SpecifiedResidueNumsList}}, @SpecifiedResidueNumsList; | |
869 | |
870 $OptionsInfo{SpecifiedStartResidueNum} = $StartResidueNum; | |
871 $OptionsInfo{SpecifiedEndResidueNum} = $EndResNum; | |
872 | |
873 @{$OptionsInfo{SpecifiedResidueNamesList}} = (); | |
874 push @{$OptionsInfo{SpecifiedResidueNamesList}}, @SpecifiedResidueNamesList; | |
875 | |
876 # Set up a specified residue numbers map... | |
877 %{$OptionsInfo{SpecifiedResidueNumsMap}} = (); | |
878 for $ResidueNum (@{$OptionsInfo{SpecifiedResidueNumsList}}) { | |
879 $OptionsInfo{SpecifiedResidueNumsMap}{$ResidueNum} = $ResidueNum; | |
880 } | |
881 | |
882 # Set up a specified residue names map... | |
883 %{$OptionsInfo{SpecifiedResidueNamesMap}} = (); | |
884 for $ResidueName (@{$OptionsInfo{SpecifiedResidueNamesList}}) { | |
885 $OptionsInfo{SpecifiedResidueNamesMap}{lc $ResidueName} = lc $ResidueName; | |
886 } | |
887 | |
888 } | |
889 | |
890 # Process specified atom options... | |
891 sub ProcessAtomsOptions { | |
892 my($AtomNum, $StartAtomNum, $EndAtomNum, $AtomName, @SpecifiedAtomNumsList, @SpecifiedAtomNamesList); | |
893 | |
894 @SpecifiedAtomNumsList = (); | |
895 ($StartAtomNum, $EndAtomNum) = (0, 0); | |
896 | |
897 @SpecifiedAtomNamesList = (); | |
898 | |
899 if ($OptionsInfo{Mode} =~ /^(AtomNums|AtomsRange|AtomNames)$/i) { | |
900 if (!$Options{atoms}) { | |
901 die "Error: You must specify a value for \"--Atoms\" option in \"AtomNums, AtomsRange, or AtomNames\" \"-m, --mode\". \n"; | |
902 } | |
903 $OptionsInfo{Atoms} = $Options{atoms}; | |
904 $OptionsInfo{Atoms} =~ s/ //g; | |
905 | |
906 if ($OptionsInfo{Mode} =~ /^AtomNames$/i) { | |
907 @SpecifiedAtomNamesList = split /\,/, $OptionsInfo{Atoms}; | |
908 } | |
909 else { | |
910 @SpecifiedAtomNumsList = split /\,/, $OptionsInfo{Atoms}; | |
911 for $AtomNum (@SpecifiedAtomNumsList) { | |
912 if (!IsPositiveInteger($AtomNum)) { | |
913 die "Error: Invalid atom number value, $AtomNum, for \"--Atoms\" option during \"AtomNums\" or \"AtomsRange\"value of \"-m --mode\" option: Atom number must be a positive integer.\n"; | |
914 } | |
915 } | |
916 if ($OptionsInfo{Mode} =~ /^AtomsRange$/i) { | |
917 if (@SpecifiedAtomNumsList != 2) { | |
918 die "Error: Invalid number of atom number values, ", scalar(@SpecifiedAtomNumsList), ", for \"--Atoms\" option during \"AtomsRange\" value of \"-m --mode\" option: The number of values must be 2 corresponding to start and end atom numbers.\n"; | |
919 } | |
920 if ($SpecifiedAtomNumsList[0] > $SpecifiedAtomNumsList[1]) { | |
921 die "Error: Invalid atom number values, @SpecifiedAtomNumsList, for \"--Atoms\" option during \"AtomsRange\" value of \"-m --mode\" option: The start atom number must be less than end atom number.\n"; | |
922 } | |
923 ($StartAtomNum, $EndAtomNum) = @SpecifiedAtomNumsList; | |
924 } | |
925 } | |
926 } | |
927 elsif ($OptionsInfo{Mode} =~ /^CAlphas$/i) { | |
928 @SpecifiedAtomNamesList = ("CA"); | |
929 } | |
930 | |
931 @{$OptionsInfo{SpecifiedAtomNumsList}} = (); | |
932 push @{$OptionsInfo{SpecifiedAtomNumsList}}, @SpecifiedAtomNumsList; | |
933 | |
934 $OptionsInfo{SpecifiedStartAtomNum} = $StartAtomNum; | |
935 $OptionsInfo{SpecifiedEndAtomNum} = $EndAtomNum; | |
936 | |
937 @{$OptionsInfo{SpecifiedAtomNamesList}} = (); | |
938 push @{$OptionsInfo{SpecifiedAtomNamesList}}, @SpecifiedAtomNamesList; | |
939 | |
940 # Set up a specified residue numbers map... | |
941 %{$OptionsInfo{SpecifiedAtomNumsMap}} = (); | |
942 for $AtomNum (@{$OptionsInfo{SpecifiedAtomNumsList}}) { | |
943 $OptionsInfo{SpecifiedAtomNumsMap}{$AtomNum} = $AtomNum; | |
944 } | |
945 | |
946 # Set up a specified residue names map... | |
947 %{$OptionsInfo{SpecifiedAtomNamesMap}} = (); | |
948 for $AtomName (@{$OptionsInfo{SpecifiedAtomNamesList}}) { | |
949 $OptionsInfo{SpecifiedAtomNamesMap}{lc $AtomName} = lc $AtomName; | |
950 } | |
951 | |
952 } | |
953 | |
954 # Process specified distance options... | |
955 sub ProcessDistanceOptions { | |
956 my(@SpecifiedDistanceOrigin) = (); | |
957 | |
958 $OptionsInfo{MaxExtractionDistance} = $Options{distance}; | |
959 $OptionsInfo{ExtractionDistanceMode} = $Options{distancemode}; | |
960 $OptionsInfo{ExtractionDistanceOrigin} = $Options{distanceorigin} ? $Options{distanceorigin} : ''; | |
961 $OptionsInfo{DistanceSelectionMode} = $Options{distanceselectionmode}; | |
962 | |
963 if ($OptionsInfo{Mode} =~ /^Distance$/i) { | |
964 if (!$Options{distanceorigin}) { | |
965 die "Error: You must specify a value for \"--distanceorigin\" option in \"Distance\" \"-m, --mode\". \n"; | |
966 } | |
967 @SpecifiedDistanceOrigin = split /\,/, $Options{distanceorigin}; | |
968 if ($OptionsInfo{ExtractionDistanceMode} =~ /^Atom$/i) { | |
969 if (@SpecifiedDistanceOrigin != 2) { | |
970 die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"Atom\" value of \"--distancemode\" : The number of values must be 2.\n"; | |
971 } | |
972 if (!IsPositiveInteger($SpecifiedDistanceOrigin[0])) { | |
973 die "Error: Invalid atom number value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"Atom\" value of \"--distancemode\". Allowed values: > 0\n"; | |
974 } | |
975 } | |
976 elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Hetatm$/i) { | |
977 if (@SpecifiedDistanceOrigin != 2) { | |
978 die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"Hetatm\" value of \"--distancemode\" : The number of values must be 2.\n"; | |
979 } | |
980 if (!IsPositiveInteger($SpecifiedDistanceOrigin[0])) { | |
981 die "Error: Invalid hetatm number value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"Hetatm\" value of \"--distancemode\". Allowed values: > 0\n"; | |
982 } | |
983 } | |
984 elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) { | |
985 if (!(@SpecifiedDistanceOrigin == 2 || @SpecifiedDistanceOrigin == 3)) { | |
986 die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"Residue\" value of \"--distancemode\" : The number of values must be either 2 or 3.\n"; | |
987 } | |
988 if (!IsPositiveInteger($SpecifiedDistanceOrigin[0])) { | |
989 die "Error: Invalid residue number value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"Residue\" value of \"--distancemode\". Allowed values: > 0\n"; | |
990 } | |
991 } | |
992 elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^XYZ$/i) { | |
993 if (@SpecifiedDistanceOrigin != 3) { | |
994 die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"XYZ\" value of \"--distancemode\" : The number of values must be 3.\n"; | |
995 } | |
996 my($Value); | |
997 for $Value (@SpecifiedDistanceOrigin) { | |
998 if (!IsNumerical($Value)) { | |
999 die "Error: Invalid coordinate value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"XYZ\" value of \"--distancemode\". Allowed values: numerical\n"; | |
1000 } | |
1001 } | |
1002 } | |
1003 } | |
1004 @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}} = (); | |
1005 push @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}}, @SpecifiedDistanceOrigin; | |
1006 | |
1007 } | |
1008 | |
1009 # Retrieve information about PDB files... | |
1010 sub RetrievePDBFilesInfo { | |
1011 my($Index, $PDBFile, $PDBRecordLinesRef, $ChainID, $ChainLabel, $ChainsAndResiduesInfoRef, $Mode, $FileDir, $FileName, $FileExt, $OutFileName, $OutFileRoot, @SpecifiedChains, @DistanceOrigin, @OutFileNames, @ChainLabels, @ChainSequenceIDs, @ChainSequenceIDsPrefix); | |
1012 | |
1013 %PDBFilesInfo = (); | |
1014 @{$PDBFilesInfo{FileOkay}} = (); | |
1015 @{$PDBFilesInfo{OutFileRoot}} = (); | |
1016 @{$PDBFilesInfo{OutFileNames}} = (); | |
1017 @{$PDBFilesInfo{ChainLabels}} = (); | |
1018 @{$PDBFilesInfo{ChainSequenceIDs}} = (); | |
1019 @{$PDBFilesInfo{ChainSequenceIDsPrefix}} = (); | |
1020 @{$PDBFilesInfo{SpecifiedChains}} = (); | |
1021 @{$PDBFilesInfo{DistanceOrigin}} = (); | |
1022 | |
1023 FILELIST: for $Index (0 .. $#PDBFilesList) { | |
1024 $PDBFilesInfo{FileOkay}[$Index] = 0; | |
1025 | |
1026 $PDBFilesInfo{OutFileRoot}[$Index] = ''; | |
1027 @{$PDBFilesInfo{OutFileNames}[$Index]} = (); | |
1028 @{$PDBFilesInfo{OutFileNames}[$Index]} = (); | |
1029 @{$PDBFilesInfo{ChainLabels}[$Index]} = (); | |
1030 @{$PDBFilesInfo{ChainSequenceIDs}[$Index]} = (); | |
1031 @{$PDBFilesInfo{ChainSequenceIDsPrefix}[$Index]} = (); | |
1032 @{$PDBFilesInfo{SpecifiedChains}[$Index]} = (); | |
1033 @{$PDBFilesInfo{DistanceOrigin}[$Index]} = (); | |
1034 | |
1035 $PDBFile = $PDBFilesList[$Index]; | |
1036 if (!(-e $PDBFile)) { | |
1037 warn "Warning: Ignoring file $PDBFile: It doesn't exist\n"; | |
1038 next FILELIST; | |
1039 } | |
1040 if (!CheckFileType($PDBFile, "pdb")) { | |
1041 warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n"; | |
1042 next FILELIST; | |
1043 } | |
1044 if (! open PDBFILE, "$PDBFile") { | |
1045 warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n"; | |
1046 next FILELIST; | |
1047 } | |
1048 close PDBFILE; | |
1049 | |
1050 # Get PDB data... | |
1051 $PDBRecordLinesRef = ReadPDBFile($PDBFile); | |
1052 if ($OptionsInfo{Mode} =~ /^Sequences$/i && $OptionsInfo{SequenceRecordSource} =~ /^SeqRes$/i) { | |
1053 $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'SeqRes'); | |
1054 } | |
1055 else { | |
1056 $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef); | |
1057 } | |
1058 if (!scalar @{$ChainsAndResiduesInfoRef->{ChainIDs}}) { | |
1059 warn "Warning: Ignoring file $PDBFile: No chains found \n"; | |
1060 next FILELIST; | |
1061 } | |
1062 | |
1063 # Make sure specified chains exist in PDB file... | |
1064 @SpecifiedChains = (); | |
1065 if ($OptionsInfo{ChainsToExtract} =~ /^Specified$/i) { | |
1066 for $ChainID (@{$OptionsInfo{SpecifiedChains}}) { | |
1067 if (exists $ChainsAndResiduesInfoRef->{Residues}{$ChainID}) { | |
1068 push @SpecifiedChains, $ChainID; | |
1069 } | |
1070 else { | |
1071 warn "Warning: Ignoring file $PDBFile: Specified chain, $ChainID, in \"-c, --chains\" option doesn't exist.\n"; | |
1072 next FILELIST; | |
1073 } | |
1074 } | |
1075 } | |
1076 elsif ($OptionsInfo{ChainsToExtract} =~ /^First$/i) { | |
1077 push @SpecifiedChains, $ChainsAndResiduesInfoRef->{ChainIDs}[0]; | |
1078 } | |
1079 elsif ($OptionsInfo{ChainsToExtract} =~ /^All$/i) { | |
1080 push @SpecifiedChains, @{$ChainsAndResiduesInfoRef->{ChainIDs}}; | |
1081 } | |
1082 # Setup chain labels to use for sequence IDs and generating output files... | |
1083 @ChainLabels = (); | |
1084 for $ChainID (@SpecifiedChains) { | |
1085 $ChainLabel = $ChainID; $ChainLabel =~ s/^None//ig; | |
1086 $ChainLabel = "Chain${ChainLabel}"; | |
1087 push @ChainLabels, $ChainLabel; | |
1088 } | |
1089 | |
1090 # Make sure specified distance origin is valid... | |
1091 @DistanceOrigin = (); | |
1092 if ($OptionsInfo{Mode} =~ /^Distance$/i) { | |
1093 if ($OptionsInfo{ExtractionDistanceMode} =~ /^(Atom|Hetatm)$/i) { | |
1094 my($RecordType, $SpecifiedAtomName, $SpecifiedAtomNumber, $RecordFound, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $RecordLine); | |
1095 $RecordType = $OptionsInfo{ExtractionDistanceMode}; | |
1096 ($SpecifiedAtomNumber, $SpecifiedAtomName) = @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}}; | |
1097 $RecordFound = 0; | |
1098 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { | |
1099 if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) { | |
1100 next LINE; | |
1101 } | |
1102 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine); | |
1103 $AtomName = RemoveLeadingAndTrailingWhiteSpaces($AtomName); | |
1104 if (($RecordType =~ /^Atom$/i && IsAtomRecordType($RecordLine)) || ($RecordType =~ /^Hetatm$/i && IsHetatmRecordType($RecordLine))) { | |
1105 if ($AtomNumber == $SpecifiedAtomNumber && $AtomName eq $SpecifiedAtomName) { | |
1106 $RecordFound = 1; | |
1107 last LINE; | |
1108 } | |
1109 } | |
1110 } | |
1111 if (!$RecordFound) { | |
1112 warn "Warning: Ignoring file $PDBFile: ", uc($RecordType), " record corresponding to \"--distanceorigin\" option value, $OptionsInfo{ExtractionDistanceOrigin}, doesn't exist.\n"; | |
1113 next FILELIST; | |
1114 } | |
1115 push @DistanceOrigin, ($X, $Y, $Z); | |
1116 } | |
1117 elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) { | |
1118 my($SpecifiedResidueNumber, $SpecifiedResidueName, $SpecifiedChainID, $RecordFound, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $RecordLine); | |
1119 $SpecifiedChainID = ''; | |
1120 if (@{$OptionsInfo{SpecifiedExtractionDistanceOrigin}} == 3) { | |
1121 ($SpecifiedResidueNumber, $SpecifiedResidueName, $SpecifiedChainID) = @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}}; | |
1122 } | |
1123 else { | |
1124 ($SpecifiedResidueNumber, $SpecifiedResidueName) = @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}}; | |
1125 } | |
1126 $RecordFound = 0; | |
1127 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { | |
1128 if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) { | |
1129 next LINE; | |
1130 } | |
1131 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine); | |
1132 $ResidueName = RemoveLeadingAndTrailingWhiteSpaces($ResidueName); | |
1133 $ChainID = RemoveLeadingAndTrailingWhiteSpaces($ChainID); | |
1134 if ($SpecifiedChainID && ($SpecifiedChainID ne $ChainID)) { | |
1135 next LINE; | |
1136 } | |
1137 if ($ResidueNumber == $SpecifiedResidueNumber && $ResidueName eq $SpecifiedResidueName) { | |
1138 # Store coordinates for all the atoms... | |
1139 $RecordFound = 1; | |
1140 push @DistanceOrigin, ($X, $Y, $Z); | |
1141 next LINE; | |
1142 } | |
1143 } | |
1144 if (!$RecordFound) { | |
1145 warn "Warning: Ignoring file $PDBFile: ATOM/HETATM record corresponding to \"--distanceorigin\" option value, $OptionsInfo{ExtractionDistanceOrigin}, doesn't exist.\n"; | |
1146 next FILELIST; | |
1147 } | |
1148 } | |
1149 elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^XYZ$/i) { | |
1150 push @DistanceOrigin, @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}}; | |
1151 } | |
1152 } | |
1153 # Setup output file names... | |
1154 @OutFileNames = (); | |
1155 $FileDir = ""; $FileName = ""; $FileExt = ""; | |
1156 ($FileDir, $FileName, $FileExt) = ParseFileName($PDBFile); | |
1157 if ($OptionsInfo{OutFileRoot} && (@PDBFilesList == 1)) { | |
1158 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot}); | |
1159 if ($RootFileName && $RootFileExt) { | |
1160 $FileName = $RootFileName; | |
1161 } | |
1162 else { | |
1163 $FileName = $OptionsInfo{OutFileRoot}; | |
1164 } | |
1165 $OutFileRoot = $FileName; | |
1166 } | |
1167 else { | |
1168 $OutFileRoot = $FileName; | |
1169 } | |
1170 $Mode = $OptionsInfo{Mode}; | |
1171 if ($Mode =~ /^(Atoms|CAlphas|AtomNums|AtomsRange|AtomNames|ResidueNums|ResiduesRange|ResidueNames|Distance|NonWater|NonHydrogens)$/i) { | |
1172 $OutFileName = ''; | |
1173 if ($Mode =~ /^CAlphas$/i) { | |
1174 $OutFileName = "${OutFileRoot}CAlphas.pdb"; | |
1175 } | |
1176 elsif ($Mode =~ /^Atoms$/i) { | |
1177 $OutFileName = "${OutFileRoot}Atoms.pdb"; | |
1178 } | |
1179 elsif ($Mode =~ /^AtomNums$/i) { | |
1180 $OutFileName = "${OutFileRoot}AtomNums.pdb"; | |
1181 } | |
1182 elsif ($Mode =~ /^AtomsRange$/i) { | |
1183 $OutFileName = "${OutFileRoot}AtomsRange.pdb"; | |
1184 } | |
1185 elsif ($Mode =~ /^AtomNames$/i) { | |
1186 $OutFileName = "${OutFileRoot}AtomNames.pdb"; | |
1187 } | |
1188 elsif ($Mode =~ /^ResidueNums$/i) { | |
1189 $OutFileName = "${OutFileRoot}ResidueNums.pdb"; | |
1190 } | |
1191 elsif ($Mode =~ /^ResiduesRange$/i) { | |
1192 $OutFileName = "${OutFileRoot}ResiduesRange.pdb"; | |
1193 } | |
1194 elsif ($Mode =~ /^ResidueNames$/i) { | |
1195 $OutFileName = "${OutFileRoot}ResidueNames.pdb"; | |
1196 } | |
1197 elsif ($Mode =~ /^NonWater$/i) { | |
1198 $OutFileName = "${OutFileRoot}NonWater.pdb"; | |
1199 } | |
1200 elsif ($Mode =~ /^NonHydrogens$/i) { | |
1201 $OutFileName = "${OutFileRoot}NonHydrogens.pdb"; | |
1202 } | |
1203 elsif ($Mode =~ /^Distance$/i) { | |
1204 my($DistanceMode) = ''; | |
1205 if ($OptionsInfo{ExtractionDistanceMode} =~ /^Atom$/i) { | |
1206 $DistanceMode = 'Atom'; | |
1207 } | |
1208 elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Hetatm$/i) { | |
1209 $DistanceMode = 'Hetatm'; | |
1210 } | |
1211 elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) { | |
1212 $DistanceMode = 'Residue'; | |
1213 } | |
1214 elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^XYZ$/i) { | |
1215 $DistanceMode = 'XYZ'; | |
1216 } | |
1217 $OutFileName = "${OutFileRoot}DistanceBy${DistanceMode}.pdb"; | |
1218 } | |
1219 push @OutFileNames, $OutFileName; | |
1220 if (!$OptionsInfo{OverwriteFiles} && (-e $OutFileName)) { | |
1221 warn "Warning: Ignoring file $PDBFile: The file $OutFileName already exists\n"; | |
1222 next FILELIST; | |
1223 } | |
1224 } | |
1225 elsif ($Mode =~ /^(Chains|Sequences)$/i) { | |
1226 if ($OptionsInfo{CombineChainSequences}) { | |
1227 $OutFileName = ($Mode =~ /^Chains$/i) ? "${OutFileRoot}ExtractedChains.pdb" : "${OutFileRoot}SequencesChainsCombined.fasta"; | |
1228 push @OutFileNames, $OutFileName; | |
1229 if (!$OptionsInfo{OverwriteFiles} && (-e $OutFileName)) { | |
1230 warn "Warning: Ignoring file $PDBFile: The file $OutFileName already exists\n"; | |
1231 next FILELIST; | |
1232 } | |
1233 } | |
1234 else { | |
1235 for $ChainLabel (@ChainLabels) { | |
1236 $OutFileName = ($Mode =~ /^Chains$/i) ? "${OutFileRoot}${ChainLabel}.pdb" : "${OutFileRoot}Sequences${ChainLabel}.fasta"; | |
1237 push @OutFileNames, $OutFileName; | |
1238 if (!$OptionsInfo{OverwriteFiles} && (-e $OutFileName)) { | |
1239 warn "Warning: Ignoring file $PDBFile: The file $OutFileName already exists\n"; | |
1240 next FILELIST; | |
1241 } | |
1242 } | |
1243 } | |
1244 } | |
1245 @ChainSequenceIDs = (); | |
1246 @ChainSequenceIDsPrefix = (); | |
1247 if ($Mode =~ /^Sequences$/i) { | |
1248 my($HeaderRecordLine, $Classification, $DepositionDate, $IDCode, $IDPrefix); | |
1249 ($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef); | |
1250 | |
1251 if ($OptionsInfo{SequenceIDPrefixSource} =~ /^FileName$/i) { | |
1252 $IDPrefix = $FileName; | |
1253 } | |
1254 elsif ($OptionsInfo{SequenceIDPrefixSource} =~ /^HeaderRecord$/i) { | |
1255 $IDPrefix = IsNotEmpty($IDCode) ? $IDCode : ''; | |
1256 } | |
1257 else { | |
1258 $IDPrefix = IsNotEmpty($IDCode) ? $IDCode : $FileName; | |
1259 } | |
1260 | |
1261 for $ChainLabel (@ChainLabels) { | |
1262 push @ChainSequenceIDsPrefix, $IDPrefix; | |
1263 push @ChainSequenceIDs, "${IDPrefix}_${ChainLabel}|PDB"; | |
1264 } | |
1265 } | |
1266 | |
1267 $PDBFilesInfo{FileOkay}[$Index] = 1; | |
1268 $PDBFilesInfo{OutFileRoot}[$Index] = $OutFileRoot; | |
1269 | |
1270 push @{$PDBFilesInfo{OutFileNames}[$Index]}, @OutFileNames; | |
1271 push @{$PDBFilesInfo{ChainLabels}[$Index]}, @ChainLabels; | |
1272 push @{$PDBFilesInfo{ChainSequenceIDsPrefix}[$Index]}, @ChainSequenceIDsPrefix; | |
1273 push @{$PDBFilesInfo{ChainSequenceIDs}[$Index]}, @ChainSequenceIDs; | |
1274 push @{$PDBFilesInfo{SpecifiedChains}[$Index]}, @SpecifiedChains; | |
1275 push @{$PDBFilesInfo{DistanceOrigin}[$Index]}, @DistanceOrigin; | |
1276 } | |
1277 } | |
1278 | |
1279 | |
1280 # Setup script usage and retrieve command line arguments specified using various options... | |
1281 sub SetupScriptUsage { | |
1282 | |
1283 # Retrieve all the options... | |
1284 %Options = (); | |
1285 $Options{chains} = 'First'; | |
1286 $Options{combinechains} = 'no'; | |
1287 $Options{distance} = 10.0; | |
1288 $Options{distancemode} = 'XYZ'; | |
1289 $Options{distanceselectionmode} = 'ByAtom'; | |
1290 $Options{keepoldrecords} = 'no'; | |
1291 $Options{mode} = 'NonWater'; | |
1292 $Options{modifyheader} = 'yes'; | |
1293 $Options{nonstandardkeep} = 'yes'; | |
1294 $Options{nonstandardcode} = 'X'; | |
1295 $Options{sequencelength} = 80; | |
1296 $Options{sequenceidprefix} = 'Automatic'; | |
1297 $Options{sequencerecords} = 'Atom'; | |
1298 $Options{waterresiduenames} = 'Automatic'; | |
1299 | |
1300 if (!GetOptions(\%Options, "atoms|a=s", "chains|c=s", "combinechains=s", "distance|d=f", "distancemode=s", "distanceorigin=s", "distanceselectionmode=s", "help|h", "keepoldrecords|k=s", "mode|m=s", "modifyheader=s", "nonstandardkeep=s", "nonstandardcode=s", "overwrite|o", "root|r=s", "recordmode=s", "residues=s", "sequencelength=i", "sequenceidprefix=s", "sequencerecords=s", "waterresiduenames=s", "workingdir|w=s")) { | |
1301 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; | |
1302 } | |
1303 if ($Options{workingdir}) { | |
1304 if (! -d $Options{workingdir}) { | |
1305 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; | |
1306 } | |
1307 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; | |
1308 } | |
1309 if ($Options{combinechains} !~ /^(yes|no)$/i) { | |
1310 die "Error: The value specified, $Options{combinechains}, for option \"--CombineChains\" is not valid. Allowed values: yes or no\n"; | |
1311 } | |
1312 if ($Options{distancemode} !~ /^(Atom|Hetatm|Residue|XYZ)$/i) { | |
1313 die "Error: The value specified, $Options{distancemode}, for option \"--DistanceMode\" is not valid. Allowed values: Atom, Hetatm, Residue, or XYZ\n"; | |
1314 } | |
1315 if ($Options{distanceselectionmode} !~ /^(ByAtom|ByResidue)$/i) { | |
1316 die "Error: The value specified, $Options{distanceselectionmode}, for option \"--DistanceSelectionMode\" is not valid. Allowed values: ByAtom or ByResidue\n"; | |
1317 } | |
1318 if ($Options{keepoldrecords} !~ /^(yes|no)$/i) { | |
1319 die "Error: The value specified, $Options{keepoldrecords}, for option \"--KeepOldRecords\" is not valid. Allowed values: yes or no\n"; | |
1320 } | |
1321 if ($Options{mode} !~ /^(Chains|Sequences|Atoms|CAlphas|AtomNums|AtomsRange|AtomNames|ResidueNums|ResidueNames|ResiduesRange|Distance|NonWater|NonHydrogens)$/i) { | |
1322 die "Error: The value specified, $Options{mode}, for option \"m, --mode\" is not valid. Allowed values: Chains, Sequences, Atoms, CAlphas, AtomNums, AtomsRange, AtomNames, ResidueNums, ResiduesRange, ResidueNames, Distance, NonWater, NonHydrogens\n"; | |
1323 } | |
1324 if ($Options{modifyheader} !~ /^(yes|no)$/i) { | |
1325 die "Error: The value specified, $Options{modifyheader}, for option \"--ModifyHeader\" is not valid. Allowed values: yes or no\n"; | |
1326 } | |
1327 if ($Options{nonstandardkeep} !~ /^(yes|no)$/i) { | |
1328 die "Error: The value specified, $Options{nonstandardkeep}, for option \"--NonStandardKeep\" is not valid. Allowed values: yes or no\n"; | |
1329 } | |
1330 if ($Options{nonstandardcode} !~ /^(\?|\-|X)$/i) { | |
1331 die "Error: The value specified, $Options{nonstandardcode}, for option \"--NonStandardCode\" is not valid. Allowed values: ?, -, or X\n"; | |
1332 } | |
1333 if ($Options{recordmode} && $Options{recordmode} !~ /^(Atom|Hetatm|AtomAndHetatm)$/i) { | |
1334 die "Error: The value specified, $Options{recordmode}, for option \"--RecordMode\" is not valid. Allowed values: Atom, Hetatm, AtomAndHetatm\n"; | |
1335 } | |
1336 if (!IsPositiveInteger($Options{sequencelength})) { | |
1337 die "Error: The value specified, $Options{sequencelength}, for option \"--SequenceLength\" is not valid. Allowed values: >0\n"; | |
1338 } | |
1339 if ($Options{sequencerecords} !~ /^(Atom|SeqRes)$/i) { | |
1340 die "Error: The value specified, $Options{sequencerecords}, for option \"--SequenceRecords\" is not valid. Allowed values: Atom or SeqRes\n"; | |
1341 } | |
1342 if ($Options{sequenceidprefix} !~ /^(FileName|HeaderRecord|Automatic)$/i) { | |
1343 die "Error: The value specified, $Options{sequenceidprefix}, for option \"--SequenceIDPrefix\" is not valid. Allowed values: FileName, HeaderRecord, or AutomaticAtom\n"; | |
1344 } | |
1345 } | |
1346 | |
1347 __END__ | |
1348 | |
1349 =head1 NAME | |
1350 | |
1351 ExtractFromPDBFiles.pl - Extract specific data from PDBFile(s) | |
1352 | |
1353 =head1 SYNOPSIS | |
1354 | |
1355 ExtractFromPDBFiles.pl PDBFile(s)... | |
1356 | |
1357 ExtractFromPDBFiles.pl [B<-a, --Atoms> "AtomNum, [AtomNum...]" | "StartAtomNum, EndAtomNum" | | |
1358 "AtomName, [AtomName...]"] [B<-c, --chains> First | All | "ChainID, [ChainID,...]"] | |
1359 [<--CombineChains> yes | no] [B<-d, --distance> number] [B<--DistanceMode> Atom | Hetatm | Residue | XYZ] | |
1360 [B<--DistanceOrigin> "AtomNumber, AtomName" | "HetatmNumber, HetAtmName" | "ResidueNumber, ResidueName, [ChainID]" | "X,Y,Z">] | |
1361 [<--DistanceSelectionMode> ByAtom | ByResidue] [B<-h, --help>] [B<-k, --KeepOldRecords> yes | no] | |
1362 [B<-m, --mode > Chains | Sequences | Atoms | CAlphas | AtomNums | AtomsRange | AtomNames | | |
1363 ResidueNums | ResiduesRange | ResidueNames | Distance | NonWater | NonHydrogens] | |
1364 [B<--ModifyHeader> yes | no] [B<--NonStandardKeep> yes | no] [B<--NonStandardCode> character] | |
1365 [B<-o, --overwrite>] [B<-r, --root> rootname] B<--RecordMode> I<Atom | Hetatm | AtomAndHetatm>] | |
1366 [B<--Residues> "ResidueNum,[ResidueNum...]" | StartResidueNum,EndResiduNum ] | |
1367 [B<--SequenceLength> number] [B<--SequenceRecords> Atom | SeqRes] | |
1368 [B<--SequenceIDPrefix> FileName | HeaderRecord | Automatic] | |
1369 [B<--WaterResidueNames> Automatic | "ResidueName, [ResidueName,...]"] | |
1370 [B<-w, --WorkingDir> dirname] PDBFile(s)... | |
1371 | |
1372 =head1 DESCRIPTION | |
1373 | |
1374 Extract specific data from I<PDBFile(s)> and generate appropriate PDB or sequence file(s). | |
1375 Multiple PDBFile names are separated by spaces. The valid file extension is I<.pdb>. | |
1376 All other file name extensions are ignored during the wild card expansion. All the PDB files | |
1377 in a current directory can be specified either by I<*.pdb> or the current directory name. | |
1378 | |
1379 During I<Chains> and I<Sequences> values of B<-m, --mode> option, all ATOM/HETAM records | |
1380 for chains after the first model in PDB fils containing data for multiple models are ignored. | |
1381 | |
1382 =head1 OPTIONS | |
1383 | |
1384 =over 4 | |
1385 | |
1386 =item B<-a, --Atoms> I<"AtomNum,[AtomNum...]" | "StartAtomNum,EndAtomNum" | "AtomName,[AtomName...]"> | |
1387 | |
1388 Specify which atom records to extract from I<PDBFiles(s)> during I<AtomNums>, | |
1389 I<AtomsRange>, and I<AtomNames> value of B<-m, --mode> option: extract records | |
1390 corresponding to atom numbers specified in a comma delimited list of atom numbers/names, | |
1391 or with in the range of start and end atom numbers. Possible values: I<"AtomNum[,AtomNum,..]">, | |
1392 I<StartAtomNum,EndAtomNum>, or I<"AtomName[,AtomName,..]">. Default: I<None>. Examples: | |
1393 | |
1394 10 | |
1395 15,20 | |
1396 N,CA,C,O | |
1397 | |
1398 =item B<-c, --chains> I<First | All | ChainID,[ChainID,...]> | |
1399 | |
1400 Specify which chains to extract from I<PDBFile(s)> during I<Chains | Sequences> value of | |
1401 B<-m, --mode> option: first chain, all chains, or a specific list of comma delimited chain IDs. | |
1402 Possible values: I<First | All | ChainID,[ChainID,...]>. Default: I<First>. Examples: | |
1403 | |
1404 A | |
1405 A,B | |
1406 All | |
1407 | |
1408 =item B<--CombineChains> I<yes | no> | |
1409 | |
1410 Specify whether to combine extracted chains data into a single file during I<Chains> or | |
1411 I<Sequences> value of B<-m, --mode> option. Possible values: I<yes | no>. Default: I<no>. | |
1412 | |
1413 During I<Chains> value of <-m, --mode> option with I<Yes> value of <--CombineChains>, | |
1414 extracted data for specified chains is written into a single file instead of individual file for each | |
1415 chain. | |
1416 | |
1417 During I<Sequences> value of <-m, --mode> option with I<Yes> value of <--CombineChains>, | |
1418 residues sequences for specified chains are extracted and concatenated into a single sequence | |
1419 file instead of individual file for each chain. | |
1420 | |
1421 =item B<-d, --distance> I<number> | |
1422 | |
1423 Specify distance used to extract ATOM/HETATM recods during I<Distance> value of | |
1424 B<-m, --mode> option. Default: I<10.0> angstroms. | |
1425 | |
1426 B<--RecordMode> option controls type of record lines to extract from I<PDBFile(s)>: | |
1427 ATOM, HETATM or both. | |
1428 | |
1429 =item B<--DistanceMode> I<Atom | Hetatm | Residue | XYZ> | |
1430 | |
1431 Specify how to extract ATOM/HETATM records from I<PDBFile(s)> during I<Distance> value of | |
1432 B<-m, --mode> option: extract all the records within a certain distance specifed by B<-d, --distance> | |
1433 from an atom or hetro atom record, a residue, or any artbitrary point. Possible values: I<Atom | | |
1434 Hetatm | Residue | XYZ>. Default: I<XYZ>. | |
1435 | |
1436 During I<Residue> value of B<--distancemode>, distance of ATOM/HETATM records is calculated from | |
1437 all the atoms in the residue and the records are selected as long as any atom of the residue lies with | |
1438 in the distace specified using B<-d, --distance> option. | |
1439 | |
1440 B<--RecordMode> option controls type of record lines to extract from I<PDBFile(s)>: | |
1441 ATOM, HETATM or both. | |
1442 | |
1443 =item B<--DistanceSelectionMode> I<ByAtom | ByResidue> | |
1444 | |
1445 Specify how how to extract ATOM/HETATM records from I<PDBFile(s)> during I<Distance> value of | |
1446 B<-m, --mode> option for all values of B<--DistanceMode> option: extract only those ATOM/HETATM | |
1447 records that meet specified distance criterion; extract all records corresponding to a residue as | |
1448 long as one of the ATOM/HETATM record in the residue satisfies specified distance criterion. Possible | |
1449 values: I<ByAtom, ByResidue>. Default value: I<ByAtom>. | |
1450 | |
1451 =item B<--DistanceOrigin> I<"AtomNumber,AtomName" | "HetatmNumber,HetAtmName" | "ResidueNumber,ResidueName[,ChainID]" | "X,Y,Z"> | |
1452 | |
1453 This value is B<--distancemode> specific. In general, it identifies a point used to select | |
1454 other ATOM/HETATMS with in a specific distance from this point. | |
1455 | |
1456 For I<Atom> value of B<--distancemode>, this option corresponds to an atom specification. | |
1457 Format: I<AtomNumber,AtomName>. Example: | |
1458 | |
1459 455,CA | |
1460 | |
1461 For I<Hetatm> value of B<--distancemode>, this option corresponds to a hetatm specification. | |
1462 Format: I<HetatmNumber,HetAtmName>. Example: | |
1463 | |
1464 5295,C1 | |
1465 | |
1466 For I<Residue> value of B<--distancemode>, this option corresponds to a residue specification. | |
1467 Format: I<ResidueNumber, ResidueName[,ChainID]>. Example: | |
1468 | |
1469 78,MSE | |
1470 977,RET,A | |
1471 978,RET,B | |
1472 | |
1473 For I<XYZ> value of B<--distancemode>, this option corresponds to a coordinate of an | |
1474 arbitrary point. Format: I<X,Y,X>. Example: | |
1475 | |
1476 10.044,19.261,-4.292 | |
1477 | |
1478 B<--RecordMode> option controls type of record lines to extract from I<PDBFile(s)>: | |
1479 ATOM, HETATM or both. | |
1480 | |
1481 =item B<-h, --help> | |
1482 | |
1483 Print this help message. | |
1484 | |
1485 =item B<-k, --KeepOldRecords> I<yes | no> | |
1486 | |
1487 Specify whether to transfer old non ATOM and HETATM records from input PDBFile(s) to new | |
1488 PDBFile(s) during I<Chains | Atoms | HetAtms | CAlphas | Distance| NonWater | NonHydrogens> | |
1489 value of B<-m --mode> option. By default, except for the HEADER record, all | |
1490 other unnecessary non ATOM/HETATM records are dropped during the | |
1491 generation of new PDB files. Possible values: I<yes | no>. Default: I<no>. | |
1492 | |
1493 =item B<-m, --mode > I<Chains | Sequences | Atoms | CAlphas | AtomNums | AtomsRange | AtomNames | ResidueNums | ResiduesRange | ResidueNames | Distance | NonWater | NonHydrogens> | |
1494 | |
1495 Specify what to extract from I<PDBFile(s)>: I<Chains> - retrieve records for | |
1496 specified chains; I<Sequences> - generate sequence files for specific chains; | |
1497 I<Atoms> - extract atom records; I<CAlphas> - extract atom records for alpha | |
1498 carbon atoms; I<AtomNums> - extract atom records for specified atom numbers; | |
1499 I<AtomsRange> - extract atom records between specified atom number range; | |
1500 I<AtomNames> - extract atom records for specified atom names; I<ResidueNums> | |
1501 - extract records for specified residue numbers; I<ResiduesRange> - extract records | |
1502 for residues between specified residue number range; I<ResidueNames> - extract | |
1503 records for specified residue names; I<Distance> - extract records with in a | |
1504 certain distance from a specific position; I<NonWater> - extract records corresponding | |
1505 to residues other than water; I<NonHydrogens> - extract non-hydrogen records. | |
1506 | |
1507 Possible values: I<Chains, Sequences Atoms, CAlphas, AtomNums, AtomsRange, | |
1508 AtomNames, ResidueNums, ResiduesRange, ResidueNames, Distance, NonWater, | |
1509 NonHydrogens>. Default value: I<NonWater> | |
1510 | |
1511 During the generation of new PDB files, unnecessay CONECT records are dropped. | |
1512 | |
1513 For I<Chains> mode, data for appropriate chains specified by B<--c --chains> option | |
1514 is extracted from I<PDBFile(s)> and placed into new PDB file(s). | |
1515 | |
1516 For I<Sequences> mode, residues names using various sequence related options are | |
1517 extracted for chains specified by B<--c --chains> option from I<PDBFile(s)> and | |
1518 FASTA sequence file(s) are generated. | |
1519 | |
1520 For I<Distance> mode, all ATOM/HETATM records with in a distance specified | |
1521 by B<-d --distance> option from a specific atom, residue or a point indicated by | |
1522 B<--distancemode> are extracted and placed into new PDB file(s). | |
1523 | |
1524 For I<NonWater> mode, non water ATOM/HETATM record lines, identified using value of | |
1525 B<--WaterResidueNames>, are extracted and written to new PDB file(s). | |
1526 | |
1527 For I<NonHydrogens> mode, ATOM/HETATOM record lines containing element symbol | |
1528 other than I<H> are extracted and written to new PDB file(s). | |
1529 | |
1530 For all other options, appropriate ATOM/HETATM records are extracted to generate new | |
1531 PDB file(s). | |
1532 | |
1533 B<--RecordMode> option controls type of record lines to extract and process from | |
1534 I<PDBFile(s)>: ATOM, HETATM or both. | |
1535 | |
1536 =item B<--ModifyHeader> I<yes | no> | |
1537 | |
1538 Specify whether to modify HEADER record during the generation of new PDB files | |
1539 for B<-m, --mode> values of I<Chains | Atoms | CAlphas | Distance>. Possible values: | |
1540 I<yes | no>. Default: I<yes>. By default, Classification data is replaced by I<Data extracted | |
1541 using MayaChemTools> before writing out HEADER record. | |
1542 | |
1543 =item B<--NonStandardKeep> I<yes | no> | |
1544 | |
1545 Specify whether to include and convert non-standard three letter residue codes into | |
1546 a code specified using B<--nonstandardcode> option and include them into sequence file(s) | |
1547 generated during I<Sequences> value of B<-m, --mode> option. Possible values: I<yes | no>. | |
1548 Default: I<yes>. | |
1549 | |
1550 A warning is also printed about the presence of non-standard residues. Any residue other | |
1551 than standard 20 amino acids and 5 nucleic acid is considered non-standard; additionally, | |
1552 HETATM residues in chains also tagged as non-standard. | |
1553 | |
1554 =item B<--NonStandardCode> I<character> | |
1555 | |
1556 A single character code to use for non-standard residues. Default: I<X>. Possible values: | |
1557 I<?, -, or X>. | |
1558 | |
1559 =item B<-o, --overwrite> | |
1560 | |
1561 Overwrite existing files. | |
1562 | |
1563 =item B<-r, --root> I<rootname> | |
1564 | |
1565 New PDB and sequence file name is generated using the root: <Root><Mode>.<Ext>. | |
1566 Default new file name: <PDBFileName>Chain<ChainID>.pdb for I<Chains> B<mode>; | |
1567 <PDBFileName>SequenceChain<ChainID>.fasta for I<Sequences> B<mode>; | |
1568 <PDBFileName>DistanceBy<DistanceMode>.pdb for I<Distance> B<-m, --mode> | |
1569 <PDBFileName><Mode>.pdb for I<Atoms | CAlphas | NonWater | NonHydrogens> B<-m, --mode> | |
1570 values. This option is ignored for multiple input files. | |
1571 | |
1572 =item B<--RecordMode> I<Atom | Hetatm | AtomAndHetatm> | |
1573 | |
1574 Specify type of record lines to extract and process from I<PDBFile(s)> during various | |
1575 values of B<-m, --mode> option: extract only ATOM record lines; extract only HETATM | |
1576 record lines; extract both ATOM and HETATM lines. Possible values: I<Atom | Hetatm | |
1577 | AtomAndHetatm | XYZ>. Default during I<Atoms, CAlphas, AtomNums, AtomsRange, | |
1578 AtomNames> values of B<-m, --mode> option: I<Atom>; otherwise: I<AtomAndHetatm>. | |
1579 | |
1580 This option is ignored during I<Chains, Sequences> values of B<-m, --mode> option. | |
1581 | |
1582 =item B<--Residues> I<"ResidueNum,[ResidueNum...]" | "StartResidueNum,EndResiduNum" | "ResidueName,[ResidueName...]"> | |
1583 | |
1584 Specify which resiude records to extract from I<PDBFiles(s)> during I<ResidueNums>, | |
1585 I<ResiduesRange>,and I<ResidueNames> value of B<-m, --mode> option: extract records | |
1586 corresponding to residue numbers specified in a comma delimited list of residue numbers/names, | |
1587 or with in the range of start and end residue numbers. Possible values: I<"ResidueNum[,ResidueNum,..]">, | |
1588 I<StartResidueNum,EndResiduNum>, or I<<"ResidueName[,ResidueName,..]">. Default: I<None>. Examples: | |
1589 | |
1590 20 | |
1591 5,10 | |
1592 TYR,SER,THR | |
1593 | |
1594 B<--RecordMode> option controls type of record lines to extract from I<PDBFile(s)>: | |
1595 ATOM, HETATM or both. | |
1596 | |
1597 =item B<--SequenceLength> I<number> | |
1598 | |
1599 Maximum sequence length per line in sequence file(s). Default: I<80>. | |
1600 | |
1601 =item B<--SequenceRecords> I<Atom | SeqRes> | |
1602 | |
1603 Specify which records to use for extracting residue names from I<PDBFiles(s)> during | |
1604 I<Sequences> value of B<-m, --mode> option: use ATOM records to compile a list | |
1605 of residues in a chain or parse SEQRES record to get a list of residues. Possible values: | |
1606 I<Atom | SeqRes>. Default: I<Atom>. | |
1607 | |
1608 =item B<--SequenceIDPrefix> I<FileName | HeaderRecord | Automatic> | |
1609 | |
1610 Specify how to generate a prefix for sequence IDs during I<Sequences> value | |
1611 of B<-m, --mode> option: use input file name prefix; retrieve PDB ID from HEADER record; | |
1612 or automatically decide the method for generating the prefix. The chain IDs are also | |
1613 appended to the prefix. Possible values: I<FileName | HeaderRecord | Automatic>. | |
1614 Default: I<Automatic> | |
1615 | |
1616 =item B<--WaterResidueNames> I<Automatic | "ResidueName,[ResidueName,...]"> | |
1617 | |
1618 Identification of water residues during I<NonWater> value of B<-m, --mode> option. Possible values: | |
1619 I<Automatic | "ResidueName,[ResidueName,...]">. Default: I<Automatic> - corresponds | |
1620 to "HOH,WAT,H20". You can also specify a different comma delimited list of residue names | |
1621 to use for water. | |
1622 | |
1623 =item B<-w, --WorkingDir> I<dirname> | |
1624 | |
1625 Location of working directory. Default: current directory. | |
1626 | |
1627 =back | |
1628 | |
1629 =head1 EXAMPLES | |
1630 | |
1631 To extract non-water records from Sample2.pdb file and generate Sample2NonWater.pdb | |
1632 file, type: | |
1633 | |
1634 % ExtractFromPDBFiles.pl Sample2.pdb | |
1635 | |
1636 To extract non-water records corresponding to only ATOM records from Sample2.pdb file | |
1637 and generate Sample2NonWater.pdb file, type: | |
1638 | |
1639 % ExtractFromPDBFiles.pl --RecordMode Atom Sample2.pdb | |
1640 | |
1641 To extract non-water records from Sample2.pdb file using HOH or WAT residue name for water along | |
1642 with all old non-coordinate records and generate Sample2NewNonWater.pdb file, type: | |
1643 | |
1644 % ExtractFromPDBFiles.pl -m NonWater --WaterResidueNames "HOH,WAT" | |
1645 -KeepOldRecords Yes -r Sample2New -o Sample2.pdb | |
1646 | |
1647 To extract non-hydrogens records from Sample2.pdb file and generate Sample2NonHydrogen.pdb | |
1648 file, type: | |
1649 | |
1650 % ExtractFromPDBFiles.pl -m NonHydrogens Sample2.pdb | |
1651 | |
1652 To extract data for first chain in Sample2.pdb and generate Sample2ChainA.pdb, type | |
1653 file, type: | |
1654 | |
1655 % ExtractFromPDBFiles.pl -m chains -o Sample2.pdb | |
1656 | |
1657 To extract data for both chains in Sample2.pdb and generate Sample2ChainA.pdb and | |
1658 Sample2ChainB.pdb, type: | |
1659 | |
1660 % ExtractFromPDBFiles.pl -m chains -c All -o Sample2.pdb | |
1661 | |
1662 To extract data for alpha carbons in Sample2.pdb and generate Sample2CAlphas.pdb, type: | |
1663 | |
1664 % ExtractFromPDBFiles.pl -m CAlphas -o Sample2.pdb | |
1665 | |
1666 To extract records for specific residue numbers in all chains from Sample2.pdb file and generate | |
1667 Sample2ResidueNums.pdb file, type: | |
1668 | |
1669 % ExtractFromPDBFiles.pl -m ResidueNums --Residues "3,6" | |
1670 Sample2.pdb | |
1671 | |
1672 To extract records for a specific range of residue number in all chains from Sample2.pdb | |
1673 file and generate Sample2ResiduesRange.pdb file, type: | |
1674 | |
1675 % ExtractFromPDBFiles.pl -m ResiduesRange --Residues "10,30" | |
1676 Sample2.pdb | |
1677 | |
1678 To extract data for all ATOM and HETATM records with in 10 angstrom of an atom specifed by | |
1679 atom serial number and name "1,N" in Sample2.pdb file and generate Sample2DistanceByAtom.pdb, | |
1680 type: | |
1681 | |
1682 % ExtractFromPDBFiles.pl -m Distance --DistanceMode Atom | |
1683 --DistanceOrigin "1,N" -k No --distance 10 -o Sample2.pdb | |
1684 | |
1685 To extract data for all ATOM and HETATM records for complete residues with any atom or hetatm | |
1686 less than 10 angstrom of an atom specifed by atom serial number and name "1,N" in Sample2.pdb | |
1687 file and generate Sample2DistanceByAtom.pdb, type: | |
1688 | |
1689 % ExtractFromPDBFiles.pl -m Distance --DistanceMode Atom | |
1690 --DistanceOrigin "1,N" --DistanceSelectionMode ByResidue | |
1691 -k No --distance 10 -o Sample2.pdb | |
1692 | |
1693 To extract data for all ATOM and HETATM records with in 25 angstrom of an arbitrary point "0,0,0" | |
1694 in Sample2.pdb file and generate Sample2DistanceByXYZ.pdb, type: | |
1695 | |
1696 % ExtractFromPDBFiles.pl -m Distance --DistanceMode XYZ | |
1697 --DistanceOrigin "0,0,0" -k No --distance 25 -o Sample2.pdb | |
1698 | |
1699 =head1 AUTHOR | |
1700 | |
1701 Manish Sud <msud@san.rr.com> | |
1702 | |
1703 =head1 SEE ALSO | |
1704 | |
1705 InfoPDBFiles.pl, ModifyPDBFiles.pl | |
1706 | |
1707 =head1 COPYRIGHT | |
1708 | |
1709 Copyright (C) 2015 Manish Sud. All rights reserved. | |
1710 | |
1711 This file is part of MayaChemTools. | |
1712 | |
1713 MayaChemTools is free software; you can redistribute it and/or modify it under | |
1714 the terms of the GNU Lesser General Public License as published by the Free | |
1715 Software Foundation; either version 3 of the License, or (at your option) | |
1716 any later version. | |
1717 | |
1718 =cut |